##// END OF EJS Templates
cleanup: use the deque type where appropriate...
Bryan O'Sullivan -
r16803:107a3270 default
parent child Browse files
Show More
@@ -1,258 +1,258 b''
1 1 # changelog bisection for mercurial
2 2 #
3 3 # Copyright 2007 Matt Mackall
4 4 # Copyright 2005, 2006 Benoit Boissinot <benoit.boissinot@ens-lyon.org>
5 5 #
6 6 # Inspired by git bisect, extension skeleton taken from mq.py.
7 7 #
8 8 # This software may be used and distributed according to the terms of the
9 9 # GNU General Public License version 2 or any later version.
10 10
11 11 import os, error
12 12 from i18n import _
13 13 from node import short, hex
14 import util
14 import collections, util
15 15
16 16 def bisect(changelog, state):
17 17 """find the next node (if any) for testing during a bisect search.
18 18 returns a (nodes, number, good) tuple.
19 19
20 20 'nodes' is the final result of the bisect if 'number' is 0.
21 21 Otherwise 'number' indicates the remaining possible candidates for
22 22 the search and 'nodes' contains the next bisect target.
23 23 'good' is True if bisect is searching for a first good changeset, False
24 24 if searching for a first bad one.
25 25 """
26 26
27 27 clparents = changelog.parentrevs
28 28 skip = set([changelog.rev(n) for n in state['skip']])
29 29
30 30 def buildancestors(bad, good):
31 31 # only the earliest bad revision matters
32 32 badrev = min([changelog.rev(n) for n in bad])
33 33 goodrevs = [changelog.rev(n) for n in good]
34 34 goodrev = min(goodrevs)
35 35 # build visit array
36 36 ancestors = [None] * (len(changelog) + 1) # an extra for [-1]
37 37
38 38 # set nodes descended from goodrevs
39 39 for rev in goodrevs:
40 40 ancestors[rev] = []
41 41 for rev in xrange(goodrev + 1, len(changelog)):
42 42 for prev in clparents(rev):
43 43 if ancestors[prev] == []:
44 44 ancestors[rev] = []
45 45
46 46 # clear good revs from array
47 47 for rev in goodrevs:
48 48 ancestors[rev] = None
49 49 for rev in xrange(len(changelog), goodrev, -1):
50 50 if ancestors[rev] is None:
51 51 for prev in clparents(rev):
52 52 ancestors[prev] = None
53 53
54 54 if ancestors[badrev] is None:
55 55 return badrev, None
56 56 return badrev, ancestors
57 57
58 58 good = False
59 59 badrev, ancestors = buildancestors(state['bad'], state['good'])
60 60 if not ancestors: # looking for bad to good transition?
61 61 good = True
62 62 badrev, ancestors = buildancestors(state['good'], state['bad'])
63 63 bad = changelog.node(badrev)
64 64 if not ancestors: # now we're confused
65 65 if len(state['bad']) == 1 and len(state['good']) == 1:
66 66 raise util.Abort(_("starting revisions are not directly related"))
67 67 raise util.Abort(_("inconsistent state, %s:%s is good and bad")
68 68 % (badrev, short(bad)))
69 69
70 70 # build children dict
71 71 children = {}
72 visit = [badrev]
72 visit = collections.deque([badrev])
73 73 candidates = []
74 74 while visit:
75 rev = visit.pop(0)
75 rev = visit.popleft()
76 76 if ancestors[rev] == []:
77 77 candidates.append(rev)
78 78 for prev in clparents(rev):
79 79 if prev != -1:
80 80 if prev in children:
81 81 children[prev].append(rev)
82 82 else:
83 83 children[prev] = [rev]
84 84 visit.append(prev)
85 85
86 86 candidates.sort()
87 87 # have we narrowed it down to one entry?
88 88 # or have all other possible candidates besides 'bad' have been skipped?
89 89 tot = len(candidates)
90 90 unskipped = [c for c in candidates if (c not in skip) and (c != badrev)]
91 91 if tot == 1 or not unskipped:
92 92 return ([changelog.node(rev) for rev in candidates], 0, good)
93 93 perfect = tot // 2
94 94
95 95 # find the best node to test
96 96 best_rev = None
97 97 best_len = -1
98 98 poison = set()
99 99 for rev in candidates:
100 100 if rev in poison:
101 101 # poison children
102 102 poison.update(children.get(rev, []))
103 103 continue
104 104
105 105 a = ancestors[rev] or [rev]
106 106 ancestors[rev] = None
107 107
108 108 x = len(a) # number of ancestors
109 109 y = tot - x # number of non-ancestors
110 110 value = min(x, y) # how good is this test?
111 111 if value > best_len and rev not in skip:
112 112 best_len = value
113 113 best_rev = rev
114 114 if value == perfect: # found a perfect candidate? quit early
115 115 break
116 116
117 117 if y < perfect and rev not in skip: # all downhill from here?
118 118 # poison children
119 119 poison.update(children.get(rev, []))
120 120 continue
121 121
122 122 for c in children.get(rev, []):
123 123 if ancestors[c]:
124 124 ancestors[c] = list(set(ancestors[c] + a))
125 125 else:
126 126 ancestors[c] = a + [c]
127 127
128 128 assert best_rev is not None
129 129 best_node = changelog.node(best_rev)
130 130
131 131 return ([best_node], tot, good)
132 132
133 133
134 134 def load_state(repo):
135 135 state = {'current': [], 'good': [], 'bad': [], 'skip': []}
136 136 if os.path.exists(repo.join("bisect.state")):
137 137 for l in repo.opener("bisect.state"):
138 138 kind, node = l[:-1].split()
139 139 node = repo.lookup(node)
140 140 if kind not in state:
141 141 raise util.Abort(_("unknown bisect kind %s") % kind)
142 142 state[kind].append(node)
143 143 return state
144 144
145 145
146 146 def save_state(repo, state):
147 147 f = repo.opener("bisect.state", "w", atomictemp=True)
148 148 wlock = repo.wlock()
149 149 try:
150 150 for kind in state:
151 151 for node in state[kind]:
152 152 f.write("%s %s\n" % (kind, hex(node)))
153 153 f.close()
154 154 finally:
155 155 wlock.release()
156 156
157 157 def get(repo, status):
158 158 """
159 159 Return a list of revision(s) that match the given status:
160 160
161 161 - ``good``, ``bad``, ``skip``: csets explicitly marked as good/bad/skip
162 162 - ``goods``, ``bads`` : csets topologicaly good/bad
163 163 - ``range`` : csets taking part in the bisection
164 164 - ``pruned`` : csets that are goods, bads or skipped
165 165 - ``untested`` : csets whose fate is yet unknown
166 166 - ``ignored`` : csets ignored due to DAG topology
167 167 - ``current`` : the cset currently being bisected
168 168 """
169 169 state = load_state(repo)
170 170 if status in ('good', 'bad', 'skip', 'current'):
171 171 return map(repo.changelog.rev, state[status])
172 172 else:
173 173 # In the floowing sets, we do *not* call 'bisect()' with more
174 174 # than one level of recusrsion, because that can be very, very
175 175 # time consuming. Instead, we always develop the expression as
176 176 # much as possible.
177 177
178 178 # 'range' is all csets that make the bisection:
179 179 # - have a good ancestor and a bad descendant, or conversely
180 180 # that's because the bisection can go either way
181 181 range = '( bisect(bad)::bisect(good) | bisect(good)::bisect(bad) )'
182 182
183 183 _t = repo.revs('bisect(good)::bisect(bad)')
184 184 # The sets of topologically good or bad csets
185 185 if len(_t) == 0:
186 186 # Goods are topologically after bads
187 187 goods = 'bisect(good)::' # Pruned good csets
188 188 bads = '::bisect(bad)' # Pruned bad csets
189 189 else:
190 190 # Goods are topologically before bads
191 191 goods = '::bisect(good)' # Pruned good csets
192 192 bads = 'bisect(bad)::' # Pruned bad csets
193 193
194 194 # 'pruned' is all csets whose fate is already known: good, bad, skip
195 195 skips = 'bisect(skip)' # Pruned skipped csets
196 196 pruned = '( (%s) | (%s) | (%s) )' % (goods, bads, skips)
197 197
198 198 # 'untested' is all cset that are- in 'range', but not in 'pruned'
199 199 untested = '( (%s) - (%s) )' % (range, pruned)
200 200
201 201 # 'ignored' is all csets that were not used during the bisection
202 202 # due to DAG topology, but may however have had an impact.
203 203 # Eg., a branch merged between bads and goods, but whose branch-
204 204 # point is out-side of the range.
205 205 iba = '::bisect(bad) - ::bisect(good)' # Ignored bads' ancestors
206 206 iga = '::bisect(good) - ::bisect(bad)' # Ignored goods' ancestors
207 207 ignored = '( ( (%s) | (%s) ) - (%s) )' % (iba, iga, range)
208 208
209 209 if status == 'range':
210 210 return repo.revs(range)
211 211 elif status == 'pruned':
212 212 return repo.revs(pruned)
213 213 elif status == 'untested':
214 214 return repo.revs(untested)
215 215 elif status == 'ignored':
216 216 return repo.revs(ignored)
217 217 elif status == "goods":
218 218 return repo.revs(goods)
219 219 elif status == "bads":
220 220 return repo.revs(bads)
221 221 else:
222 222 raise error.ParseError(_('invalid bisect state'))
223 223
224 224 def label(repo, node):
225 225 rev = repo.changelog.rev(node)
226 226
227 227 # Try explicit sets
228 228 if rev in get(repo, 'good'):
229 229 # i18n: bisect changeset status
230 230 return _('good')
231 231 if rev in get(repo, 'bad'):
232 232 # i18n: bisect changeset status
233 233 return _('bad')
234 234 if rev in get(repo, 'skip'):
235 235 # i18n: bisect changeset status
236 236 return _('skipped')
237 237 if rev in get(repo, 'untested') or rev in get(repo, 'current'):
238 238 # i18n: bisect changeset status
239 239 return _('untested')
240 240 if rev in get(repo, 'ignored'):
241 241 # i18n: bisect changeset status
242 242 return _('ignored')
243 243
244 244 # Try implicit sets
245 245 if rev in get(repo, 'goods'):
246 246 # i18n: bisect changeset status
247 247 return _('good (implicit)')
248 248 if rev in get(repo, 'bads'):
249 249 # i18n: bisect changeset status
250 250 return _('bad (implicit)')
251 251
252 252 return None
253 253
254 254 def shortlabel(label):
255 255 if label:
256 256 return label[0].upper()
257 257
258 258 return None
@@ -1,1890 +1,1890 b''
1 1 # patch.py - patch file parsing routines
2 2 #
3 3 # Copyright 2006 Brendan Cully <brendan@kublai.com>
4 4 # Copyright 2007 Chris Mason <chris.mason@oracle.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 import cStringIO, email.Parser, os, errno, re
10 10 import tempfile, zlib, shutil
11 11
12 12 from i18n import _
13 13 from node import hex, nullid, short
14 14 import base85, mdiff, scmutil, util, diffhelpers, copies, encoding, error
15 import context
15 import collections, context
16 16
17 17 gitre = re.compile('diff --git a/(.*) b/(.*)')
18 18
19 19 class PatchError(Exception):
20 20 pass
21 21
22 22
23 23 # public functions
24 24
25 25 def split(stream):
26 26 '''return an iterator of individual patches from a stream'''
27 27 def isheader(line, inheader):
28 28 if inheader and line[0] in (' ', '\t'):
29 29 # continuation
30 30 return True
31 31 if line[0] in (' ', '-', '+'):
32 32 # diff line - don't check for header pattern in there
33 33 return False
34 34 l = line.split(': ', 1)
35 35 return len(l) == 2 and ' ' not in l[0]
36 36
37 37 def chunk(lines):
38 38 return cStringIO.StringIO(''.join(lines))
39 39
40 40 def hgsplit(stream, cur):
41 41 inheader = True
42 42
43 43 for line in stream:
44 44 if not line.strip():
45 45 inheader = False
46 46 if not inheader and line.startswith('# HG changeset patch'):
47 47 yield chunk(cur)
48 48 cur = []
49 49 inheader = True
50 50
51 51 cur.append(line)
52 52
53 53 if cur:
54 54 yield chunk(cur)
55 55
56 56 def mboxsplit(stream, cur):
57 57 for line in stream:
58 58 if line.startswith('From '):
59 59 for c in split(chunk(cur[1:])):
60 60 yield c
61 61 cur = []
62 62
63 63 cur.append(line)
64 64
65 65 if cur:
66 66 for c in split(chunk(cur[1:])):
67 67 yield c
68 68
69 69 def mimesplit(stream, cur):
70 70 def msgfp(m):
71 71 fp = cStringIO.StringIO()
72 72 g = email.Generator.Generator(fp, mangle_from_=False)
73 73 g.flatten(m)
74 74 fp.seek(0)
75 75 return fp
76 76
77 77 for line in stream:
78 78 cur.append(line)
79 79 c = chunk(cur)
80 80
81 81 m = email.Parser.Parser().parse(c)
82 82 if not m.is_multipart():
83 83 yield msgfp(m)
84 84 else:
85 85 ok_types = ('text/plain', 'text/x-diff', 'text/x-patch')
86 86 for part in m.walk():
87 87 ct = part.get_content_type()
88 88 if ct not in ok_types:
89 89 continue
90 90 yield msgfp(part)
91 91
92 92 def headersplit(stream, cur):
93 93 inheader = False
94 94
95 95 for line in stream:
96 96 if not inheader and isheader(line, inheader):
97 97 yield chunk(cur)
98 98 cur = []
99 99 inheader = True
100 100 if inheader and not isheader(line, inheader):
101 101 inheader = False
102 102
103 103 cur.append(line)
104 104
105 105 if cur:
106 106 yield chunk(cur)
107 107
108 108 def remainder(cur):
109 109 yield chunk(cur)
110 110
111 111 class fiter(object):
112 112 def __init__(self, fp):
113 113 self.fp = fp
114 114
115 115 def __iter__(self):
116 116 return self
117 117
118 118 def next(self):
119 119 l = self.fp.readline()
120 120 if not l:
121 121 raise StopIteration
122 122 return l
123 123
124 124 inheader = False
125 125 cur = []
126 126
127 127 mimeheaders = ['content-type']
128 128
129 129 if not util.safehasattr(stream, 'next'):
130 130 # http responses, for example, have readline but not next
131 131 stream = fiter(stream)
132 132
133 133 for line in stream:
134 134 cur.append(line)
135 135 if line.startswith('# HG changeset patch'):
136 136 return hgsplit(stream, cur)
137 137 elif line.startswith('From '):
138 138 return mboxsplit(stream, cur)
139 139 elif isheader(line, inheader):
140 140 inheader = True
141 141 if line.split(':', 1)[0].lower() in mimeheaders:
142 142 # let email parser handle this
143 143 return mimesplit(stream, cur)
144 144 elif line.startswith('--- ') and inheader:
145 145 # No evil headers seen by diff start, split by hand
146 146 return headersplit(stream, cur)
147 147 # Not enough info, keep reading
148 148
149 149 # if we are here, we have a very plain patch
150 150 return remainder(cur)
151 151
152 152 def extract(ui, fileobj):
153 153 '''extract patch from data read from fileobj.
154 154
155 155 patch can be a normal patch or contained in an email message.
156 156
157 157 return tuple (filename, message, user, date, branch, node, p1, p2).
158 158 Any item in the returned tuple can be None. If filename is None,
159 159 fileobj did not contain a patch. Caller must unlink filename when done.'''
160 160
161 161 # attempt to detect the start of a patch
162 162 # (this heuristic is borrowed from quilt)
163 163 diffre = re.compile(r'^(?:Index:[ \t]|diff[ \t]|RCS file: |'
164 164 r'retrieving revision [0-9]+(\.[0-9]+)*$|'
165 165 r'---[ \t].*?^\+\+\+[ \t]|'
166 166 r'\*\*\*[ \t].*?^---[ \t])', re.MULTILINE|re.DOTALL)
167 167
168 168 fd, tmpname = tempfile.mkstemp(prefix='hg-patch-')
169 169 tmpfp = os.fdopen(fd, 'w')
170 170 try:
171 171 msg = email.Parser.Parser().parse(fileobj)
172 172
173 173 subject = msg['Subject']
174 174 user = msg['From']
175 175 if not subject and not user:
176 176 # Not an email, restore parsed headers if any
177 177 subject = '\n'.join(': '.join(h) for h in msg.items()) + '\n'
178 178
179 179 gitsendmail = 'git-send-email' in msg.get('X-Mailer', '')
180 180 # should try to parse msg['Date']
181 181 date = None
182 182 nodeid = None
183 183 branch = None
184 184 parents = []
185 185
186 186 if subject:
187 187 if subject.startswith('[PATCH'):
188 188 pend = subject.find(']')
189 189 if pend >= 0:
190 190 subject = subject[pend + 1:].lstrip()
191 191 subject = re.sub(r'\n[ \t]+', ' ', subject)
192 192 ui.debug('Subject: %s\n' % subject)
193 193 if user:
194 194 ui.debug('From: %s\n' % user)
195 195 diffs_seen = 0
196 196 ok_types = ('text/plain', 'text/x-diff', 'text/x-patch')
197 197 message = ''
198 198 for part in msg.walk():
199 199 content_type = part.get_content_type()
200 200 ui.debug('Content-Type: %s\n' % content_type)
201 201 if content_type not in ok_types:
202 202 continue
203 203 payload = part.get_payload(decode=True)
204 204 m = diffre.search(payload)
205 205 if m:
206 206 hgpatch = False
207 207 hgpatchheader = False
208 208 ignoretext = False
209 209
210 210 ui.debug('found patch at byte %d\n' % m.start(0))
211 211 diffs_seen += 1
212 212 cfp = cStringIO.StringIO()
213 213 for line in payload[:m.start(0)].splitlines():
214 214 if line.startswith('# HG changeset patch') and not hgpatch:
215 215 ui.debug('patch generated by hg export\n')
216 216 hgpatch = True
217 217 hgpatchheader = True
218 218 # drop earlier commit message content
219 219 cfp.seek(0)
220 220 cfp.truncate()
221 221 subject = None
222 222 elif hgpatchheader:
223 223 if line.startswith('# User '):
224 224 user = line[7:]
225 225 ui.debug('From: %s\n' % user)
226 226 elif line.startswith("# Date "):
227 227 date = line[7:]
228 228 elif line.startswith("# Branch "):
229 229 branch = line[9:]
230 230 elif line.startswith("# Node ID "):
231 231 nodeid = line[10:]
232 232 elif line.startswith("# Parent "):
233 233 parents.append(line[9:].lstrip())
234 234 elif not line.startswith("# "):
235 235 hgpatchheader = False
236 236 elif line == '---' and gitsendmail:
237 237 ignoretext = True
238 238 if not hgpatchheader and not ignoretext:
239 239 cfp.write(line)
240 240 cfp.write('\n')
241 241 message = cfp.getvalue()
242 242 if tmpfp:
243 243 tmpfp.write(payload)
244 244 if not payload.endswith('\n'):
245 245 tmpfp.write('\n')
246 246 elif not diffs_seen and message and content_type == 'text/plain':
247 247 message += '\n' + payload
248 248 except: # re-raises
249 249 tmpfp.close()
250 250 os.unlink(tmpname)
251 251 raise
252 252
253 253 if subject and not message.startswith(subject):
254 254 message = '%s\n%s' % (subject, message)
255 255 tmpfp.close()
256 256 if not diffs_seen:
257 257 os.unlink(tmpname)
258 258 return None, message, user, date, branch, None, None, None
259 259 p1 = parents and parents.pop(0) or None
260 260 p2 = parents and parents.pop(0) or None
261 261 return tmpname, message, user, date, branch, nodeid, p1, p2
262 262
263 263 class patchmeta(object):
264 264 """Patched file metadata
265 265
266 266 'op' is the performed operation within ADD, DELETE, RENAME, MODIFY
267 267 or COPY. 'path' is patched file path. 'oldpath' is set to the
268 268 origin file when 'op' is either COPY or RENAME, None otherwise. If
269 269 file mode is changed, 'mode' is a tuple (islink, isexec) where
270 270 'islink' is True if the file is a symlink and 'isexec' is True if
271 271 the file is executable. Otherwise, 'mode' is None.
272 272 """
273 273 def __init__(self, path):
274 274 self.path = path
275 275 self.oldpath = None
276 276 self.mode = None
277 277 self.op = 'MODIFY'
278 278 self.binary = False
279 279
280 280 def setmode(self, mode):
281 281 islink = mode & 020000
282 282 isexec = mode & 0100
283 283 self.mode = (islink, isexec)
284 284
285 285 def copy(self):
286 286 other = patchmeta(self.path)
287 287 other.oldpath = self.oldpath
288 288 other.mode = self.mode
289 289 other.op = self.op
290 290 other.binary = self.binary
291 291 return other
292 292
293 293 def _ispatchinga(self, afile):
294 294 if afile == '/dev/null':
295 295 return self.op == 'ADD'
296 296 return afile == 'a/' + (self.oldpath or self.path)
297 297
298 298 def _ispatchingb(self, bfile):
299 299 if bfile == '/dev/null':
300 300 return self.op == 'DELETE'
301 301 return bfile == 'b/' + self.path
302 302
303 303 def ispatching(self, afile, bfile):
304 304 return self._ispatchinga(afile) and self._ispatchingb(bfile)
305 305
306 306 def __repr__(self):
307 307 return "<patchmeta %s %r>" % (self.op, self.path)
308 308
309 309 def readgitpatch(lr):
310 310 """extract git-style metadata about patches from <patchname>"""
311 311
312 312 # Filter patch for git information
313 313 gp = None
314 314 gitpatches = []
315 315 for line in lr:
316 316 line = line.rstrip(' \r\n')
317 317 if line.startswith('diff --git'):
318 318 m = gitre.match(line)
319 319 if m:
320 320 if gp:
321 321 gitpatches.append(gp)
322 322 dst = m.group(2)
323 323 gp = patchmeta(dst)
324 324 elif gp:
325 325 if line.startswith('--- '):
326 326 gitpatches.append(gp)
327 327 gp = None
328 328 continue
329 329 if line.startswith('rename from '):
330 330 gp.op = 'RENAME'
331 331 gp.oldpath = line[12:]
332 332 elif line.startswith('rename to '):
333 333 gp.path = line[10:]
334 334 elif line.startswith('copy from '):
335 335 gp.op = 'COPY'
336 336 gp.oldpath = line[10:]
337 337 elif line.startswith('copy to '):
338 338 gp.path = line[8:]
339 339 elif line.startswith('deleted file'):
340 340 gp.op = 'DELETE'
341 341 elif line.startswith('new file mode '):
342 342 gp.op = 'ADD'
343 343 gp.setmode(int(line[-6:], 8))
344 344 elif line.startswith('new mode '):
345 345 gp.setmode(int(line[-6:], 8))
346 346 elif line.startswith('GIT binary patch'):
347 347 gp.binary = True
348 348 if gp:
349 349 gitpatches.append(gp)
350 350
351 351 return gitpatches
352 352
353 353 class linereader(object):
354 354 # simple class to allow pushing lines back into the input stream
355 355 def __init__(self, fp):
356 356 self.fp = fp
357 357 self.buf = []
358 358
359 359 def push(self, line):
360 360 if line is not None:
361 361 self.buf.append(line)
362 362
363 363 def readline(self):
364 364 if self.buf:
365 365 l = self.buf[0]
366 366 del self.buf[0]
367 367 return l
368 368 return self.fp.readline()
369 369
370 370 def __iter__(self):
371 371 while True:
372 372 l = self.readline()
373 373 if not l:
374 374 break
375 375 yield l
376 376
377 377 class abstractbackend(object):
378 378 def __init__(self, ui):
379 379 self.ui = ui
380 380
381 381 def getfile(self, fname):
382 382 """Return target file data and flags as a (data, (islink,
383 383 isexec)) tuple.
384 384 """
385 385 raise NotImplementedError
386 386
387 387 def setfile(self, fname, data, mode, copysource):
388 388 """Write data to target file fname and set its mode. mode is a
389 389 (islink, isexec) tuple. If data is None, the file content should
390 390 be left unchanged. If the file is modified after being copied,
391 391 copysource is set to the original file name.
392 392 """
393 393 raise NotImplementedError
394 394
395 395 def unlink(self, fname):
396 396 """Unlink target file."""
397 397 raise NotImplementedError
398 398
399 399 def writerej(self, fname, failed, total, lines):
400 400 """Write rejected lines for fname. total is the number of hunks
401 401 which failed to apply and total the total number of hunks for this
402 402 files.
403 403 """
404 404 pass
405 405
406 406 def exists(self, fname):
407 407 raise NotImplementedError
408 408
409 409 class fsbackend(abstractbackend):
410 410 def __init__(self, ui, basedir):
411 411 super(fsbackend, self).__init__(ui)
412 412 self.opener = scmutil.opener(basedir)
413 413
414 414 def _join(self, f):
415 415 return os.path.join(self.opener.base, f)
416 416
417 417 def getfile(self, fname):
418 418 path = self._join(fname)
419 419 if os.path.islink(path):
420 420 return (os.readlink(path), (True, False))
421 421 isexec = False
422 422 try:
423 423 isexec = os.lstat(path).st_mode & 0100 != 0
424 424 except OSError, e:
425 425 if e.errno != errno.ENOENT:
426 426 raise
427 427 return (self.opener.read(fname), (False, isexec))
428 428
429 429 def setfile(self, fname, data, mode, copysource):
430 430 islink, isexec = mode
431 431 if data is None:
432 432 util.setflags(self._join(fname), islink, isexec)
433 433 return
434 434 if islink:
435 435 self.opener.symlink(data, fname)
436 436 else:
437 437 self.opener.write(fname, data)
438 438 if isexec:
439 439 util.setflags(self._join(fname), False, True)
440 440
441 441 def unlink(self, fname):
442 442 try:
443 443 util.unlinkpath(self._join(fname))
444 444 except OSError, inst:
445 445 if inst.errno != errno.ENOENT:
446 446 raise
447 447
448 448 def writerej(self, fname, failed, total, lines):
449 449 fname = fname + ".rej"
450 450 self.ui.warn(
451 451 _("%d out of %d hunks FAILED -- saving rejects to file %s\n") %
452 452 (failed, total, fname))
453 453 fp = self.opener(fname, 'w')
454 454 fp.writelines(lines)
455 455 fp.close()
456 456
457 457 def exists(self, fname):
458 458 return os.path.lexists(self._join(fname))
459 459
460 460 class workingbackend(fsbackend):
461 461 def __init__(self, ui, repo, similarity):
462 462 super(workingbackend, self).__init__(ui, repo.root)
463 463 self.repo = repo
464 464 self.similarity = similarity
465 465 self.removed = set()
466 466 self.changed = set()
467 467 self.copied = []
468 468
469 469 def _checkknown(self, fname):
470 470 if self.repo.dirstate[fname] == '?' and self.exists(fname):
471 471 raise PatchError(_('cannot patch %s: file is not tracked') % fname)
472 472
473 473 def setfile(self, fname, data, mode, copysource):
474 474 self._checkknown(fname)
475 475 super(workingbackend, self).setfile(fname, data, mode, copysource)
476 476 if copysource is not None:
477 477 self.copied.append((copysource, fname))
478 478 self.changed.add(fname)
479 479
480 480 def unlink(self, fname):
481 481 self._checkknown(fname)
482 482 super(workingbackend, self).unlink(fname)
483 483 self.removed.add(fname)
484 484 self.changed.add(fname)
485 485
486 486 def close(self):
487 487 wctx = self.repo[None]
488 488 addremoved = set(self.changed)
489 489 for src, dst in self.copied:
490 490 scmutil.dirstatecopy(self.ui, self.repo, wctx, src, dst)
491 491 if self.removed:
492 492 wctx.forget(sorted(self.removed))
493 493 for f in self.removed:
494 494 if f not in self.repo.dirstate:
495 495 # File was deleted and no longer belongs to the
496 496 # dirstate, it was probably marked added then
497 497 # deleted, and should not be considered by
498 498 # addremove().
499 499 addremoved.discard(f)
500 500 if addremoved:
501 501 cwd = self.repo.getcwd()
502 502 if cwd:
503 503 addremoved = [util.pathto(self.repo.root, cwd, f)
504 504 for f in addremoved]
505 505 scmutil.addremove(self.repo, addremoved, similarity=self.similarity)
506 506 return sorted(self.changed)
507 507
508 508 class filestore(object):
509 509 def __init__(self, maxsize=None):
510 510 self.opener = None
511 511 self.files = {}
512 512 self.created = 0
513 513 self.maxsize = maxsize
514 514 if self.maxsize is None:
515 515 self.maxsize = 4*(2**20)
516 516 self.size = 0
517 517 self.data = {}
518 518
519 519 def setfile(self, fname, data, mode, copied=None):
520 520 if self.maxsize < 0 or (len(data) + self.size) <= self.maxsize:
521 521 self.data[fname] = (data, mode, copied)
522 522 self.size += len(data)
523 523 else:
524 524 if self.opener is None:
525 525 root = tempfile.mkdtemp(prefix='hg-patch-')
526 526 self.opener = scmutil.opener(root)
527 527 # Avoid filename issues with these simple names
528 528 fn = str(self.created)
529 529 self.opener.write(fn, data)
530 530 self.created += 1
531 531 self.files[fname] = (fn, mode, copied)
532 532
533 533 def getfile(self, fname):
534 534 if fname in self.data:
535 535 return self.data[fname]
536 536 if not self.opener or fname not in self.files:
537 537 raise IOError
538 538 fn, mode, copied = self.files[fname]
539 539 return self.opener.read(fn), mode, copied
540 540
541 541 def close(self):
542 542 if self.opener:
543 543 shutil.rmtree(self.opener.base)
544 544
545 545 class repobackend(abstractbackend):
546 546 def __init__(self, ui, repo, ctx, store):
547 547 super(repobackend, self).__init__(ui)
548 548 self.repo = repo
549 549 self.ctx = ctx
550 550 self.store = store
551 551 self.changed = set()
552 552 self.removed = set()
553 553 self.copied = {}
554 554
555 555 def _checkknown(self, fname):
556 556 if fname not in self.ctx:
557 557 raise PatchError(_('cannot patch %s: file is not tracked') % fname)
558 558
559 559 def getfile(self, fname):
560 560 try:
561 561 fctx = self.ctx[fname]
562 562 except error.LookupError:
563 563 raise IOError
564 564 flags = fctx.flags()
565 565 return fctx.data(), ('l' in flags, 'x' in flags)
566 566
567 567 def setfile(self, fname, data, mode, copysource):
568 568 if copysource:
569 569 self._checkknown(copysource)
570 570 if data is None:
571 571 data = self.ctx[fname].data()
572 572 self.store.setfile(fname, data, mode, copysource)
573 573 self.changed.add(fname)
574 574 if copysource:
575 575 self.copied[fname] = copysource
576 576
577 577 def unlink(self, fname):
578 578 self._checkknown(fname)
579 579 self.removed.add(fname)
580 580
581 581 def exists(self, fname):
582 582 return fname in self.ctx
583 583
584 584 def close(self):
585 585 return self.changed | self.removed
586 586
587 587 # @@ -start,len +start,len @@ or @@ -start +start @@ if len is 1
588 588 unidesc = re.compile('@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@')
589 589 contextdesc = re.compile('(?:---|\*\*\*) (\d+)(?:,(\d+))? (?:---|\*\*\*)')
590 590 eolmodes = ['strict', 'crlf', 'lf', 'auto']
591 591
592 592 class patchfile(object):
593 593 def __init__(self, ui, gp, backend, store, eolmode='strict'):
594 594 self.fname = gp.path
595 595 self.eolmode = eolmode
596 596 self.eol = None
597 597 self.backend = backend
598 598 self.ui = ui
599 599 self.lines = []
600 600 self.exists = False
601 601 self.missing = True
602 602 self.mode = gp.mode
603 603 self.copysource = gp.oldpath
604 604 self.create = gp.op in ('ADD', 'COPY', 'RENAME')
605 605 self.remove = gp.op == 'DELETE'
606 606 try:
607 607 if self.copysource is None:
608 608 data, mode = backend.getfile(self.fname)
609 609 self.exists = True
610 610 else:
611 611 data, mode = store.getfile(self.copysource)[:2]
612 612 self.exists = backend.exists(self.fname)
613 613 self.missing = False
614 614 if data:
615 615 self.lines = mdiff.splitnewlines(data)
616 616 if self.mode is None:
617 617 self.mode = mode
618 618 if self.lines:
619 619 # Normalize line endings
620 620 if self.lines[0].endswith('\r\n'):
621 621 self.eol = '\r\n'
622 622 elif self.lines[0].endswith('\n'):
623 623 self.eol = '\n'
624 624 if eolmode != 'strict':
625 625 nlines = []
626 626 for l in self.lines:
627 627 if l.endswith('\r\n'):
628 628 l = l[:-2] + '\n'
629 629 nlines.append(l)
630 630 self.lines = nlines
631 631 except IOError:
632 632 if self.create:
633 633 self.missing = False
634 634 if self.mode is None:
635 635 self.mode = (False, False)
636 636 if self.missing:
637 637 self.ui.warn(_("unable to find '%s' for patching\n") % self.fname)
638 638
639 639 self.hash = {}
640 640 self.dirty = 0
641 641 self.offset = 0
642 642 self.skew = 0
643 643 self.rej = []
644 644 self.fileprinted = False
645 645 self.printfile(False)
646 646 self.hunks = 0
647 647
648 648 def writelines(self, fname, lines, mode):
649 649 if self.eolmode == 'auto':
650 650 eol = self.eol
651 651 elif self.eolmode == 'crlf':
652 652 eol = '\r\n'
653 653 else:
654 654 eol = '\n'
655 655
656 656 if self.eolmode != 'strict' and eol and eol != '\n':
657 657 rawlines = []
658 658 for l in lines:
659 659 if l and l[-1] == '\n':
660 660 l = l[:-1] + eol
661 661 rawlines.append(l)
662 662 lines = rawlines
663 663
664 664 self.backend.setfile(fname, ''.join(lines), mode, self.copysource)
665 665
666 666 def printfile(self, warn):
667 667 if self.fileprinted:
668 668 return
669 669 if warn or self.ui.verbose:
670 670 self.fileprinted = True
671 671 s = _("patching file %s\n") % self.fname
672 672 if warn:
673 673 self.ui.warn(s)
674 674 else:
675 675 self.ui.note(s)
676 676
677 677
678 678 def findlines(self, l, linenum):
679 679 # looks through the hash and finds candidate lines. The
680 680 # result is a list of line numbers sorted based on distance
681 681 # from linenum
682 682
683 683 cand = self.hash.get(l, [])
684 684 if len(cand) > 1:
685 685 # resort our list of potentials forward then back.
686 686 cand.sort(key=lambda x: abs(x - linenum))
687 687 return cand
688 688
689 689 def write_rej(self):
690 690 # our rejects are a little different from patch(1). This always
691 691 # creates rejects in the same form as the original patch. A file
692 692 # header is inserted so that you can run the reject through patch again
693 693 # without having to type the filename.
694 694 if not self.rej:
695 695 return
696 696 base = os.path.basename(self.fname)
697 697 lines = ["--- %s\n+++ %s\n" % (base, base)]
698 698 for x in self.rej:
699 699 for l in x.hunk:
700 700 lines.append(l)
701 701 if l[-1] != '\n':
702 702 lines.append("\n\ No newline at end of file\n")
703 703 self.backend.writerej(self.fname, len(self.rej), self.hunks, lines)
704 704
705 705 def apply(self, h):
706 706 if not h.complete():
707 707 raise PatchError(_("bad hunk #%d %s (%d %d %d %d)") %
708 708 (h.number, h.desc, len(h.a), h.lena, len(h.b),
709 709 h.lenb))
710 710
711 711 self.hunks += 1
712 712
713 713 if self.missing:
714 714 self.rej.append(h)
715 715 return -1
716 716
717 717 if self.exists and self.create:
718 718 if self.copysource:
719 719 self.ui.warn(_("cannot create %s: destination already "
720 720 "exists\n" % self.fname))
721 721 else:
722 722 self.ui.warn(_("file %s already exists\n") % self.fname)
723 723 self.rej.append(h)
724 724 return -1
725 725
726 726 if isinstance(h, binhunk):
727 727 if self.remove:
728 728 self.backend.unlink(self.fname)
729 729 else:
730 730 self.lines[:] = h.new()
731 731 self.offset += len(h.new())
732 732 self.dirty = True
733 733 return 0
734 734
735 735 horig = h
736 736 if (self.eolmode in ('crlf', 'lf')
737 737 or self.eolmode == 'auto' and self.eol):
738 738 # If new eols are going to be normalized, then normalize
739 739 # hunk data before patching. Otherwise, preserve input
740 740 # line-endings.
741 741 h = h.getnormalized()
742 742
743 743 # fast case first, no offsets, no fuzz
744 744 old, oldstart, new, newstart = h.fuzzit(0, False)
745 745 oldstart += self.offset
746 746 orig_start = oldstart
747 747 # if there's skew we want to emit the "(offset %d lines)" even
748 748 # when the hunk cleanly applies at start + skew, so skip the
749 749 # fast case code
750 750 if (self.skew == 0 and
751 751 diffhelpers.testhunk(old, self.lines, oldstart) == 0):
752 752 if self.remove:
753 753 self.backend.unlink(self.fname)
754 754 else:
755 755 self.lines[oldstart:oldstart + len(old)] = new
756 756 self.offset += len(new) - len(old)
757 757 self.dirty = True
758 758 return 0
759 759
760 760 # ok, we couldn't match the hunk. Lets look for offsets and fuzz it
761 761 self.hash = {}
762 762 for x, s in enumerate(self.lines):
763 763 self.hash.setdefault(s, []).append(x)
764 764
765 765 for fuzzlen in xrange(3):
766 766 for toponly in [True, False]:
767 767 old, oldstart, new, newstart = h.fuzzit(fuzzlen, toponly)
768 768 oldstart = oldstart + self.offset + self.skew
769 769 oldstart = min(oldstart, len(self.lines))
770 770 if old:
771 771 cand = self.findlines(old[0][1:], oldstart)
772 772 else:
773 773 # Only adding lines with no or fuzzed context, just
774 774 # take the skew in account
775 775 cand = [oldstart]
776 776
777 777 for l in cand:
778 778 if not old or diffhelpers.testhunk(old, self.lines, l) == 0:
779 779 self.lines[l : l + len(old)] = new
780 780 self.offset += len(new) - len(old)
781 781 self.skew = l - orig_start
782 782 self.dirty = True
783 783 offset = l - orig_start - fuzzlen
784 784 if fuzzlen:
785 785 msg = _("Hunk #%d succeeded at %d "
786 786 "with fuzz %d "
787 787 "(offset %d lines).\n")
788 788 self.printfile(True)
789 789 self.ui.warn(msg %
790 790 (h.number, l + 1, fuzzlen, offset))
791 791 else:
792 792 msg = _("Hunk #%d succeeded at %d "
793 793 "(offset %d lines).\n")
794 794 self.ui.note(msg % (h.number, l + 1, offset))
795 795 return fuzzlen
796 796 self.printfile(True)
797 797 self.ui.warn(_("Hunk #%d FAILED at %d\n") % (h.number, orig_start))
798 798 self.rej.append(horig)
799 799 return -1
800 800
801 801 def close(self):
802 802 if self.dirty:
803 803 self.writelines(self.fname, self.lines, self.mode)
804 804 self.write_rej()
805 805 return len(self.rej)
806 806
807 807 class hunk(object):
808 808 def __init__(self, desc, num, lr, context):
809 809 self.number = num
810 810 self.desc = desc
811 811 self.hunk = [desc]
812 812 self.a = []
813 813 self.b = []
814 814 self.starta = self.lena = None
815 815 self.startb = self.lenb = None
816 816 if lr is not None:
817 817 if context:
818 818 self.read_context_hunk(lr)
819 819 else:
820 820 self.read_unified_hunk(lr)
821 821
822 822 def getnormalized(self):
823 823 """Return a copy with line endings normalized to LF."""
824 824
825 825 def normalize(lines):
826 826 nlines = []
827 827 for line in lines:
828 828 if line.endswith('\r\n'):
829 829 line = line[:-2] + '\n'
830 830 nlines.append(line)
831 831 return nlines
832 832
833 833 # Dummy object, it is rebuilt manually
834 834 nh = hunk(self.desc, self.number, None, None)
835 835 nh.number = self.number
836 836 nh.desc = self.desc
837 837 nh.hunk = self.hunk
838 838 nh.a = normalize(self.a)
839 839 nh.b = normalize(self.b)
840 840 nh.starta = self.starta
841 841 nh.startb = self.startb
842 842 nh.lena = self.lena
843 843 nh.lenb = self.lenb
844 844 return nh
845 845
846 846 def read_unified_hunk(self, lr):
847 847 m = unidesc.match(self.desc)
848 848 if not m:
849 849 raise PatchError(_("bad hunk #%d") % self.number)
850 850 self.starta, self.lena, self.startb, self.lenb = m.groups()
851 851 if self.lena is None:
852 852 self.lena = 1
853 853 else:
854 854 self.lena = int(self.lena)
855 855 if self.lenb is None:
856 856 self.lenb = 1
857 857 else:
858 858 self.lenb = int(self.lenb)
859 859 self.starta = int(self.starta)
860 860 self.startb = int(self.startb)
861 861 diffhelpers.addlines(lr, self.hunk, self.lena, self.lenb, self.a,
862 862 self.b)
863 863 # if we hit eof before finishing out the hunk, the last line will
864 864 # be zero length. Lets try to fix it up.
865 865 while len(self.hunk[-1]) == 0:
866 866 del self.hunk[-1]
867 867 del self.a[-1]
868 868 del self.b[-1]
869 869 self.lena -= 1
870 870 self.lenb -= 1
871 871 self._fixnewline(lr)
872 872
873 873 def read_context_hunk(self, lr):
874 874 self.desc = lr.readline()
875 875 m = contextdesc.match(self.desc)
876 876 if not m:
877 877 raise PatchError(_("bad hunk #%d") % self.number)
878 878 self.starta, aend = m.groups()
879 879 self.starta = int(self.starta)
880 880 if aend is None:
881 881 aend = self.starta
882 882 self.lena = int(aend) - self.starta
883 883 if self.starta:
884 884 self.lena += 1
885 885 for x in xrange(self.lena):
886 886 l = lr.readline()
887 887 if l.startswith('---'):
888 888 # lines addition, old block is empty
889 889 lr.push(l)
890 890 break
891 891 s = l[2:]
892 892 if l.startswith('- ') or l.startswith('! '):
893 893 u = '-' + s
894 894 elif l.startswith(' '):
895 895 u = ' ' + s
896 896 else:
897 897 raise PatchError(_("bad hunk #%d old text line %d") %
898 898 (self.number, x))
899 899 self.a.append(u)
900 900 self.hunk.append(u)
901 901
902 902 l = lr.readline()
903 903 if l.startswith('\ '):
904 904 s = self.a[-1][:-1]
905 905 self.a[-1] = s
906 906 self.hunk[-1] = s
907 907 l = lr.readline()
908 908 m = contextdesc.match(l)
909 909 if not m:
910 910 raise PatchError(_("bad hunk #%d") % self.number)
911 911 self.startb, bend = m.groups()
912 912 self.startb = int(self.startb)
913 913 if bend is None:
914 914 bend = self.startb
915 915 self.lenb = int(bend) - self.startb
916 916 if self.startb:
917 917 self.lenb += 1
918 918 hunki = 1
919 919 for x in xrange(self.lenb):
920 920 l = lr.readline()
921 921 if l.startswith('\ '):
922 922 # XXX: the only way to hit this is with an invalid line range.
923 923 # The no-eol marker is not counted in the line range, but I
924 924 # guess there are diff(1) out there which behave differently.
925 925 s = self.b[-1][:-1]
926 926 self.b[-1] = s
927 927 self.hunk[hunki - 1] = s
928 928 continue
929 929 if not l:
930 930 # line deletions, new block is empty and we hit EOF
931 931 lr.push(l)
932 932 break
933 933 s = l[2:]
934 934 if l.startswith('+ ') or l.startswith('! '):
935 935 u = '+' + s
936 936 elif l.startswith(' '):
937 937 u = ' ' + s
938 938 elif len(self.b) == 0:
939 939 # line deletions, new block is empty
940 940 lr.push(l)
941 941 break
942 942 else:
943 943 raise PatchError(_("bad hunk #%d old text line %d") %
944 944 (self.number, x))
945 945 self.b.append(s)
946 946 while True:
947 947 if hunki >= len(self.hunk):
948 948 h = ""
949 949 else:
950 950 h = self.hunk[hunki]
951 951 hunki += 1
952 952 if h == u:
953 953 break
954 954 elif h.startswith('-'):
955 955 continue
956 956 else:
957 957 self.hunk.insert(hunki - 1, u)
958 958 break
959 959
960 960 if not self.a:
961 961 # this happens when lines were only added to the hunk
962 962 for x in self.hunk:
963 963 if x.startswith('-') or x.startswith(' '):
964 964 self.a.append(x)
965 965 if not self.b:
966 966 # this happens when lines were only deleted from the hunk
967 967 for x in self.hunk:
968 968 if x.startswith('+') or x.startswith(' '):
969 969 self.b.append(x[1:])
970 970 # @@ -start,len +start,len @@
971 971 self.desc = "@@ -%d,%d +%d,%d @@\n" % (self.starta, self.lena,
972 972 self.startb, self.lenb)
973 973 self.hunk[0] = self.desc
974 974 self._fixnewline(lr)
975 975
976 976 def _fixnewline(self, lr):
977 977 l = lr.readline()
978 978 if l.startswith('\ '):
979 979 diffhelpers.fix_newline(self.hunk, self.a, self.b)
980 980 else:
981 981 lr.push(l)
982 982
983 983 def complete(self):
984 984 return len(self.a) == self.lena and len(self.b) == self.lenb
985 985
986 986 def _fuzzit(self, old, new, fuzz, toponly):
987 987 # this removes context lines from the top and bottom of list 'l'. It
988 988 # checks the hunk to make sure only context lines are removed, and then
989 989 # returns a new shortened list of lines.
990 990 fuzz = min(fuzz, len(old))
991 991 if fuzz:
992 992 top = 0
993 993 bot = 0
994 994 hlen = len(self.hunk)
995 995 for x in xrange(hlen - 1):
996 996 # the hunk starts with the @@ line, so use x+1
997 997 if self.hunk[x + 1][0] == ' ':
998 998 top += 1
999 999 else:
1000 1000 break
1001 1001 if not toponly:
1002 1002 for x in xrange(hlen - 1):
1003 1003 if self.hunk[hlen - bot - 1][0] == ' ':
1004 1004 bot += 1
1005 1005 else:
1006 1006 break
1007 1007
1008 1008 bot = min(fuzz, bot)
1009 1009 top = min(fuzz, top)
1010 1010 return old[top:len(old)-bot], new[top:len(new)-bot], top
1011 1011 return old, new, 0
1012 1012
1013 1013 def fuzzit(self, fuzz, toponly):
1014 1014 old, new, top = self._fuzzit(self.a, self.b, fuzz, toponly)
1015 1015 oldstart = self.starta + top
1016 1016 newstart = self.startb + top
1017 1017 # zero length hunk ranges already have their start decremented
1018 1018 if self.lena and oldstart > 0:
1019 1019 oldstart -= 1
1020 1020 if self.lenb and newstart > 0:
1021 1021 newstart -= 1
1022 1022 return old, oldstart, new, newstart
1023 1023
1024 1024 class binhunk(object):
1025 1025 'A binary patch file. Only understands literals so far.'
1026 1026 def __init__(self, lr, fname):
1027 1027 self.text = None
1028 1028 self.hunk = ['GIT binary patch\n']
1029 1029 self._fname = fname
1030 1030 self._read(lr)
1031 1031
1032 1032 def complete(self):
1033 1033 return self.text is not None
1034 1034
1035 1035 def new(self):
1036 1036 return [self.text]
1037 1037
1038 1038 def _read(self, lr):
1039 1039 def getline(lr, hunk):
1040 1040 l = lr.readline()
1041 1041 hunk.append(l)
1042 1042 return l.rstrip('\r\n')
1043 1043
1044 1044 while True:
1045 1045 line = getline(lr, self.hunk)
1046 1046 if not line:
1047 1047 raise PatchError(_('could not extract "%s" binary data')
1048 1048 % self._fname)
1049 1049 if line.startswith('literal '):
1050 1050 break
1051 1051 size = int(line[8:].rstrip())
1052 1052 dec = []
1053 1053 line = getline(lr, self.hunk)
1054 1054 while len(line) > 1:
1055 1055 l = line[0]
1056 1056 if l <= 'Z' and l >= 'A':
1057 1057 l = ord(l) - ord('A') + 1
1058 1058 else:
1059 1059 l = ord(l) - ord('a') + 27
1060 1060 try:
1061 1061 dec.append(base85.b85decode(line[1:])[:l])
1062 1062 except ValueError, e:
1063 1063 raise PatchError(_('could not decode "%s" binary patch: %s')
1064 1064 % (self._fname, str(e)))
1065 1065 line = getline(lr, self.hunk)
1066 1066 text = zlib.decompress(''.join(dec))
1067 1067 if len(text) != size:
1068 1068 raise PatchError(_('"%s" length is %d bytes, should be %d')
1069 1069 % (self._fname, len(text), size))
1070 1070 self.text = text
1071 1071
1072 1072 def parsefilename(str):
1073 1073 # --- filename \t|space stuff
1074 1074 s = str[4:].rstrip('\r\n')
1075 1075 i = s.find('\t')
1076 1076 if i < 0:
1077 1077 i = s.find(' ')
1078 1078 if i < 0:
1079 1079 return s
1080 1080 return s[:i]
1081 1081
1082 1082 def pathstrip(path, strip):
1083 1083 pathlen = len(path)
1084 1084 i = 0
1085 1085 if strip == 0:
1086 1086 return '', path.rstrip()
1087 1087 count = strip
1088 1088 while count > 0:
1089 1089 i = path.find('/', i)
1090 1090 if i == -1:
1091 1091 raise PatchError(_("unable to strip away %d of %d dirs from %s") %
1092 1092 (count, strip, path))
1093 1093 i += 1
1094 1094 # consume '//' in the path
1095 1095 while i < pathlen - 1 and path[i] == '/':
1096 1096 i += 1
1097 1097 count -= 1
1098 1098 return path[:i].lstrip(), path[i:].rstrip()
1099 1099
1100 1100 def makepatchmeta(backend, afile_orig, bfile_orig, hunk, strip):
1101 1101 nulla = afile_orig == "/dev/null"
1102 1102 nullb = bfile_orig == "/dev/null"
1103 1103 create = nulla and hunk.starta == 0 and hunk.lena == 0
1104 1104 remove = nullb and hunk.startb == 0 and hunk.lenb == 0
1105 1105 abase, afile = pathstrip(afile_orig, strip)
1106 1106 gooda = not nulla and backend.exists(afile)
1107 1107 bbase, bfile = pathstrip(bfile_orig, strip)
1108 1108 if afile == bfile:
1109 1109 goodb = gooda
1110 1110 else:
1111 1111 goodb = not nullb and backend.exists(bfile)
1112 1112 missing = not goodb and not gooda and not create
1113 1113
1114 1114 # some diff programs apparently produce patches where the afile is
1115 1115 # not /dev/null, but afile starts with bfile
1116 1116 abasedir = afile[:afile.rfind('/') + 1]
1117 1117 bbasedir = bfile[:bfile.rfind('/') + 1]
1118 1118 if (missing and abasedir == bbasedir and afile.startswith(bfile)
1119 1119 and hunk.starta == 0 and hunk.lena == 0):
1120 1120 create = True
1121 1121 missing = False
1122 1122
1123 1123 # If afile is "a/b/foo" and bfile is "a/b/foo.orig" we assume the
1124 1124 # diff is between a file and its backup. In this case, the original
1125 1125 # file should be patched (see original mpatch code).
1126 1126 isbackup = (abase == bbase and bfile.startswith(afile))
1127 1127 fname = None
1128 1128 if not missing:
1129 1129 if gooda and goodb:
1130 1130 fname = isbackup and afile or bfile
1131 1131 elif gooda:
1132 1132 fname = afile
1133 1133
1134 1134 if not fname:
1135 1135 if not nullb:
1136 1136 fname = isbackup and afile or bfile
1137 1137 elif not nulla:
1138 1138 fname = afile
1139 1139 else:
1140 1140 raise PatchError(_("undefined source and destination files"))
1141 1141
1142 1142 gp = patchmeta(fname)
1143 1143 if create:
1144 1144 gp.op = 'ADD'
1145 1145 elif remove:
1146 1146 gp.op = 'DELETE'
1147 1147 return gp
1148 1148
1149 1149 def scangitpatch(lr, firstline):
1150 1150 """
1151 1151 Git patches can emit:
1152 1152 - rename a to b
1153 1153 - change b
1154 1154 - copy a to c
1155 1155 - change c
1156 1156
1157 1157 We cannot apply this sequence as-is, the renamed 'a' could not be
1158 1158 found for it would have been renamed already. And we cannot copy
1159 1159 from 'b' instead because 'b' would have been changed already. So
1160 1160 we scan the git patch for copy and rename commands so we can
1161 1161 perform the copies ahead of time.
1162 1162 """
1163 1163 pos = 0
1164 1164 try:
1165 1165 pos = lr.fp.tell()
1166 1166 fp = lr.fp
1167 1167 except IOError:
1168 1168 fp = cStringIO.StringIO(lr.fp.read())
1169 1169 gitlr = linereader(fp)
1170 1170 gitlr.push(firstline)
1171 1171 gitpatches = readgitpatch(gitlr)
1172 1172 fp.seek(pos)
1173 1173 return gitpatches
1174 1174
1175 1175 def iterhunks(fp):
1176 1176 """Read a patch and yield the following events:
1177 1177 - ("file", afile, bfile, firsthunk): select a new target file.
1178 1178 - ("hunk", hunk): a new hunk is ready to be applied, follows a
1179 1179 "file" event.
1180 1180 - ("git", gitchanges): current diff is in git format, gitchanges
1181 1181 maps filenames to gitpatch records. Unique event.
1182 1182 """
1183 1183 afile = ""
1184 1184 bfile = ""
1185 1185 state = None
1186 1186 hunknum = 0
1187 1187 emitfile = newfile = False
1188 1188 gitpatches = None
1189 1189
1190 1190 # our states
1191 1191 BFILE = 1
1192 1192 context = None
1193 1193 lr = linereader(fp)
1194 1194
1195 1195 while True:
1196 1196 x = lr.readline()
1197 1197 if not x:
1198 1198 break
1199 1199 if state == BFILE and (
1200 1200 (not context and x[0] == '@')
1201 1201 or (context is not False and x.startswith('***************'))
1202 1202 or x.startswith('GIT binary patch')):
1203 1203 gp = None
1204 1204 if (gitpatches and
1205 1205 gitpatches[-1].ispatching(afile, bfile)):
1206 1206 gp = gitpatches.pop()
1207 1207 if x.startswith('GIT binary patch'):
1208 1208 h = binhunk(lr, gp.path)
1209 1209 else:
1210 1210 if context is None and x.startswith('***************'):
1211 1211 context = True
1212 1212 h = hunk(x, hunknum + 1, lr, context)
1213 1213 hunknum += 1
1214 1214 if emitfile:
1215 1215 emitfile = False
1216 1216 yield 'file', (afile, bfile, h, gp and gp.copy() or None)
1217 1217 yield 'hunk', h
1218 1218 elif x.startswith('diff --git'):
1219 1219 m = gitre.match(x.rstrip(' \r\n'))
1220 1220 if not m:
1221 1221 continue
1222 1222 if gitpatches is None:
1223 1223 # scan whole input for git metadata
1224 1224 gitpatches = scangitpatch(lr, x)
1225 1225 yield 'git', [g.copy() for g in gitpatches
1226 1226 if g.op in ('COPY', 'RENAME')]
1227 1227 gitpatches.reverse()
1228 1228 afile = 'a/' + m.group(1)
1229 1229 bfile = 'b/' + m.group(2)
1230 1230 while gitpatches and not gitpatches[-1].ispatching(afile, bfile):
1231 1231 gp = gitpatches.pop()
1232 1232 yield 'file', ('a/' + gp.path, 'b/' + gp.path, None, gp.copy())
1233 1233 if not gitpatches:
1234 1234 raise PatchError(_('failed to synchronize metadata for "%s"')
1235 1235 % afile[2:])
1236 1236 gp = gitpatches[-1]
1237 1237 newfile = True
1238 1238 elif x.startswith('---'):
1239 1239 # check for a unified diff
1240 1240 l2 = lr.readline()
1241 1241 if not l2.startswith('+++'):
1242 1242 lr.push(l2)
1243 1243 continue
1244 1244 newfile = True
1245 1245 context = False
1246 1246 afile = parsefilename(x)
1247 1247 bfile = parsefilename(l2)
1248 1248 elif x.startswith('***'):
1249 1249 # check for a context diff
1250 1250 l2 = lr.readline()
1251 1251 if not l2.startswith('---'):
1252 1252 lr.push(l2)
1253 1253 continue
1254 1254 l3 = lr.readline()
1255 1255 lr.push(l3)
1256 1256 if not l3.startswith("***************"):
1257 1257 lr.push(l2)
1258 1258 continue
1259 1259 newfile = True
1260 1260 context = True
1261 1261 afile = parsefilename(x)
1262 1262 bfile = parsefilename(l2)
1263 1263
1264 1264 if newfile:
1265 1265 newfile = False
1266 1266 emitfile = True
1267 1267 state = BFILE
1268 1268 hunknum = 0
1269 1269
1270 1270 while gitpatches:
1271 1271 gp = gitpatches.pop()
1272 1272 yield 'file', ('a/' + gp.path, 'b/' + gp.path, None, gp.copy())
1273 1273
1274 1274 def applydiff(ui, fp, backend, store, strip=1, eolmode='strict'):
1275 1275 """Reads a patch from fp and tries to apply it.
1276 1276
1277 1277 Returns 0 for a clean patch, -1 if any rejects were found and 1 if
1278 1278 there was any fuzz.
1279 1279
1280 1280 If 'eolmode' is 'strict', the patch content and patched file are
1281 1281 read in binary mode. Otherwise, line endings are ignored when
1282 1282 patching then normalized according to 'eolmode'.
1283 1283 """
1284 1284 return _applydiff(ui, fp, patchfile, backend, store, strip=strip,
1285 1285 eolmode=eolmode)
1286 1286
1287 1287 def _applydiff(ui, fp, patcher, backend, store, strip=1,
1288 1288 eolmode='strict'):
1289 1289
1290 1290 def pstrip(p):
1291 1291 return pathstrip(p, strip - 1)[1]
1292 1292
1293 1293 rejects = 0
1294 1294 err = 0
1295 1295 current_file = None
1296 1296
1297 1297 for state, values in iterhunks(fp):
1298 1298 if state == 'hunk':
1299 1299 if not current_file:
1300 1300 continue
1301 1301 ret = current_file.apply(values)
1302 1302 if ret > 0:
1303 1303 err = 1
1304 1304 elif state == 'file':
1305 1305 if current_file:
1306 1306 rejects += current_file.close()
1307 1307 current_file = None
1308 1308 afile, bfile, first_hunk, gp = values
1309 1309 if gp:
1310 1310 gp.path = pstrip(gp.path)
1311 1311 if gp.oldpath:
1312 1312 gp.oldpath = pstrip(gp.oldpath)
1313 1313 else:
1314 1314 gp = makepatchmeta(backend, afile, bfile, first_hunk, strip)
1315 1315 if gp.op == 'RENAME':
1316 1316 backend.unlink(gp.oldpath)
1317 1317 if not first_hunk:
1318 1318 if gp.op == 'DELETE':
1319 1319 backend.unlink(gp.path)
1320 1320 continue
1321 1321 data, mode = None, None
1322 1322 if gp.op in ('RENAME', 'COPY'):
1323 1323 data, mode = store.getfile(gp.oldpath)[:2]
1324 1324 if gp.mode:
1325 1325 mode = gp.mode
1326 1326 if gp.op == 'ADD':
1327 1327 # Added files without content have no hunk and
1328 1328 # must be created
1329 1329 data = ''
1330 1330 if data or mode:
1331 1331 if (gp.op in ('ADD', 'RENAME', 'COPY')
1332 1332 and backend.exists(gp.path)):
1333 1333 raise PatchError(_("cannot create %s: destination "
1334 1334 "already exists") % gp.path)
1335 1335 backend.setfile(gp.path, data, mode, gp.oldpath)
1336 1336 continue
1337 1337 try:
1338 1338 current_file = patcher(ui, gp, backend, store,
1339 1339 eolmode=eolmode)
1340 1340 except PatchError, inst:
1341 1341 ui.warn(str(inst) + '\n')
1342 1342 current_file = None
1343 1343 rejects += 1
1344 1344 continue
1345 1345 elif state == 'git':
1346 1346 for gp in values:
1347 1347 path = pstrip(gp.oldpath)
1348 1348 data, mode = backend.getfile(path)
1349 1349 store.setfile(path, data, mode)
1350 1350 else:
1351 1351 raise util.Abort(_('unsupported parser state: %s') % state)
1352 1352
1353 1353 if current_file:
1354 1354 rejects += current_file.close()
1355 1355
1356 1356 if rejects:
1357 1357 return -1
1358 1358 return err
1359 1359
1360 1360 def _externalpatch(ui, repo, patcher, patchname, strip, files,
1361 1361 similarity):
1362 1362 """use <patcher> to apply <patchname> to the working directory.
1363 1363 returns whether patch was applied with fuzz factor."""
1364 1364
1365 1365 fuzz = False
1366 1366 args = []
1367 1367 cwd = repo.root
1368 1368 if cwd:
1369 1369 args.append('-d %s' % util.shellquote(cwd))
1370 1370 fp = util.popen('%s %s -p%d < %s' % (patcher, ' '.join(args), strip,
1371 1371 util.shellquote(patchname)))
1372 1372 try:
1373 1373 for line in fp:
1374 1374 line = line.rstrip()
1375 1375 ui.note(line + '\n')
1376 1376 if line.startswith('patching file '):
1377 1377 pf = util.parsepatchoutput(line)
1378 1378 printed_file = False
1379 1379 files.add(pf)
1380 1380 elif line.find('with fuzz') >= 0:
1381 1381 fuzz = True
1382 1382 if not printed_file:
1383 1383 ui.warn(pf + '\n')
1384 1384 printed_file = True
1385 1385 ui.warn(line + '\n')
1386 1386 elif line.find('saving rejects to file') >= 0:
1387 1387 ui.warn(line + '\n')
1388 1388 elif line.find('FAILED') >= 0:
1389 1389 if not printed_file:
1390 1390 ui.warn(pf + '\n')
1391 1391 printed_file = True
1392 1392 ui.warn(line + '\n')
1393 1393 finally:
1394 1394 if files:
1395 1395 cfiles = list(files)
1396 1396 cwd = repo.getcwd()
1397 1397 if cwd:
1398 1398 cfiles = [util.pathto(repo.root, cwd, f)
1399 1399 for f in cfiles]
1400 1400 scmutil.addremove(repo, cfiles, similarity=similarity)
1401 1401 code = fp.close()
1402 1402 if code:
1403 1403 raise PatchError(_("patch command failed: %s") %
1404 1404 util.explainexit(code)[0])
1405 1405 return fuzz
1406 1406
1407 1407 def patchbackend(ui, backend, patchobj, strip, files=None, eolmode='strict'):
1408 1408 if files is None:
1409 1409 files = set()
1410 1410 if eolmode is None:
1411 1411 eolmode = ui.config('patch', 'eol', 'strict')
1412 1412 if eolmode.lower() not in eolmodes:
1413 1413 raise util.Abort(_('unsupported line endings type: %s') % eolmode)
1414 1414 eolmode = eolmode.lower()
1415 1415
1416 1416 store = filestore()
1417 1417 try:
1418 1418 fp = open(patchobj, 'rb')
1419 1419 except TypeError:
1420 1420 fp = patchobj
1421 1421 try:
1422 1422 ret = applydiff(ui, fp, backend, store, strip=strip,
1423 1423 eolmode=eolmode)
1424 1424 finally:
1425 1425 if fp != patchobj:
1426 1426 fp.close()
1427 1427 files.update(backend.close())
1428 1428 store.close()
1429 1429 if ret < 0:
1430 1430 raise PatchError(_('patch failed to apply'))
1431 1431 return ret > 0
1432 1432
1433 1433 def internalpatch(ui, repo, patchobj, strip, files=None, eolmode='strict',
1434 1434 similarity=0):
1435 1435 """use builtin patch to apply <patchobj> to the working directory.
1436 1436 returns whether patch was applied with fuzz factor."""
1437 1437 backend = workingbackend(ui, repo, similarity)
1438 1438 return patchbackend(ui, backend, patchobj, strip, files, eolmode)
1439 1439
1440 1440 def patchrepo(ui, repo, ctx, store, patchobj, strip, files=None,
1441 1441 eolmode='strict'):
1442 1442 backend = repobackend(ui, repo, ctx, store)
1443 1443 return patchbackend(ui, backend, patchobj, strip, files, eolmode)
1444 1444
1445 1445 def makememctx(repo, parents, text, user, date, branch, files, store,
1446 1446 editor=None):
1447 1447 def getfilectx(repo, memctx, path):
1448 1448 data, (islink, isexec), copied = store.getfile(path)
1449 1449 return context.memfilectx(path, data, islink=islink, isexec=isexec,
1450 1450 copied=copied)
1451 1451 extra = {}
1452 1452 if branch:
1453 1453 extra['branch'] = encoding.fromlocal(branch)
1454 1454 ctx = context.memctx(repo, parents, text, files, getfilectx, user,
1455 1455 date, extra)
1456 1456 if editor:
1457 1457 ctx._text = editor(repo, ctx, [])
1458 1458 return ctx
1459 1459
1460 1460 def patch(ui, repo, patchname, strip=1, files=None, eolmode='strict',
1461 1461 similarity=0):
1462 1462 """Apply <patchname> to the working directory.
1463 1463
1464 1464 'eolmode' specifies how end of lines should be handled. It can be:
1465 1465 - 'strict': inputs are read in binary mode, EOLs are preserved
1466 1466 - 'crlf': EOLs are ignored when patching and reset to CRLF
1467 1467 - 'lf': EOLs are ignored when patching and reset to LF
1468 1468 - None: get it from user settings, default to 'strict'
1469 1469 'eolmode' is ignored when using an external patcher program.
1470 1470
1471 1471 Returns whether patch was applied with fuzz factor.
1472 1472 """
1473 1473 patcher = ui.config('ui', 'patch')
1474 1474 if files is None:
1475 1475 files = set()
1476 1476 try:
1477 1477 if patcher:
1478 1478 return _externalpatch(ui, repo, patcher, patchname, strip,
1479 1479 files, similarity)
1480 1480 return internalpatch(ui, repo, patchname, strip, files, eolmode,
1481 1481 similarity)
1482 1482 except PatchError, err:
1483 1483 raise util.Abort(str(err))
1484 1484
1485 1485 def changedfiles(ui, repo, patchpath, strip=1):
1486 1486 backend = fsbackend(ui, repo.root)
1487 1487 fp = open(patchpath, 'rb')
1488 1488 try:
1489 1489 changed = set()
1490 1490 for state, values in iterhunks(fp):
1491 1491 if state == 'file':
1492 1492 afile, bfile, first_hunk, gp = values
1493 1493 if gp:
1494 1494 gp.path = pathstrip(gp.path, strip - 1)[1]
1495 1495 if gp.oldpath:
1496 1496 gp.oldpath = pathstrip(gp.oldpath, strip - 1)[1]
1497 1497 else:
1498 1498 gp = makepatchmeta(backend, afile, bfile, first_hunk, strip)
1499 1499 changed.add(gp.path)
1500 1500 if gp.op == 'RENAME':
1501 1501 changed.add(gp.oldpath)
1502 1502 elif state not in ('hunk', 'git'):
1503 1503 raise util.Abort(_('unsupported parser state: %s') % state)
1504 1504 return changed
1505 1505 finally:
1506 1506 fp.close()
1507 1507
1508 1508 def b85diff(to, tn):
1509 1509 '''print base85-encoded binary diff'''
1510 1510 def gitindex(text):
1511 1511 if not text:
1512 1512 return hex(nullid)
1513 1513 l = len(text)
1514 1514 s = util.sha1('blob %d\0' % l)
1515 1515 s.update(text)
1516 1516 return s.hexdigest()
1517 1517
1518 1518 def fmtline(line):
1519 1519 l = len(line)
1520 1520 if l <= 26:
1521 1521 l = chr(ord('A') + l - 1)
1522 1522 else:
1523 1523 l = chr(l - 26 + ord('a') - 1)
1524 1524 return '%c%s\n' % (l, base85.b85encode(line, True))
1525 1525
1526 1526 def chunk(text, csize=52):
1527 1527 l = len(text)
1528 1528 i = 0
1529 1529 while i < l:
1530 1530 yield text[i:i + csize]
1531 1531 i += csize
1532 1532
1533 1533 tohash = gitindex(to)
1534 1534 tnhash = gitindex(tn)
1535 1535 if tohash == tnhash:
1536 1536 return ""
1537 1537
1538 1538 # TODO: deltas
1539 1539 ret = ['index %s..%s\nGIT binary patch\nliteral %s\n' %
1540 1540 (tohash, tnhash, len(tn))]
1541 1541 for l in chunk(zlib.compress(tn)):
1542 1542 ret.append(fmtline(l))
1543 1543 ret.append('\n')
1544 1544 return ''.join(ret)
1545 1545
1546 1546 class GitDiffRequired(Exception):
1547 1547 pass
1548 1548
1549 1549 def diffopts(ui, opts=None, untrusted=False, section='diff'):
1550 1550 def get(key, name=None, getter=ui.configbool):
1551 1551 return ((opts and opts.get(key)) or
1552 1552 getter(section, name or key, None, untrusted=untrusted))
1553 1553 return mdiff.diffopts(
1554 1554 text=opts and opts.get('text'),
1555 1555 git=get('git'),
1556 1556 nodates=get('nodates'),
1557 1557 showfunc=get('show_function', 'showfunc'),
1558 1558 ignorews=get('ignore_all_space', 'ignorews'),
1559 1559 ignorewsamount=get('ignore_space_change', 'ignorewsamount'),
1560 1560 ignoreblanklines=get('ignore_blank_lines', 'ignoreblanklines'),
1561 1561 context=get('unified', getter=ui.config))
1562 1562
1563 1563 def diff(repo, node1=None, node2=None, match=None, changes=None, opts=None,
1564 1564 losedatafn=None, prefix=''):
1565 1565 '''yields diff of changes to files between two nodes, or node and
1566 1566 working directory.
1567 1567
1568 1568 if node1 is None, use first dirstate parent instead.
1569 1569 if node2 is None, compare node1 with working directory.
1570 1570
1571 1571 losedatafn(**kwarg) is a callable run when opts.upgrade=True and
1572 1572 every time some change cannot be represented with the current
1573 1573 patch format. Return False to upgrade to git patch format, True to
1574 1574 accept the loss or raise an exception to abort the diff. It is
1575 1575 called with the name of current file being diffed as 'fn'. If set
1576 1576 to None, patches will always be upgraded to git format when
1577 1577 necessary.
1578 1578
1579 1579 prefix is a filename prefix that is prepended to all filenames on
1580 1580 display (used for subrepos).
1581 1581 '''
1582 1582
1583 1583 if opts is None:
1584 1584 opts = mdiff.defaultopts
1585 1585
1586 1586 if not node1 and not node2:
1587 1587 node1 = repo.dirstate.p1()
1588 1588
1589 1589 def lrugetfilectx():
1590 1590 cache = {}
1591 order = []
1591 order = collections.deque()
1592 1592 def getfilectx(f, ctx):
1593 1593 fctx = ctx.filectx(f, filelog=cache.get(f))
1594 1594 if f not in cache:
1595 1595 if len(cache) > 20:
1596 del cache[order.pop(0)]
1596 del cache[order.popleft()]
1597 1597 cache[f] = fctx.filelog()
1598 1598 else:
1599 1599 order.remove(f)
1600 1600 order.append(f)
1601 1601 return fctx
1602 1602 return getfilectx
1603 1603 getfilectx = lrugetfilectx()
1604 1604
1605 1605 ctx1 = repo[node1]
1606 1606 ctx2 = repo[node2]
1607 1607
1608 1608 if not changes:
1609 1609 changes = repo.status(ctx1, ctx2, match=match)
1610 1610 modified, added, removed = changes[:3]
1611 1611
1612 1612 if not modified and not added and not removed:
1613 1613 return []
1614 1614
1615 1615 revs = None
1616 1616 if not repo.ui.quiet:
1617 1617 hexfunc = repo.ui.debugflag and hex or short
1618 1618 revs = [hexfunc(node) for node in [node1, node2] if node]
1619 1619
1620 1620 copy = {}
1621 1621 if opts.git or opts.upgrade:
1622 1622 copy = copies.pathcopies(ctx1, ctx2)
1623 1623
1624 1624 difffn = (lambda opts, losedata:
1625 1625 trydiff(repo, revs, ctx1, ctx2, modified, added, removed,
1626 1626 copy, getfilectx, opts, losedata, prefix))
1627 1627 if opts.upgrade and not opts.git:
1628 1628 try:
1629 1629 def losedata(fn):
1630 1630 if not losedatafn or not losedatafn(fn=fn):
1631 1631 raise GitDiffRequired
1632 1632 # Buffer the whole output until we are sure it can be generated
1633 1633 return list(difffn(opts.copy(git=False), losedata))
1634 1634 except GitDiffRequired:
1635 1635 return difffn(opts.copy(git=True), None)
1636 1636 else:
1637 1637 return difffn(opts, None)
1638 1638
1639 1639 def difflabel(func, *args, **kw):
1640 1640 '''yields 2-tuples of (output, label) based on the output of func()'''
1641 1641 headprefixes = [('diff', 'diff.diffline'),
1642 1642 ('copy', 'diff.extended'),
1643 1643 ('rename', 'diff.extended'),
1644 1644 ('old', 'diff.extended'),
1645 1645 ('new', 'diff.extended'),
1646 1646 ('deleted', 'diff.extended'),
1647 1647 ('---', 'diff.file_a'),
1648 1648 ('+++', 'diff.file_b')]
1649 1649 textprefixes = [('@', 'diff.hunk'),
1650 1650 ('-', 'diff.deleted'),
1651 1651 ('+', 'diff.inserted')]
1652 1652 head = False
1653 1653 for chunk in func(*args, **kw):
1654 1654 lines = chunk.split('\n')
1655 1655 for i, line in enumerate(lines):
1656 1656 if i != 0:
1657 1657 yield ('\n', '')
1658 1658 if head:
1659 1659 if line.startswith('@'):
1660 1660 head = False
1661 1661 else:
1662 1662 if line and line[0] not in ' +-@\\':
1663 1663 head = True
1664 1664 stripline = line
1665 1665 if not head and line and line[0] in '+-':
1666 1666 # highlight trailing whitespace, but only in changed lines
1667 1667 stripline = line.rstrip()
1668 1668 prefixes = textprefixes
1669 1669 if head:
1670 1670 prefixes = headprefixes
1671 1671 for prefix, label in prefixes:
1672 1672 if stripline.startswith(prefix):
1673 1673 yield (stripline, label)
1674 1674 break
1675 1675 else:
1676 1676 yield (line, '')
1677 1677 if line != stripline:
1678 1678 yield (line[len(stripline):], 'diff.trailingwhitespace')
1679 1679
1680 1680 def diffui(*args, **kw):
1681 1681 '''like diff(), but yields 2-tuples of (output, label) for ui.write()'''
1682 1682 return difflabel(diff, *args, **kw)
1683 1683
1684 1684
1685 1685 def _addmodehdr(header, omode, nmode):
1686 1686 if omode != nmode:
1687 1687 header.append('old mode %s\n' % omode)
1688 1688 header.append('new mode %s\n' % nmode)
1689 1689
1690 1690 def trydiff(repo, revs, ctx1, ctx2, modified, added, removed,
1691 1691 copy, getfilectx, opts, losedatafn, prefix):
1692 1692
1693 1693 def join(f):
1694 1694 return os.path.join(prefix, f)
1695 1695
1696 1696 date1 = util.datestr(ctx1.date())
1697 1697 man1 = ctx1.manifest()
1698 1698
1699 1699 gone = set()
1700 1700 gitmode = {'l': '120000', 'x': '100755', '': '100644'}
1701 1701
1702 1702 copyto = dict([(v, k) for k, v in copy.items()])
1703 1703
1704 1704 if opts.git:
1705 1705 revs = None
1706 1706
1707 1707 for f in sorted(modified + added + removed):
1708 1708 to = None
1709 1709 tn = None
1710 1710 dodiff = True
1711 1711 header = []
1712 1712 if f in man1:
1713 1713 to = getfilectx(f, ctx1).data()
1714 1714 if f not in removed:
1715 1715 tn = getfilectx(f, ctx2).data()
1716 1716 a, b = f, f
1717 1717 if opts.git or losedatafn:
1718 1718 if f in added:
1719 1719 mode = gitmode[ctx2.flags(f)]
1720 1720 if f in copy or f in copyto:
1721 1721 if opts.git:
1722 1722 if f in copy:
1723 1723 a = copy[f]
1724 1724 else:
1725 1725 a = copyto[f]
1726 1726 omode = gitmode[man1.flags(a)]
1727 1727 _addmodehdr(header, omode, mode)
1728 1728 if a in removed and a not in gone:
1729 1729 op = 'rename'
1730 1730 gone.add(a)
1731 1731 else:
1732 1732 op = 'copy'
1733 1733 header.append('%s from %s\n' % (op, join(a)))
1734 1734 header.append('%s to %s\n' % (op, join(f)))
1735 1735 to = getfilectx(a, ctx1).data()
1736 1736 else:
1737 1737 losedatafn(f)
1738 1738 else:
1739 1739 if opts.git:
1740 1740 header.append('new file mode %s\n' % mode)
1741 1741 elif ctx2.flags(f):
1742 1742 losedatafn(f)
1743 1743 # In theory, if tn was copied or renamed we should check
1744 1744 # if the source is binary too but the copy record already
1745 1745 # forces git mode.
1746 1746 if util.binary(tn):
1747 1747 if opts.git:
1748 1748 dodiff = 'binary'
1749 1749 else:
1750 1750 losedatafn(f)
1751 1751 if not opts.git and not tn:
1752 1752 # regular diffs cannot represent new empty file
1753 1753 losedatafn(f)
1754 1754 elif f in removed:
1755 1755 if opts.git:
1756 1756 # have we already reported a copy above?
1757 1757 if ((f in copy and copy[f] in added
1758 1758 and copyto[copy[f]] == f) or
1759 1759 (f in copyto and copyto[f] in added
1760 1760 and copy[copyto[f]] == f)):
1761 1761 dodiff = False
1762 1762 else:
1763 1763 header.append('deleted file mode %s\n' %
1764 1764 gitmode[man1.flags(f)])
1765 1765 elif not to or util.binary(to):
1766 1766 # regular diffs cannot represent empty file deletion
1767 1767 losedatafn(f)
1768 1768 else:
1769 1769 oflag = man1.flags(f)
1770 1770 nflag = ctx2.flags(f)
1771 1771 binary = util.binary(to) or util.binary(tn)
1772 1772 if opts.git:
1773 1773 _addmodehdr(header, gitmode[oflag], gitmode[nflag])
1774 1774 if binary:
1775 1775 dodiff = 'binary'
1776 1776 elif binary or nflag != oflag:
1777 1777 losedatafn(f)
1778 1778 if opts.git:
1779 1779 header.insert(0, mdiff.diffline(revs, join(a), join(b), opts))
1780 1780
1781 1781 if dodiff:
1782 1782 if dodiff == 'binary':
1783 1783 text = b85diff(to, tn)
1784 1784 else:
1785 1785 text = mdiff.unidiff(to, date1,
1786 1786 # ctx2 date may be dynamic
1787 1787 tn, util.datestr(ctx2.date()),
1788 1788 join(a), join(b), revs, opts=opts)
1789 1789 if header and (text or len(header) > 1):
1790 1790 yield ''.join(header)
1791 1791 if text:
1792 1792 yield text
1793 1793
1794 1794 def diffstatsum(stats):
1795 1795 maxfile, maxtotal, addtotal, removetotal, binary = 0, 0, 0, 0, False
1796 1796 for f, a, r, b in stats:
1797 1797 maxfile = max(maxfile, encoding.colwidth(f))
1798 1798 maxtotal = max(maxtotal, a + r)
1799 1799 addtotal += a
1800 1800 removetotal += r
1801 1801 binary = binary or b
1802 1802
1803 1803 return maxfile, maxtotal, addtotal, removetotal, binary
1804 1804
1805 1805 def diffstatdata(lines):
1806 1806 diffre = re.compile('^diff .*-r [a-z0-9]+\s(.*)$')
1807 1807
1808 1808 results = []
1809 1809 filename, adds, removes, isbinary = None, 0, 0, False
1810 1810
1811 1811 def addresult():
1812 1812 if filename:
1813 1813 results.append((filename, adds, removes, isbinary))
1814 1814
1815 1815 for line in lines:
1816 1816 if line.startswith('diff'):
1817 1817 addresult()
1818 1818 # set numbers to 0 anyway when starting new file
1819 1819 adds, removes, isbinary = 0, 0, False
1820 1820 if line.startswith('diff --git'):
1821 1821 filename = gitre.search(line).group(1)
1822 1822 elif line.startswith('diff -r'):
1823 1823 # format: "diff -r ... -r ... filename"
1824 1824 filename = diffre.search(line).group(1)
1825 1825 elif line.startswith('+') and not line.startswith('+++ '):
1826 1826 adds += 1
1827 1827 elif line.startswith('-') and not line.startswith('--- '):
1828 1828 removes += 1
1829 1829 elif (line.startswith('GIT binary patch') or
1830 1830 line.startswith('Binary file')):
1831 1831 isbinary = True
1832 1832 addresult()
1833 1833 return results
1834 1834
1835 1835 def diffstat(lines, width=80, git=False):
1836 1836 output = []
1837 1837 stats = diffstatdata(lines)
1838 1838 maxname, maxtotal, totaladds, totalremoves, hasbinary = diffstatsum(stats)
1839 1839
1840 1840 countwidth = len(str(maxtotal))
1841 1841 if hasbinary and countwidth < 3:
1842 1842 countwidth = 3
1843 1843 graphwidth = width - countwidth - maxname - 6
1844 1844 if graphwidth < 10:
1845 1845 graphwidth = 10
1846 1846
1847 1847 def scale(i):
1848 1848 if maxtotal <= graphwidth:
1849 1849 return i
1850 1850 # If diffstat runs out of room it doesn't print anything,
1851 1851 # which isn't very useful, so always print at least one + or -
1852 1852 # if there were at least some changes.
1853 1853 return max(i * graphwidth // maxtotal, int(bool(i)))
1854 1854
1855 1855 for filename, adds, removes, isbinary in stats:
1856 1856 if isbinary:
1857 1857 count = 'Bin'
1858 1858 else:
1859 1859 count = adds + removes
1860 1860 pluses = '+' * scale(adds)
1861 1861 minuses = '-' * scale(removes)
1862 1862 output.append(' %s%s | %*s %s%s\n' %
1863 1863 (filename, ' ' * (maxname - encoding.colwidth(filename)),
1864 1864 countwidth, count, pluses, minuses))
1865 1865
1866 1866 if stats:
1867 1867 output.append(_(' %d files changed, %d insertions(+), '
1868 1868 '%d deletions(-)\n')
1869 1869 % (len(stats), totaladds, totalremoves))
1870 1870
1871 1871 return ''.join(output)
1872 1872
1873 1873 def diffstatui(*args, **kw):
1874 1874 '''like diffstat(), but yields 2-tuples of (output, label) for
1875 1875 ui.write()
1876 1876 '''
1877 1877
1878 1878 for line in diffstat(*args, **kw).splitlines():
1879 1879 if line and line[-1] in '+-':
1880 1880 name, graph = line.rsplit(' ', 1)
1881 1881 yield (name + ' ', '')
1882 1882 m = re.search(r'\++', graph)
1883 1883 if m:
1884 1884 yield (m.group(0), 'diffstat.inserted')
1885 1885 m = re.search(r'-+', graph)
1886 1886 if m:
1887 1887 yield (m.group(0), 'diffstat.deleted')
1888 1888 else:
1889 1889 yield (line, '')
1890 1890 yield ('\n', '')
@@ -1,1321 +1,1321 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 # import stuff from node for others to import from revlog
15 15 from node import bin, hex, nullid, nullrev
16 16 from i18n import _
17 17 import ancestor, mdiff, parsers, error, util, dagutil
18 import struct, zlib, errno
18 import struct, zlib, errno, collections
19 19
20 20 _pack = struct.pack
21 21 _unpack = struct.unpack
22 22 _compress = zlib.compress
23 23 _decompress = zlib.decompress
24 24 _sha = util.sha1
25 25
26 26 # revlog header flags
27 27 REVLOGV0 = 0
28 28 REVLOGNG = 1
29 29 REVLOGNGINLINEDATA = (1 << 16)
30 30 REVLOGGENERALDELTA = (1 << 17)
31 31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
32 32 REVLOG_DEFAULT_FORMAT = REVLOGNG
33 33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
34 34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
35 35
36 36 # revlog index flags
37 37 REVIDX_KNOWN_FLAGS = 0
38 38
39 39 # max size of revlog with inline data
40 40 _maxinline = 131072
41 41 _chunksize = 1048576
42 42
43 43 RevlogError = error.RevlogError
44 44 LookupError = error.LookupError
45 45
46 46 def getoffset(q):
47 47 return int(q >> 16)
48 48
49 49 def gettype(q):
50 50 return int(q & 0xFFFF)
51 51
52 52 def offset_type(offset, type):
53 53 return long(long(offset) << 16 | type)
54 54
55 55 nullhash = _sha(nullid)
56 56
57 57 def hash(text, p1, p2):
58 58 """generate a hash from the given text and its parent hashes
59 59
60 60 This hash combines both the current file contents and its history
61 61 in a manner that makes it easy to distinguish nodes with the same
62 62 content in the revision graph.
63 63 """
64 64 # As of now, if one of the parent node is null, p2 is null
65 65 if p2 == nullid:
66 66 # deep copy of a hash is faster than creating one
67 67 s = nullhash.copy()
68 68 s.update(p1)
69 69 else:
70 70 # none of the parent nodes are nullid
71 71 l = [p1, p2]
72 72 l.sort()
73 73 s = _sha(l[0])
74 74 s.update(l[1])
75 75 s.update(text)
76 76 return s.digest()
77 77
78 78 def compress(text):
79 79 """ generate a possibly-compressed representation of text """
80 80 if not text:
81 81 return ("", text)
82 82 l = len(text)
83 83 bin = None
84 84 if l < 44:
85 85 pass
86 86 elif l > 1000000:
87 87 # zlib makes an internal copy, thus doubling memory usage for
88 88 # large files, so lets do this in pieces
89 89 z = zlib.compressobj()
90 90 p = []
91 91 pos = 0
92 92 while pos < l:
93 93 pos2 = pos + 2**20
94 94 p.append(z.compress(text[pos:pos2]))
95 95 pos = pos2
96 96 p.append(z.flush())
97 97 if sum(map(len, p)) < l:
98 98 bin = "".join(p)
99 99 else:
100 100 bin = _compress(text)
101 101 if bin is None or len(bin) > l:
102 102 if text[0] == '\0':
103 103 return ("", text)
104 104 return ('u', text)
105 105 return ("", bin)
106 106
107 107 def decompress(bin):
108 108 """ decompress the given input """
109 109 if not bin:
110 110 return bin
111 111 t = bin[0]
112 112 if t == '\0':
113 113 return bin
114 114 if t == 'x':
115 115 return _decompress(bin)
116 116 if t == 'u':
117 117 return bin[1:]
118 118 raise RevlogError(_("unknown compression type %r") % t)
119 119
120 120 indexformatv0 = ">4l20s20s20s"
121 121 v0shaoffset = 56
122 122
123 123 class revlogoldio(object):
124 124 def __init__(self):
125 125 self.size = struct.calcsize(indexformatv0)
126 126
127 127 def parseindex(self, data, inline):
128 128 s = self.size
129 129 index = []
130 130 nodemap = {nullid: nullrev}
131 131 n = off = 0
132 132 l = len(data)
133 133 while off + s <= l:
134 134 cur = data[off:off + s]
135 135 off += s
136 136 e = _unpack(indexformatv0, cur)
137 137 # transform to revlogv1 format
138 138 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
139 139 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
140 140 index.append(e2)
141 141 nodemap[e[6]] = n
142 142 n += 1
143 143
144 144 # add the magic null revision at -1
145 145 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
146 146
147 147 return index, nodemap, None
148 148
149 149 def packentry(self, entry, node, version, rev):
150 150 if gettype(entry[0]):
151 151 raise RevlogError(_("index entry flags need RevlogNG"))
152 152 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
153 153 node(entry[5]), node(entry[6]), entry[7])
154 154 return _pack(indexformatv0, *e2)
155 155
156 156 # index ng:
157 157 # 6 bytes: offset
158 158 # 2 bytes: flags
159 159 # 4 bytes: compressed length
160 160 # 4 bytes: uncompressed length
161 161 # 4 bytes: base rev
162 162 # 4 bytes: link rev
163 163 # 4 bytes: parent 1 rev
164 164 # 4 bytes: parent 2 rev
165 165 # 32 bytes: nodeid
166 166 indexformatng = ">Qiiiiii20s12x"
167 167 ngshaoffset = 32
168 168 versionformat = ">I"
169 169
170 170 class revlogio(object):
171 171 def __init__(self):
172 172 self.size = struct.calcsize(indexformatng)
173 173
174 174 def parseindex(self, data, inline):
175 175 # call the C implementation to parse the index data
176 176 index, cache = parsers.parse_index2(data, inline)
177 177 return index, getattr(index, 'nodemap', None), cache
178 178
179 179 def packentry(self, entry, node, version, rev):
180 180 p = _pack(indexformatng, *entry)
181 181 if rev == 0:
182 182 p = _pack(versionformat, version) + p[4:]
183 183 return p
184 184
185 185 class revlog(object):
186 186 """
187 187 the underlying revision storage object
188 188
189 189 A revlog consists of two parts, an index and the revision data.
190 190
191 191 The index is a file with a fixed record size containing
192 192 information on each revision, including its nodeid (hash), the
193 193 nodeids of its parents, the position and offset of its data within
194 194 the data file, and the revision it's based on. Finally, each entry
195 195 contains a linkrev entry that can serve as a pointer to external
196 196 data.
197 197
198 198 The revision data itself is a linear collection of data chunks.
199 199 Each chunk represents a revision and is usually represented as a
200 200 delta against the previous chunk. To bound lookup time, runs of
201 201 deltas are limited to about 2 times the length of the original
202 202 version data. This makes retrieval of a version proportional to
203 203 its size, or O(1) relative to the number of revisions.
204 204
205 205 Both pieces of the revlog are written to in an append-only
206 206 fashion, which means we never need to rewrite a file to insert or
207 207 remove data, and can use some simple techniques to avoid the need
208 208 for locking while reading.
209 209 """
210 210 def __init__(self, opener, indexfile):
211 211 """
212 212 create a revlog object
213 213
214 214 opener is a function that abstracts the file opening operation
215 215 and can be used to implement COW semantics or the like.
216 216 """
217 217 self.indexfile = indexfile
218 218 self.datafile = indexfile[:-2] + ".d"
219 219 self.opener = opener
220 220 self._cache = None
221 221 self._basecache = (0, 0)
222 222 self._chunkcache = (0, '')
223 223 self.index = []
224 224 self._pcache = {}
225 225 self._nodecache = {nullid: nullrev}
226 226 self._nodepos = None
227 227
228 228 v = REVLOG_DEFAULT_VERSION
229 229 opts = getattr(opener, 'options', None)
230 230 if opts is not None:
231 231 if 'revlogv1' in opts:
232 232 if 'generaldelta' in opts:
233 233 v |= REVLOGGENERALDELTA
234 234 else:
235 235 v = 0
236 236
237 237 i = ''
238 238 self._initempty = True
239 239 try:
240 240 f = self.opener(self.indexfile)
241 241 i = f.read()
242 242 f.close()
243 243 if len(i) > 0:
244 244 v = struct.unpack(versionformat, i[:4])[0]
245 245 self._initempty = False
246 246 except IOError, inst:
247 247 if inst.errno != errno.ENOENT:
248 248 raise
249 249
250 250 self.version = v
251 251 self._inline = v & REVLOGNGINLINEDATA
252 252 self._generaldelta = v & REVLOGGENERALDELTA
253 253 flags = v & ~0xFFFF
254 254 fmt = v & 0xFFFF
255 255 if fmt == REVLOGV0 and flags:
256 256 raise RevlogError(_("index %s unknown flags %#04x for format v0")
257 257 % (self.indexfile, flags >> 16))
258 258 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
259 259 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
260 260 % (self.indexfile, flags >> 16))
261 261 elif fmt > REVLOGNG:
262 262 raise RevlogError(_("index %s unknown format %d")
263 263 % (self.indexfile, fmt))
264 264
265 265 self._io = revlogio()
266 266 if self.version == REVLOGV0:
267 267 self._io = revlogoldio()
268 268 try:
269 269 d = self._io.parseindex(i, self._inline)
270 270 except (ValueError, IndexError):
271 271 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
272 272 self.index, nodemap, self._chunkcache = d
273 273 if nodemap is not None:
274 274 self.nodemap = self._nodecache = nodemap
275 275 if not self._chunkcache:
276 276 self._chunkclear()
277 277
278 278 def tip(self):
279 279 return self.node(len(self.index) - 2)
280 280 def __len__(self):
281 281 return len(self.index) - 1
282 282 def __iter__(self):
283 283 for i in xrange(len(self)):
284 284 yield i
285 285
286 286 @util.propertycache
287 287 def nodemap(self):
288 288 self.rev(self.node(0))
289 289 return self._nodecache
290 290
291 291 def hasnode(self, node):
292 292 try:
293 293 self.rev(node)
294 294 return True
295 295 except KeyError:
296 296 return False
297 297
298 298 def clearcaches(self):
299 299 try:
300 300 self._nodecache.clearcaches()
301 301 except AttributeError:
302 302 self._nodecache = {nullid: nullrev}
303 303 self._nodepos = None
304 304
305 305 def rev(self, node):
306 306 try:
307 307 return self._nodecache[node]
308 308 except RevlogError:
309 309 # parsers.c radix tree lookup failed
310 310 raise LookupError(node, self.indexfile, _('no node'))
311 311 except KeyError:
312 312 # pure python cache lookup failed
313 313 n = self._nodecache
314 314 i = self.index
315 315 p = self._nodepos
316 316 if p is None:
317 317 p = len(i) - 2
318 318 for r in xrange(p, -1, -1):
319 319 v = i[r][7]
320 320 n[v] = r
321 321 if v == node:
322 322 self._nodepos = r - 1
323 323 return r
324 324 raise LookupError(node, self.indexfile, _('no node'))
325 325
326 326 def node(self, rev):
327 327 return self.index[rev][7]
328 328 def linkrev(self, rev):
329 329 return self.index[rev][4]
330 330 def parents(self, node):
331 331 i = self.index
332 332 d = i[self.rev(node)]
333 333 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
334 334 def parentrevs(self, rev):
335 335 return self.index[rev][5:7]
336 336 def start(self, rev):
337 337 return int(self.index[rev][0] >> 16)
338 338 def end(self, rev):
339 339 return self.start(rev) + self.length(rev)
340 340 def length(self, rev):
341 341 return self.index[rev][1]
342 342 def chainbase(self, rev):
343 343 index = self.index
344 344 base = index[rev][3]
345 345 while base != rev:
346 346 rev = base
347 347 base = index[rev][3]
348 348 return base
349 349 def flags(self, rev):
350 350 return self.index[rev][0] & 0xFFFF
351 351 def rawsize(self, rev):
352 352 """return the length of the uncompressed text for a given revision"""
353 353 l = self.index[rev][2]
354 354 if l >= 0:
355 355 return l
356 356
357 357 t = self.revision(self.node(rev))
358 358 return len(t)
359 359 size = rawsize
360 360
361 361 def reachable(self, node, stop=None):
362 362 """return the set of all nodes ancestral to a given node, including
363 363 the node itself, stopping when stop is matched"""
364 364 reachable = set((node,))
365 visit = [node]
365 visit = collections.deque([node])
366 366 if stop:
367 367 stopn = self.rev(stop)
368 368 else:
369 369 stopn = 0
370 370 while visit:
371 n = visit.pop(0)
371 n = visit.popleft()
372 372 if n == stop:
373 373 continue
374 374 if n == nullid:
375 375 continue
376 376 for p in self.parents(n):
377 377 if self.rev(p) < stopn:
378 378 continue
379 379 if p not in reachable:
380 380 reachable.add(p)
381 381 visit.append(p)
382 382 return reachable
383 383
384 384 def ancestors(self, *revs):
385 385 """Generate the ancestors of 'revs' in reverse topological order.
386 386
387 387 Yield a sequence of revision numbers starting with the parents
388 388 of each revision in revs, i.e., each revision is *not* considered
389 389 an ancestor of itself. Results are in breadth-first order:
390 390 parents of each rev in revs, then parents of those, etc. Result
391 391 does not include the null revision."""
392 visit = list(revs)
392 visit = collections.deque(revs)
393 393 seen = set([nullrev])
394 394 while visit:
395 for parent in self.parentrevs(visit.pop(0)):
395 for parent in self.parentrevs(visit.popleft()):
396 396 if parent not in seen:
397 397 visit.append(parent)
398 398 seen.add(parent)
399 399 yield parent
400 400
401 401 def descendants(self, *revs):
402 402 """Generate the descendants of 'revs' in revision order.
403 403
404 404 Yield a sequence of revision numbers starting with a child of
405 405 some rev in revs, i.e., each revision is *not* considered a
406 406 descendant of itself. Results are ordered by revision number (a
407 407 topological sort)."""
408 408 first = min(revs)
409 409 if first == nullrev:
410 410 for i in self:
411 411 yield i
412 412 return
413 413
414 414 seen = set(revs)
415 415 for i in xrange(first + 1, len(self)):
416 416 for x in self.parentrevs(i):
417 417 if x != nullrev and x in seen:
418 418 seen.add(i)
419 419 yield i
420 420 break
421 421
422 422 def findcommonmissing(self, common=None, heads=None):
423 423 """Return a tuple of the ancestors of common and the ancestors of heads
424 424 that are not ancestors of common. In revset terminology, we return the
425 425 tuple:
426 426
427 427 ::common, (::heads) - (::common)
428 428
429 429 The list is sorted by revision number, meaning it is
430 430 topologically sorted.
431 431
432 432 'heads' and 'common' are both lists of node IDs. If heads is
433 433 not supplied, uses all of the revlog's heads. If common is not
434 434 supplied, uses nullid."""
435 435 if common is None:
436 436 common = [nullid]
437 437 if heads is None:
438 438 heads = self.heads()
439 439
440 440 common = [self.rev(n) for n in common]
441 441 heads = [self.rev(n) for n in heads]
442 442
443 443 # we want the ancestors, but inclusive
444 444 has = set(self.ancestors(*common))
445 445 has.add(nullrev)
446 446 has.update(common)
447 447
448 448 # take all ancestors from heads that aren't in has
449 449 missing = set()
450 visit = [r for r in heads if r not in has]
450 visit = collections.deque(r for r in heads if r not in has)
451 451 while visit:
452 r = visit.pop(0)
452 r = visit.popleft()
453 453 if r in missing:
454 454 continue
455 455 else:
456 456 missing.add(r)
457 457 for p in self.parentrevs(r):
458 458 if p not in has:
459 459 visit.append(p)
460 460 missing = list(missing)
461 461 missing.sort()
462 462 return has, [self.node(r) for r in missing]
463 463
464 464 def findmissing(self, common=None, heads=None):
465 465 """Return the ancestors of heads that are not ancestors of common.
466 466
467 467 More specifically, return a list of nodes N such that every N
468 468 satisfies the following constraints:
469 469
470 470 1. N is an ancestor of some node in 'heads'
471 471 2. N is not an ancestor of any node in 'common'
472 472
473 473 The list is sorted by revision number, meaning it is
474 474 topologically sorted.
475 475
476 476 'heads' and 'common' are both lists of node IDs. If heads is
477 477 not supplied, uses all of the revlog's heads. If common is not
478 478 supplied, uses nullid."""
479 479 _common, missing = self.findcommonmissing(common, heads)
480 480 return missing
481 481
482 482 def nodesbetween(self, roots=None, heads=None):
483 483 """Return a topological path from 'roots' to 'heads'.
484 484
485 485 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
486 486 topologically sorted list of all nodes N that satisfy both of
487 487 these constraints:
488 488
489 489 1. N is a descendant of some node in 'roots'
490 490 2. N is an ancestor of some node in 'heads'
491 491
492 492 Every node is considered to be both a descendant and an ancestor
493 493 of itself, so every reachable node in 'roots' and 'heads' will be
494 494 included in 'nodes'.
495 495
496 496 'outroots' is the list of reachable nodes in 'roots', i.e., the
497 497 subset of 'roots' that is returned in 'nodes'. Likewise,
498 498 'outheads' is the subset of 'heads' that is also in 'nodes'.
499 499
500 500 'roots' and 'heads' are both lists of node IDs. If 'roots' is
501 501 unspecified, uses nullid as the only root. If 'heads' is
502 502 unspecified, uses list of all of the revlog's heads."""
503 503 nonodes = ([], [], [])
504 504 if roots is not None:
505 505 roots = list(roots)
506 506 if not roots:
507 507 return nonodes
508 508 lowestrev = min([self.rev(n) for n in roots])
509 509 else:
510 510 roots = [nullid] # Everybody's a descendant of nullid
511 511 lowestrev = nullrev
512 512 if (lowestrev == nullrev) and (heads is None):
513 513 # We want _all_ the nodes!
514 514 return ([self.node(r) for r in self], [nullid], list(self.heads()))
515 515 if heads is None:
516 516 # All nodes are ancestors, so the latest ancestor is the last
517 517 # node.
518 518 highestrev = len(self) - 1
519 519 # Set ancestors to None to signal that every node is an ancestor.
520 520 ancestors = None
521 521 # Set heads to an empty dictionary for later discovery of heads
522 522 heads = {}
523 523 else:
524 524 heads = list(heads)
525 525 if not heads:
526 526 return nonodes
527 527 ancestors = set()
528 528 # Turn heads into a dictionary so we can remove 'fake' heads.
529 529 # Also, later we will be using it to filter out the heads we can't
530 530 # find from roots.
531 531 heads = dict.fromkeys(heads, False)
532 532 # Start at the top and keep marking parents until we're done.
533 533 nodestotag = set(heads)
534 534 # Remember where the top was so we can use it as a limit later.
535 535 highestrev = max([self.rev(n) for n in nodestotag])
536 536 while nodestotag:
537 537 # grab a node to tag
538 538 n = nodestotag.pop()
539 539 # Never tag nullid
540 540 if n == nullid:
541 541 continue
542 542 # A node's revision number represents its place in a
543 543 # topologically sorted list of nodes.
544 544 r = self.rev(n)
545 545 if r >= lowestrev:
546 546 if n not in ancestors:
547 547 # If we are possibly a descendant of one of the roots
548 548 # and we haven't already been marked as an ancestor
549 549 ancestors.add(n) # Mark as ancestor
550 550 # Add non-nullid parents to list of nodes to tag.
551 551 nodestotag.update([p for p in self.parents(n) if
552 552 p != nullid])
553 553 elif n in heads: # We've seen it before, is it a fake head?
554 554 # So it is, real heads should not be the ancestors of
555 555 # any other heads.
556 556 heads.pop(n)
557 557 if not ancestors:
558 558 return nonodes
559 559 # Now that we have our set of ancestors, we want to remove any
560 560 # roots that are not ancestors.
561 561
562 562 # If one of the roots was nullid, everything is included anyway.
563 563 if lowestrev > nullrev:
564 564 # But, since we weren't, let's recompute the lowest rev to not
565 565 # include roots that aren't ancestors.
566 566
567 567 # Filter out roots that aren't ancestors of heads
568 568 roots = [n for n in roots if n in ancestors]
569 569 # Recompute the lowest revision
570 570 if roots:
571 571 lowestrev = min([self.rev(n) for n in roots])
572 572 else:
573 573 # No more roots? Return empty list
574 574 return nonodes
575 575 else:
576 576 # We are descending from nullid, and don't need to care about
577 577 # any other roots.
578 578 lowestrev = nullrev
579 579 roots = [nullid]
580 580 # Transform our roots list into a set.
581 581 descendants = set(roots)
582 582 # Also, keep the original roots so we can filter out roots that aren't
583 583 # 'real' roots (i.e. are descended from other roots).
584 584 roots = descendants.copy()
585 585 # Our topologically sorted list of output nodes.
586 586 orderedout = []
587 587 # Don't start at nullid since we don't want nullid in our output list,
588 588 # and if nullid shows up in descedents, empty parents will look like
589 589 # they're descendants.
590 590 for r in xrange(max(lowestrev, 0), highestrev + 1):
591 591 n = self.node(r)
592 592 isdescendant = False
593 593 if lowestrev == nullrev: # Everybody is a descendant of nullid
594 594 isdescendant = True
595 595 elif n in descendants:
596 596 # n is already a descendant
597 597 isdescendant = True
598 598 # This check only needs to be done here because all the roots
599 599 # will start being marked is descendants before the loop.
600 600 if n in roots:
601 601 # If n was a root, check if it's a 'real' root.
602 602 p = tuple(self.parents(n))
603 603 # If any of its parents are descendants, it's not a root.
604 604 if (p[0] in descendants) or (p[1] in descendants):
605 605 roots.remove(n)
606 606 else:
607 607 p = tuple(self.parents(n))
608 608 # A node is a descendant if either of its parents are
609 609 # descendants. (We seeded the dependents list with the roots
610 610 # up there, remember?)
611 611 if (p[0] in descendants) or (p[1] in descendants):
612 612 descendants.add(n)
613 613 isdescendant = True
614 614 if isdescendant and ((ancestors is None) or (n in ancestors)):
615 615 # Only include nodes that are both descendants and ancestors.
616 616 orderedout.append(n)
617 617 if (ancestors is not None) and (n in heads):
618 618 # We're trying to figure out which heads are reachable
619 619 # from roots.
620 620 # Mark this head as having been reached
621 621 heads[n] = True
622 622 elif ancestors is None:
623 623 # Otherwise, we're trying to discover the heads.
624 624 # Assume this is a head because if it isn't, the next step
625 625 # will eventually remove it.
626 626 heads[n] = True
627 627 # But, obviously its parents aren't.
628 628 for p in self.parents(n):
629 629 heads.pop(p, None)
630 630 heads = [n for n, flag in heads.iteritems() if flag]
631 631 roots = list(roots)
632 632 assert orderedout
633 633 assert roots
634 634 assert heads
635 635 return (orderedout, roots, heads)
636 636
637 637 def headrevs(self):
638 638 try:
639 639 return self.index.headrevs()
640 640 except AttributeError:
641 641 pass
642 642 count = len(self)
643 643 if not count:
644 644 return [nullrev]
645 645 ishead = [1] * (count + 1)
646 646 index = self.index
647 647 for r in xrange(count):
648 648 e = index[r]
649 649 ishead[e[5]] = ishead[e[6]] = 0
650 650 return [r for r in xrange(count) if ishead[r]]
651 651
652 652 def heads(self, start=None, stop=None):
653 653 """return the list of all nodes that have no children
654 654
655 655 if start is specified, only heads that are descendants of
656 656 start will be returned
657 657 if stop is specified, it will consider all the revs from stop
658 658 as if they had no children
659 659 """
660 660 if start is None and stop is None:
661 661 if not len(self):
662 662 return [nullid]
663 663 return [self.node(r) for r in self.headrevs()]
664 664
665 665 if start is None:
666 666 start = nullid
667 667 if stop is None:
668 668 stop = []
669 669 stoprevs = set([self.rev(n) for n in stop])
670 670 startrev = self.rev(start)
671 671 reachable = set((startrev,))
672 672 heads = set((startrev,))
673 673
674 674 parentrevs = self.parentrevs
675 675 for r in xrange(startrev + 1, len(self)):
676 676 for p in parentrevs(r):
677 677 if p in reachable:
678 678 if r not in stoprevs:
679 679 reachable.add(r)
680 680 heads.add(r)
681 681 if p in heads and p not in stoprevs:
682 682 heads.remove(p)
683 683
684 684 return [self.node(r) for r in heads]
685 685
686 686 def children(self, node):
687 687 """find the children of a given node"""
688 688 c = []
689 689 p = self.rev(node)
690 690 for r in range(p + 1, len(self)):
691 691 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
692 692 if prevs:
693 693 for pr in prevs:
694 694 if pr == p:
695 695 c.append(self.node(r))
696 696 elif p == nullrev:
697 697 c.append(self.node(r))
698 698 return c
699 699
700 700 def descendant(self, start, end):
701 701 if start == nullrev:
702 702 return True
703 703 for i in self.descendants(start):
704 704 if i == end:
705 705 return True
706 706 elif i > end:
707 707 break
708 708 return False
709 709
710 710 def ancestor(self, a, b):
711 711 """calculate the least common ancestor of nodes a and b"""
712 712
713 713 # fast path, check if it is a descendant
714 714 a, b = self.rev(a), self.rev(b)
715 715 start, end = sorted((a, b))
716 716 if self.descendant(start, end):
717 717 return self.node(start)
718 718
719 719 def parents(rev):
720 720 return [p for p in self.parentrevs(rev) if p != nullrev]
721 721
722 722 c = ancestor.ancestor(a, b, parents)
723 723 if c is None:
724 724 return nullid
725 725
726 726 return self.node(c)
727 727
728 728 def _match(self, id):
729 729 if isinstance(id, int):
730 730 # rev
731 731 return self.node(id)
732 732 if len(id) == 20:
733 733 # possibly a binary node
734 734 # odds of a binary node being all hex in ASCII are 1 in 10**25
735 735 try:
736 736 node = id
737 737 self.rev(node) # quick search the index
738 738 return node
739 739 except LookupError:
740 740 pass # may be partial hex id
741 741 try:
742 742 # str(rev)
743 743 rev = int(id)
744 744 if str(rev) != id:
745 745 raise ValueError
746 746 if rev < 0:
747 747 rev = len(self) + rev
748 748 if rev < 0 or rev >= len(self):
749 749 raise ValueError
750 750 return self.node(rev)
751 751 except (ValueError, OverflowError):
752 752 pass
753 753 if len(id) == 40:
754 754 try:
755 755 # a full hex nodeid?
756 756 node = bin(id)
757 757 self.rev(node)
758 758 return node
759 759 except (TypeError, LookupError):
760 760 pass
761 761
762 762 def _partialmatch(self, id):
763 763 try:
764 764 return self.index.partialmatch(id)
765 765 except RevlogError:
766 766 # parsers.c radix tree lookup gave multiple matches
767 767 raise LookupError(id, self.indexfile, _("ambiguous identifier"))
768 768 except (AttributeError, ValueError):
769 769 # we are pure python, or key was too short to search radix tree
770 770 pass
771 771
772 772 if id in self._pcache:
773 773 return self._pcache[id]
774 774
775 775 if len(id) < 40:
776 776 try:
777 777 # hex(node)[:...]
778 778 l = len(id) // 2 # grab an even number of digits
779 779 prefix = bin(id[:l * 2])
780 780 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
781 781 nl = [n for n in nl if hex(n).startswith(id)]
782 782 if len(nl) > 0:
783 783 if len(nl) == 1:
784 784 self._pcache[id] = nl[0]
785 785 return nl[0]
786 786 raise LookupError(id, self.indexfile,
787 787 _('ambiguous identifier'))
788 788 return None
789 789 except TypeError:
790 790 pass
791 791
792 792 def lookup(self, id):
793 793 """locate a node based on:
794 794 - revision number or str(revision number)
795 795 - nodeid or subset of hex nodeid
796 796 """
797 797 n = self._match(id)
798 798 if n is not None:
799 799 return n
800 800 n = self._partialmatch(id)
801 801 if n:
802 802 return n
803 803
804 804 raise LookupError(id, self.indexfile, _('no match found'))
805 805
806 806 def cmp(self, node, text):
807 807 """compare text with a given file revision
808 808
809 809 returns True if text is different than what is stored.
810 810 """
811 811 p1, p2 = self.parents(node)
812 812 return hash(text, p1, p2) != node
813 813
814 814 def _addchunk(self, offset, data):
815 815 o, d = self._chunkcache
816 816 # try to add to existing cache
817 817 if o + len(d) == offset and len(d) + len(data) < _chunksize:
818 818 self._chunkcache = o, d + data
819 819 else:
820 820 self._chunkcache = offset, data
821 821
822 822 def _loadchunk(self, offset, length):
823 823 if self._inline:
824 824 df = self.opener(self.indexfile)
825 825 else:
826 826 df = self.opener(self.datafile)
827 827
828 828 readahead = max(65536, length)
829 829 df.seek(offset)
830 830 d = df.read(readahead)
831 831 df.close()
832 832 self._addchunk(offset, d)
833 833 if readahead > length:
834 834 return util.buffer(d, 0, length)
835 835 return d
836 836
837 837 def _getchunk(self, offset, length):
838 838 o, d = self._chunkcache
839 839 l = len(d)
840 840
841 841 # is it in the cache?
842 842 cachestart = offset - o
843 843 cacheend = cachestart + length
844 844 if cachestart >= 0 and cacheend <= l:
845 845 if cachestart == 0 and cacheend == l:
846 846 return d # avoid a copy
847 847 return util.buffer(d, cachestart, cacheend - cachestart)
848 848
849 849 return self._loadchunk(offset, length)
850 850
851 851 def _chunkraw(self, startrev, endrev):
852 852 start = self.start(startrev)
853 853 length = self.end(endrev) - start
854 854 if self._inline:
855 855 start += (startrev + 1) * self._io.size
856 856 return self._getchunk(start, length)
857 857
858 858 def _chunk(self, rev):
859 859 return decompress(self._chunkraw(rev, rev))
860 860
861 861 def _chunkbase(self, rev):
862 862 return self._chunk(rev)
863 863
864 864 def _chunkclear(self):
865 865 self._chunkcache = (0, '')
866 866
867 867 def deltaparent(self, rev):
868 868 """return deltaparent of the given revision"""
869 869 base = self.index[rev][3]
870 870 if base == rev:
871 871 return nullrev
872 872 elif self._generaldelta:
873 873 return base
874 874 else:
875 875 return rev - 1
876 876
877 877 def revdiff(self, rev1, rev2):
878 878 """return or calculate a delta between two revisions"""
879 879 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
880 880 return str(self._chunk(rev2))
881 881
882 882 return mdiff.textdiff(self.revision(rev1),
883 883 self.revision(rev2))
884 884
885 885 def revision(self, nodeorrev):
886 886 """return an uncompressed revision of a given node or revision
887 887 number.
888 888 """
889 889 if isinstance(nodeorrev, int):
890 890 rev = nodeorrev
891 891 node = self.node(rev)
892 892 else:
893 893 node = nodeorrev
894 894 rev = None
895 895
896 896 cachedrev = None
897 897 if node == nullid:
898 898 return ""
899 899 if self._cache:
900 900 if self._cache[0] == node:
901 901 return self._cache[2]
902 902 cachedrev = self._cache[1]
903 903
904 904 # look up what we need to read
905 905 text = None
906 906 if rev is None:
907 907 rev = self.rev(node)
908 908
909 909 # check rev flags
910 910 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
911 911 raise RevlogError(_('incompatible revision flag %x') %
912 912 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
913 913
914 914 # build delta chain
915 915 chain = []
916 916 index = self.index # for performance
917 917 generaldelta = self._generaldelta
918 918 iterrev = rev
919 919 e = index[iterrev]
920 920 while iterrev != e[3] and iterrev != cachedrev:
921 921 chain.append(iterrev)
922 922 if generaldelta:
923 923 iterrev = e[3]
924 924 else:
925 925 iterrev -= 1
926 926 e = index[iterrev]
927 927 chain.reverse()
928 928 base = iterrev
929 929
930 930 if iterrev == cachedrev:
931 931 # cache hit
932 932 text = self._cache[2]
933 933
934 934 # drop cache to save memory
935 935 self._cache = None
936 936
937 937 self._chunkraw(base, rev)
938 938 if text is None:
939 939 text = str(self._chunkbase(base))
940 940
941 941 bins = [self._chunk(r) for r in chain]
942 942 text = mdiff.patches(text, bins)
943 943
944 944 text = self._checkhash(text, node, rev)
945 945
946 946 self._cache = (node, rev, text)
947 947 return text
948 948
949 949 def _checkhash(self, text, node, rev):
950 950 p1, p2 = self.parents(node)
951 951 if node != hash(text, p1, p2):
952 952 raise RevlogError(_("integrity check failed on %s:%d")
953 953 % (self.indexfile, rev))
954 954 return text
955 955
956 956 def checkinlinesize(self, tr, fp=None):
957 957 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
958 958 return
959 959
960 960 trinfo = tr.find(self.indexfile)
961 961 if trinfo is None:
962 962 raise RevlogError(_("%s not found in the transaction")
963 963 % self.indexfile)
964 964
965 965 trindex = trinfo[2]
966 966 dataoff = self.start(trindex)
967 967
968 968 tr.add(self.datafile, dataoff)
969 969
970 970 if fp:
971 971 fp.flush()
972 972 fp.close()
973 973
974 974 df = self.opener(self.datafile, 'w')
975 975 try:
976 976 for r in self:
977 977 df.write(self._chunkraw(r, r))
978 978 finally:
979 979 df.close()
980 980
981 981 fp = self.opener(self.indexfile, 'w', atomictemp=True)
982 982 self.version &= ~(REVLOGNGINLINEDATA)
983 983 self._inline = False
984 984 for i in self:
985 985 e = self._io.packentry(self.index[i], self.node, self.version, i)
986 986 fp.write(e)
987 987
988 988 # if we don't call close, the temp file will never replace the
989 989 # real index
990 990 fp.close()
991 991
992 992 tr.replace(self.indexfile, trindex * self._io.size)
993 993 self._chunkclear()
994 994
995 995 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None):
996 996 """add a revision to the log
997 997
998 998 text - the revision data to add
999 999 transaction - the transaction object used for rollback
1000 1000 link - the linkrev data to add
1001 1001 p1, p2 - the parent nodeids of the revision
1002 1002 cachedelta - an optional precomputed delta
1003 1003 """
1004 1004 node = hash(text, p1, p2)
1005 1005 if node in self.nodemap:
1006 1006 return node
1007 1007
1008 1008 dfh = None
1009 1009 if not self._inline:
1010 1010 dfh = self.opener(self.datafile, "a")
1011 1011 ifh = self.opener(self.indexfile, "a+")
1012 1012 try:
1013 1013 return self._addrevision(node, text, transaction, link, p1, p2,
1014 1014 cachedelta, ifh, dfh)
1015 1015 finally:
1016 1016 if dfh:
1017 1017 dfh.close()
1018 1018 ifh.close()
1019 1019
1020 1020 def _addrevision(self, node, text, transaction, link, p1, p2,
1021 1021 cachedelta, ifh, dfh):
1022 1022 """internal function to add revisions to the log
1023 1023
1024 1024 see addrevision for argument descriptions.
1025 1025 invariants:
1026 1026 - text is optional (can be None); if not set, cachedelta must be set.
1027 1027 if both are set, they must correspond to eachother.
1028 1028 """
1029 1029 btext = [text]
1030 1030 def buildtext():
1031 1031 if btext[0] is not None:
1032 1032 return btext[0]
1033 1033 # flush any pending writes here so we can read it in revision
1034 1034 if dfh:
1035 1035 dfh.flush()
1036 1036 ifh.flush()
1037 1037 basetext = self.revision(self.node(cachedelta[0]))
1038 1038 btext[0] = mdiff.patch(basetext, cachedelta[1])
1039 1039 chk = hash(btext[0], p1, p2)
1040 1040 if chk != node:
1041 1041 raise RevlogError(_("consistency error in delta"))
1042 1042 return btext[0]
1043 1043
1044 1044 def builddelta(rev):
1045 1045 # can we use the cached delta?
1046 1046 if cachedelta and cachedelta[0] == rev:
1047 1047 delta = cachedelta[1]
1048 1048 else:
1049 1049 t = buildtext()
1050 1050 ptext = self.revision(self.node(rev))
1051 1051 delta = mdiff.textdiff(ptext, t)
1052 1052 data = compress(delta)
1053 1053 l = len(data[1]) + len(data[0])
1054 1054 if basecache[0] == rev:
1055 1055 chainbase = basecache[1]
1056 1056 else:
1057 1057 chainbase = self.chainbase(rev)
1058 1058 dist = l + offset - self.start(chainbase)
1059 1059 if self._generaldelta:
1060 1060 base = rev
1061 1061 else:
1062 1062 base = chainbase
1063 1063 return dist, l, data, base, chainbase
1064 1064
1065 1065 curr = len(self)
1066 1066 prev = curr - 1
1067 1067 base = chainbase = curr
1068 1068 offset = self.end(prev)
1069 1069 flags = 0
1070 1070 d = None
1071 1071 basecache = self._basecache
1072 1072 p1r, p2r = self.rev(p1), self.rev(p2)
1073 1073
1074 1074 # should we try to build a delta?
1075 1075 if prev != nullrev:
1076 1076 if self._generaldelta:
1077 1077 if p1r >= basecache[1]:
1078 1078 d = builddelta(p1r)
1079 1079 elif p2r >= basecache[1]:
1080 1080 d = builddelta(p2r)
1081 1081 else:
1082 1082 d = builddelta(prev)
1083 1083 else:
1084 1084 d = builddelta(prev)
1085 1085 dist, l, data, base, chainbase = d
1086 1086
1087 1087 # full versions are inserted when the needed deltas
1088 1088 # become comparable to the uncompressed text
1089 1089 if text is None:
1090 1090 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1091 1091 cachedelta[1])
1092 1092 else:
1093 1093 textlen = len(text)
1094 1094 if d is None or dist > textlen * 2:
1095 1095 text = buildtext()
1096 1096 data = compress(text)
1097 1097 l = len(data[1]) + len(data[0])
1098 1098 base = chainbase = curr
1099 1099
1100 1100 e = (offset_type(offset, flags), l, textlen,
1101 1101 base, link, p1r, p2r, node)
1102 1102 self.index.insert(-1, e)
1103 1103 self.nodemap[node] = curr
1104 1104
1105 1105 entry = self._io.packentry(e, self.node, self.version, curr)
1106 1106 if not self._inline:
1107 1107 transaction.add(self.datafile, offset)
1108 1108 transaction.add(self.indexfile, curr * len(entry))
1109 1109 if data[0]:
1110 1110 dfh.write(data[0])
1111 1111 dfh.write(data[1])
1112 1112 dfh.flush()
1113 1113 ifh.write(entry)
1114 1114 else:
1115 1115 offset += curr * self._io.size
1116 1116 transaction.add(self.indexfile, offset, curr)
1117 1117 ifh.write(entry)
1118 1118 ifh.write(data[0])
1119 1119 ifh.write(data[1])
1120 1120 self.checkinlinesize(transaction, ifh)
1121 1121
1122 1122 if type(text) == str: # only accept immutable objects
1123 1123 self._cache = (node, curr, text)
1124 1124 self._basecache = (curr, chainbase)
1125 1125 return node
1126 1126
1127 1127 def group(self, nodelist, bundler, reorder=None):
1128 1128 """Calculate a delta group, yielding a sequence of changegroup chunks
1129 1129 (strings).
1130 1130
1131 1131 Given a list of changeset revs, return a set of deltas and
1132 1132 metadata corresponding to nodes. The first delta is
1133 1133 first parent(nodelist[0]) -> nodelist[0], the receiver is
1134 1134 guaranteed to have this parent as it has all history before
1135 1135 these changesets. In the case firstparent is nullrev the
1136 1136 changegroup starts with a full revision.
1137 1137 """
1138 1138
1139 1139 # if we don't have any revisions touched by these changesets, bail
1140 1140 if len(nodelist) == 0:
1141 1141 yield bundler.close()
1142 1142 return
1143 1143
1144 1144 # for generaldelta revlogs, we linearize the revs; this will both be
1145 1145 # much quicker and generate a much smaller bundle
1146 1146 if (self._generaldelta and reorder is not False) or reorder:
1147 1147 dag = dagutil.revlogdag(self)
1148 1148 revs = set(self.rev(n) for n in nodelist)
1149 1149 revs = dag.linearize(revs)
1150 1150 else:
1151 1151 revs = sorted([self.rev(n) for n in nodelist])
1152 1152
1153 1153 # add the parent of the first rev
1154 1154 p = self.parentrevs(revs[0])[0]
1155 1155 revs.insert(0, p)
1156 1156
1157 1157 # build deltas
1158 1158 for r in xrange(len(revs) - 1):
1159 1159 prev, curr = revs[r], revs[r + 1]
1160 1160 for c in bundler.revchunk(self, curr, prev):
1161 1161 yield c
1162 1162
1163 1163 yield bundler.close()
1164 1164
1165 1165 def addgroup(self, bundle, linkmapper, transaction):
1166 1166 """
1167 1167 add a delta group
1168 1168
1169 1169 given a set of deltas, add them to the revision log. the
1170 1170 first delta is against its parent, which should be in our
1171 1171 log, the rest are against the previous delta.
1172 1172 """
1173 1173
1174 1174 # track the base of the current delta log
1175 1175 content = []
1176 1176 node = None
1177 1177
1178 1178 r = len(self)
1179 1179 end = 0
1180 1180 if r:
1181 1181 end = self.end(r - 1)
1182 1182 ifh = self.opener(self.indexfile, "a+")
1183 1183 isize = r * self._io.size
1184 1184 if self._inline:
1185 1185 transaction.add(self.indexfile, end + isize, r)
1186 1186 dfh = None
1187 1187 else:
1188 1188 transaction.add(self.indexfile, isize, r)
1189 1189 transaction.add(self.datafile, end)
1190 1190 dfh = self.opener(self.datafile, "a")
1191 1191
1192 1192 try:
1193 1193 # loop through our set of deltas
1194 1194 chain = None
1195 1195 while True:
1196 1196 chunkdata = bundle.deltachunk(chain)
1197 1197 if not chunkdata:
1198 1198 break
1199 1199 node = chunkdata['node']
1200 1200 p1 = chunkdata['p1']
1201 1201 p2 = chunkdata['p2']
1202 1202 cs = chunkdata['cs']
1203 1203 deltabase = chunkdata['deltabase']
1204 1204 delta = chunkdata['delta']
1205 1205
1206 1206 content.append(node)
1207 1207
1208 1208 link = linkmapper(cs)
1209 1209 if node in self.nodemap:
1210 1210 # this can happen if two branches make the same change
1211 1211 chain = node
1212 1212 continue
1213 1213
1214 1214 for p in (p1, p2):
1215 1215 if p not in self.nodemap:
1216 1216 raise LookupError(p, self.indexfile,
1217 1217 _('unknown parent'))
1218 1218
1219 1219 if deltabase not in self.nodemap:
1220 1220 raise LookupError(deltabase, self.indexfile,
1221 1221 _('unknown delta base'))
1222 1222
1223 1223 baserev = self.rev(deltabase)
1224 1224 chain = self._addrevision(node, None, transaction, link,
1225 1225 p1, p2, (baserev, delta), ifh, dfh)
1226 1226 if not dfh and not self._inline:
1227 1227 # addrevision switched from inline to conventional
1228 1228 # reopen the index
1229 1229 ifh.close()
1230 1230 dfh = self.opener(self.datafile, "a")
1231 1231 ifh = self.opener(self.indexfile, "a")
1232 1232 finally:
1233 1233 if dfh:
1234 1234 dfh.close()
1235 1235 ifh.close()
1236 1236
1237 1237 return content
1238 1238
1239 1239 def strip(self, minlink, transaction):
1240 1240 """truncate the revlog on the first revision with a linkrev >= minlink
1241 1241
1242 1242 This function is called when we're stripping revision minlink and
1243 1243 its descendants from the repository.
1244 1244
1245 1245 We have to remove all revisions with linkrev >= minlink, because
1246 1246 the equivalent changelog revisions will be renumbered after the
1247 1247 strip.
1248 1248
1249 1249 So we truncate the revlog on the first of these revisions, and
1250 1250 trust that the caller has saved the revisions that shouldn't be
1251 1251 removed and that it'll re-add them after this truncation.
1252 1252 """
1253 1253 if len(self) == 0:
1254 1254 return
1255 1255
1256 1256 for rev in self:
1257 1257 if self.index[rev][4] >= minlink:
1258 1258 break
1259 1259 else:
1260 1260 return
1261 1261
1262 1262 # first truncate the files on disk
1263 1263 end = self.start(rev)
1264 1264 if not self._inline:
1265 1265 transaction.add(self.datafile, end)
1266 1266 end = rev * self._io.size
1267 1267 else:
1268 1268 end += rev * self._io.size
1269 1269
1270 1270 transaction.add(self.indexfile, end)
1271 1271
1272 1272 # then reset internal state in memory to forget those revisions
1273 1273 self._cache = None
1274 1274 self._chunkclear()
1275 1275 for x in xrange(rev, len(self)):
1276 1276 del self.nodemap[self.node(x)]
1277 1277
1278 1278 del self.index[rev:-1]
1279 1279
1280 1280 def checksize(self):
1281 1281 expected = 0
1282 1282 if len(self):
1283 1283 expected = max(0, self.end(len(self) - 1))
1284 1284
1285 1285 try:
1286 1286 f = self.opener(self.datafile)
1287 1287 f.seek(0, 2)
1288 1288 actual = f.tell()
1289 1289 f.close()
1290 1290 dd = actual - expected
1291 1291 except IOError, inst:
1292 1292 if inst.errno != errno.ENOENT:
1293 1293 raise
1294 1294 dd = 0
1295 1295
1296 1296 try:
1297 1297 f = self.opener(self.indexfile)
1298 1298 f.seek(0, 2)
1299 1299 actual = f.tell()
1300 1300 f.close()
1301 1301 s = self._io.size
1302 1302 i = max(0, actual // s)
1303 1303 di = actual - (i * s)
1304 1304 if self._inline:
1305 1305 databytes = 0
1306 1306 for r in self:
1307 1307 databytes += max(0, self.length(r))
1308 1308 dd = 0
1309 1309 di = actual - len(self) * s - databytes
1310 1310 except IOError, inst:
1311 1311 if inst.errno != errno.ENOENT:
1312 1312 raise
1313 1313 di = 0
1314 1314
1315 1315 return (dd, di)
1316 1316
1317 1317 def files(self):
1318 1318 res = [self.indexfile]
1319 1319 if not self._inline:
1320 1320 res.append(self.datafile)
1321 1321 return res
@@ -1,1552 +1,1552 b''
1 1 # revset.py - revision set queries for mercurial
2 2 #
3 3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 import re
8 import re, collections
9 9 import parser, util, error, discovery, hbisect, phases
10 10 import node
11 11 import bookmarks as bookmarksmod
12 12 import match as matchmod
13 13 from i18n import _
14 14 import encoding
15 15
16 16 def _revancestors(repo, revs, followfirst):
17 17 """Like revlog.ancestors(), but supports followfirst."""
18 18 cut = followfirst and 1 or None
19 19 cl = repo.changelog
20 visit = list(revs)
20 visit = collections.deque(revs)
21 21 seen = set([node.nullrev])
22 22 while visit:
23 for parent in cl.parentrevs(visit.pop(0))[:cut]:
23 for parent in cl.parentrevs(visit.popleft())[:cut]:
24 24 if parent not in seen:
25 25 visit.append(parent)
26 26 seen.add(parent)
27 27 yield parent
28 28
29 29 def _revdescendants(repo, revs, followfirst):
30 30 """Like revlog.descendants() but supports followfirst."""
31 31 cut = followfirst and 1 or None
32 32 cl = repo.changelog
33 33 first = min(revs)
34 34 nullrev = node.nullrev
35 35 if first == nullrev:
36 36 # Are there nodes with a null first parent and a non-null
37 37 # second one? Maybe. Do we care? Probably not.
38 38 for i in cl:
39 39 yield i
40 40 return
41 41
42 42 seen = set(revs)
43 43 for i in xrange(first + 1, len(cl)):
44 44 for x in cl.parentrevs(i)[:cut]:
45 45 if x != nullrev and x in seen:
46 46 seen.add(i)
47 47 yield i
48 48 break
49 49
50 50 elements = {
51 51 "(": (20, ("group", 1, ")"), ("func", 1, ")")),
52 52 "~": (18, None, ("ancestor", 18)),
53 53 "^": (18, None, ("parent", 18), ("parentpost", 18)),
54 54 "-": (5, ("negate", 19), ("minus", 5)),
55 55 "::": (17, ("dagrangepre", 17), ("dagrange", 17),
56 56 ("dagrangepost", 17)),
57 57 "..": (17, ("dagrangepre", 17), ("dagrange", 17),
58 58 ("dagrangepost", 17)),
59 59 ":": (15, ("rangepre", 15), ("range", 15), ("rangepost", 15)),
60 60 "not": (10, ("not", 10)),
61 61 "!": (10, ("not", 10)),
62 62 "and": (5, None, ("and", 5)),
63 63 "&": (5, None, ("and", 5)),
64 64 "or": (4, None, ("or", 4)),
65 65 "|": (4, None, ("or", 4)),
66 66 "+": (4, None, ("or", 4)),
67 67 ",": (2, None, ("list", 2)),
68 68 ")": (0, None, None),
69 69 "symbol": (0, ("symbol",), None),
70 70 "string": (0, ("string",), None),
71 71 "end": (0, None, None),
72 72 }
73 73
74 74 keywords = set(['and', 'or', 'not'])
75 75
76 76 def tokenize(program):
77 77 pos, l = 0, len(program)
78 78 while pos < l:
79 79 c = program[pos]
80 80 if c.isspace(): # skip inter-token whitespace
81 81 pass
82 82 elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully
83 83 yield ('::', None, pos)
84 84 pos += 1 # skip ahead
85 85 elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully
86 86 yield ('..', None, pos)
87 87 pos += 1 # skip ahead
88 88 elif c in "():,-|&+!~^": # handle simple operators
89 89 yield (c, None, pos)
90 90 elif (c in '"\'' or c == 'r' and
91 91 program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings
92 92 if c == 'r':
93 93 pos += 1
94 94 c = program[pos]
95 95 decode = lambda x: x
96 96 else:
97 97 decode = lambda x: x.decode('string-escape')
98 98 pos += 1
99 99 s = pos
100 100 while pos < l: # find closing quote
101 101 d = program[pos]
102 102 if d == '\\': # skip over escaped characters
103 103 pos += 2
104 104 continue
105 105 if d == c:
106 106 yield ('string', decode(program[s:pos]), s)
107 107 break
108 108 pos += 1
109 109 else:
110 110 raise error.ParseError(_("unterminated string"), s)
111 111 # gather up a symbol/keyword
112 112 elif c.isalnum() or c in '._' or ord(c) > 127:
113 113 s = pos
114 114 pos += 1
115 115 while pos < l: # find end of symbol
116 116 d = program[pos]
117 117 if not (d.isalnum() or d in "._/" or ord(d) > 127):
118 118 break
119 119 if d == '.' and program[pos - 1] == '.': # special case for ..
120 120 pos -= 1
121 121 break
122 122 pos += 1
123 123 sym = program[s:pos]
124 124 if sym in keywords: # operator keywords
125 125 yield (sym, None, s)
126 126 else:
127 127 yield ('symbol', sym, s)
128 128 pos -= 1
129 129 else:
130 130 raise error.ParseError(_("syntax error"), pos)
131 131 pos += 1
132 132 yield ('end', None, pos)
133 133
134 134 # helpers
135 135
136 136 def getstring(x, err):
137 137 if x and (x[0] == 'string' or x[0] == 'symbol'):
138 138 return x[1]
139 139 raise error.ParseError(err)
140 140
141 141 def getlist(x):
142 142 if not x:
143 143 return []
144 144 if x[0] == 'list':
145 145 return getlist(x[1]) + [x[2]]
146 146 return [x]
147 147
148 148 def getargs(x, min, max, err):
149 149 l = getlist(x)
150 150 if len(l) < min or (max >= 0 and len(l) > max):
151 151 raise error.ParseError(err)
152 152 return l
153 153
154 154 def getset(repo, subset, x):
155 155 if not x:
156 156 raise error.ParseError(_("missing argument"))
157 157 return methods[x[0]](repo, subset, *x[1:])
158 158
159 159 # operator methods
160 160
161 161 def stringset(repo, subset, x):
162 162 x = repo[x].rev()
163 163 if x == -1 and len(subset) == len(repo):
164 164 return [-1]
165 165 if len(subset) == len(repo) or x in subset:
166 166 return [x]
167 167 return []
168 168
169 169 def symbolset(repo, subset, x):
170 170 if x in symbols:
171 171 raise error.ParseError(_("can't use %s here") % x)
172 172 return stringset(repo, subset, x)
173 173
174 174 def rangeset(repo, subset, x, y):
175 175 m = getset(repo, subset, x)
176 176 if not m:
177 177 m = getset(repo, range(len(repo)), x)
178 178
179 179 n = getset(repo, subset, y)
180 180 if not n:
181 181 n = getset(repo, range(len(repo)), y)
182 182
183 183 if not m or not n:
184 184 return []
185 185 m, n = m[0], n[-1]
186 186
187 187 if m < n:
188 188 r = range(m, n + 1)
189 189 else:
190 190 r = range(m, n - 1, -1)
191 191 s = set(subset)
192 192 return [x for x in r if x in s]
193 193
194 194 def andset(repo, subset, x, y):
195 195 return getset(repo, getset(repo, subset, x), y)
196 196
197 197 def orset(repo, subset, x, y):
198 198 xl = getset(repo, subset, x)
199 199 s = set(xl)
200 200 yl = getset(repo, [r for r in subset if r not in s], y)
201 201 return xl + yl
202 202
203 203 def notset(repo, subset, x):
204 204 s = set(getset(repo, subset, x))
205 205 return [r for r in subset if r not in s]
206 206
207 207 def listset(repo, subset, a, b):
208 208 raise error.ParseError(_("can't use a list in this context"))
209 209
210 210 def func(repo, subset, a, b):
211 211 if a[0] == 'symbol' and a[1] in symbols:
212 212 return symbols[a[1]](repo, subset, b)
213 213 raise error.ParseError(_("not a function: %s") % a[1])
214 214
215 215 # functions
216 216
217 217 def adds(repo, subset, x):
218 218 """``adds(pattern)``
219 219 Changesets that add a file matching pattern.
220 220 """
221 221 # i18n: "adds" is a keyword
222 222 pat = getstring(x, _("adds requires a pattern"))
223 223 return checkstatus(repo, subset, pat, 1)
224 224
225 225 def ancestor(repo, subset, x):
226 226 """``ancestor(single, single)``
227 227 Greatest common ancestor of the two changesets.
228 228 """
229 229 # i18n: "ancestor" is a keyword
230 230 l = getargs(x, 2, 2, _("ancestor requires two arguments"))
231 231 r = range(len(repo))
232 232 a = getset(repo, r, l[0])
233 233 b = getset(repo, r, l[1])
234 234 if len(a) != 1 or len(b) != 1:
235 235 # i18n: "ancestor" is a keyword
236 236 raise error.ParseError(_("ancestor arguments must be single revisions"))
237 237 an = [repo[a[0]].ancestor(repo[b[0]]).rev()]
238 238
239 239 return [r for r in an if r in subset]
240 240
241 241 def _ancestors(repo, subset, x, followfirst=False):
242 242 args = getset(repo, range(len(repo)), x)
243 243 if not args:
244 244 return []
245 245 s = set(_revancestors(repo, args, followfirst)) | set(args)
246 246 return [r for r in subset if r in s]
247 247
248 248 def ancestors(repo, subset, x):
249 249 """``ancestors(set)``
250 250 Changesets that are ancestors of a changeset in set.
251 251 """
252 252 return _ancestors(repo, subset, x)
253 253
254 254 def _firstancestors(repo, subset, x):
255 255 # ``_firstancestors(set)``
256 256 # Like ``ancestors(set)`` but follows only the first parents.
257 257 return _ancestors(repo, subset, x, followfirst=True)
258 258
259 259 def ancestorspec(repo, subset, x, n):
260 260 """``set~n``
261 261 Changesets that are the Nth ancestor (first parents only) of a changeset
262 262 in set.
263 263 """
264 264 try:
265 265 n = int(n[1])
266 266 except (TypeError, ValueError):
267 267 raise error.ParseError(_("~ expects a number"))
268 268 ps = set()
269 269 cl = repo.changelog
270 270 for r in getset(repo, subset, x):
271 271 for i in range(n):
272 272 r = cl.parentrevs(r)[0]
273 273 ps.add(r)
274 274 return [r for r in subset if r in ps]
275 275
276 276 def author(repo, subset, x):
277 277 """``author(string)``
278 278 Alias for ``user(string)``.
279 279 """
280 280 # i18n: "author" is a keyword
281 281 n = encoding.lower(getstring(x, _("author requires a string")))
282 282 return [r for r in subset if n in encoding.lower(repo[r].user())]
283 283
284 284 def bisect(repo, subset, x):
285 285 """``bisect(string)``
286 286 Changesets marked in the specified bisect status:
287 287
288 288 - ``good``, ``bad``, ``skip``: csets explicitly marked as good/bad/skip
289 289 - ``goods``, ``bads`` : csets topologicaly good/bad
290 290 - ``range`` : csets taking part in the bisection
291 291 - ``pruned`` : csets that are goods, bads or skipped
292 292 - ``untested`` : csets whose fate is yet unknown
293 293 - ``ignored`` : csets ignored due to DAG topology
294 294 - ``current`` : the cset currently being bisected
295 295 """
296 296 status = getstring(x, _("bisect requires a string")).lower()
297 297 state = set(hbisect.get(repo, status))
298 298 return [r for r in subset if r in state]
299 299
300 300 # Backward-compatibility
301 301 # - no help entry so that we do not advertise it any more
302 302 def bisected(repo, subset, x):
303 303 return bisect(repo, subset, x)
304 304
305 305 def bookmark(repo, subset, x):
306 306 """``bookmark([name])``
307 307 The named bookmark or all bookmarks.
308 308 """
309 309 # i18n: "bookmark" is a keyword
310 310 args = getargs(x, 0, 1, _('bookmark takes one or no arguments'))
311 311 if args:
312 312 bm = getstring(args[0],
313 313 # i18n: "bookmark" is a keyword
314 314 _('the argument to bookmark must be a string'))
315 315 bmrev = bookmarksmod.listbookmarks(repo).get(bm, None)
316 316 if not bmrev:
317 317 raise util.Abort(_("bookmark '%s' does not exist") % bm)
318 318 bmrev = repo[bmrev].rev()
319 319 return [r for r in subset if r == bmrev]
320 320 bms = set([repo[r].rev()
321 321 for r in bookmarksmod.listbookmarks(repo).values()])
322 322 return [r for r in subset if r in bms]
323 323
324 324 def branch(repo, subset, x):
325 325 """``branch(string or set)``
326 326 All changesets belonging to the given branch or the branches of the given
327 327 changesets.
328 328 """
329 329 try:
330 330 b = getstring(x, '')
331 331 if b in repo.branchmap():
332 332 return [r for r in subset if repo[r].branch() == b]
333 333 except error.ParseError:
334 334 # not a string, but another revspec, e.g. tip()
335 335 pass
336 336
337 337 s = getset(repo, range(len(repo)), x)
338 338 b = set()
339 339 for r in s:
340 340 b.add(repo[r].branch())
341 341 s = set(s)
342 342 return [r for r in subset if r in s or repo[r].branch() in b]
343 343
344 344 def checkstatus(repo, subset, pat, field):
345 345 m = None
346 346 s = []
347 347 hasset = matchmod.patkind(pat) == 'set'
348 348 fname = None
349 349 for r in subset:
350 350 c = repo[r]
351 351 if not m or hasset:
352 352 m = matchmod.match(repo.root, repo.getcwd(), [pat], ctx=c)
353 353 if not m.anypats() and len(m.files()) == 1:
354 354 fname = m.files()[0]
355 355 if fname is not None:
356 356 if fname not in c.files():
357 357 continue
358 358 else:
359 359 for f in c.files():
360 360 if m(f):
361 361 break
362 362 else:
363 363 continue
364 364 files = repo.status(c.p1().node(), c.node())[field]
365 365 if fname is not None:
366 366 if fname in files:
367 367 s.append(r)
368 368 else:
369 369 for f in files:
370 370 if m(f):
371 371 s.append(r)
372 372 break
373 373 return s
374 374
375 375 def _children(repo, narrow, parentset):
376 376 cs = set()
377 377 pr = repo.changelog.parentrevs
378 378 for r in narrow:
379 379 for p in pr(r):
380 380 if p in parentset:
381 381 cs.add(r)
382 382 return cs
383 383
384 384 def children(repo, subset, x):
385 385 """``children(set)``
386 386 Child changesets of changesets in set.
387 387 """
388 388 s = set(getset(repo, range(len(repo)), x))
389 389 cs = _children(repo, subset, s)
390 390 return [r for r in subset if r in cs]
391 391
392 392 def closed(repo, subset, x):
393 393 """``closed()``
394 394 Changeset is closed.
395 395 """
396 396 # i18n: "closed" is a keyword
397 397 getargs(x, 0, 0, _("closed takes no arguments"))
398 398 return [r for r in subset if repo[r].closesbranch()]
399 399
400 400 def contains(repo, subset, x):
401 401 """``contains(pattern)``
402 402 Revision contains a file matching pattern. See :hg:`help patterns`
403 403 for information about file patterns.
404 404 """
405 405 # i18n: "contains" is a keyword
406 406 pat = getstring(x, _("contains requires a pattern"))
407 407 m = None
408 408 s = []
409 409 if not matchmod.patkind(pat):
410 410 for r in subset:
411 411 if pat in repo[r]:
412 412 s.append(r)
413 413 else:
414 414 for r in subset:
415 415 c = repo[r]
416 416 if not m or matchmod.patkind(pat) == 'set':
417 417 m = matchmod.match(repo.root, repo.getcwd(), [pat], ctx=c)
418 418 for f in c.manifest():
419 419 if m(f):
420 420 s.append(r)
421 421 break
422 422 return s
423 423
424 424 def date(repo, subset, x):
425 425 """``date(interval)``
426 426 Changesets within the interval, see :hg:`help dates`.
427 427 """
428 428 # i18n: "date" is a keyword
429 429 ds = getstring(x, _("date requires a string"))
430 430 dm = util.matchdate(ds)
431 431 return [r for r in subset if dm(repo[r].date()[0])]
432 432
433 433 def desc(repo, subset, x):
434 434 """``desc(string)``
435 435 Search commit message for string. The match is case-insensitive.
436 436 """
437 437 # i18n: "desc" is a keyword
438 438 ds = encoding.lower(getstring(x, _("desc requires a string")))
439 439 l = []
440 440 for r in subset:
441 441 c = repo[r]
442 442 if ds in encoding.lower(c.description()):
443 443 l.append(r)
444 444 return l
445 445
446 446 def _descendants(repo, subset, x, followfirst=False):
447 447 args = getset(repo, range(len(repo)), x)
448 448 if not args:
449 449 return []
450 450 s = set(_revdescendants(repo, args, followfirst)) | set(args)
451 451 return [r for r in subset if r in s]
452 452
453 453 def descendants(repo, subset, x):
454 454 """``descendants(set)``
455 455 Changesets which are descendants of changesets in set.
456 456 """
457 457 return _descendants(repo, subset, x)
458 458
459 459 def _firstdescendants(repo, subset, x):
460 460 # ``_firstdescendants(set)``
461 461 # Like ``descendants(set)`` but follows only the first parents.
462 462 return _descendants(repo, subset, x, followfirst=True)
463 463
464 464 def draft(repo, subset, x):
465 465 """``draft()``
466 466 Changeset in draft phase."""
467 467 getargs(x, 0, 0, _("draft takes no arguments"))
468 468 pc = repo._phasecache
469 469 return [r for r in subset if pc.phase(repo, r) == phases.draft]
470 470
471 471 def extra(repo, subset, x):
472 472 """``extra(label, [value])``
473 473 Changesets with the given label in the extra metadata, with the given
474 474 optional value."""
475 475
476 476 l = getargs(x, 1, 2, _('extra takes at least 1 and at most 2 arguments'))
477 477 label = getstring(l[0], _('first argument to extra must be a string'))
478 478 value = None
479 479
480 480 if len(l) > 1:
481 481 value = getstring(l[1], _('second argument to extra must be a string'))
482 482
483 483 def _matchvalue(r):
484 484 extra = repo[r].extra()
485 485 return label in extra and (value is None or value == extra[label])
486 486
487 487 return [r for r in subset if _matchvalue(r)]
488 488
489 489 def filelog(repo, subset, x):
490 490 """``filelog(pattern)``
491 491 Changesets connected to the specified filelog.
492 492 """
493 493
494 494 pat = getstring(x, _("filelog requires a pattern"))
495 495 m = matchmod.match(repo.root, repo.getcwd(), [pat], default='relpath',
496 496 ctx=repo[None])
497 497 s = set()
498 498
499 499 if not matchmod.patkind(pat):
500 500 for f in m.files():
501 501 fl = repo.file(f)
502 502 for fr in fl:
503 503 s.add(fl.linkrev(fr))
504 504 else:
505 505 for f in repo[None]:
506 506 if m(f):
507 507 fl = repo.file(f)
508 508 for fr in fl:
509 509 s.add(fl.linkrev(fr))
510 510
511 511 return [r for r in subset if r in s]
512 512
513 513 def first(repo, subset, x):
514 514 """``first(set, [n])``
515 515 An alias for limit().
516 516 """
517 517 return limit(repo, subset, x)
518 518
519 519 def _follow(repo, subset, x, name, followfirst=False):
520 520 l = getargs(x, 0, 1, _("%s takes no arguments or a filename") % name)
521 521 c = repo['.']
522 522 if l:
523 523 x = getstring(l[0], _("%s expected a filename") % name)
524 524 if x in c:
525 525 cx = c[x]
526 526 s = set(ctx.rev() for ctx in cx.ancestors(followfirst=followfirst))
527 527 # include the revision responsible for the most recent version
528 528 s.add(cx.linkrev())
529 529 else:
530 530 return []
531 531 else:
532 532 s = set(_revancestors(repo, [c.rev()], followfirst)) | set([c.rev()])
533 533
534 534 return [r for r in subset if r in s]
535 535
536 536 def follow(repo, subset, x):
537 537 """``follow([file])``
538 538 An alias for ``::.`` (ancestors of the working copy's first parent).
539 539 If a filename is specified, the history of the given file is followed,
540 540 including copies.
541 541 """
542 542 return _follow(repo, subset, x, 'follow')
543 543
544 544 def _followfirst(repo, subset, x):
545 545 # ``followfirst([file])``
546 546 # Like ``follow([file])`` but follows only the first parent of
547 547 # every revision or file revision.
548 548 return _follow(repo, subset, x, '_followfirst', followfirst=True)
549 549
550 550 def getall(repo, subset, x):
551 551 """``all()``
552 552 All changesets, the same as ``0:tip``.
553 553 """
554 554 # i18n: "all" is a keyword
555 555 getargs(x, 0, 0, _("all takes no arguments"))
556 556 return subset
557 557
558 558 def grep(repo, subset, x):
559 559 """``grep(regex)``
560 560 Like ``keyword(string)`` but accepts a regex. Use ``grep(r'...')``
561 561 to ensure special escape characters are handled correctly. Unlike
562 562 ``keyword(string)``, the match is case-sensitive.
563 563 """
564 564 try:
565 565 # i18n: "grep" is a keyword
566 566 gr = re.compile(getstring(x, _("grep requires a string")))
567 567 except re.error, e:
568 568 raise error.ParseError(_('invalid match pattern: %s') % e)
569 569 l = []
570 570 for r in subset:
571 571 c = repo[r]
572 572 for e in c.files() + [c.user(), c.description()]:
573 573 if gr.search(e):
574 574 l.append(r)
575 575 break
576 576 return l
577 577
578 578 def _matchfiles(repo, subset, x):
579 579 # _matchfiles takes a revset list of prefixed arguments:
580 580 #
581 581 # [p:foo, i:bar, x:baz]
582 582 #
583 583 # builds a match object from them and filters subset. Allowed
584 584 # prefixes are 'p:' for regular patterns, 'i:' for include
585 585 # patterns and 'x:' for exclude patterns. Use 'r:' prefix to pass
586 586 # a revision identifier, or the empty string to reference the
587 587 # working directory, from which the match object is
588 588 # initialized. Use 'd:' to set the default matching mode, default
589 589 # to 'glob'. At most one 'r:' and 'd:' argument can be passed.
590 590
591 591 # i18n: "_matchfiles" is a keyword
592 592 l = getargs(x, 1, -1, _("_matchfiles requires at least one argument"))
593 593 pats, inc, exc = [], [], []
594 594 hasset = False
595 595 rev, default = None, None
596 596 for arg in l:
597 597 s = getstring(arg, _("_matchfiles requires string arguments"))
598 598 prefix, value = s[:2], s[2:]
599 599 if prefix == 'p:':
600 600 pats.append(value)
601 601 elif prefix == 'i:':
602 602 inc.append(value)
603 603 elif prefix == 'x:':
604 604 exc.append(value)
605 605 elif prefix == 'r:':
606 606 if rev is not None:
607 607 raise error.ParseError(_('_matchfiles expected at most one '
608 608 'revision'))
609 609 rev = value
610 610 elif prefix == 'd:':
611 611 if default is not None:
612 612 raise error.ParseError(_('_matchfiles expected at most one '
613 613 'default mode'))
614 614 default = value
615 615 else:
616 616 raise error.ParseError(_('invalid _matchfiles prefix: %s') % prefix)
617 617 if not hasset and matchmod.patkind(value) == 'set':
618 618 hasset = True
619 619 if not default:
620 620 default = 'glob'
621 621 m = None
622 622 s = []
623 623 for r in subset:
624 624 c = repo[r]
625 625 if not m or (hasset and rev is None):
626 626 ctx = c
627 627 if rev is not None:
628 628 ctx = repo[rev or None]
629 629 m = matchmod.match(repo.root, repo.getcwd(), pats, include=inc,
630 630 exclude=exc, ctx=ctx, default=default)
631 631 for f in c.files():
632 632 if m(f):
633 633 s.append(r)
634 634 break
635 635 return s
636 636
637 637 def hasfile(repo, subset, x):
638 638 """``file(pattern)``
639 639 Changesets affecting files matched by pattern.
640 640 """
641 641 # i18n: "file" is a keyword
642 642 pat = getstring(x, _("file requires a pattern"))
643 643 return _matchfiles(repo, subset, ('string', 'p:' + pat))
644 644
645 645 def head(repo, subset, x):
646 646 """``head()``
647 647 Changeset is a named branch head.
648 648 """
649 649 # i18n: "head" is a keyword
650 650 getargs(x, 0, 0, _("head takes no arguments"))
651 651 hs = set()
652 652 for b, ls in repo.branchmap().iteritems():
653 653 hs.update(repo[h].rev() for h in ls)
654 654 return [r for r in subset if r in hs]
655 655
656 656 def heads(repo, subset, x):
657 657 """``heads(set)``
658 658 Members of set with no children in set.
659 659 """
660 660 s = getset(repo, subset, x)
661 661 ps = set(parents(repo, subset, x))
662 662 return [r for r in s if r not in ps]
663 663
664 664 def keyword(repo, subset, x):
665 665 """``keyword(string)``
666 666 Search commit message, user name, and names of changed files for
667 667 string. The match is case-insensitive.
668 668 """
669 669 # i18n: "keyword" is a keyword
670 670 kw = encoding.lower(getstring(x, _("keyword requires a string")))
671 671 l = []
672 672 for r in subset:
673 673 c = repo[r]
674 674 t = " ".join(c.files() + [c.user(), c.description()])
675 675 if kw in encoding.lower(t):
676 676 l.append(r)
677 677 return l
678 678
679 679 def limit(repo, subset, x):
680 680 """``limit(set, [n])``
681 681 First n members of set, defaulting to 1.
682 682 """
683 683 # i18n: "limit" is a keyword
684 684 l = getargs(x, 1, 2, _("limit requires one or two arguments"))
685 685 try:
686 686 lim = 1
687 687 if len(l) == 2:
688 688 # i18n: "limit" is a keyword
689 689 lim = int(getstring(l[1], _("limit requires a number")))
690 690 except (TypeError, ValueError):
691 691 # i18n: "limit" is a keyword
692 692 raise error.ParseError(_("limit expects a number"))
693 693 ss = set(subset)
694 694 os = getset(repo, range(len(repo)), l[0])[:lim]
695 695 return [r for r in os if r in ss]
696 696
697 697 def last(repo, subset, x):
698 698 """``last(set, [n])``
699 699 Last n members of set, defaulting to 1.
700 700 """
701 701 # i18n: "last" is a keyword
702 702 l = getargs(x, 1, 2, _("last requires one or two arguments"))
703 703 try:
704 704 lim = 1
705 705 if len(l) == 2:
706 706 # i18n: "last" is a keyword
707 707 lim = int(getstring(l[1], _("last requires a number")))
708 708 except (TypeError, ValueError):
709 709 # i18n: "last" is a keyword
710 710 raise error.ParseError(_("last expects a number"))
711 711 ss = set(subset)
712 712 os = getset(repo, range(len(repo)), l[0])[-lim:]
713 713 return [r for r in os if r in ss]
714 714
715 715 def maxrev(repo, subset, x):
716 716 """``max(set)``
717 717 Changeset with highest revision number in set.
718 718 """
719 719 os = getset(repo, range(len(repo)), x)
720 720 if os:
721 721 m = max(os)
722 722 if m in subset:
723 723 return [m]
724 724 return []
725 725
726 726 def merge(repo, subset, x):
727 727 """``merge()``
728 728 Changeset is a merge changeset.
729 729 """
730 730 # i18n: "merge" is a keyword
731 731 getargs(x, 0, 0, _("merge takes no arguments"))
732 732 cl = repo.changelog
733 733 return [r for r in subset if cl.parentrevs(r)[1] != -1]
734 734
735 735 def minrev(repo, subset, x):
736 736 """``min(set)``
737 737 Changeset with lowest revision number in set.
738 738 """
739 739 os = getset(repo, range(len(repo)), x)
740 740 if os:
741 741 m = min(os)
742 742 if m in subset:
743 743 return [m]
744 744 return []
745 745
746 746 def modifies(repo, subset, x):
747 747 """``modifies(pattern)``
748 748 Changesets modifying files matched by pattern.
749 749 """
750 750 # i18n: "modifies" is a keyword
751 751 pat = getstring(x, _("modifies requires a pattern"))
752 752 return checkstatus(repo, subset, pat, 0)
753 753
754 754 def node_(repo, subset, x):
755 755 """``id(string)``
756 756 Revision non-ambiguously specified by the given hex string prefix.
757 757 """
758 758 # i18n: "id" is a keyword
759 759 l = getargs(x, 1, 1, _("id requires one argument"))
760 760 # i18n: "id" is a keyword
761 761 n = getstring(l[0], _("id requires a string"))
762 762 if len(n) == 40:
763 763 rn = repo[n].rev()
764 764 else:
765 765 rn = None
766 766 pm = repo.changelog._partialmatch(n)
767 767 if pm is not None:
768 768 rn = repo.changelog.rev(pm)
769 769
770 770 return [r for r in subset if r == rn]
771 771
772 772 def outgoing(repo, subset, x):
773 773 """``outgoing([path])``
774 774 Changesets not found in the specified destination repository, or the
775 775 default push location.
776 776 """
777 777 import hg # avoid start-up nasties
778 778 # i18n: "outgoing" is a keyword
779 779 l = getargs(x, 0, 1, _("outgoing takes one or no arguments"))
780 780 # i18n: "outgoing" is a keyword
781 781 dest = l and getstring(l[0], _("outgoing requires a repository path")) or ''
782 782 dest = repo.ui.expandpath(dest or 'default-push', dest or 'default')
783 783 dest, branches = hg.parseurl(dest)
784 784 revs, checkout = hg.addbranchrevs(repo, repo, branches, [])
785 785 if revs:
786 786 revs = [repo.lookup(rev) for rev in revs]
787 787 other = hg.peer(repo, {}, dest)
788 788 repo.ui.pushbuffer()
789 789 outgoing = discovery.findcommonoutgoing(repo, other, onlyheads=revs)
790 790 repo.ui.popbuffer()
791 791 cl = repo.changelog
792 792 o = set([cl.rev(r) for r in outgoing.missing])
793 793 return [r for r in subset if r in o]
794 794
795 795 def p1(repo, subset, x):
796 796 """``p1([set])``
797 797 First parent of changesets in set, or the working directory.
798 798 """
799 799 if x is None:
800 800 p = repo[x].p1().rev()
801 801 return [r for r in subset if r == p]
802 802
803 803 ps = set()
804 804 cl = repo.changelog
805 805 for r in getset(repo, range(len(repo)), x):
806 806 ps.add(cl.parentrevs(r)[0])
807 807 return [r for r in subset if r in ps]
808 808
809 809 def p2(repo, subset, x):
810 810 """``p2([set])``
811 811 Second parent of changesets in set, or the working directory.
812 812 """
813 813 if x is None:
814 814 ps = repo[x].parents()
815 815 try:
816 816 p = ps[1].rev()
817 817 return [r for r in subset if r == p]
818 818 except IndexError:
819 819 return []
820 820
821 821 ps = set()
822 822 cl = repo.changelog
823 823 for r in getset(repo, range(len(repo)), x):
824 824 ps.add(cl.parentrevs(r)[1])
825 825 return [r for r in subset if r in ps]
826 826
827 827 def parents(repo, subset, x):
828 828 """``parents([set])``
829 829 The set of all parents for all changesets in set, or the working directory.
830 830 """
831 831 if x is None:
832 832 ps = tuple(p.rev() for p in repo[x].parents())
833 833 return [r for r in subset if r in ps]
834 834
835 835 ps = set()
836 836 cl = repo.changelog
837 837 for r in getset(repo, range(len(repo)), x):
838 838 ps.update(cl.parentrevs(r))
839 839 return [r for r in subset if r in ps]
840 840
841 841 def parentspec(repo, subset, x, n):
842 842 """``set^0``
843 843 The set.
844 844 ``set^1`` (or ``set^``), ``set^2``
845 845 First or second parent, respectively, of all changesets in set.
846 846 """
847 847 try:
848 848 n = int(n[1])
849 849 if n not in (0, 1, 2):
850 850 raise ValueError
851 851 except (TypeError, ValueError):
852 852 raise error.ParseError(_("^ expects a number 0, 1, or 2"))
853 853 ps = set()
854 854 cl = repo.changelog
855 855 for r in getset(repo, subset, x):
856 856 if n == 0:
857 857 ps.add(r)
858 858 elif n == 1:
859 859 ps.add(cl.parentrevs(r)[0])
860 860 elif n == 2:
861 861 parents = cl.parentrevs(r)
862 862 if len(parents) > 1:
863 863 ps.add(parents[1])
864 864 return [r for r in subset if r in ps]
865 865
866 866 def present(repo, subset, x):
867 867 """``present(set)``
868 868 An empty set, if any revision in set isn't found; otherwise,
869 869 all revisions in set.
870 870
871 871 If any of specified revisions is not present in the local repository,
872 872 the query is normally aborted. But this predicate allows the query
873 873 to continue even in such cases.
874 874 """
875 875 try:
876 876 return getset(repo, subset, x)
877 877 except error.RepoLookupError:
878 878 return []
879 879
880 880 def public(repo, subset, x):
881 881 """``public()``
882 882 Changeset in public phase."""
883 883 getargs(x, 0, 0, _("public takes no arguments"))
884 884 pc = repo._phasecache
885 885 return [r for r in subset if pc.phase(repo, r) == phases.public]
886 886
887 887 def remote(repo, subset, x):
888 888 """``remote([id [,path]])``
889 889 Local revision that corresponds to the given identifier in a
890 890 remote repository, if present. Here, the '.' identifier is a
891 891 synonym for the current local branch.
892 892 """
893 893
894 894 import hg # avoid start-up nasties
895 895 # i18n: "remote" is a keyword
896 896 l = getargs(x, 0, 2, _("remote takes one, two or no arguments"))
897 897
898 898 q = '.'
899 899 if len(l) > 0:
900 900 # i18n: "remote" is a keyword
901 901 q = getstring(l[0], _("remote requires a string id"))
902 902 if q == '.':
903 903 q = repo['.'].branch()
904 904
905 905 dest = ''
906 906 if len(l) > 1:
907 907 # i18n: "remote" is a keyword
908 908 dest = getstring(l[1], _("remote requires a repository path"))
909 909 dest = repo.ui.expandpath(dest or 'default')
910 910 dest, branches = hg.parseurl(dest)
911 911 revs, checkout = hg.addbranchrevs(repo, repo, branches, [])
912 912 if revs:
913 913 revs = [repo.lookup(rev) for rev in revs]
914 914 other = hg.peer(repo, {}, dest)
915 915 n = other.lookup(q)
916 916 if n in repo:
917 917 r = repo[n].rev()
918 918 if r in subset:
919 919 return [r]
920 920 return []
921 921
922 922 def removes(repo, subset, x):
923 923 """``removes(pattern)``
924 924 Changesets which remove files matching pattern.
925 925 """
926 926 # i18n: "removes" is a keyword
927 927 pat = getstring(x, _("removes requires a pattern"))
928 928 return checkstatus(repo, subset, pat, 2)
929 929
930 930 def rev(repo, subset, x):
931 931 """``rev(number)``
932 932 Revision with the given numeric identifier.
933 933 """
934 934 # i18n: "rev" is a keyword
935 935 l = getargs(x, 1, 1, _("rev requires one argument"))
936 936 try:
937 937 # i18n: "rev" is a keyword
938 938 l = int(getstring(l[0], _("rev requires a number")))
939 939 except (TypeError, ValueError):
940 940 # i18n: "rev" is a keyword
941 941 raise error.ParseError(_("rev expects a number"))
942 942 return [r for r in subset if r == l]
943 943
944 944 def matching(repo, subset, x):
945 945 """``matching(revision [, field])``
946 946 Changesets in which a given set of fields match the set of fields in the
947 947 selected revision or set.
948 948
949 949 To match more than one field pass the list of fields to match separated
950 950 by spaces (e.g. ``author description``).
951 951
952 952 Valid fields are most regular revision fields and some special fields.
953 953
954 954 Regular revision fields are ``description``, ``author``, ``branch``,
955 955 ``date``, ``files``, ``phase``, ``parents``, ``substate`` and ``user``.
956 956 Note that ``author`` and ``user`` are synonyms.
957 957
958 958 Special fields are ``summary`` and ``metadata``:
959 959 ``summary`` matches the first line of the description.
960 960 ``metadata`` is equivalent to matching ``description user date``
961 961 (i.e. it matches the main metadata fields).
962 962
963 963 ``metadata`` is the default field which is used when no fields are
964 964 specified. You can match more than one field at a time.
965 965 """
966 966 l = getargs(x, 1, 2, _("matching takes 1 or 2 arguments"))
967 967
968 968 revs = getset(repo, xrange(len(repo)), l[0])
969 969
970 970 fieldlist = ['metadata']
971 971 if len(l) > 1:
972 972 fieldlist = getstring(l[1],
973 973 _("matching requires a string "
974 974 "as its second argument")).split()
975 975
976 976 # Make sure that there are no repeated fields, and expand the
977 977 # 'special' 'metadata' field type
978 978 fields = []
979 979 for field in fieldlist:
980 980 if field == 'metadata':
981 981 fields += ['user', 'description', 'date']
982 982 else:
983 983 if field == 'author':
984 984 field = 'user'
985 985 fields.append(field)
986 986 fields = set(fields)
987 987 if 'summary' in fields and 'description' in fields:
988 988 # If a revision matches its description it also matches its summary
989 989 fields.discard('summary')
990 990
991 991 # We may want to match more than one field
992 992 # Not all fields take the same amount of time to be matched
993 993 # Sort the selected fields in order of increasing matching cost
994 994 fieldorder = ['phase', 'parents', 'user', 'date', 'branch', 'summary',
995 995 'files', 'description', 'substate']
996 996 def fieldkeyfunc(f):
997 997 try:
998 998 return fieldorder.index(f)
999 999 except ValueError:
1000 1000 # assume an unknown field is very costly
1001 1001 return len(fieldorder)
1002 1002 fields = list(fields)
1003 1003 fields.sort(key=fieldkeyfunc)
1004 1004
1005 1005 # Each field will be matched with its own "getfield" function
1006 1006 # which will be added to the getfieldfuncs array of functions
1007 1007 getfieldfuncs = []
1008 1008 _funcs = {
1009 1009 'user': lambda r: repo[r].user(),
1010 1010 'branch': lambda r: repo[r].branch(),
1011 1011 'date': lambda r: repo[r].date(),
1012 1012 'description': lambda r: repo[r].description(),
1013 1013 'files': lambda r: repo[r].files(),
1014 1014 'parents': lambda r: repo[r].parents(),
1015 1015 'phase': lambda r: repo[r].phase(),
1016 1016 'substate': lambda r: repo[r].substate,
1017 1017 'summary': lambda r: repo[r].description().splitlines()[0],
1018 1018 }
1019 1019 for info in fields:
1020 1020 getfield = _funcs.get(info, None)
1021 1021 if getfield is None:
1022 1022 raise error.ParseError(
1023 1023 _("unexpected field name passed to matching: %s") % info)
1024 1024 getfieldfuncs.append(getfield)
1025 1025 # convert the getfield array of functions into a "getinfo" function
1026 1026 # which returns an array of field values (or a single value if there
1027 1027 # is only one field to match)
1028 1028 getinfo = lambda r: [f(r) for f in getfieldfuncs]
1029 1029
1030 1030 matches = set()
1031 1031 for rev in revs:
1032 1032 target = getinfo(rev)
1033 1033 for r in subset:
1034 1034 match = True
1035 1035 for n, f in enumerate(getfieldfuncs):
1036 1036 if target[n] != f(r):
1037 1037 match = False
1038 1038 break
1039 1039 if match:
1040 1040 matches.add(r)
1041 1041 return [r for r in subset if r in matches]
1042 1042
1043 1043 def reverse(repo, subset, x):
1044 1044 """``reverse(set)``
1045 1045 Reverse order of set.
1046 1046 """
1047 1047 l = getset(repo, subset, x)
1048 1048 l.reverse()
1049 1049 return l
1050 1050
1051 1051 def roots(repo, subset, x):
1052 1052 """``roots(set)``
1053 1053 Changesets in set with no parent changeset in set.
1054 1054 """
1055 1055 s = set(getset(repo, xrange(len(repo)), x))
1056 1056 subset = [r for r in subset if r in s]
1057 1057 cs = _children(repo, subset, s)
1058 1058 return [r for r in subset if r not in cs]
1059 1059
1060 1060 def secret(repo, subset, x):
1061 1061 """``secret()``
1062 1062 Changeset in secret phase."""
1063 1063 getargs(x, 0, 0, _("secret takes no arguments"))
1064 1064 pc = repo._phasecache
1065 1065 return [r for r in subset if pc.phase(repo, r) == phases.secret]
1066 1066
1067 1067 def sort(repo, subset, x):
1068 1068 """``sort(set[, [-]key...])``
1069 1069 Sort set by keys. The default sort order is ascending, specify a key
1070 1070 as ``-key`` to sort in descending order.
1071 1071
1072 1072 The keys can be:
1073 1073
1074 1074 - ``rev`` for the revision number,
1075 1075 - ``branch`` for the branch name,
1076 1076 - ``desc`` for the commit message (description),
1077 1077 - ``user`` for user name (``author`` can be used as an alias),
1078 1078 - ``date`` for the commit date
1079 1079 """
1080 1080 # i18n: "sort" is a keyword
1081 1081 l = getargs(x, 1, 2, _("sort requires one or two arguments"))
1082 1082 keys = "rev"
1083 1083 if len(l) == 2:
1084 1084 keys = getstring(l[1], _("sort spec must be a string"))
1085 1085
1086 1086 s = l[0]
1087 1087 keys = keys.split()
1088 1088 l = []
1089 1089 def invert(s):
1090 1090 return "".join(chr(255 - ord(c)) for c in s)
1091 1091 for r in getset(repo, subset, s):
1092 1092 c = repo[r]
1093 1093 e = []
1094 1094 for k in keys:
1095 1095 if k == 'rev':
1096 1096 e.append(r)
1097 1097 elif k == '-rev':
1098 1098 e.append(-r)
1099 1099 elif k == 'branch':
1100 1100 e.append(c.branch())
1101 1101 elif k == '-branch':
1102 1102 e.append(invert(c.branch()))
1103 1103 elif k == 'desc':
1104 1104 e.append(c.description())
1105 1105 elif k == '-desc':
1106 1106 e.append(invert(c.description()))
1107 1107 elif k in 'user author':
1108 1108 e.append(c.user())
1109 1109 elif k in '-user -author':
1110 1110 e.append(invert(c.user()))
1111 1111 elif k == 'date':
1112 1112 e.append(c.date()[0])
1113 1113 elif k == '-date':
1114 1114 e.append(-c.date()[0])
1115 1115 else:
1116 1116 raise error.ParseError(_("unknown sort key %r") % k)
1117 1117 e.append(r)
1118 1118 l.append(e)
1119 1119 l.sort()
1120 1120 return [e[-1] for e in l]
1121 1121
1122 1122 def tag(repo, subset, x):
1123 1123 """``tag([name])``
1124 1124 The specified tag by name, or all tagged revisions if no name is given.
1125 1125 """
1126 1126 # i18n: "tag" is a keyword
1127 1127 args = getargs(x, 0, 1, _("tag takes one or no arguments"))
1128 1128 cl = repo.changelog
1129 1129 if args:
1130 1130 tn = getstring(args[0],
1131 1131 # i18n: "tag" is a keyword
1132 1132 _('the argument to tag must be a string'))
1133 1133 if not repo.tags().get(tn, None):
1134 1134 raise util.Abort(_("tag '%s' does not exist") % tn)
1135 1135 s = set([cl.rev(n) for t, n in repo.tagslist() if t == tn])
1136 1136 else:
1137 1137 s = set([cl.rev(n) for t, n in repo.tagslist() if t != 'tip'])
1138 1138 return [r for r in subset if r in s]
1139 1139
1140 1140 def tagged(repo, subset, x):
1141 1141 return tag(repo, subset, x)
1142 1142
1143 1143 def user(repo, subset, x):
1144 1144 """``user(string)``
1145 1145 User name contains string. The match is case-insensitive.
1146 1146 """
1147 1147 return author(repo, subset, x)
1148 1148
1149 1149 # for internal use
1150 1150 def _list(repo, subset, x):
1151 1151 s = getstring(x, "internal error")
1152 1152 if not s:
1153 1153 return []
1154 1154 if not isinstance(subset, set):
1155 1155 subset = set(subset)
1156 1156 ls = [repo[r].rev() for r in s.split('\0')]
1157 1157 return [r for r in ls if r in subset]
1158 1158
1159 1159 symbols = {
1160 1160 "adds": adds,
1161 1161 "all": getall,
1162 1162 "ancestor": ancestor,
1163 1163 "ancestors": ancestors,
1164 1164 "_firstancestors": _firstancestors,
1165 1165 "author": author,
1166 1166 "bisect": bisect,
1167 1167 "bisected": bisected,
1168 1168 "bookmark": bookmark,
1169 1169 "branch": branch,
1170 1170 "children": children,
1171 1171 "closed": closed,
1172 1172 "contains": contains,
1173 1173 "date": date,
1174 1174 "desc": desc,
1175 1175 "descendants": descendants,
1176 1176 "_firstdescendants": _firstdescendants,
1177 1177 "draft": draft,
1178 1178 "extra": extra,
1179 1179 "file": hasfile,
1180 1180 "filelog": filelog,
1181 1181 "first": first,
1182 1182 "follow": follow,
1183 1183 "_followfirst": _followfirst,
1184 1184 "grep": grep,
1185 1185 "head": head,
1186 1186 "heads": heads,
1187 1187 "id": node_,
1188 1188 "keyword": keyword,
1189 1189 "last": last,
1190 1190 "limit": limit,
1191 1191 "_matchfiles": _matchfiles,
1192 1192 "max": maxrev,
1193 1193 "merge": merge,
1194 1194 "min": minrev,
1195 1195 "modifies": modifies,
1196 1196 "outgoing": outgoing,
1197 1197 "p1": p1,
1198 1198 "p2": p2,
1199 1199 "parents": parents,
1200 1200 "present": present,
1201 1201 "public": public,
1202 1202 "remote": remote,
1203 1203 "removes": removes,
1204 1204 "rev": rev,
1205 1205 "reverse": reverse,
1206 1206 "roots": roots,
1207 1207 "sort": sort,
1208 1208 "secret": secret,
1209 1209 "matching": matching,
1210 1210 "tag": tag,
1211 1211 "tagged": tagged,
1212 1212 "user": user,
1213 1213 "_list": _list,
1214 1214 }
1215 1215
1216 1216 methods = {
1217 1217 "range": rangeset,
1218 1218 "string": stringset,
1219 1219 "symbol": symbolset,
1220 1220 "and": andset,
1221 1221 "or": orset,
1222 1222 "not": notset,
1223 1223 "list": listset,
1224 1224 "func": func,
1225 1225 "ancestor": ancestorspec,
1226 1226 "parent": parentspec,
1227 1227 "parentpost": p1,
1228 1228 }
1229 1229
1230 1230 def optimize(x, small):
1231 1231 if x is None:
1232 1232 return 0, x
1233 1233
1234 1234 smallbonus = 1
1235 1235 if small:
1236 1236 smallbonus = .5
1237 1237
1238 1238 op = x[0]
1239 1239 if op == 'minus':
1240 1240 return optimize(('and', x[1], ('not', x[2])), small)
1241 1241 elif op == 'dagrange':
1242 1242 return optimize(('and', ('func', ('symbol', 'descendants'), x[1]),
1243 1243 ('func', ('symbol', 'ancestors'), x[2])), small)
1244 1244 elif op == 'dagrangepre':
1245 1245 return optimize(('func', ('symbol', 'ancestors'), x[1]), small)
1246 1246 elif op == 'dagrangepost':
1247 1247 return optimize(('func', ('symbol', 'descendants'), x[1]), small)
1248 1248 elif op == 'rangepre':
1249 1249 return optimize(('range', ('string', '0'), x[1]), small)
1250 1250 elif op == 'rangepost':
1251 1251 return optimize(('range', x[1], ('string', 'tip')), small)
1252 1252 elif op == 'negate':
1253 1253 return optimize(('string',
1254 1254 '-' + getstring(x[1], _("can't negate that"))), small)
1255 1255 elif op in 'string symbol negate':
1256 1256 return smallbonus, x # single revisions are small
1257 1257 elif op == 'and' or op == 'dagrange':
1258 1258 wa, ta = optimize(x[1], True)
1259 1259 wb, tb = optimize(x[2], True)
1260 1260 w = min(wa, wb)
1261 1261 if wa > wb:
1262 1262 return w, (op, tb, ta)
1263 1263 return w, (op, ta, tb)
1264 1264 elif op == 'or':
1265 1265 wa, ta = optimize(x[1], False)
1266 1266 wb, tb = optimize(x[2], False)
1267 1267 if wb < wa:
1268 1268 wb, wa = wa, wb
1269 1269 return max(wa, wb), (op, ta, tb)
1270 1270 elif op == 'not':
1271 1271 o = optimize(x[1], not small)
1272 1272 return o[0], (op, o[1])
1273 1273 elif op == 'parentpost':
1274 1274 o = optimize(x[1], small)
1275 1275 return o[0], (op, o[1])
1276 1276 elif op == 'group':
1277 1277 return optimize(x[1], small)
1278 1278 elif op in 'range list parent ancestorspec':
1279 1279 if op == 'parent':
1280 1280 # x^:y means (x^) : y, not x ^ (:y)
1281 1281 post = ('parentpost', x[1])
1282 1282 if x[2][0] == 'dagrangepre':
1283 1283 return optimize(('dagrange', post, x[2][1]), small)
1284 1284 elif x[2][0] == 'rangepre':
1285 1285 return optimize(('range', post, x[2][1]), small)
1286 1286
1287 1287 wa, ta = optimize(x[1], small)
1288 1288 wb, tb = optimize(x[2], small)
1289 1289 return wa + wb, (op, ta, tb)
1290 1290 elif op == 'func':
1291 1291 f = getstring(x[1], _("not a symbol"))
1292 1292 wa, ta = optimize(x[2], small)
1293 1293 if f in ("author branch closed date desc file grep keyword "
1294 1294 "outgoing user"):
1295 1295 w = 10 # slow
1296 1296 elif f in "modifies adds removes":
1297 1297 w = 30 # slower
1298 1298 elif f == "contains":
1299 1299 w = 100 # very slow
1300 1300 elif f == "ancestor":
1301 1301 w = 1 * smallbonus
1302 1302 elif f in "reverse limit first":
1303 1303 w = 0
1304 1304 elif f in "sort":
1305 1305 w = 10 # assume most sorts look at changelog
1306 1306 else:
1307 1307 w = 1
1308 1308 return w + wa, (op, x[1], ta)
1309 1309 return 1, x
1310 1310
1311 1311 _aliasarg = ('func', ('symbol', '_aliasarg'))
1312 1312 def _getaliasarg(tree):
1313 1313 """If tree matches ('func', ('symbol', '_aliasarg'), ('string', X))
1314 1314 return X, None otherwise.
1315 1315 """
1316 1316 if (len(tree) == 3 and tree[:2] == _aliasarg
1317 1317 and tree[2][0] == 'string'):
1318 1318 return tree[2][1]
1319 1319 return None
1320 1320
1321 1321 def _checkaliasarg(tree, known=None):
1322 1322 """Check tree contains no _aliasarg construct or only ones which
1323 1323 value is in known. Used to avoid alias placeholders injection.
1324 1324 """
1325 1325 if isinstance(tree, tuple):
1326 1326 arg = _getaliasarg(tree)
1327 1327 if arg is not None and (not known or arg not in known):
1328 1328 raise error.ParseError(_("not a function: %s") % '_aliasarg')
1329 1329 for t in tree:
1330 1330 _checkaliasarg(t, known)
1331 1331
1332 1332 class revsetalias(object):
1333 1333 funcre = re.compile('^([^(]+)\(([^)]+)\)$')
1334 1334 args = None
1335 1335
1336 1336 def __init__(self, name, value):
1337 1337 '''Aliases like:
1338 1338
1339 1339 h = heads(default)
1340 1340 b($1) = ancestors($1) - ancestors(default)
1341 1341 '''
1342 1342 m = self.funcre.search(name)
1343 1343 if m:
1344 1344 self.name = m.group(1)
1345 1345 self.tree = ('func', ('symbol', m.group(1)))
1346 1346 self.args = [x.strip() for x in m.group(2).split(',')]
1347 1347 for arg in self.args:
1348 1348 # _aliasarg() is an unknown symbol only used separate
1349 1349 # alias argument placeholders from regular strings.
1350 1350 value = value.replace(arg, '_aliasarg(%r)' % (arg,))
1351 1351 else:
1352 1352 self.name = name
1353 1353 self.tree = ('symbol', name)
1354 1354
1355 1355 self.replacement, pos = parse(value)
1356 1356 if pos != len(value):
1357 1357 raise error.ParseError(_('invalid token'), pos)
1358 1358 # Check for placeholder injection
1359 1359 _checkaliasarg(self.replacement, self.args)
1360 1360
1361 1361 def _getalias(aliases, tree):
1362 1362 """If tree looks like an unexpanded alias, return it. Return None
1363 1363 otherwise.
1364 1364 """
1365 1365 if isinstance(tree, tuple) and tree:
1366 1366 if tree[0] == 'symbol' and len(tree) == 2:
1367 1367 name = tree[1]
1368 1368 alias = aliases.get(name)
1369 1369 if alias and alias.args is None and alias.tree == tree:
1370 1370 return alias
1371 1371 if tree[0] == 'func' and len(tree) > 1:
1372 1372 if tree[1][0] == 'symbol' and len(tree[1]) == 2:
1373 1373 name = tree[1][1]
1374 1374 alias = aliases.get(name)
1375 1375 if alias and alias.args is not None and alias.tree == tree[:2]:
1376 1376 return alias
1377 1377 return None
1378 1378
1379 1379 def _expandargs(tree, args):
1380 1380 """Replace _aliasarg instances with the substitution value of the
1381 1381 same name in args, recursively.
1382 1382 """
1383 1383 if not tree or not isinstance(tree, tuple):
1384 1384 return tree
1385 1385 arg = _getaliasarg(tree)
1386 1386 if arg is not None:
1387 1387 return args[arg]
1388 1388 return tuple(_expandargs(t, args) for t in tree)
1389 1389
1390 1390 def _expandaliases(aliases, tree, expanding):
1391 1391 """Expand aliases in tree, recursively.
1392 1392
1393 1393 'aliases' is a dictionary mapping user defined aliases to
1394 1394 revsetalias objects.
1395 1395 """
1396 1396 if not isinstance(tree, tuple):
1397 1397 # Do not expand raw strings
1398 1398 return tree
1399 1399 alias = _getalias(aliases, tree)
1400 1400 if alias is not None:
1401 1401 if alias in expanding:
1402 1402 raise error.ParseError(_('infinite expansion of revset alias "%s" '
1403 1403 'detected') % alias.name)
1404 1404 expanding.append(alias)
1405 1405 result = _expandaliases(aliases, alias.replacement, expanding)
1406 1406 expanding.pop()
1407 1407 if alias.args is not None:
1408 1408 l = getlist(tree[2])
1409 1409 if len(l) != len(alias.args):
1410 1410 raise error.ParseError(
1411 1411 _('invalid number of arguments: %s') % len(l))
1412 1412 l = [_expandaliases(aliases, a, []) for a in l]
1413 1413 result = _expandargs(result, dict(zip(alias.args, l)))
1414 1414 else:
1415 1415 result = tuple(_expandaliases(aliases, t, expanding)
1416 1416 for t in tree)
1417 1417 return result
1418 1418
1419 1419 def findaliases(ui, tree):
1420 1420 _checkaliasarg(tree)
1421 1421 aliases = {}
1422 1422 for k, v in ui.configitems('revsetalias'):
1423 1423 alias = revsetalias(k, v)
1424 1424 aliases[alias.name] = alias
1425 1425 return _expandaliases(aliases, tree, [])
1426 1426
1427 1427 parse = parser.parser(tokenize, elements).parse
1428 1428
1429 1429 def match(ui, spec):
1430 1430 if not spec:
1431 1431 raise error.ParseError(_("empty query"))
1432 1432 tree, pos = parse(spec)
1433 1433 if (pos != len(spec)):
1434 1434 raise error.ParseError(_("invalid token"), pos)
1435 1435 if ui:
1436 1436 tree = findaliases(ui, tree)
1437 1437 weight, tree = optimize(tree, True)
1438 1438 def mfunc(repo, subset):
1439 1439 return getset(repo, subset, tree)
1440 1440 return mfunc
1441 1441
1442 1442 def formatspec(expr, *args):
1443 1443 '''
1444 1444 This is a convenience function for using revsets internally, and
1445 1445 escapes arguments appropriately. Aliases are intentionally ignored
1446 1446 so that intended expression behavior isn't accidentally subverted.
1447 1447
1448 1448 Supported arguments:
1449 1449
1450 1450 %r = revset expression, parenthesized
1451 1451 %d = int(arg), no quoting
1452 1452 %s = string(arg), escaped and single-quoted
1453 1453 %b = arg.branch(), escaped and single-quoted
1454 1454 %n = hex(arg), single-quoted
1455 1455 %% = a literal '%'
1456 1456
1457 1457 Prefixing the type with 'l' specifies a parenthesized list of that type.
1458 1458
1459 1459 >>> formatspec('%r:: and %lr', '10 or 11', ("this()", "that()"))
1460 1460 '(10 or 11):: and ((this()) or (that()))'
1461 1461 >>> formatspec('%d:: and not %d::', 10, 20)
1462 1462 '10:: and not 20::'
1463 1463 >>> formatspec('%ld or %ld', [], [1])
1464 1464 "_list('') or 1"
1465 1465 >>> formatspec('keyword(%s)', 'foo\\xe9')
1466 1466 "keyword('foo\\\\xe9')"
1467 1467 >>> b = lambda: 'default'
1468 1468 >>> b.branch = b
1469 1469 >>> formatspec('branch(%b)', b)
1470 1470 "branch('default')"
1471 1471 >>> formatspec('root(%ls)', ['a', 'b', 'c', 'd'])
1472 1472 "root(_list('a\\x00b\\x00c\\x00d'))"
1473 1473 '''
1474 1474
1475 1475 def quote(s):
1476 1476 return repr(str(s))
1477 1477
1478 1478 def argtype(c, arg):
1479 1479 if c == 'd':
1480 1480 return str(int(arg))
1481 1481 elif c == 's':
1482 1482 return quote(arg)
1483 1483 elif c == 'r':
1484 1484 parse(arg) # make sure syntax errors are confined
1485 1485 return '(%s)' % arg
1486 1486 elif c == 'n':
1487 1487 return quote(node.hex(arg))
1488 1488 elif c == 'b':
1489 1489 return quote(arg.branch())
1490 1490
1491 1491 def listexp(s, t):
1492 1492 l = len(s)
1493 1493 if l == 0:
1494 1494 return "_list('')"
1495 1495 elif l == 1:
1496 1496 return argtype(t, s[0])
1497 1497 elif t == 'd':
1498 1498 return "_list('%s')" % "\0".join(str(int(a)) for a in s)
1499 1499 elif t == 's':
1500 1500 return "_list('%s')" % "\0".join(s)
1501 1501 elif t == 'n':
1502 1502 return "_list('%s')" % "\0".join(node.hex(a) for a in s)
1503 1503 elif t == 'b':
1504 1504 return "_list('%s')" % "\0".join(a.branch() for a in s)
1505 1505
1506 1506 m = l // 2
1507 1507 return '(%s or %s)' % (listexp(s[:m], t), listexp(s[m:], t))
1508 1508
1509 1509 ret = ''
1510 1510 pos = 0
1511 1511 arg = 0
1512 1512 while pos < len(expr):
1513 1513 c = expr[pos]
1514 1514 if c == '%':
1515 1515 pos += 1
1516 1516 d = expr[pos]
1517 1517 if d == '%':
1518 1518 ret += d
1519 1519 elif d in 'dsnbr':
1520 1520 ret += argtype(d, args[arg])
1521 1521 arg += 1
1522 1522 elif d == 'l':
1523 1523 # a list of some type
1524 1524 pos += 1
1525 1525 d = expr[pos]
1526 1526 ret += listexp(list(args[arg]), d)
1527 1527 arg += 1
1528 1528 else:
1529 1529 raise util.Abort('unexpected revspec format character %s' % d)
1530 1530 else:
1531 1531 ret += c
1532 1532 pos += 1
1533 1533
1534 1534 return ret
1535 1535
1536 1536 def prettyformat(tree):
1537 1537 def _prettyformat(tree, level, lines):
1538 1538 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
1539 1539 lines.append((level, str(tree)))
1540 1540 else:
1541 1541 lines.append((level, '(%s' % tree[0]))
1542 1542 for s in tree[1:]:
1543 1543 _prettyformat(s, level + 1, lines)
1544 1544 lines[-1:] = [(lines[-1][0], lines[-1][1] + ')')]
1545 1545
1546 1546 lines = []
1547 1547 _prettyformat(tree, 0, lines)
1548 1548 output = '\n'.join((' '*l + s) for l, s in lines)
1549 1549 return output
1550 1550
1551 1551 # tell hggettext to extract docstrings from these functions:
1552 1552 i18nfunctions = symbols.values()
@@ -1,150 +1,150 b''
1 1 # discovery.py - protocol changeset discovery functions
2 2 #
3 3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from node import nullid, short
9 9 from i18n import _
10 import util, error
10 import util, error, collections
11 11
12 12 def findcommonincoming(repo, remote, heads=None, force=False):
13 13 """Return a tuple (common, fetch, heads) used to identify the common
14 14 subset of nodes between repo and remote.
15 15
16 16 "common" is a list of (at least) the heads of the common subset.
17 17 "fetch" is a list of roots of the nodes that would be incoming, to be
18 18 supplied to changegroupsubset.
19 19 "heads" is either the supplied heads, or else the remote's heads.
20 20 """
21 21
22 22 m = repo.changelog.nodemap
23 23 search = []
24 24 fetch = set()
25 25 seen = set()
26 26 seenbranch = set()
27 27 base = set()
28 28
29 29 if not heads:
30 30 heads = remote.heads()
31 31
32 32 if repo.changelog.tip() == nullid:
33 33 base.add(nullid)
34 34 if heads != [nullid]:
35 35 return [nullid], [nullid], list(heads)
36 36 return [nullid], [], heads
37 37
38 38 # assume we're closer to the tip than the root
39 39 # and start by examining the heads
40 40 repo.ui.status(_("searching for changes\n"))
41 41
42 42 unknown = []
43 43 for h in heads:
44 44 if h not in m:
45 45 unknown.append(h)
46 46 else:
47 47 base.add(h)
48 48
49 49 if not unknown:
50 50 return list(base), [], list(heads)
51 51
52 52 req = set(unknown)
53 53 reqcnt = 0
54 54
55 55 # search through remote branches
56 56 # a 'branch' here is a linear segment of history, with four parts:
57 57 # head, root, first parent, second parent
58 58 # (a branch always has two parents (or none) by definition)
59 unknown = remote.branches(unknown)
59 unknown = collections.deque(remote.branches(unknown))
60 60 while unknown:
61 61 r = []
62 62 while unknown:
63 n = unknown.pop(0)
63 n = unknown.popleft()
64 64 if n[0] in seen:
65 65 continue
66 66
67 67 repo.ui.debug("examining %s:%s\n"
68 68 % (short(n[0]), short(n[1])))
69 69 if n[0] == nullid: # found the end of the branch
70 70 pass
71 71 elif n in seenbranch:
72 72 repo.ui.debug("branch already found\n")
73 73 continue
74 74 elif n[1] and n[1] in m: # do we know the base?
75 75 repo.ui.debug("found incomplete branch %s:%s\n"
76 76 % (short(n[0]), short(n[1])))
77 77 search.append(n[0:2]) # schedule branch range for scanning
78 78 seenbranch.add(n)
79 79 else:
80 80 if n[1] not in seen and n[1] not in fetch:
81 81 if n[2] in m and n[3] in m:
82 82 repo.ui.debug("found new changeset %s\n" %
83 83 short(n[1]))
84 84 fetch.add(n[1]) # earliest unknown
85 85 for p in n[2:4]:
86 86 if p in m:
87 87 base.add(p) # latest known
88 88
89 89 for p in n[2:4]:
90 90 if p not in req and p not in m:
91 91 r.append(p)
92 92 req.add(p)
93 93 seen.add(n[0])
94 94
95 95 if r:
96 96 reqcnt += 1
97 97 repo.ui.progress(_('searching'), reqcnt, unit=_('queries'))
98 98 repo.ui.debug("request %d: %s\n" %
99 99 (reqcnt, " ".join(map(short, r))))
100 100 for p in xrange(0, len(r), 10):
101 101 for b in remote.branches(r[p:p + 10]):
102 102 repo.ui.debug("received %s:%s\n" %
103 103 (short(b[0]), short(b[1])))
104 104 unknown.append(b)
105 105
106 106 # do binary search on the branches we found
107 107 while search:
108 108 newsearch = []
109 109 reqcnt += 1
110 110 repo.ui.progress(_('searching'), reqcnt, unit=_('queries'))
111 111 for n, l in zip(search, remote.between(search)):
112 112 l.append(n[1])
113 113 p = n[0]
114 114 f = 1
115 115 for i in l:
116 116 repo.ui.debug("narrowing %d:%d %s\n" % (f, len(l), short(i)))
117 117 if i in m:
118 118 if f <= 2:
119 119 repo.ui.debug("found new branch changeset %s\n" %
120 120 short(p))
121 121 fetch.add(p)
122 122 base.add(i)
123 123 else:
124 124 repo.ui.debug("narrowed branch search to %s:%s\n"
125 125 % (short(p), short(i)))
126 126 newsearch.append((p, i))
127 127 break
128 128 p, f = i, f * 2
129 129 search = newsearch
130 130
131 131 # sanity check our fetch list
132 132 for f in fetch:
133 133 if f in m:
134 134 raise error.RepoError(_("already have changeset ")
135 135 + short(f[:4]))
136 136
137 137 base = list(base)
138 138 if base == [nullid]:
139 139 if force:
140 140 repo.ui.warn(_("warning: repository is unrelated\n"))
141 141 else:
142 142 raise util.Abort(_("repository is unrelated"))
143 143
144 144 repo.ui.debug("found new changesets starting at " +
145 145 " ".join([short(f) for f in fetch]) + "\n")
146 146
147 147 repo.ui.progress(_('searching'), None)
148 148 repo.ui.debug("%d total queries\n" % reqcnt)
149 149
150 150 return base, list(fetch), heads
@@ -1,1766 +1,1767 b''
1 1 # util.py - Mercurial utility functions and platform specfic implementations
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Mercurial utility functions and platform specfic implementations.
11 11
12 12 This contains helper routines that are independent of the SCM core and
13 13 hide platform-specific details from the core.
14 14 """
15 15
16 16 from i18n import _
17 import error, osutil, encoding
17 import error, osutil, encoding, collections
18 18 import errno, re, shutil, sys, tempfile, traceback
19 19 import os, time, datetime, calendar, textwrap, signal
20 20 import imp, socket, urllib
21 21
22 22 if os.name == 'nt':
23 23 import windows as platform
24 24 else:
25 25 import posix as platform
26 26
27 27 platform.encodinglower = encoding.lower
28 28 platform.encodingupper = encoding.upper
29 29
30 30 cachestat = platform.cachestat
31 31 checkexec = platform.checkexec
32 32 checklink = platform.checklink
33 33 copymode = platform.copymode
34 34 executablepath = platform.executablepath
35 35 expandglobs = platform.expandglobs
36 36 explainexit = platform.explainexit
37 37 findexe = platform.findexe
38 38 gethgcmd = platform.gethgcmd
39 39 getuser = platform.getuser
40 40 groupmembers = platform.groupmembers
41 41 groupname = platform.groupname
42 42 hidewindow = platform.hidewindow
43 43 isexec = platform.isexec
44 44 isowner = platform.isowner
45 45 localpath = platform.localpath
46 46 lookupreg = platform.lookupreg
47 47 makedir = platform.makedir
48 48 nlinks = platform.nlinks
49 49 normpath = platform.normpath
50 50 normcase = platform.normcase
51 51 nulldev = platform.nulldev
52 52 openhardlinks = platform.openhardlinks
53 53 oslink = platform.oslink
54 54 parsepatchoutput = platform.parsepatchoutput
55 55 pconvert = platform.pconvert
56 56 popen = platform.popen
57 57 posixfile = platform.posixfile
58 58 quotecommand = platform.quotecommand
59 59 realpath = platform.realpath
60 60 rename = platform.rename
61 61 samedevice = platform.samedevice
62 62 samefile = platform.samefile
63 63 samestat = platform.samestat
64 64 setbinary = platform.setbinary
65 65 setflags = platform.setflags
66 66 setsignalhandler = platform.setsignalhandler
67 67 shellquote = platform.shellquote
68 68 spawndetached = platform.spawndetached
69 69 sshargs = platform.sshargs
70 70 statfiles = platform.statfiles
71 71 termwidth = platform.termwidth
72 72 testpid = platform.testpid
73 73 umask = platform.umask
74 74 unlink = platform.unlink
75 75 unlinkpath = platform.unlinkpath
76 76 username = platform.username
77 77
78 78 # Python compatibility
79 79
80 80 _notset = object()
81 81
82 82 def safehasattr(thing, attr):
83 83 return getattr(thing, attr, _notset) is not _notset
84 84
85 85 def sha1(s=''):
86 86 '''
87 87 Low-overhead wrapper around Python's SHA support
88 88
89 89 >>> f = _fastsha1
90 90 >>> a = sha1()
91 91 >>> a = f()
92 92 >>> a.hexdigest()
93 93 'da39a3ee5e6b4b0d3255bfef95601890afd80709'
94 94 '''
95 95
96 96 return _fastsha1(s)
97 97
98 98 def _fastsha1(s=''):
99 99 # This function will import sha1 from hashlib or sha (whichever is
100 100 # available) and overwrite itself with it on the first call.
101 101 # Subsequent calls will go directly to the imported function.
102 102 if sys.version_info >= (2, 5):
103 103 from hashlib import sha1 as _sha1
104 104 else:
105 105 from sha import sha as _sha1
106 106 global _fastsha1, sha1
107 107 _fastsha1 = sha1 = _sha1
108 108 return _sha1(s)
109 109
110 110 try:
111 111 buffer = buffer
112 112 except NameError:
113 113 if sys.version_info[0] < 3:
114 114 def buffer(sliceable, offset=0):
115 115 return sliceable[offset:]
116 116 else:
117 117 def buffer(sliceable, offset=0):
118 118 return memoryview(sliceable)[offset:]
119 119
120 120 import subprocess
121 121 closefds = os.name == 'posix'
122 122
123 123 def popen2(cmd, env=None, newlines=False):
124 124 # Setting bufsize to -1 lets the system decide the buffer size.
125 125 # The default for bufsize is 0, meaning unbuffered. This leads to
126 126 # poor performance on Mac OS X: http://bugs.python.org/issue4194
127 127 p = subprocess.Popen(cmd, shell=True, bufsize=-1,
128 128 close_fds=closefds,
129 129 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
130 130 universal_newlines=newlines,
131 131 env=env)
132 132 return p.stdin, p.stdout
133 133
134 134 def popen3(cmd, env=None, newlines=False):
135 135 p = subprocess.Popen(cmd, shell=True, bufsize=-1,
136 136 close_fds=closefds,
137 137 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
138 138 stderr=subprocess.PIPE,
139 139 universal_newlines=newlines,
140 140 env=env)
141 141 return p.stdin, p.stdout, p.stderr
142 142
143 143 def version():
144 144 """Return version information if available."""
145 145 try:
146 146 import __version__
147 147 return __version__.version
148 148 except ImportError:
149 149 return 'unknown'
150 150
151 151 # used by parsedate
152 152 defaultdateformats = (
153 153 '%Y-%m-%d %H:%M:%S',
154 154 '%Y-%m-%d %I:%M:%S%p',
155 155 '%Y-%m-%d %H:%M',
156 156 '%Y-%m-%d %I:%M%p',
157 157 '%Y-%m-%d',
158 158 '%m-%d',
159 159 '%m/%d',
160 160 '%m/%d/%y',
161 161 '%m/%d/%Y',
162 162 '%a %b %d %H:%M:%S %Y',
163 163 '%a %b %d %I:%M:%S%p %Y',
164 164 '%a, %d %b %Y %H:%M:%S', # GNU coreutils "/bin/date --rfc-2822"
165 165 '%b %d %H:%M:%S %Y',
166 166 '%b %d %I:%M:%S%p %Y',
167 167 '%b %d %H:%M:%S',
168 168 '%b %d %I:%M:%S%p',
169 169 '%b %d %H:%M',
170 170 '%b %d %I:%M%p',
171 171 '%b %d %Y',
172 172 '%b %d',
173 173 '%H:%M:%S',
174 174 '%I:%M:%S%p',
175 175 '%H:%M',
176 176 '%I:%M%p',
177 177 )
178 178
179 179 extendeddateformats = defaultdateformats + (
180 180 "%Y",
181 181 "%Y-%m",
182 182 "%b",
183 183 "%b %Y",
184 184 )
185 185
186 186 def cachefunc(func):
187 187 '''cache the result of function calls'''
188 188 # XXX doesn't handle keywords args
189 189 cache = {}
190 190 if func.func_code.co_argcount == 1:
191 191 # we gain a small amount of time because
192 192 # we don't need to pack/unpack the list
193 193 def f(arg):
194 194 if arg not in cache:
195 195 cache[arg] = func(arg)
196 196 return cache[arg]
197 197 else:
198 198 def f(*args):
199 199 if args not in cache:
200 200 cache[args] = func(*args)
201 201 return cache[args]
202 202
203 203 return f
204 204
205 205 def lrucachefunc(func):
206 206 '''cache most recent results of function calls'''
207 207 cache = {}
208 order = []
208 order = collections.deque()
209 209 if func.func_code.co_argcount == 1:
210 210 def f(arg):
211 211 if arg not in cache:
212 212 if len(cache) > 20:
213 del cache[order.pop(0)]
213 del cache[order.popleft()]
214 214 cache[arg] = func(arg)
215 215 else:
216 216 order.remove(arg)
217 217 order.append(arg)
218 218 return cache[arg]
219 219 else:
220 220 def f(*args):
221 221 if args not in cache:
222 222 if len(cache) > 20:
223 del cache[order.pop(0)]
223 del cache[order.popleft()]
224 224 cache[args] = func(*args)
225 225 else:
226 226 order.remove(args)
227 227 order.append(args)
228 228 return cache[args]
229 229
230 230 return f
231 231
232 232 class propertycache(object):
233 233 def __init__(self, func):
234 234 self.func = func
235 235 self.name = func.__name__
236 236 def __get__(self, obj, type=None):
237 237 result = self.func(obj)
238 238 setattr(obj, self.name, result)
239 239 return result
240 240
241 241 def pipefilter(s, cmd):
242 242 '''filter string S through command CMD, returning its output'''
243 243 p = subprocess.Popen(cmd, shell=True, close_fds=closefds,
244 244 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
245 245 pout, perr = p.communicate(s)
246 246 return pout
247 247
248 248 def tempfilter(s, cmd):
249 249 '''filter string S through a pair of temporary files with CMD.
250 250 CMD is used as a template to create the real command to be run,
251 251 with the strings INFILE and OUTFILE replaced by the real names of
252 252 the temporary files generated.'''
253 253 inname, outname = None, None
254 254 try:
255 255 infd, inname = tempfile.mkstemp(prefix='hg-filter-in-')
256 256 fp = os.fdopen(infd, 'wb')
257 257 fp.write(s)
258 258 fp.close()
259 259 outfd, outname = tempfile.mkstemp(prefix='hg-filter-out-')
260 260 os.close(outfd)
261 261 cmd = cmd.replace('INFILE', inname)
262 262 cmd = cmd.replace('OUTFILE', outname)
263 263 code = os.system(cmd)
264 264 if sys.platform == 'OpenVMS' and code & 1:
265 265 code = 0
266 266 if code:
267 267 raise Abort(_("command '%s' failed: %s") %
268 268 (cmd, explainexit(code)))
269 269 fp = open(outname, 'rb')
270 270 r = fp.read()
271 271 fp.close()
272 272 return r
273 273 finally:
274 274 try:
275 275 if inname:
276 276 os.unlink(inname)
277 277 except OSError:
278 278 pass
279 279 try:
280 280 if outname:
281 281 os.unlink(outname)
282 282 except OSError:
283 283 pass
284 284
285 285 filtertable = {
286 286 'tempfile:': tempfilter,
287 287 'pipe:': pipefilter,
288 288 }
289 289
290 290 def filter(s, cmd):
291 291 "filter a string through a command that transforms its input to its output"
292 292 for name, fn in filtertable.iteritems():
293 293 if cmd.startswith(name):
294 294 return fn(s, cmd[len(name):].lstrip())
295 295 return pipefilter(s, cmd)
296 296
297 297 def binary(s):
298 298 """return true if a string is binary data"""
299 299 return bool(s and '\0' in s)
300 300
301 301 def increasingchunks(source, min=1024, max=65536):
302 302 '''return no less than min bytes per chunk while data remains,
303 303 doubling min after each chunk until it reaches max'''
304 304 def log2(x):
305 305 if not x:
306 306 return 0
307 307 i = 0
308 308 while x:
309 309 x >>= 1
310 310 i += 1
311 311 return i - 1
312 312
313 313 buf = []
314 314 blen = 0
315 315 for chunk in source:
316 316 buf.append(chunk)
317 317 blen += len(chunk)
318 318 if blen >= min:
319 319 if min < max:
320 320 min = min << 1
321 321 nmin = 1 << log2(blen)
322 322 if nmin > min:
323 323 min = nmin
324 324 if min > max:
325 325 min = max
326 326 yield ''.join(buf)
327 327 blen = 0
328 328 buf = []
329 329 if buf:
330 330 yield ''.join(buf)
331 331
332 332 Abort = error.Abort
333 333
334 334 def always(fn):
335 335 return True
336 336
337 337 def never(fn):
338 338 return False
339 339
340 340 def pathto(root, n1, n2):
341 341 '''return the relative path from one place to another.
342 342 root should use os.sep to separate directories
343 343 n1 should use os.sep to separate directories
344 344 n2 should use "/" to separate directories
345 345 returns an os.sep-separated path.
346 346
347 347 If n1 is a relative path, it's assumed it's
348 348 relative to root.
349 349 n2 should always be relative to root.
350 350 '''
351 351 if not n1:
352 352 return localpath(n2)
353 353 if os.path.isabs(n1):
354 354 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
355 355 return os.path.join(root, localpath(n2))
356 356 n2 = '/'.join((pconvert(root), n2))
357 357 a, b = splitpath(n1), n2.split('/')
358 358 a.reverse()
359 359 b.reverse()
360 360 while a and b and a[-1] == b[-1]:
361 361 a.pop()
362 362 b.pop()
363 363 b.reverse()
364 364 return os.sep.join((['..'] * len(a)) + b) or '.'
365 365
366 366 _hgexecutable = None
367 367
368 368 def mainfrozen():
369 369 """return True if we are a frozen executable.
370 370
371 371 The code supports py2exe (most common, Windows only) and tools/freeze
372 372 (portable, not much used).
373 373 """
374 374 return (safehasattr(sys, "frozen") or # new py2exe
375 375 safehasattr(sys, "importers") or # old py2exe
376 376 imp.is_frozen("__main__")) # tools/freeze
377 377
378 378 def hgexecutable():
379 379 """return location of the 'hg' executable.
380 380
381 381 Defaults to $HG or 'hg' in the search path.
382 382 """
383 383 if _hgexecutable is None:
384 384 hg = os.environ.get('HG')
385 385 mainmod = sys.modules['__main__']
386 386 if hg:
387 387 _sethgexecutable(hg)
388 388 elif mainfrozen():
389 389 _sethgexecutable(sys.executable)
390 390 elif os.path.basename(getattr(mainmod, '__file__', '')) == 'hg':
391 391 _sethgexecutable(mainmod.__file__)
392 392 else:
393 393 exe = findexe('hg') or os.path.basename(sys.argv[0])
394 394 _sethgexecutable(exe)
395 395 return _hgexecutable
396 396
397 397 def _sethgexecutable(path):
398 398 """set location of the 'hg' executable"""
399 399 global _hgexecutable
400 400 _hgexecutable = path
401 401
402 402 def system(cmd, environ={}, cwd=None, onerr=None, errprefix=None, out=None):
403 403 '''enhanced shell command execution.
404 404 run with environment maybe modified, maybe in different dir.
405 405
406 406 if command fails and onerr is None, return status. if ui object,
407 407 print error message and return status, else raise onerr object as
408 408 exception.
409 409
410 410 if out is specified, it is assumed to be a file-like object that has a
411 411 write() method. stdout and stderr will be redirected to out.'''
412 412 try:
413 413 sys.stdout.flush()
414 414 except Exception:
415 415 pass
416 416 def py2shell(val):
417 417 'convert python object into string that is useful to shell'
418 418 if val is None or val is False:
419 419 return '0'
420 420 if val is True:
421 421 return '1'
422 422 return str(val)
423 423 origcmd = cmd
424 424 cmd = quotecommand(cmd)
425 425 if sys.platform == 'plan9':
426 426 # subprocess kludge to work around issues in half-baked Python
427 427 # ports, notably bichued/python:
428 428 if not cwd is None:
429 429 os.chdir(cwd)
430 430 rc = os.system(cmd)
431 431 else:
432 432 env = dict(os.environ)
433 433 env.update((k, py2shell(v)) for k, v in environ.iteritems())
434 434 env['HG'] = hgexecutable()
435 435 if out is None or out == sys.__stdout__:
436 436 rc = subprocess.call(cmd, shell=True, close_fds=closefds,
437 437 env=env, cwd=cwd)
438 438 else:
439 439 proc = subprocess.Popen(cmd, shell=True, close_fds=closefds,
440 440 env=env, cwd=cwd, stdout=subprocess.PIPE,
441 441 stderr=subprocess.STDOUT)
442 442 for line in proc.stdout:
443 443 out.write(line)
444 444 proc.wait()
445 445 rc = proc.returncode
446 446 if sys.platform == 'OpenVMS' and rc & 1:
447 447 rc = 0
448 448 if rc and onerr:
449 449 errmsg = '%s %s' % (os.path.basename(origcmd.split(None, 1)[0]),
450 450 explainexit(rc)[0])
451 451 if errprefix:
452 452 errmsg = '%s: %s' % (errprefix, errmsg)
453 453 try:
454 454 onerr.warn(errmsg + '\n')
455 455 except AttributeError:
456 456 raise onerr(errmsg)
457 457 return rc
458 458
459 459 def checksignature(func):
460 460 '''wrap a function with code to check for calling errors'''
461 461 def check(*args, **kwargs):
462 462 try:
463 463 return func(*args, **kwargs)
464 464 except TypeError:
465 465 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
466 466 raise error.SignatureError
467 467 raise
468 468
469 469 return check
470 470
471 471 def copyfile(src, dest):
472 472 "copy a file, preserving mode and atime/mtime"
473 473 if os.path.islink(src):
474 474 try:
475 475 os.unlink(dest)
476 476 except OSError:
477 477 pass
478 478 os.symlink(os.readlink(src), dest)
479 479 else:
480 480 try:
481 481 shutil.copyfile(src, dest)
482 482 shutil.copymode(src, dest)
483 483 except shutil.Error, inst:
484 484 raise Abort(str(inst))
485 485
486 486 def copyfiles(src, dst, hardlink=None):
487 487 """Copy a directory tree using hardlinks if possible"""
488 488
489 489 if hardlink is None:
490 490 hardlink = (os.stat(src).st_dev ==
491 491 os.stat(os.path.dirname(dst)).st_dev)
492 492
493 493 num = 0
494 494 if os.path.isdir(src):
495 495 os.mkdir(dst)
496 496 for name, kind in osutil.listdir(src):
497 497 srcname = os.path.join(src, name)
498 498 dstname = os.path.join(dst, name)
499 499 hardlink, n = copyfiles(srcname, dstname, hardlink)
500 500 num += n
501 501 else:
502 502 if hardlink:
503 503 try:
504 504 oslink(src, dst)
505 505 except (IOError, OSError):
506 506 hardlink = False
507 507 shutil.copy(src, dst)
508 508 else:
509 509 shutil.copy(src, dst)
510 510 num += 1
511 511
512 512 return hardlink, num
513 513
514 514 _winreservednames = '''con prn aux nul
515 515 com1 com2 com3 com4 com5 com6 com7 com8 com9
516 516 lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split()
517 517 _winreservedchars = ':*?"<>|'
518 518 def checkwinfilename(path):
519 519 '''Check that the base-relative path is a valid filename on Windows.
520 520 Returns None if the path is ok, or a UI string describing the problem.
521 521
522 522 >>> checkwinfilename("just/a/normal/path")
523 523 >>> checkwinfilename("foo/bar/con.xml")
524 524 "filename contains 'con', which is reserved on Windows"
525 525 >>> checkwinfilename("foo/con.xml/bar")
526 526 "filename contains 'con', which is reserved on Windows"
527 527 >>> checkwinfilename("foo/bar/xml.con")
528 528 >>> checkwinfilename("foo/bar/AUX/bla.txt")
529 529 "filename contains 'AUX', which is reserved on Windows"
530 530 >>> checkwinfilename("foo/bar/bla:.txt")
531 531 "filename contains ':', which is reserved on Windows"
532 532 >>> checkwinfilename("foo/bar/b\07la.txt")
533 533 "filename contains '\\\\x07', which is invalid on Windows"
534 534 >>> checkwinfilename("foo/bar/bla ")
535 535 "filename ends with ' ', which is not allowed on Windows"
536 536 >>> checkwinfilename("../bar")
537 537 '''
538 538 for n in path.replace('\\', '/').split('/'):
539 539 if not n:
540 540 continue
541 541 for c in n:
542 542 if c in _winreservedchars:
543 543 return _("filename contains '%s', which is reserved "
544 544 "on Windows") % c
545 545 if ord(c) <= 31:
546 546 return _("filename contains %r, which is invalid "
547 547 "on Windows") % c
548 548 base = n.split('.')[0]
549 549 if base and base.lower() in _winreservednames:
550 550 return _("filename contains '%s', which is reserved "
551 551 "on Windows") % base
552 552 t = n[-1]
553 553 if t in '. ' and n not in '..':
554 554 return _("filename ends with '%s', which is not allowed "
555 555 "on Windows") % t
556 556
557 557 if os.name == 'nt':
558 558 checkosfilename = checkwinfilename
559 559 else:
560 560 checkosfilename = platform.checkosfilename
561 561
562 562 def makelock(info, pathname):
563 563 try:
564 564 return os.symlink(info, pathname)
565 565 except OSError, why:
566 566 if why.errno == errno.EEXIST:
567 567 raise
568 568 except AttributeError: # no symlink in os
569 569 pass
570 570
571 571 ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL)
572 572 os.write(ld, info)
573 573 os.close(ld)
574 574
575 575 def readlock(pathname):
576 576 try:
577 577 return os.readlink(pathname)
578 578 except OSError, why:
579 579 if why.errno not in (errno.EINVAL, errno.ENOSYS):
580 580 raise
581 581 except AttributeError: # no symlink in os
582 582 pass
583 583 fp = posixfile(pathname)
584 584 r = fp.read()
585 585 fp.close()
586 586 return r
587 587
588 588 def fstat(fp):
589 589 '''stat file object that may not have fileno method.'''
590 590 try:
591 591 return os.fstat(fp.fileno())
592 592 except AttributeError:
593 593 return os.stat(fp.name)
594 594
595 595 # File system features
596 596
597 597 def checkcase(path):
598 598 """
599 599 Check whether the given path is on a case-sensitive filesystem
600 600
601 601 Requires a path (like /foo/.hg) ending with a foldable final
602 602 directory component.
603 603 """
604 604 s1 = os.stat(path)
605 605 d, b = os.path.split(path)
606 606 b2 = b.upper()
607 607 if b == b2:
608 608 b2 = b.lower()
609 609 if b == b2:
610 610 return True # no evidence against case sensitivity
611 611 p2 = os.path.join(d, b2)
612 612 try:
613 613 s2 = os.stat(p2)
614 614 if s2 == s1:
615 615 return False
616 616 return True
617 617 except OSError:
618 618 return True
619 619
620 620 _fspathcache = {}
621 621 def fspath(name, root):
622 622 '''Get name in the case stored in the filesystem
623 623
624 624 The name should be relative to root, and be normcase-ed for efficiency.
625 625
626 626 Note that this function is unnecessary, and should not be
627 627 called, for case-sensitive filesystems (simply because it's expensive).
628 628
629 629 The root should be normcase-ed, too.
630 630 '''
631 631 def find(p, contents):
632 632 for n in contents:
633 633 if normcase(n) == p:
634 634 return n
635 635 return None
636 636
637 637 seps = os.sep
638 638 if os.altsep:
639 639 seps = seps + os.altsep
640 640 # Protect backslashes. This gets silly very quickly.
641 641 seps.replace('\\','\\\\')
642 642 pattern = re.compile(r'([^%s]+)|([%s]+)' % (seps, seps))
643 643 dir = os.path.normpath(root)
644 644 result = []
645 645 for part, sep in pattern.findall(name):
646 646 if sep:
647 647 result.append(sep)
648 648 continue
649 649
650 650 if dir not in _fspathcache:
651 651 _fspathcache[dir] = os.listdir(dir)
652 652 contents = _fspathcache[dir]
653 653
654 654 found = find(part, contents)
655 655 if not found:
656 656 # retry "once per directory" per "dirstate.walk" which
657 657 # may take place for each patches of "hg qpush", for example
658 658 contents = os.listdir(dir)
659 659 _fspathcache[dir] = contents
660 660 found = find(part, contents)
661 661
662 662 result.append(found or part)
663 663 dir = os.path.join(dir, part)
664 664
665 665 return ''.join(result)
666 666
667 667 def checknlink(testfile):
668 668 '''check whether hardlink count reporting works properly'''
669 669
670 670 # testfile may be open, so we need a separate file for checking to
671 671 # work around issue2543 (or testfile may get lost on Samba shares)
672 672 f1 = testfile + ".hgtmp1"
673 673 if os.path.lexists(f1):
674 674 return False
675 675 try:
676 676 posixfile(f1, 'w').close()
677 677 except IOError:
678 678 return False
679 679
680 680 f2 = testfile + ".hgtmp2"
681 681 fd = None
682 682 try:
683 683 try:
684 684 oslink(f1, f2)
685 685 except OSError:
686 686 return False
687 687
688 688 # nlinks() may behave differently for files on Windows shares if
689 689 # the file is open.
690 690 fd = posixfile(f2)
691 691 return nlinks(f2) > 1
692 692 finally:
693 693 if fd is not None:
694 694 fd.close()
695 695 for f in (f1, f2):
696 696 try:
697 697 os.unlink(f)
698 698 except OSError:
699 699 pass
700 700
701 701 return False
702 702
703 703 def endswithsep(path):
704 704 '''Check path ends with os.sep or os.altsep.'''
705 705 return path.endswith(os.sep) or os.altsep and path.endswith(os.altsep)
706 706
707 707 def splitpath(path):
708 708 '''Split path by os.sep.
709 709 Note that this function does not use os.altsep because this is
710 710 an alternative of simple "xxx.split(os.sep)".
711 711 It is recommended to use os.path.normpath() before using this
712 712 function if need.'''
713 713 return path.split(os.sep)
714 714
715 715 def gui():
716 716 '''Are we running in a GUI?'''
717 717 if sys.platform == 'darwin':
718 718 if 'SSH_CONNECTION' in os.environ:
719 719 # handle SSH access to a box where the user is logged in
720 720 return False
721 721 elif getattr(osutil, 'isgui', None):
722 722 # check if a CoreGraphics session is available
723 723 return osutil.isgui()
724 724 else:
725 725 # pure build; use a safe default
726 726 return True
727 727 else:
728 728 return os.name == "nt" or os.environ.get("DISPLAY")
729 729
730 730 def mktempcopy(name, emptyok=False, createmode=None):
731 731 """Create a temporary file with the same contents from name
732 732
733 733 The permission bits are copied from the original file.
734 734
735 735 If the temporary file is going to be truncated immediately, you
736 736 can use emptyok=True as an optimization.
737 737
738 738 Returns the name of the temporary file.
739 739 """
740 740 d, fn = os.path.split(name)
741 741 fd, temp = tempfile.mkstemp(prefix='.%s-' % fn, dir=d)
742 742 os.close(fd)
743 743 # Temporary files are created with mode 0600, which is usually not
744 744 # what we want. If the original file already exists, just copy
745 745 # its mode. Otherwise, manually obey umask.
746 746 copymode(name, temp, createmode)
747 747 if emptyok:
748 748 return temp
749 749 try:
750 750 try:
751 751 ifp = posixfile(name, "rb")
752 752 except IOError, inst:
753 753 if inst.errno == errno.ENOENT:
754 754 return temp
755 755 if not getattr(inst, 'filename', None):
756 756 inst.filename = name
757 757 raise
758 758 ofp = posixfile(temp, "wb")
759 759 for chunk in filechunkiter(ifp):
760 760 ofp.write(chunk)
761 761 ifp.close()
762 762 ofp.close()
763 763 except: # re-raises
764 764 try: os.unlink(temp)
765 765 except OSError: pass
766 766 raise
767 767 return temp
768 768
769 769 class atomictempfile(object):
770 770 '''writeable file object that atomically updates a file
771 771
772 772 All writes will go to a temporary copy of the original file. Call
773 773 close() when you are done writing, and atomictempfile will rename
774 774 the temporary copy to the original name, making the changes
775 775 visible. If the object is destroyed without being closed, all your
776 776 writes are discarded.
777 777 '''
778 778 def __init__(self, name, mode='w+b', createmode=None):
779 779 self.__name = name # permanent name
780 780 self._tempname = mktempcopy(name, emptyok=('w' in mode),
781 781 createmode=createmode)
782 782 self._fp = posixfile(self._tempname, mode)
783 783
784 784 # delegated methods
785 785 self.write = self._fp.write
786 786 self.fileno = self._fp.fileno
787 787
788 788 def close(self):
789 789 if not self._fp.closed:
790 790 self._fp.close()
791 791 rename(self._tempname, localpath(self.__name))
792 792
793 793 def discard(self):
794 794 if not self._fp.closed:
795 795 try:
796 796 os.unlink(self._tempname)
797 797 except OSError:
798 798 pass
799 799 self._fp.close()
800 800
801 801 def __del__(self):
802 802 if safehasattr(self, '_fp'): # constructor actually did something
803 803 self.discard()
804 804
805 805 def makedirs(name, mode=None):
806 806 """recursive directory creation with parent mode inheritance"""
807 807 try:
808 808 os.mkdir(name)
809 809 except OSError, err:
810 810 if err.errno == errno.EEXIST:
811 811 return
812 812 if err.errno != errno.ENOENT or not name:
813 813 raise
814 814 parent = os.path.dirname(os.path.abspath(name))
815 815 if parent == name:
816 816 raise
817 817 makedirs(parent, mode)
818 818 os.mkdir(name)
819 819 if mode is not None:
820 820 os.chmod(name, mode)
821 821
822 822 def readfile(path):
823 823 fp = open(path, 'rb')
824 824 try:
825 825 return fp.read()
826 826 finally:
827 827 fp.close()
828 828
829 829 def writefile(path, text):
830 830 fp = open(path, 'wb')
831 831 try:
832 832 fp.write(text)
833 833 finally:
834 834 fp.close()
835 835
836 836 def appendfile(path, text):
837 837 fp = open(path, 'ab')
838 838 try:
839 839 fp.write(text)
840 840 finally:
841 841 fp.close()
842 842
843 843 class chunkbuffer(object):
844 844 """Allow arbitrary sized chunks of data to be efficiently read from an
845 845 iterator over chunks of arbitrary size."""
846 846
847 847 def __init__(self, in_iter):
848 848 """in_iter is the iterator that's iterating over the input chunks.
849 849 targetsize is how big a buffer to try to maintain."""
850 850 def splitbig(chunks):
851 851 for chunk in chunks:
852 852 if len(chunk) > 2**20:
853 853 pos = 0
854 854 while pos < len(chunk):
855 855 end = pos + 2 ** 18
856 856 yield chunk[pos:end]
857 857 pos = end
858 858 else:
859 859 yield chunk
860 860 self.iter = splitbig(in_iter)
861 861 self._queue = []
862 862
863 863 def read(self, l):
864 864 """Read L bytes of data from the iterator of chunks of data.
865 865 Returns less than L bytes if the iterator runs dry."""
866 866 left = l
867 867 buf = ''
868 queue = self._queue
868 queue = collections.deque(self._queue)
869 869 while left > 0:
870 870 # refill the queue
871 871 if not queue:
872 872 target = 2**18
873 873 for chunk in self.iter:
874 874 queue.append(chunk)
875 875 target -= len(chunk)
876 876 if target <= 0:
877 877 break
878 878 if not queue:
879 879 break
880 880
881 chunk = queue.pop(0)
881 chunk = queue.popleft()
882 882 left -= len(chunk)
883 883 if left < 0:
884 queue.insert(0, chunk[left:])
884 queue.appendleft(chunk[left:])
885 885 buf += chunk[:left]
886 886 else:
887 887 buf += chunk
888 self._queue = list(queue)
888 889
889 890 return buf
890 891
891 892 def filechunkiter(f, size=65536, limit=None):
892 893 """Create a generator that produces the data in the file size
893 894 (default 65536) bytes at a time, up to optional limit (default is
894 895 to read all data). Chunks may be less than size bytes if the
895 896 chunk is the last chunk in the file, or the file is a socket or
896 897 some other type of file that sometimes reads less data than is
897 898 requested."""
898 899 assert size >= 0
899 900 assert limit is None or limit >= 0
900 901 while True:
901 902 if limit is None:
902 903 nbytes = size
903 904 else:
904 905 nbytes = min(limit, size)
905 906 s = nbytes and f.read(nbytes)
906 907 if not s:
907 908 break
908 909 if limit:
909 910 limit -= len(s)
910 911 yield s
911 912
912 913 def makedate():
913 914 ct = time.time()
914 915 if ct < 0:
915 916 hint = _("check your clock")
916 917 raise Abort(_("negative timestamp: %d") % ct, hint=hint)
917 918 delta = (datetime.datetime.utcfromtimestamp(ct) -
918 919 datetime.datetime.fromtimestamp(ct))
919 920 tz = delta.days * 86400 + delta.seconds
920 921 return ct, tz
921 922
922 923 def datestr(date=None, format='%a %b %d %H:%M:%S %Y %1%2'):
923 924 """represent a (unixtime, offset) tuple as a localized time.
924 925 unixtime is seconds since the epoch, and offset is the time zone's
925 926 number of seconds away from UTC. if timezone is false, do not
926 927 append time zone to string."""
927 928 t, tz = date or makedate()
928 929 if t < 0:
929 930 t = 0 # time.gmtime(lt) fails on Windows for lt < -43200
930 931 tz = 0
931 932 if "%1" in format or "%2" in format:
932 933 sign = (tz > 0) and "-" or "+"
933 934 minutes = abs(tz) // 60
934 935 format = format.replace("%1", "%c%02d" % (sign, minutes // 60))
935 936 format = format.replace("%2", "%02d" % (minutes % 60))
936 937 try:
937 938 t = time.gmtime(float(t) - tz)
938 939 except ValueError:
939 940 # time was out of range
940 941 t = time.gmtime(sys.maxint)
941 942 s = time.strftime(format, t)
942 943 return s
943 944
944 945 def shortdate(date=None):
945 946 """turn (timestamp, tzoff) tuple into iso 8631 date."""
946 947 return datestr(date, format='%Y-%m-%d')
947 948
948 949 def strdate(string, format, defaults=[]):
949 950 """parse a localized time string and return a (unixtime, offset) tuple.
950 951 if the string cannot be parsed, ValueError is raised."""
951 952 def timezone(string):
952 953 tz = string.split()[-1]
953 954 if tz[0] in "+-" and len(tz) == 5 and tz[1:].isdigit():
954 955 sign = (tz[0] == "+") and 1 or -1
955 956 hours = int(tz[1:3])
956 957 minutes = int(tz[3:5])
957 958 return -sign * (hours * 60 + minutes) * 60
958 959 if tz == "GMT" or tz == "UTC":
959 960 return 0
960 961 return None
961 962
962 963 # NOTE: unixtime = localunixtime + offset
963 964 offset, date = timezone(string), string
964 965 if offset is not None:
965 966 date = " ".join(string.split()[:-1])
966 967
967 968 # add missing elements from defaults
968 969 usenow = False # default to using biased defaults
969 970 for part in ("S", "M", "HI", "d", "mb", "yY"): # decreasing specificity
970 971 found = [True for p in part if ("%"+p) in format]
971 972 if not found:
972 973 date += "@" + defaults[part][usenow]
973 974 format += "@%" + part[0]
974 975 else:
975 976 # We've found a specific time element, less specific time
976 977 # elements are relative to today
977 978 usenow = True
978 979
979 980 timetuple = time.strptime(date, format)
980 981 localunixtime = int(calendar.timegm(timetuple))
981 982 if offset is None:
982 983 # local timezone
983 984 unixtime = int(time.mktime(timetuple))
984 985 offset = unixtime - localunixtime
985 986 else:
986 987 unixtime = localunixtime + offset
987 988 return unixtime, offset
988 989
989 990 def parsedate(date, formats=None, bias={}):
990 991 """parse a localized date/time and return a (unixtime, offset) tuple.
991 992
992 993 The date may be a "unixtime offset" string or in one of the specified
993 994 formats. If the date already is a (unixtime, offset) tuple, it is returned.
994 995 """
995 996 if not date:
996 997 return 0, 0
997 998 if isinstance(date, tuple) and len(date) == 2:
998 999 return date
999 1000 if not formats:
1000 1001 formats = defaultdateformats
1001 1002 date = date.strip()
1002 1003 try:
1003 1004 when, offset = map(int, date.split(' '))
1004 1005 except ValueError:
1005 1006 # fill out defaults
1006 1007 now = makedate()
1007 1008 defaults = {}
1008 1009 for part in ("d", "mb", "yY", "HI", "M", "S"):
1009 1010 # this piece is for rounding the specific end of unknowns
1010 1011 b = bias.get(part)
1011 1012 if b is None:
1012 1013 if part[0] in "HMS":
1013 1014 b = "00"
1014 1015 else:
1015 1016 b = "0"
1016 1017
1017 1018 # this piece is for matching the generic end to today's date
1018 1019 n = datestr(now, "%" + part[0])
1019 1020
1020 1021 defaults[part] = (b, n)
1021 1022
1022 1023 for format in formats:
1023 1024 try:
1024 1025 when, offset = strdate(date, format, defaults)
1025 1026 except (ValueError, OverflowError):
1026 1027 pass
1027 1028 else:
1028 1029 break
1029 1030 else:
1030 1031 raise Abort(_('invalid date: %r') % date)
1031 1032 # validate explicit (probably user-specified) date and
1032 1033 # time zone offset. values must fit in signed 32 bits for
1033 1034 # current 32-bit linux runtimes. timezones go from UTC-12
1034 1035 # to UTC+14
1035 1036 if abs(when) > 0x7fffffff:
1036 1037 raise Abort(_('date exceeds 32 bits: %d') % when)
1037 1038 if when < 0:
1038 1039 raise Abort(_('negative date value: %d') % when)
1039 1040 if offset < -50400 or offset > 43200:
1040 1041 raise Abort(_('impossible time zone offset: %d') % offset)
1041 1042 return when, offset
1042 1043
1043 1044 def matchdate(date):
1044 1045 """Return a function that matches a given date match specifier
1045 1046
1046 1047 Formats include:
1047 1048
1048 1049 '{date}' match a given date to the accuracy provided
1049 1050
1050 1051 '<{date}' on or before a given date
1051 1052
1052 1053 '>{date}' on or after a given date
1053 1054
1054 1055 >>> p1 = parsedate("10:29:59")
1055 1056 >>> p2 = parsedate("10:30:00")
1056 1057 >>> p3 = parsedate("10:30:59")
1057 1058 >>> p4 = parsedate("10:31:00")
1058 1059 >>> p5 = parsedate("Sep 15 10:30:00 1999")
1059 1060 >>> f = matchdate("10:30")
1060 1061 >>> f(p1[0])
1061 1062 False
1062 1063 >>> f(p2[0])
1063 1064 True
1064 1065 >>> f(p3[0])
1065 1066 True
1066 1067 >>> f(p4[0])
1067 1068 False
1068 1069 >>> f(p5[0])
1069 1070 False
1070 1071 """
1071 1072
1072 1073 def lower(date):
1073 1074 d = dict(mb="1", d="1")
1074 1075 return parsedate(date, extendeddateformats, d)[0]
1075 1076
1076 1077 def upper(date):
1077 1078 d = dict(mb="12", HI="23", M="59", S="59")
1078 1079 for days in ("31", "30", "29"):
1079 1080 try:
1080 1081 d["d"] = days
1081 1082 return parsedate(date, extendeddateformats, d)[0]
1082 1083 except Abort:
1083 1084 pass
1084 1085 d["d"] = "28"
1085 1086 return parsedate(date, extendeddateformats, d)[0]
1086 1087
1087 1088 date = date.strip()
1088 1089
1089 1090 if not date:
1090 1091 raise Abort(_("dates cannot consist entirely of whitespace"))
1091 1092 elif date[0] == "<":
1092 1093 if not date[1:]:
1093 1094 raise Abort(_("invalid day spec, use '<DATE'"))
1094 1095 when = upper(date[1:])
1095 1096 return lambda x: x <= when
1096 1097 elif date[0] == ">":
1097 1098 if not date[1:]:
1098 1099 raise Abort(_("invalid day spec, use '>DATE'"))
1099 1100 when = lower(date[1:])
1100 1101 return lambda x: x >= when
1101 1102 elif date[0] == "-":
1102 1103 try:
1103 1104 days = int(date[1:])
1104 1105 except ValueError:
1105 1106 raise Abort(_("invalid day spec: %s") % date[1:])
1106 1107 if days < 0:
1107 1108 raise Abort(_("%s must be nonnegative (see 'hg help dates')")
1108 1109 % date[1:])
1109 1110 when = makedate()[0] - days * 3600 * 24
1110 1111 return lambda x: x >= when
1111 1112 elif " to " in date:
1112 1113 a, b = date.split(" to ")
1113 1114 start, stop = lower(a), upper(b)
1114 1115 return lambda x: x >= start and x <= stop
1115 1116 else:
1116 1117 start, stop = lower(date), upper(date)
1117 1118 return lambda x: x >= start and x <= stop
1118 1119
1119 1120 def shortuser(user):
1120 1121 """Return a short representation of a user name or email address."""
1121 1122 f = user.find('@')
1122 1123 if f >= 0:
1123 1124 user = user[:f]
1124 1125 f = user.find('<')
1125 1126 if f >= 0:
1126 1127 user = user[f + 1:]
1127 1128 f = user.find(' ')
1128 1129 if f >= 0:
1129 1130 user = user[:f]
1130 1131 f = user.find('.')
1131 1132 if f >= 0:
1132 1133 user = user[:f]
1133 1134 return user
1134 1135
1135 1136 def emailuser(user):
1136 1137 """Return the user portion of an email address."""
1137 1138 f = user.find('@')
1138 1139 if f >= 0:
1139 1140 user = user[:f]
1140 1141 f = user.find('<')
1141 1142 if f >= 0:
1142 1143 user = user[f + 1:]
1143 1144 return user
1144 1145
1145 1146 def email(author):
1146 1147 '''get email of author.'''
1147 1148 r = author.find('>')
1148 1149 if r == -1:
1149 1150 r = None
1150 1151 return author[author.find('<') + 1:r]
1151 1152
1152 1153 def _ellipsis(text, maxlength):
1153 1154 if len(text) <= maxlength:
1154 1155 return text, False
1155 1156 else:
1156 1157 return "%s..." % (text[:maxlength - 3]), True
1157 1158
1158 1159 def ellipsis(text, maxlength=400):
1159 1160 """Trim string to at most maxlength (default: 400) characters."""
1160 1161 try:
1161 1162 # use unicode not to split at intermediate multi-byte sequence
1162 1163 utext, truncated = _ellipsis(text.decode(encoding.encoding),
1163 1164 maxlength)
1164 1165 if not truncated:
1165 1166 return text
1166 1167 return utext.encode(encoding.encoding)
1167 1168 except (UnicodeDecodeError, UnicodeEncodeError):
1168 1169 return _ellipsis(text, maxlength)[0]
1169 1170
1170 1171 _byteunits = (
1171 1172 (100, 1 << 30, _('%.0f GB')),
1172 1173 (10, 1 << 30, _('%.1f GB')),
1173 1174 (1, 1 << 30, _('%.2f GB')),
1174 1175 (100, 1 << 20, _('%.0f MB')),
1175 1176 (10, 1 << 20, _('%.1f MB')),
1176 1177 (1, 1 << 20, _('%.2f MB')),
1177 1178 (100, 1 << 10, _('%.0f KB')),
1178 1179 (10, 1 << 10, _('%.1f KB')),
1179 1180 (1, 1 << 10, _('%.2f KB')),
1180 1181 (1, 1, _('%.0f bytes')),
1181 1182 )
1182 1183
1183 1184 def bytecount(nbytes):
1184 1185 '''return byte count formatted as readable string, with units'''
1185 1186
1186 1187 for multiplier, divisor, format in _byteunits:
1187 1188 if nbytes >= divisor * multiplier:
1188 1189 return format % (nbytes / float(divisor))
1189 1190 return _byteunits[-1][2] % nbytes
1190 1191
1191 1192 def uirepr(s):
1192 1193 # Avoid double backslash in Windows path repr()
1193 1194 return repr(s).replace('\\\\', '\\')
1194 1195
1195 1196 # delay import of textwrap
1196 1197 def MBTextWrapper(**kwargs):
1197 1198 class tw(textwrap.TextWrapper):
1198 1199 """
1199 1200 Extend TextWrapper for width-awareness.
1200 1201
1201 1202 Neither number of 'bytes' in any encoding nor 'characters' is
1202 1203 appropriate to calculate terminal columns for specified string.
1203 1204
1204 1205 Original TextWrapper implementation uses built-in 'len()' directly,
1205 1206 so overriding is needed to use width information of each characters.
1206 1207
1207 1208 In addition, characters classified into 'ambiguous' width are
1208 1209 treated as wide in east asian area, but as narrow in other.
1209 1210
1210 1211 This requires use decision to determine width of such characters.
1211 1212 """
1212 1213 def __init__(self, **kwargs):
1213 1214 textwrap.TextWrapper.__init__(self, **kwargs)
1214 1215
1215 1216 # for compatibility between 2.4 and 2.6
1216 1217 if getattr(self, 'drop_whitespace', None) is None:
1217 1218 self.drop_whitespace = kwargs.get('drop_whitespace', True)
1218 1219
1219 1220 def _cutdown(self, ucstr, space_left):
1220 1221 l = 0
1221 1222 colwidth = encoding.ucolwidth
1222 1223 for i in xrange(len(ucstr)):
1223 1224 l += colwidth(ucstr[i])
1224 1225 if space_left < l:
1225 1226 return (ucstr[:i], ucstr[i:])
1226 1227 return ucstr, ''
1227 1228
1228 1229 # overriding of base class
1229 1230 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
1230 1231 space_left = max(width - cur_len, 1)
1231 1232
1232 1233 if self.break_long_words:
1233 1234 cut, res = self._cutdown(reversed_chunks[-1], space_left)
1234 1235 cur_line.append(cut)
1235 1236 reversed_chunks[-1] = res
1236 1237 elif not cur_line:
1237 1238 cur_line.append(reversed_chunks.pop())
1238 1239
1239 1240 # this overriding code is imported from TextWrapper of python 2.6
1240 1241 # to calculate columns of string by 'encoding.ucolwidth()'
1241 1242 def _wrap_chunks(self, chunks):
1242 1243 colwidth = encoding.ucolwidth
1243 1244
1244 1245 lines = []
1245 1246 if self.width <= 0:
1246 1247 raise ValueError("invalid width %r (must be > 0)" % self.width)
1247 1248
1248 1249 # Arrange in reverse order so items can be efficiently popped
1249 1250 # from a stack of chucks.
1250 1251 chunks.reverse()
1251 1252
1252 1253 while chunks:
1253 1254
1254 1255 # Start the list of chunks that will make up the current line.
1255 1256 # cur_len is just the length of all the chunks in cur_line.
1256 1257 cur_line = []
1257 1258 cur_len = 0
1258 1259
1259 1260 # Figure out which static string will prefix this line.
1260 1261 if lines:
1261 1262 indent = self.subsequent_indent
1262 1263 else:
1263 1264 indent = self.initial_indent
1264 1265
1265 1266 # Maximum width for this line.
1266 1267 width = self.width - len(indent)
1267 1268
1268 1269 # First chunk on line is whitespace -- drop it, unless this
1269 1270 # is the very beginning of the text (ie. no lines started yet).
1270 1271 if self.drop_whitespace and chunks[-1].strip() == '' and lines:
1271 1272 del chunks[-1]
1272 1273
1273 1274 while chunks:
1274 1275 l = colwidth(chunks[-1])
1275 1276
1276 1277 # Can at least squeeze this chunk onto the current line.
1277 1278 if cur_len + l <= width:
1278 1279 cur_line.append(chunks.pop())
1279 1280 cur_len += l
1280 1281
1281 1282 # Nope, this line is full.
1282 1283 else:
1283 1284 break
1284 1285
1285 1286 # The current line is full, and the next chunk is too big to
1286 1287 # fit on *any* line (not just this one).
1287 1288 if chunks and colwidth(chunks[-1]) > width:
1288 1289 self._handle_long_word(chunks, cur_line, cur_len, width)
1289 1290
1290 1291 # If the last chunk on this line is all whitespace, drop it.
1291 1292 if (self.drop_whitespace and
1292 1293 cur_line and cur_line[-1].strip() == ''):
1293 1294 del cur_line[-1]
1294 1295
1295 1296 # Convert current line back to a string and store it in list
1296 1297 # of all lines (return value).
1297 1298 if cur_line:
1298 1299 lines.append(indent + ''.join(cur_line))
1299 1300
1300 1301 return lines
1301 1302
1302 1303 global MBTextWrapper
1303 1304 MBTextWrapper = tw
1304 1305 return tw(**kwargs)
1305 1306
1306 1307 def wrap(line, width, initindent='', hangindent=''):
1307 1308 maxindent = max(len(hangindent), len(initindent))
1308 1309 if width <= maxindent:
1309 1310 # adjust for weird terminal size
1310 1311 width = max(78, maxindent + 1)
1311 1312 line = line.decode(encoding.encoding, encoding.encodingmode)
1312 1313 initindent = initindent.decode(encoding.encoding, encoding.encodingmode)
1313 1314 hangindent = hangindent.decode(encoding.encoding, encoding.encodingmode)
1314 1315 wrapper = MBTextWrapper(width=width,
1315 1316 initial_indent=initindent,
1316 1317 subsequent_indent=hangindent)
1317 1318 return wrapper.fill(line).encode(encoding.encoding)
1318 1319
1319 1320 def iterlines(iterator):
1320 1321 for chunk in iterator:
1321 1322 for line in chunk.splitlines():
1322 1323 yield line
1323 1324
1324 1325 def expandpath(path):
1325 1326 return os.path.expanduser(os.path.expandvars(path))
1326 1327
1327 1328 def hgcmd():
1328 1329 """Return the command used to execute current hg
1329 1330
1330 1331 This is different from hgexecutable() because on Windows we want
1331 1332 to avoid things opening new shell windows like batch files, so we
1332 1333 get either the python call or current executable.
1333 1334 """
1334 1335 if mainfrozen():
1335 1336 return [sys.executable]
1336 1337 return gethgcmd()
1337 1338
1338 1339 def rundetached(args, condfn):
1339 1340 """Execute the argument list in a detached process.
1340 1341
1341 1342 condfn is a callable which is called repeatedly and should return
1342 1343 True once the child process is known to have started successfully.
1343 1344 At this point, the child process PID is returned. If the child
1344 1345 process fails to start or finishes before condfn() evaluates to
1345 1346 True, return -1.
1346 1347 """
1347 1348 # Windows case is easier because the child process is either
1348 1349 # successfully starting and validating the condition or exiting
1349 1350 # on failure. We just poll on its PID. On Unix, if the child
1350 1351 # process fails to start, it will be left in a zombie state until
1351 1352 # the parent wait on it, which we cannot do since we expect a long
1352 1353 # running process on success. Instead we listen for SIGCHLD telling
1353 1354 # us our child process terminated.
1354 1355 terminated = set()
1355 1356 def handler(signum, frame):
1356 1357 terminated.add(os.wait())
1357 1358 prevhandler = None
1358 1359 SIGCHLD = getattr(signal, 'SIGCHLD', None)
1359 1360 if SIGCHLD is not None:
1360 1361 prevhandler = signal.signal(SIGCHLD, handler)
1361 1362 try:
1362 1363 pid = spawndetached(args)
1363 1364 while not condfn():
1364 1365 if ((pid in terminated or not testpid(pid))
1365 1366 and not condfn()):
1366 1367 return -1
1367 1368 time.sleep(0.1)
1368 1369 return pid
1369 1370 finally:
1370 1371 if prevhandler is not None:
1371 1372 signal.signal(signal.SIGCHLD, prevhandler)
1372 1373
1373 1374 try:
1374 1375 any, all = any, all
1375 1376 except NameError:
1376 1377 def any(iterable):
1377 1378 for i in iterable:
1378 1379 if i:
1379 1380 return True
1380 1381 return False
1381 1382
1382 1383 def all(iterable):
1383 1384 for i in iterable:
1384 1385 if not i:
1385 1386 return False
1386 1387 return True
1387 1388
1388 1389 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
1389 1390 """Return the result of interpolating items in the mapping into string s.
1390 1391
1391 1392 prefix is a single character string, or a two character string with
1392 1393 a backslash as the first character if the prefix needs to be escaped in
1393 1394 a regular expression.
1394 1395
1395 1396 fn is an optional function that will be applied to the replacement text
1396 1397 just before replacement.
1397 1398
1398 1399 escape_prefix is an optional flag that allows using doubled prefix for
1399 1400 its escaping.
1400 1401 """
1401 1402 fn = fn or (lambda s: s)
1402 1403 patterns = '|'.join(mapping.keys())
1403 1404 if escape_prefix:
1404 1405 patterns += '|' + prefix
1405 1406 if len(prefix) > 1:
1406 1407 prefix_char = prefix[1:]
1407 1408 else:
1408 1409 prefix_char = prefix
1409 1410 mapping[prefix_char] = prefix_char
1410 1411 r = re.compile(r'%s(%s)' % (prefix, patterns))
1411 1412 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
1412 1413
1413 1414 def getport(port):
1414 1415 """Return the port for a given network service.
1415 1416
1416 1417 If port is an integer, it's returned as is. If it's a string, it's
1417 1418 looked up using socket.getservbyname(). If there's no matching
1418 1419 service, util.Abort is raised.
1419 1420 """
1420 1421 try:
1421 1422 return int(port)
1422 1423 except ValueError:
1423 1424 pass
1424 1425
1425 1426 try:
1426 1427 return socket.getservbyname(port)
1427 1428 except socket.error:
1428 1429 raise Abort(_("no port number associated with service '%s'") % port)
1429 1430
1430 1431 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
1431 1432 '0': False, 'no': False, 'false': False, 'off': False,
1432 1433 'never': False}
1433 1434
1434 1435 def parsebool(s):
1435 1436 """Parse s into a boolean.
1436 1437
1437 1438 If s is not a valid boolean, returns None.
1438 1439 """
1439 1440 return _booleans.get(s.lower(), None)
1440 1441
1441 1442 _hexdig = '0123456789ABCDEFabcdef'
1442 1443 _hextochr = dict((a + b, chr(int(a + b, 16)))
1443 1444 for a in _hexdig for b in _hexdig)
1444 1445
1445 1446 def _urlunquote(s):
1446 1447 """unquote('abc%20def') -> 'abc def'."""
1447 1448 res = s.split('%')
1448 1449 # fastpath
1449 1450 if len(res) == 1:
1450 1451 return s
1451 1452 s = res[0]
1452 1453 for item in res[1:]:
1453 1454 try:
1454 1455 s += _hextochr[item[:2]] + item[2:]
1455 1456 except KeyError:
1456 1457 s += '%' + item
1457 1458 except UnicodeDecodeError:
1458 1459 s += unichr(int(item[:2], 16)) + item[2:]
1459 1460 return s
1460 1461
1461 1462 class url(object):
1462 1463 r"""Reliable URL parser.
1463 1464
1464 1465 This parses URLs and provides attributes for the following
1465 1466 components:
1466 1467
1467 1468 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
1468 1469
1469 1470 Missing components are set to None. The only exception is
1470 1471 fragment, which is set to '' if present but empty.
1471 1472
1472 1473 If parsefragment is False, fragment is included in query. If
1473 1474 parsequery is False, query is included in path. If both are
1474 1475 False, both fragment and query are included in path.
1475 1476
1476 1477 See http://www.ietf.org/rfc/rfc2396.txt for more information.
1477 1478
1478 1479 Note that for backward compatibility reasons, bundle URLs do not
1479 1480 take host names. That means 'bundle://../' has a path of '../'.
1480 1481
1481 1482 Examples:
1482 1483
1483 1484 >>> url('http://www.ietf.org/rfc/rfc2396.txt')
1484 1485 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
1485 1486 >>> url('ssh://[::1]:2200//home/joe/repo')
1486 1487 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
1487 1488 >>> url('file:///home/joe/repo')
1488 1489 <url scheme: 'file', path: '/home/joe/repo'>
1489 1490 >>> url('file:///c:/temp/foo/')
1490 1491 <url scheme: 'file', path: 'c:/temp/foo/'>
1491 1492 >>> url('bundle:foo')
1492 1493 <url scheme: 'bundle', path: 'foo'>
1493 1494 >>> url('bundle://../foo')
1494 1495 <url scheme: 'bundle', path: '../foo'>
1495 1496 >>> url(r'c:\foo\bar')
1496 1497 <url path: 'c:\\foo\\bar'>
1497 1498 >>> url(r'\\blah\blah\blah')
1498 1499 <url path: '\\\\blah\\blah\\blah'>
1499 1500 >>> url(r'\\blah\blah\blah#baz')
1500 1501 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
1501 1502
1502 1503 Authentication credentials:
1503 1504
1504 1505 >>> url('ssh://joe:xyz@x/repo')
1505 1506 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
1506 1507 >>> url('ssh://joe@x/repo')
1507 1508 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
1508 1509
1509 1510 Query strings and fragments:
1510 1511
1511 1512 >>> url('http://host/a?b#c')
1512 1513 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
1513 1514 >>> url('http://host/a?b#c', parsequery=False, parsefragment=False)
1514 1515 <url scheme: 'http', host: 'host', path: 'a?b#c'>
1515 1516 """
1516 1517
1517 1518 _safechars = "!~*'()+"
1518 1519 _safepchars = "/!~*'()+:"
1519 1520 _matchscheme = re.compile(r'^[a-zA-Z0-9+.\-]+:').match
1520 1521
1521 1522 def __init__(self, path, parsequery=True, parsefragment=True):
1522 1523 # We slowly chomp away at path until we have only the path left
1523 1524 self.scheme = self.user = self.passwd = self.host = None
1524 1525 self.port = self.path = self.query = self.fragment = None
1525 1526 self._localpath = True
1526 1527 self._hostport = ''
1527 1528 self._origpath = path
1528 1529
1529 1530 if parsefragment and '#' in path:
1530 1531 path, self.fragment = path.split('#', 1)
1531 1532 if not path:
1532 1533 path = None
1533 1534
1534 1535 # special case for Windows drive letters and UNC paths
1535 1536 if hasdriveletter(path) or path.startswith(r'\\'):
1536 1537 self.path = path
1537 1538 return
1538 1539
1539 1540 # For compatibility reasons, we can't handle bundle paths as
1540 1541 # normal URLS
1541 1542 if path.startswith('bundle:'):
1542 1543 self.scheme = 'bundle'
1543 1544 path = path[7:]
1544 1545 if path.startswith('//'):
1545 1546 path = path[2:]
1546 1547 self.path = path
1547 1548 return
1548 1549
1549 1550 if self._matchscheme(path):
1550 1551 parts = path.split(':', 1)
1551 1552 if parts[0]:
1552 1553 self.scheme, path = parts
1553 1554 self._localpath = False
1554 1555
1555 1556 if not path:
1556 1557 path = None
1557 1558 if self._localpath:
1558 1559 self.path = ''
1559 1560 return
1560 1561 else:
1561 1562 if self._localpath:
1562 1563 self.path = path
1563 1564 return
1564 1565
1565 1566 if parsequery and '?' in path:
1566 1567 path, self.query = path.split('?', 1)
1567 1568 if not path:
1568 1569 path = None
1569 1570 if not self.query:
1570 1571 self.query = None
1571 1572
1572 1573 # // is required to specify a host/authority
1573 1574 if path and path.startswith('//'):
1574 1575 parts = path[2:].split('/', 1)
1575 1576 if len(parts) > 1:
1576 1577 self.host, path = parts
1577 1578 path = path
1578 1579 else:
1579 1580 self.host = parts[0]
1580 1581 path = None
1581 1582 if not self.host:
1582 1583 self.host = None
1583 1584 # path of file:///d is /d
1584 1585 # path of file:///d:/ is d:/, not /d:/
1585 1586 if path and not hasdriveletter(path):
1586 1587 path = '/' + path
1587 1588
1588 1589 if self.host and '@' in self.host:
1589 1590 self.user, self.host = self.host.rsplit('@', 1)
1590 1591 if ':' in self.user:
1591 1592 self.user, self.passwd = self.user.split(':', 1)
1592 1593 if not self.host:
1593 1594 self.host = None
1594 1595
1595 1596 # Don't split on colons in IPv6 addresses without ports
1596 1597 if (self.host and ':' in self.host and
1597 1598 not (self.host.startswith('[') and self.host.endswith(']'))):
1598 1599 self._hostport = self.host
1599 1600 self.host, self.port = self.host.rsplit(':', 1)
1600 1601 if not self.host:
1601 1602 self.host = None
1602 1603
1603 1604 if (self.host and self.scheme == 'file' and
1604 1605 self.host not in ('localhost', '127.0.0.1', '[::1]')):
1605 1606 raise Abort(_('file:// URLs can only refer to localhost'))
1606 1607
1607 1608 self.path = path
1608 1609
1609 1610 # leave the query string escaped
1610 1611 for a in ('user', 'passwd', 'host', 'port',
1611 1612 'path', 'fragment'):
1612 1613 v = getattr(self, a)
1613 1614 if v is not None:
1614 1615 setattr(self, a, _urlunquote(v))
1615 1616
1616 1617 def __repr__(self):
1617 1618 attrs = []
1618 1619 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
1619 1620 'query', 'fragment'):
1620 1621 v = getattr(self, a)
1621 1622 if v is not None:
1622 1623 attrs.append('%s: %r' % (a, v))
1623 1624 return '<url %s>' % ', '.join(attrs)
1624 1625
1625 1626 def __str__(self):
1626 1627 r"""Join the URL's components back into a URL string.
1627 1628
1628 1629 Examples:
1629 1630
1630 1631 >>> str(url('http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
1631 1632 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
1632 1633 >>> str(url('http://user:pw@host:80/?foo=bar&baz=42'))
1633 1634 'http://user:pw@host:80/?foo=bar&baz=42'
1634 1635 >>> str(url('http://user:pw@host:80/?foo=bar%3dbaz'))
1635 1636 'http://user:pw@host:80/?foo=bar%3dbaz'
1636 1637 >>> str(url('ssh://user:pw@[::1]:2200//home/joe#'))
1637 1638 'ssh://user:pw@[::1]:2200//home/joe#'
1638 1639 >>> str(url('http://localhost:80//'))
1639 1640 'http://localhost:80//'
1640 1641 >>> str(url('http://localhost:80/'))
1641 1642 'http://localhost:80/'
1642 1643 >>> str(url('http://localhost:80'))
1643 1644 'http://localhost:80/'
1644 1645 >>> str(url('bundle:foo'))
1645 1646 'bundle:foo'
1646 1647 >>> str(url('bundle://../foo'))
1647 1648 'bundle:../foo'
1648 1649 >>> str(url('path'))
1649 1650 'path'
1650 1651 >>> str(url('file:///tmp/foo/bar'))
1651 1652 'file:///tmp/foo/bar'
1652 1653 >>> str(url('file:///c:/tmp/foo/bar'))
1653 1654 'file:///c:/tmp/foo/bar'
1654 1655 >>> print url(r'bundle:foo\bar')
1655 1656 bundle:foo\bar
1656 1657 """
1657 1658 if self._localpath:
1658 1659 s = self.path
1659 1660 if self.scheme == 'bundle':
1660 1661 s = 'bundle:' + s
1661 1662 if self.fragment:
1662 1663 s += '#' + self.fragment
1663 1664 return s
1664 1665
1665 1666 s = self.scheme + ':'
1666 1667 if self.user or self.passwd or self.host:
1667 1668 s += '//'
1668 1669 elif self.scheme and (not self.path or self.path.startswith('/')
1669 1670 or hasdriveletter(self.path)):
1670 1671 s += '//'
1671 1672 if hasdriveletter(self.path):
1672 1673 s += '/'
1673 1674 if self.user:
1674 1675 s += urllib.quote(self.user, safe=self._safechars)
1675 1676 if self.passwd:
1676 1677 s += ':' + urllib.quote(self.passwd, safe=self._safechars)
1677 1678 if self.user or self.passwd:
1678 1679 s += '@'
1679 1680 if self.host:
1680 1681 if not (self.host.startswith('[') and self.host.endswith(']')):
1681 1682 s += urllib.quote(self.host)
1682 1683 else:
1683 1684 s += self.host
1684 1685 if self.port:
1685 1686 s += ':' + urllib.quote(self.port)
1686 1687 if self.host:
1687 1688 s += '/'
1688 1689 if self.path:
1689 1690 # TODO: similar to the query string, we should not unescape the
1690 1691 # path when we store it, the path might contain '%2f' = '/',
1691 1692 # which we should *not* escape.
1692 1693 s += urllib.quote(self.path, safe=self._safepchars)
1693 1694 if self.query:
1694 1695 # we store the query in escaped form.
1695 1696 s += '?' + self.query
1696 1697 if self.fragment is not None:
1697 1698 s += '#' + urllib.quote(self.fragment, safe=self._safepchars)
1698 1699 return s
1699 1700
1700 1701 def authinfo(self):
1701 1702 user, passwd = self.user, self.passwd
1702 1703 try:
1703 1704 self.user, self.passwd = None, None
1704 1705 s = str(self)
1705 1706 finally:
1706 1707 self.user, self.passwd = user, passwd
1707 1708 if not self.user:
1708 1709 return (s, None)
1709 1710 # authinfo[1] is passed to urllib2 password manager, and its
1710 1711 # URIs must not contain credentials. The host is passed in the
1711 1712 # URIs list because Python < 2.4.3 uses only that to search for
1712 1713 # a password.
1713 1714 return (s, (None, (s, self.host),
1714 1715 self.user, self.passwd or ''))
1715 1716
1716 1717 def isabs(self):
1717 1718 if self.scheme and self.scheme != 'file':
1718 1719 return True # remote URL
1719 1720 if hasdriveletter(self.path):
1720 1721 return True # absolute for our purposes - can't be joined()
1721 1722 if self.path.startswith(r'\\'):
1722 1723 return True # Windows UNC path
1723 1724 if self.path.startswith('/'):
1724 1725 return True # POSIX-style
1725 1726 return False
1726 1727
1727 1728 def localpath(self):
1728 1729 if self.scheme == 'file' or self.scheme == 'bundle':
1729 1730 path = self.path or '/'
1730 1731 # For Windows, we need to promote hosts containing drive
1731 1732 # letters to paths with drive letters.
1732 1733 if hasdriveletter(self._hostport):
1733 1734 path = self._hostport + '/' + self.path
1734 1735 elif (self.host is not None and self.path
1735 1736 and not hasdriveletter(path)):
1736 1737 path = '/' + path
1737 1738 return path
1738 1739 return self._origpath
1739 1740
1740 1741 def hasscheme(path):
1741 1742 return bool(url(path).scheme)
1742 1743
1743 1744 def hasdriveletter(path):
1744 1745 return path and path[1:2] == ':' and path[0:1].isalpha()
1745 1746
1746 1747 def urllocalpath(path):
1747 1748 return url(path, parsequery=False, parsefragment=False).localpath()
1748 1749
1749 1750 def hidepassword(u):
1750 1751 '''hide user credential in a url string'''
1751 1752 u = url(u)
1752 1753 if u.passwd:
1753 1754 u.passwd = '***'
1754 1755 return str(u)
1755 1756
1756 1757 def removeauth(u):
1757 1758 '''remove all authentication information from a url string'''
1758 1759 u = url(u)
1759 1760 u.user = u.passwd = None
1760 1761 return str(u)
1761 1762
1762 1763 def isatty(fd):
1763 1764 try:
1764 1765 return fd.isatty()
1765 1766 except AttributeError:
1766 1767 return False
General Comments 0
You need to be logged in to leave comments. Login now