##// END OF EJS Templates
py3: use email.generator.BytesGenerator in patch.split()...
Denis Laxalde -
r43426:0e6a7ce8 default
parent child Browse files
Show More
@@ -1,465 +1,470 b''
1 1 # mail.py - mail sending bits for mercurial
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import email
11 11 import email.charset
12 import email.generator
12 13 import email.header
13 14 import email.message
14 15 import email.parser
15 16 import io
16 17 import os
17 18 import smtplib
18 19 import socket
19 20 import time
20 21
21 22 from .i18n import _
22 23 from .pycompat import (
23 24 getattr,
24 25 open,
25 26 )
26 27 from . import (
27 28 encoding,
28 29 error,
29 30 pycompat,
30 31 sslutil,
31 32 util,
32 33 )
33 34 from .utils import (
34 35 procutil,
35 36 stringutil,
36 37 )
37 38
38 39
39 40 class STARTTLS(smtplib.SMTP):
40 41 '''Derived class to verify the peer certificate for STARTTLS.
41 42
42 43 This class allows to pass any keyword arguments to SSL socket creation.
43 44 '''
44 45
45 46 def __init__(self, ui, host=None, **kwargs):
46 47 smtplib.SMTP.__init__(self, **kwargs)
47 48 self._ui = ui
48 49 self._host = host
49 50
50 51 def starttls(self, keyfile=None, certfile=None):
51 52 if not self.has_extn(b"starttls"):
52 53 msg = b"STARTTLS extension not supported by server"
53 54 raise smtplib.SMTPException(msg)
54 55 (resp, reply) = self.docmd(b"STARTTLS")
55 56 if resp == 220:
56 57 self.sock = sslutil.wrapsocket(
57 58 self.sock,
58 59 keyfile,
59 60 certfile,
60 61 ui=self._ui,
61 62 serverhostname=self._host,
62 63 )
63 64 self.file = smtplib.SSLFakeFile(self.sock)
64 65 self.helo_resp = None
65 66 self.ehlo_resp = None
66 67 self.esmtp_features = {}
67 68 self.does_esmtp = 0
68 69 return (resp, reply)
69 70
70 71
71 72 class SMTPS(smtplib.SMTP):
72 73 '''Derived class to verify the peer certificate for SMTPS.
73 74
74 75 This class allows to pass any keyword arguments to SSL socket creation.
75 76 '''
76 77
77 78 def __init__(self, ui, keyfile=None, certfile=None, host=None, **kwargs):
78 79 self.keyfile = keyfile
79 80 self.certfile = certfile
80 81 smtplib.SMTP.__init__(self, **kwargs)
81 82 self._host = host
82 83 self.default_port = smtplib.SMTP_SSL_PORT
83 84 self._ui = ui
84 85
85 86 def _get_socket(self, host, port, timeout):
86 87 if self.debuglevel > 0:
87 88 self._ui.debug(b'connect: %r\n' % ((host, port),))
88 89 new_socket = socket.create_connection((host, port), timeout)
89 90 new_socket = sslutil.wrapsocket(
90 91 new_socket,
91 92 self.keyfile,
92 93 self.certfile,
93 94 ui=self._ui,
94 95 serverhostname=self._host,
95 96 )
96 97 self.file = new_socket.makefile(r'rb')
97 98 return new_socket
98 99
99 100
100 101 def _pyhastls():
101 102 """Returns true iff Python has TLS support, false otherwise."""
102 103 try:
103 104 import ssl
104 105
105 106 getattr(ssl, 'HAS_TLS', False)
106 107 return True
107 108 except ImportError:
108 109 return False
109 110
110 111
111 112 def _smtp(ui):
112 113 '''build an smtp connection and return a function to send mail'''
113 114 local_hostname = ui.config(b'smtp', b'local_hostname')
114 115 tls = ui.config(b'smtp', b'tls')
115 116 # backward compatible: when tls = true, we use starttls.
116 117 starttls = tls == b'starttls' or stringutil.parsebool(tls)
117 118 smtps = tls == b'smtps'
118 119 if (starttls or smtps) and not _pyhastls():
119 120 raise error.Abort(_(b"can't use TLS: Python SSL support not installed"))
120 121 mailhost = ui.config(b'smtp', b'host')
121 122 if not mailhost:
122 123 raise error.Abort(_(b'smtp.host not configured - cannot send mail'))
123 124 if smtps:
124 125 ui.note(_(b'(using smtps)\n'))
125 126 s = SMTPS(ui, local_hostname=local_hostname, host=mailhost)
126 127 elif starttls:
127 128 s = STARTTLS(ui, local_hostname=local_hostname, host=mailhost)
128 129 else:
129 130 s = smtplib.SMTP(local_hostname=local_hostname)
130 131 if smtps:
131 132 defaultport = 465
132 133 else:
133 134 defaultport = 25
134 135 mailport = util.getport(ui.config(b'smtp', b'port', defaultport))
135 136 ui.note(_(b'sending mail: smtp host %s, port %d\n') % (mailhost, mailport))
136 137 s.connect(host=mailhost, port=mailport)
137 138 if starttls:
138 139 ui.note(_(b'(using starttls)\n'))
139 140 s.ehlo()
140 141 s.starttls()
141 142 s.ehlo()
142 143 if starttls or smtps:
143 144 ui.note(_(b'(verifying remote certificate)\n'))
144 145 sslutil.validatesocket(s.sock)
145 146 username = ui.config(b'smtp', b'username')
146 147 password = ui.config(b'smtp', b'password')
147 148 if username and not password:
148 149 password = ui.getpass()
149 150 if username and password:
150 151 ui.note(_(b'(authenticating to mail server as %s)\n') % username)
151 152 try:
152 153 s.login(username, password)
153 154 except smtplib.SMTPException as inst:
154 155 raise error.Abort(inst)
155 156
156 157 def send(sender, recipients, msg):
157 158 try:
158 159 return s.sendmail(sender, recipients, msg)
159 160 except smtplib.SMTPRecipientsRefused as inst:
160 161 recipients = [r[1] for r in inst.recipients.values()]
161 162 raise error.Abort(b'\n' + b'\n'.join(recipients))
162 163 except smtplib.SMTPException as inst:
163 164 raise error.Abort(inst)
164 165
165 166 return send
166 167
167 168
168 169 def _sendmail(ui, sender, recipients, msg):
169 170 '''send mail using sendmail.'''
170 171 program = ui.config(b'email', b'method')
171 172
172 173 def stremail(x):
173 174 return procutil.shellquote(stringutil.email(encoding.strtolocal(x)))
174 175
175 176 cmdline = b'%s -f %s %s' % (
176 177 program,
177 178 stremail(sender),
178 179 b' '.join(map(stremail, recipients)),
179 180 )
180 181 ui.note(_(b'sending mail: %s\n') % cmdline)
181 182 fp = procutil.popen(cmdline, b'wb')
182 183 fp.write(util.tonativeeol(msg))
183 184 ret = fp.close()
184 185 if ret:
185 186 raise error.Abort(
186 187 b'%s %s'
187 188 % (
188 189 os.path.basename(program.split(None, 1)[0]),
189 190 procutil.explainexit(ret),
190 191 )
191 192 )
192 193
193 194
194 195 def _mbox(mbox, sender, recipients, msg):
195 196 '''write mails to mbox'''
196 197 fp = open(mbox, b'ab+')
197 198 # Should be time.asctime(), but Windows prints 2-characters day
198 199 # of month instead of one. Make them print the same thing.
199 200 date = time.strftime(r'%a %b %d %H:%M:%S %Y', time.localtime())
200 201 fp.write(
201 202 b'From %s %s\n'
202 203 % (encoding.strtolocal(sender), encoding.strtolocal(date))
203 204 )
204 205 fp.write(msg)
205 206 fp.write(b'\n\n')
206 207 fp.close()
207 208
208 209
209 210 def connect(ui, mbox=None):
210 211 '''make a mail connection. return a function to send mail.
211 212 call as sendmail(sender, list-of-recipients, msg).'''
212 213 if mbox:
213 214 open(mbox, b'wb').close()
214 215 return lambda s, r, m: _mbox(mbox, s, r, m)
215 216 if ui.config(b'email', b'method') == b'smtp':
216 217 return _smtp(ui)
217 218 return lambda s, r, m: _sendmail(ui, s, r, m)
218 219
219 220
220 221 def sendmail(ui, sender, recipients, msg, mbox=None):
221 222 send = connect(ui, mbox=mbox)
222 223 return send(sender, recipients, msg)
223 224
224 225
225 226 def validateconfig(ui):
226 227 '''determine if we have enough config data to try sending email.'''
227 228 method = ui.config(b'email', b'method')
228 229 if method == b'smtp':
229 230 if not ui.config(b'smtp', b'host'):
230 231 raise error.Abort(
231 232 _(
232 233 b'smtp specified as email transport, '
233 234 b'but no smtp host configured'
234 235 )
235 236 )
236 237 else:
237 238 if not procutil.findexe(method):
238 239 raise error.Abort(
239 240 _(b'%r specified as email transport, but not in PATH') % method
240 241 )
241 242
242 243
243 244 def codec2iana(cs):
244 245 ''''''
245 246 cs = pycompat.sysbytes(email.charset.Charset(cs).input_charset.lower())
246 247
247 248 # "latin1" normalizes to "iso8859-1", standard calls for "iso-8859-1"
248 249 if cs.startswith(b"iso") and not cs.startswith(b"iso-"):
249 250 return b"iso-" + cs[3:]
250 251 return cs
251 252
252 253
253 254 def mimetextpatch(s, subtype=b'plain', display=False):
254 255 '''Return MIME message suitable for a patch.
255 256 Charset will be detected by first trying to decode as us-ascii, then utf-8,
256 257 and finally the global encodings. If all those fail, fall back to
257 258 ISO-8859-1, an encoding with that allows all byte sequences.
258 259 Transfer encodings will be used if necessary.'''
259 260
260 261 cs = [b'us-ascii', b'utf-8', encoding.encoding, encoding.fallbackencoding]
261 262 if display:
262 263 cs = [b'us-ascii']
263 264 for charset in cs:
264 265 try:
265 266 s.decode(pycompat.sysstr(charset))
266 267 return mimetextqp(s, subtype, codec2iana(charset))
267 268 except UnicodeDecodeError:
268 269 pass
269 270
270 271 return mimetextqp(s, subtype, b"iso-8859-1")
271 272
272 273
273 274 def mimetextqp(body, subtype, charset):
274 275 '''Return MIME message.
275 276 Quoted-printable transfer encoding will be used if necessary.
276 277 '''
277 278 cs = email.charset.Charset(charset)
278 279 msg = email.message.Message()
279 280 msg.set_type(pycompat.sysstr(b'text/' + subtype))
280 281
281 282 for line in body.splitlines():
282 283 if len(line) > 950:
283 284 cs.body_encoding = email.charset.QP
284 285 break
285 286
286 287 # On Python 2, this simply assigns a value. Python 3 inspects
287 288 # body and does different things depending on whether it has
288 289 # encode() or decode() attributes. We can get the old behavior
289 290 # if we pass a str and charset is None and we call set_charset().
290 291 # But we may get into trouble later due to Python attempting to
291 292 # encode/decode using the registered charset (or attempting to
292 293 # use ascii in the absence of a charset).
293 294 msg.set_payload(body, cs)
294 295
295 296 return msg
296 297
297 298
298 299 def _charsets(ui):
299 300 '''Obtains charsets to send mail parts not containing patches.'''
300 301 charsets = [cs.lower() for cs in ui.configlist(b'email', b'charsets')]
301 302 fallbacks = [
302 303 encoding.fallbackencoding.lower(),
303 304 encoding.encoding.lower(),
304 305 b'utf-8',
305 306 ]
306 307 for cs in fallbacks: # find unique charsets while keeping order
307 308 if cs not in charsets:
308 309 charsets.append(cs)
309 310 return [cs for cs in charsets if not cs.endswith(b'ascii')]
310 311
311 312
312 313 def _encode(ui, s, charsets):
313 314 '''Returns (converted) string, charset tuple.
314 315 Finds out best charset by cycling through sendcharsets in descending
315 316 order. Tries both encoding and fallbackencoding for input. Only as
316 317 last resort send as is in fake ascii.
317 318 Caveat: Do not use for mail parts containing patches!'''
318 319 sendcharsets = charsets or _charsets(ui)
319 320 if not isinstance(s, bytes):
320 321 # We have unicode data, which we need to try and encode to
321 322 # some reasonable-ish encoding. Try the encodings the user
322 323 # wants, and fall back to garbage-in-ascii.
323 324 for ocs in sendcharsets:
324 325 try:
325 326 return s.encode(pycompat.sysstr(ocs)), ocs
326 327 except UnicodeEncodeError:
327 328 pass
328 329 except LookupError:
329 330 ui.warn(_(b'ignoring invalid sendcharset: %s\n') % ocs)
330 331 else:
331 332 # Everything failed, ascii-armor what we've got and send it.
332 333 return s.encode('ascii', 'backslashreplace')
333 334 # We have a bytes of unknown encoding. We'll try and guess a valid
334 335 # encoding, falling back to pretending we had ascii even though we
335 336 # know that's wrong.
336 337 try:
337 338 s.decode('ascii')
338 339 except UnicodeDecodeError:
339 340 for ics in (encoding.encoding, encoding.fallbackencoding):
340 341 try:
341 342 u = s.decode(ics)
342 343 except UnicodeDecodeError:
343 344 continue
344 345 for ocs in sendcharsets:
345 346 try:
346 347 return u.encode(pycompat.sysstr(ocs)), ocs
347 348 except UnicodeEncodeError:
348 349 pass
349 350 except LookupError:
350 351 ui.warn(_(b'ignoring invalid sendcharset: %s\n') % ocs)
351 352 # if ascii, or all conversion attempts fail, send (broken) ascii
352 353 return s, b'us-ascii'
353 354
354 355
355 356 def headencode(ui, s, charsets=None, display=False):
356 357 '''Returns RFC-2047 compliant header from given string.'''
357 358 if not display:
358 359 # split into words?
359 360 s, cs = _encode(ui, s, charsets)
360 361 return str(email.header.Header(s, cs))
361 362 return s
362 363
363 364
364 365 def _addressencode(ui, name, addr, charsets=None):
365 366 assert isinstance(addr, bytes)
366 367 name = headencode(ui, name, charsets)
367 368 try:
368 369 acc, dom = addr.split(b'@')
369 370 acc.decode('ascii')
370 371 dom = dom.decode(pycompat.sysstr(encoding.encoding)).encode('idna')
371 372 addr = b'%s@%s' % (acc, dom)
372 373 except UnicodeDecodeError:
373 374 raise error.Abort(_(b'invalid email address: %s') % addr)
374 375 except ValueError:
375 376 try:
376 377 # too strict?
377 378 addr.decode('ascii')
378 379 except UnicodeDecodeError:
379 380 raise error.Abort(_(b'invalid local address: %s') % addr)
380 381 return pycompat.bytesurl(
381 382 email.utils.formataddr((name, encoding.strfromlocal(addr)))
382 383 )
383 384
384 385
385 386 def addressencode(ui, address, charsets=None, display=False):
386 387 '''Turns address into RFC-2047 compliant header.'''
387 388 if display or not address:
388 389 return address or b''
389 390 name, addr = email.utils.parseaddr(encoding.strfromlocal(address))
390 391 return _addressencode(ui, name, encoding.strtolocal(addr), charsets)
391 392
392 393
393 394 def addrlistencode(ui, addrs, charsets=None, display=False):
394 395 '''Turns a list of addresses into a list of RFC-2047 compliant headers.
395 396 A single element of input list may contain multiple addresses, but output
396 397 always has one address per item'''
397 398 for a in addrs:
398 399 assert isinstance(a, bytes), r'%r unexpectedly not a bytestr' % a
399 400 if display:
400 401 return [a.strip() for a in addrs if a.strip()]
401 402
402 403 result = []
403 404 for name, addr in email.utils.getaddresses(
404 405 [encoding.strfromlocal(a) for a in addrs]
405 406 ):
406 407 if name or addr:
407 408 r = _addressencode(ui, name, encoding.strtolocal(addr), charsets)
408 409 result.append(r)
409 410 return result
410 411
411 412
412 413 def mimeencode(ui, s, charsets=None, display=False):
413 414 '''creates mime text object, encodes it if needed, and sets
414 415 charset and transfer-encoding accordingly.'''
415 416 cs = b'us-ascii'
416 417 if not display:
417 418 s, cs = _encode(ui, s, charsets)
418 419 return mimetextqp(s, b'plain', cs)
419 420
420 421
421 422 if pycompat.ispy3:
422 423
424 Generator = email.generator.BytesGenerator
425
423 426 def parse(fp):
424 427 ep = email.parser.Parser()
425 428 # disable the "universal newlines" mode, which isn't binary safe.
426 429 # I have no idea if ascii/surrogateescape is correct, but that's
427 430 # what the standard Python email parser does.
428 431 fp = io.TextIOWrapper(
429 432 fp, encoding=r'ascii', errors=r'surrogateescape', newline=chr(10)
430 433 )
431 434 try:
432 435 return ep.parse(fp)
433 436 finally:
434 437 fp.detach()
435 438
436 439
437 440 else:
438 441
442 Generator = email.generator.Generator
443
439 444 def parse(fp):
440 445 ep = email.parser.Parser()
441 446 return ep.parse(fp)
442 447
443 448
444 449 def headdecode(s):
445 450 '''Decodes RFC-2047 header'''
446 451 uparts = []
447 452 for part, charset in email.header.decode_header(s):
448 453 if charset is not None:
449 454 try:
450 455 uparts.append(part.decode(charset))
451 456 continue
452 457 except UnicodeDecodeError:
453 458 pass
454 459 # On Python 3, decode_header() may return either bytes or unicode
455 460 # depending on whether the header has =?<charset>? or not
456 461 if isinstance(part, type(u'')):
457 462 uparts.append(part)
458 463 continue
459 464 try:
460 465 uparts.append(part.decode('UTF-8'))
461 466 continue
462 467 except UnicodeDecodeError:
463 468 pass
464 469 uparts.append(part.decode('ISO-8859-1'))
465 470 return encoding.unitolocal(u' '.join(uparts))
@@ -1,3219 +1,3218 b''
1 1 # patch.py - patch file parsing routines
2 2 #
3 3 # Copyright 2006 Brendan Cully <brendan@kublai.com>
4 4 # Copyright 2007 Chris Mason <chris.mason@oracle.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 from __future__ import absolute_import, print_function
10 10
11 11 import collections
12 12 import contextlib
13 13 import copy
14 import email
15 14 import errno
16 15 import hashlib
17 16 import os
18 17 import re
19 18 import shutil
20 19 import zlib
21 20
22 21 from .i18n import _
23 22 from .node import (
24 23 hex,
25 24 short,
26 25 )
27 26 from .pycompat import open
28 27 from . import (
29 28 copies,
30 29 diffhelper,
31 30 diffutil,
32 31 encoding,
33 32 error,
34 33 mail,
35 34 mdiff,
36 35 pathutil,
37 36 pycompat,
38 37 scmutil,
39 38 similar,
40 39 util,
41 40 vfs as vfsmod,
42 41 )
43 42 from .utils import (
44 43 dateutil,
45 44 procutil,
46 45 stringutil,
47 46 )
48 47
49 48 stringio = util.stringio
50 49
51 50 gitre = re.compile(br'diff --git a/(.*) b/(.*)')
52 51 tabsplitter = re.compile(br'(\t+|[^\t]+)')
53 52 wordsplitter = re.compile(
54 53 br'(\t+| +|[a-zA-Z0-9_\x80-\xff]+|[^ \ta-zA-Z0-9_\x80-\xff])'
55 54 )
56 55
57 56 PatchError = error.PatchError
58 57
59 58 # public functions
60 59
61 60
62 61 def split(stream):
63 62 '''return an iterator of individual patches from a stream'''
64 63
65 64 def isheader(line, inheader):
66 65 if inheader and line.startswith((b' ', b'\t')):
67 66 # continuation
68 67 return True
69 68 if line.startswith((b' ', b'-', b'+')):
70 69 # diff line - don't check for header pattern in there
71 70 return False
72 71 l = line.split(b': ', 1)
73 72 return len(l) == 2 and b' ' not in l[0]
74 73
75 74 def chunk(lines):
76 75 return stringio(b''.join(lines))
77 76
78 77 def hgsplit(stream, cur):
79 78 inheader = True
80 79
81 80 for line in stream:
82 81 if not line.strip():
83 82 inheader = False
84 83 if not inheader and line.startswith(b'# HG changeset patch'):
85 84 yield chunk(cur)
86 85 cur = []
87 86 inheader = True
88 87
89 88 cur.append(line)
90 89
91 90 if cur:
92 91 yield chunk(cur)
93 92
94 93 def mboxsplit(stream, cur):
95 94 for line in stream:
96 95 if line.startswith(b'From '):
97 96 for c in split(chunk(cur[1:])):
98 97 yield c
99 98 cur = []
100 99
101 100 cur.append(line)
102 101
103 102 if cur:
104 103 for c in split(chunk(cur[1:])):
105 104 yield c
106 105
107 106 def mimesplit(stream, cur):
108 107 def msgfp(m):
109 108 fp = stringio()
110 g = email.Generator.Generator(fp, mangle_from_=False)
109 g = mail.Generator(fp, mangle_from_=False)
111 110 g.flatten(m)
112 111 fp.seek(0)
113 112 return fp
114 113
115 114 for line in stream:
116 115 cur.append(line)
117 116 c = chunk(cur)
118 117
119 118 m = mail.parse(c)
120 119 if not m.is_multipart():
121 120 yield msgfp(m)
122 121 else:
123 122 ok_types = (b'text/plain', b'text/x-diff', b'text/x-patch')
124 123 for part in m.walk():
125 124 ct = part.get_content_type()
126 125 if ct not in ok_types:
127 126 continue
128 127 yield msgfp(part)
129 128
130 129 def headersplit(stream, cur):
131 130 inheader = False
132 131
133 132 for line in stream:
134 133 if not inheader and isheader(line, inheader):
135 134 yield chunk(cur)
136 135 cur = []
137 136 inheader = True
138 137 if inheader and not isheader(line, inheader):
139 138 inheader = False
140 139
141 140 cur.append(line)
142 141
143 142 if cur:
144 143 yield chunk(cur)
145 144
146 145 def remainder(cur):
147 146 yield chunk(cur)
148 147
149 148 class fiter(object):
150 149 def __init__(self, fp):
151 150 self.fp = fp
152 151
153 152 def __iter__(self):
154 153 return self
155 154
156 155 def next(self):
157 156 l = self.fp.readline()
158 157 if not l:
159 158 raise StopIteration
160 159 return l
161 160
162 161 __next__ = next
163 162
164 163 inheader = False
165 164 cur = []
166 165
167 166 mimeheaders = [b'content-type']
168 167
169 168 if not util.safehasattr(stream, b'next'):
170 169 # http responses, for example, have readline but not next
171 170 stream = fiter(stream)
172 171
173 172 for line in stream:
174 173 cur.append(line)
175 174 if line.startswith(b'# HG changeset patch'):
176 175 return hgsplit(stream, cur)
177 176 elif line.startswith(b'From '):
178 177 return mboxsplit(stream, cur)
179 178 elif isheader(line, inheader):
180 179 inheader = True
181 180 if line.split(b':', 1)[0].lower() in mimeheaders:
182 181 # let email parser handle this
183 182 return mimesplit(stream, cur)
184 183 elif line.startswith(b'--- ') and inheader:
185 184 # No evil headers seen by diff start, split by hand
186 185 return headersplit(stream, cur)
187 186 # Not enough info, keep reading
188 187
189 188 # if we are here, we have a very plain patch
190 189 return remainder(cur)
191 190
192 191
193 192 ## Some facility for extensible patch parsing:
194 193 # list of pairs ("header to match", "data key")
195 194 patchheadermap = [
196 195 (b'Date', b'date'),
197 196 (b'Branch', b'branch'),
198 197 (b'Node ID', b'nodeid'),
199 198 ]
200 199
201 200
202 201 @contextlib.contextmanager
203 202 def extract(ui, fileobj):
204 203 '''extract patch from data read from fileobj.
205 204
206 205 patch can be a normal patch or contained in an email message.
207 206
208 207 return a dictionary. Standard keys are:
209 208 - filename,
210 209 - message,
211 210 - user,
212 211 - date,
213 212 - branch,
214 213 - node,
215 214 - p1,
216 215 - p2.
217 216 Any item can be missing from the dictionary. If filename is missing,
218 217 fileobj did not contain a patch. Caller must unlink filename when done.'''
219 218
220 219 fd, tmpname = pycompat.mkstemp(prefix=b'hg-patch-')
221 220 tmpfp = os.fdopen(fd, r'wb')
222 221 try:
223 222 yield _extract(ui, fileobj, tmpname, tmpfp)
224 223 finally:
225 224 tmpfp.close()
226 225 os.unlink(tmpname)
227 226
228 227
229 228 def _extract(ui, fileobj, tmpname, tmpfp):
230 229
231 230 # attempt to detect the start of a patch
232 231 # (this heuristic is borrowed from quilt)
233 232 diffre = re.compile(
234 233 br'^(?:Index:[ \t]|diff[ \t]-|RCS file: |'
235 234 br'retrieving revision [0-9]+(\.[0-9]+)*$|'
236 235 br'---[ \t].*?^\+\+\+[ \t]|'
237 236 br'\*\*\*[ \t].*?^---[ \t])',
238 237 re.MULTILINE | re.DOTALL,
239 238 )
240 239
241 240 data = {}
242 241
243 242 msg = mail.parse(fileobj)
244 243
245 244 subject = msg[r'Subject'] and mail.headdecode(msg[r'Subject'])
246 245 data[b'user'] = msg[r'From'] and mail.headdecode(msg[r'From'])
247 246 if not subject and not data[b'user']:
248 247 # Not an email, restore parsed headers if any
249 248 subject = (
250 249 b'\n'.join(
251 250 b': '.join(map(encoding.strtolocal, h)) for h in msg.items()
252 251 )
253 252 + b'\n'
254 253 )
255 254
256 255 # should try to parse msg['Date']
257 256 parents = []
258 257
259 258 nodeid = msg[r'X-Mercurial-Node']
260 259 if nodeid:
261 260 data[b'nodeid'] = nodeid = mail.headdecode(nodeid)
262 261 ui.debug(b'Node ID: %s\n' % nodeid)
263 262
264 263 if subject:
265 264 if subject.startswith(b'[PATCH'):
266 265 pend = subject.find(b']')
267 266 if pend >= 0:
268 267 subject = subject[pend + 1 :].lstrip()
269 268 subject = re.sub(br'\n[ \t]+', b' ', subject)
270 269 ui.debug(b'Subject: %s\n' % subject)
271 270 if data[b'user']:
272 271 ui.debug(b'From: %s\n' % data[b'user'])
273 272 diffs_seen = 0
274 273 ok_types = (b'text/plain', b'text/x-diff', b'text/x-patch')
275 274 message = b''
276 275 for part in msg.walk():
277 276 content_type = pycompat.bytestr(part.get_content_type())
278 277 ui.debug(b'Content-Type: %s\n' % content_type)
279 278 if content_type not in ok_types:
280 279 continue
281 280 payload = part.get_payload(decode=True)
282 281 m = diffre.search(payload)
283 282 if m:
284 283 hgpatch = False
285 284 hgpatchheader = False
286 285 ignoretext = False
287 286
288 287 ui.debug(b'found patch at byte %d\n' % m.start(0))
289 288 diffs_seen += 1
290 289 cfp = stringio()
291 290 for line in payload[: m.start(0)].splitlines():
292 291 if line.startswith(b'# HG changeset patch') and not hgpatch:
293 292 ui.debug(b'patch generated by hg export\n')
294 293 hgpatch = True
295 294 hgpatchheader = True
296 295 # drop earlier commit message content
297 296 cfp.seek(0)
298 297 cfp.truncate()
299 298 subject = None
300 299 elif hgpatchheader:
301 300 if line.startswith(b'# User '):
302 301 data[b'user'] = line[7:]
303 302 ui.debug(b'From: %s\n' % data[b'user'])
304 303 elif line.startswith(b"# Parent "):
305 304 parents.append(line[9:].lstrip())
306 305 elif line.startswith(b"# "):
307 306 for header, key in patchheadermap:
308 307 prefix = b'# %s ' % header
309 308 if line.startswith(prefix):
310 309 data[key] = line[len(prefix) :]
311 310 ui.debug(b'%s: %s\n' % (header, data[key]))
312 311 else:
313 312 hgpatchheader = False
314 313 elif line == b'---':
315 314 ignoretext = True
316 315 if not hgpatchheader and not ignoretext:
317 316 cfp.write(line)
318 317 cfp.write(b'\n')
319 318 message = cfp.getvalue()
320 319 if tmpfp:
321 320 tmpfp.write(payload)
322 321 if not payload.endswith(b'\n'):
323 322 tmpfp.write(b'\n')
324 323 elif not diffs_seen and message and content_type == b'text/plain':
325 324 message += b'\n' + payload
326 325
327 326 if subject and not message.startswith(subject):
328 327 message = b'%s\n%s' % (subject, message)
329 328 data[b'message'] = message
330 329 tmpfp.close()
331 330 if parents:
332 331 data[b'p1'] = parents.pop(0)
333 332 if parents:
334 333 data[b'p2'] = parents.pop(0)
335 334
336 335 if diffs_seen:
337 336 data[b'filename'] = tmpname
338 337
339 338 return data
340 339
341 340
342 341 class patchmeta(object):
343 342 """Patched file metadata
344 343
345 344 'op' is the performed operation within ADD, DELETE, RENAME, MODIFY
346 345 or COPY. 'path' is patched file path. 'oldpath' is set to the
347 346 origin file when 'op' is either COPY or RENAME, None otherwise. If
348 347 file mode is changed, 'mode' is a tuple (islink, isexec) where
349 348 'islink' is True if the file is a symlink and 'isexec' is True if
350 349 the file is executable. Otherwise, 'mode' is None.
351 350 """
352 351
353 352 def __init__(self, path):
354 353 self.path = path
355 354 self.oldpath = None
356 355 self.mode = None
357 356 self.op = b'MODIFY'
358 357 self.binary = False
359 358
360 359 def setmode(self, mode):
361 360 islink = mode & 0o20000
362 361 isexec = mode & 0o100
363 362 self.mode = (islink, isexec)
364 363
365 364 def copy(self):
366 365 other = patchmeta(self.path)
367 366 other.oldpath = self.oldpath
368 367 other.mode = self.mode
369 368 other.op = self.op
370 369 other.binary = self.binary
371 370 return other
372 371
373 372 def _ispatchinga(self, afile):
374 373 if afile == b'/dev/null':
375 374 return self.op == b'ADD'
376 375 return afile == b'a/' + (self.oldpath or self.path)
377 376
378 377 def _ispatchingb(self, bfile):
379 378 if bfile == b'/dev/null':
380 379 return self.op == b'DELETE'
381 380 return bfile == b'b/' + self.path
382 381
383 382 def ispatching(self, afile, bfile):
384 383 return self._ispatchinga(afile) and self._ispatchingb(bfile)
385 384
386 385 def __repr__(self):
387 386 return r"<patchmeta %s %r>" % (self.op, self.path)
388 387
389 388
390 389 def readgitpatch(lr):
391 390 """extract git-style metadata about patches from <patchname>"""
392 391
393 392 # Filter patch for git information
394 393 gp = None
395 394 gitpatches = []
396 395 for line in lr:
397 396 line = line.rstrip(b' \r\n')
398 397 if line.startswith(b'diff --git a/'):
399 398 m = gitre.match(line)
400 399 if m:
401 400 if gp:
402 401 gitpatches.append(gp)
403 402 dst = m.group(2)
404 403 gp = patchmeta(dst)
405 404 elif gp:
406 405 if line.startswith(b'--- '):
407 406 gitpatches.append(gp)
408 407 gp = None
409 408 continue
410 409 if line.startswith(b'rename from '):
411 410 gp.op = b'RENAME'
412 411 gp.oldpath = line[12:]
413 412 elif line.startswith(b'rename to '):
414 413 gp.path = line[10:]
415 414 elif line.startswith(b'copy from '):
416 415 gp.op = b'COPY'
417 416 gp.oldpath = line[10:]
418 417 elif line.startswith(b'copy to '):
419 418 gp.path = line[8:]
420 419 elif line.startswith(b'deleted file'):
421 420 gp.op = b'DELETE'
422 421 elif line.startswith(b'new file mode '):
423 422 gp.op = b'ADD'
424 423 gp.setmode(int(line[-6:], 8))
425 424 elif line.startswith(b'new mode '):
426 425 gp.setmode(int(line[-6:], 8))
427 426 elif line.startswith(b'GIT binary patch'):
428 427 gp.binary = True
429 428 if gp:
430 429 gitpatches.append(gp)
431 430
432 431 return gitpatches
433 432
434 433
435 434 class linereader(object):
436 435 # simple class to allow pushing lines back into the input stream
437 436 def __init__(self, fp):
438 437 self.fp = fp
439 438 self.buf = []
440 439
441 440 def push(self, line):
442 441 if line is not None:
443 442 self.buf.append(line)
444 443
445 444 def readline(self):
446 445 if self.buf:
447 446 l = self.buf[0]
448 447 del self.buf[0]
449 448 return l
450 449 return self.fp.readline()
451 450
452 451 def __iter__(self):
453 452 return iter(self.readline, b'')
454 453
455 454
456 455 class abstractbackend(object):
457 456 def __init__(self, ui):
458 457 self.ui = ui
459 458
460 459 def getfile(self, fname):
461 460 """Return target file data and flags as a (data, (islink,
462 461 isexec)) tuple. Data is None if file is missing/deleted.
463 462 """
464 463 raise NotImplementedError
465 464
466 465 def setfile(self, fname, data, mode, copysource):
467 466 """Write data to target file fname and set its mode. mode is a
468 467 (islink, isexec) tuple. If data is None, the file content should
469 468 be left unchanged. If the file is modified after being copied,
470 469 copysource is set to the original file name.
471 470 """
472 471 raise NotImplementedError
473 472
474 473 def unlink(self, fname):
475 474 """Unlink target file."""
476 475 raise NotImplementedError
477 476
478 477 def writerej(self, fname, failed, total, lines):
479 478 """Write rejected lines for fname. total is the number of hunks
480 479 which failed to apply and total the total number of hunks for this
481 480 files.
482 481 """
483 482
484 483 def exists(self, fname):
485 484 raise NotImplementedError
486 485
487 486 def close(self):
488 487 raise NotImplementedError
489 488
490 489
491 490 class fsbackend(abstractbackend):
492 491 def __init__(self, ui, basedir):
493 492 super(fsbackend, self).__init__(ui)
494 493 self.opener = vfsmod.vfs(basedir)
495 494
496 495 def getfile(self, fname):
497 496 if self.opener.islink(fname):
498 497 return (self.opener.readlink(fname), (True, False))
499 498
500 499 isexec = False
501 500 try:
502 501 isexec = self.opener.lstat(fname).st_mode & 0o100 != 0
503 502 except OSError as e:
504 503 if e.errno != errno.ENOENT:
505 504 raise
506 505 try:
507 506 return (self.opener.read(fname), (False, isexec))
508 507 except IOError as e:
509 508 if e.errno != errno.ENOENT:
510 509 raise
511 510 return None, None
512 511
513 512 def setfile(self, fname, data, mode, copysource):
514 513 islink, isexec = mode
515 514 if data is None:
516 515 self.opener.setflags(fname, islink, isexec)
517 516 return
518 517 if islink:
519 518 self.opener.symlink(data, fname)
520 519 else:
521 520 self.opener.write(fname, data)
522 521 if isexec:
523 522 self.opener.setflags(fname, False, True)
524 523
525 524 def unlink(self, fname):
526 525 rmdir = self.ui.configbool(b'experimental', b'removeemptydirs')
527 526 self.opener.unlinkpath(fname, ignoremissing=True, rmdir=rmdir)
528 527
529 528 def writerej(self, fname, failed, total, lines):
530 529 fname = fname + b".rej"
531 530 self.ui.warn(
532 531 _(b"%d out of %d hunks FAILED -- saving rejects to file %s\n")
533 532 % (failed, total, fname)
534 533 )
535 534 fp = self.opener(fname, b'w')
536 535 fp.writelines(lines)
537 536 fp.close()
538 537
539 538 def exists(self, fname):
540 539 return self.opener.lexists(fname)
541 540
542 541
543 542 class workingbackend(fsbackend):
544 543 def __init__(self, ui, repo, similarity):
545 544 super(workingbackend, self).__init__(ui, repo.root)
546 545 self.repo = repo
547 546 self.similarity = similarity
548 547 self.removed = set()
549 548 self.changed = set()
550 549 self.copied = []
551 550
552 551 def _checkknown(self, fname):
553 552 if self.repo.dirstate[fname] == b'?' and self.exists(fname):
554 553 raise PatchError(_(b'cannot patch %s: file is not tracked') % fname)
555 554
556 555 def setfile(self, fname, data, mode, copysource):
557 556 self._checkknown(fname)
558 557 super(workingbackend, self).setfile(fname, data, mode, copysource)
559 558 if copysource is not None:
560 559 self.copied.append((copysource, fname))
561 560 self.changed.add(fname)
562 561
563 562 def unlink(self, fname):
564 563 self._checkknown(fname)
565 564 super(workingbackend, self).unlink(fname)
566 565 self.removed.add(fname)
567 566 self.changed.add(fname)
568 567
569 568 def close(self):
570 569 wctx = self.repo[None]
571 570 changed = set(self.changed)
572 571 for src, dst in self.copied:
573 572 scmutil.dirstatecopy(self.ui, self.repo, wctx, src, dst)
574 573 if self.removed:
575 574 wctx.forget(sorted(self.removed))
576 575 for f in self.removed:
577 576 if f not in self.repo.dirstate:
578 577 # File was deleted and no longer belongs to the
579 578 # dirstate, it was probably marked added then
580 579 # deleted, and should not be considered by
581 580 # marktouched().
582 581 changed.discard(f)
583 582 if changed:
584 583 scmutil.marktouched(self.repo, changed, self.similarity)
585 584 return sorted(self.changed)
586 585
587 586
588 587 class filestore(object):
589 588 def __init__(self, maxsize=None):
590 589 self.opener = None
591 590 self.files = {}
592 591 self.created = 0
593 592 self.maxsize = maxsize
594 593 if self.maxsize is None:
595 594 self.maxsize = 4 * (2 ** 20)
596 595 self.size = 0
597 596 self.data = {}
598 597
599 598 def setfile(self, fname, data, mode, copied=None):
600 599 if self.maxsize < 0 or (len(data) + self.size) <= self.maxsize:
601 600 self.data[fname] = (data, mode, copied)
602 601 self.size += len(data)
603 602 else:
604 603 if self.opener is None:
605 604 root = pycompat.mkdtemp(prefix=b'hg-patch-')
606 605 self.opener = vfsmod.vfs(root)
607 606 # Avoid filename issues with these simple names
608 607 fn = b'%d' % self.created
609 608 self.opener.write(fn, data)
610 609 self.created += 1
611 610 self.files[fname] = (fn, mode, copied)
612 611
613 612 def getfile(self, fname):
614 613 if fname in self.data:
615 614 return self.data[fname]
616 615 if not self.opener or fname not in self.files:
617 616 return None, None, None
618 617 fn, mode, copied = self.files[fname]
619 618 return self.opener.read(fn), mode, copied
620 619
621 620 def close(self):
622 621 if self.opener:
623 622 shutil.rmtree(self.opener.base)
624 623
625 624
626 625 class repobackend(abstractbackend):
627 626 def __init__(self, ui, repo, ctx, store):
628 627 super(repobackend, self).__init__(ui)
629 628 self.repo = repo
630 629 self.ctx = ctx
631 630 self.store = store
632 631 self.changed = set()
633 632 self.removed = set()
634 633 self.copied = {}
635 634
636 635 def _checkknown(self, fname):
637 636 if fname not in self.ctx:
638 637 raise PatchError(_(b'cannot patch %s: file is not tracked') % fname)
639 638
640 639 def getfile(self, fname):
641 640 try:
642 641 fctx = self.ctx[fname]
643 642 except error.LookupError:
644 643 return None, None
645 644 flags = fctx.flags()
646 645 return fctx.data(), (b'l' in flags, b'x' in flags)
647 646
648 647 def setfile(self, fname, data, mode, copysource):
649 648 if copysource:
650 649 self._checkknown(copysource)
651 650 if data is None:
652 651 data = self.ctx[fname].data()
653 652 self.store.setfile(fname, data, mode, copysource)
654 653 self.changed.add(fname)
655 654 if copysource:
656 655 self.copied[fname] = copysource
657 656
658 657 def unlink(self, fname):
659 658 self._checkknown(fname)
660 659 self.removed.add(fname)
661 660
662 661 def exists(self, fname):
663 662 return fname in self.ctx
664 663
665 664 def close(self):
666 665 return self.changed | self.removed
667 666
668 667
669 668 # @@ -start,len +start,len @@ or @@ -start +start @@ if len is 1
670 669 unidesc = re.compile(br'@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@')
671 670 contextdesc = re.compile(br'(?:---|\*\*\*) (\d+)(?:,(\d+))? (?:---|\*\*\*)')
672 671 eolmodes = [b'strict', b'crlf', b'lf', b'auto']
673 672
674 673
675 674 class patchfile(object):
676 675 def __init__(self, ui, gp, backend, store, eolmode=b'strict'):
677 676 self.fname = gp.path
678 677 self.eolmode = eolmode
679 678 self.eol = None
680 679 self.backend = backend
681 680 self.ui = ui
682 681 self.lines = []
683 682 self.exists = False
684 683 self.missing = True
685 684 self.mode = gp.mode
686 685 self.copysource = gp.oldpath
687 686 self.create = gp.op in (b'ADD', b'COPY', b'RENAME')
688 687 self.remove = gp.op == b'DELETE'
689 688 if self.copysource is None:
690 689 data, mode = backend.getfile(self.fname)
691 690 else:
692 691 data, mode = store.getfile(self.copysource)[:2]
693 692 if data is not None:
694 693 self.exists = self.copysource is None or backend.exists(self.fname)
695 694 self.missing = False
696 695 if data:
697 696 self.lines = mdiff.splitnewlines(data)
698 697 if self.mode is None:
699 698 self.mode = mode
700 699 if self.lines:
701 700 # Normalize line endings
702 701 if self.lines[0].endswith(b'\r\n'):
703 702 self.eol = b'\r\n'
704 703 elif self.lines[0].endswith(b'\n'):
705 704 self.eol = b'\n'
706 705 if eolmode != b'strict':
707 706 nlines = []
708 707 for l in self.lines:
709 708 if l.endswith(b'\r\n'):
710 709 l = l[:-2] + b'\n'
711 710 nlines.append(l)
712 711 self.lines = nlines
713 712 else:
714 713 if self.create:
715 714 self.missing = False
716 715 if self.mode is None:
717 716 self.mode = (False, False)
718 717 if self.missing:
719 718 self.ui.warn(_(b"unable to find '%s' for patching\n") % self.fname)
720 719 self.ui.warn(
721 720 _(
722 721 b"(use '--prefix' to apply patch relative to the "
723 722 b"current directory)\n"
724 723 )
725 724 )
726 725
727 726 self.hash = {}
728 727 self.dirty = 0
729 728 self.offset = 0
730 729 self.skew = 0
731 730 self.rej = []
732 731 self.fileprinted = False
733 732 self.printfile(False)
734 733 self.hunks = 0
735 734
736 735 def writelines(self, fname, lines, mode):
737 736 if self.eolmode == b'auto':
738 737 eol = self.eol
739 738 elif self.eolmode == b'crlf':
740 739 eol = b'\r\n'
741 740 else:
742 741 eol = b'\n'
743 742
744 743 if self.eolmode != b'strict' and eol and eol != b'\n':
745 744 rawlines = []
746 745 for l in lines:
747 746 if l and l.endswith(b'\n'):
748 747 l = l[:-1] + eol
749 748 rawlines.append(l)
750 749 lines = rawlines
751 750
752 751 self.backend.setfile(fname, b''.join(lines), mode, self.copysource)
753 752
754 753 def printfile(self, warn):
755 754 if self.fileprinted:
756 755 return
757 756 if warn or self.ui.verbose:
758 757 self.fileprinted = True
759 758 s = _(b"patching file %s\n") % self.fname
760 759 if warn:
761 760 self.ui.warn(s)
762 761 else:
763 762 self.ui.note(s)
764 763
765 764 def findlines(self, l, linenum):
766 765 # looks through the hash and finds candidate lines. The
767 766 # result is a list of line numbers sorted based on distance
768 767 # from linenum
769 768
770 769 cand = self.hash.get(l, [])
771 770 if len(cand) > 1:
772 771 # resort our list of potentials forward then back.
773 772 cand.sort(key=lambda x: abs(x - linenum))
774 773 return cand
775 774
776 775 def write_rej(self):
777 776 # our rejects are a little different from patch(1). This always
778 777 # creates rejects in the same form as the original patch. A file
779 778 # header is inserted so that you can run the reject through patch again
780 779 # without having to type the filename.
781 780 if not self.rej:
782 781 return
783 782 base = os.path.basename(self.fname)
784 783 lines = [b"--- %s\n+++ %s\n" % (base, base)]
785 784 for x in self.rej:
786 785 for l in x.hunk:
787 786 lines.append(l)
788 787 if l[-1:] != b'\n':
789 788 lines.append(b"\n\\ No newline at end of file\n")
790 789 self.backend.writerej(self.fname, len(self.rej), self.hunks, lines)
791 790
792 791 def apply(self, h):
793 792 if not h.complete():
794 793 raise PatchError(
795 794 _(b"bad hunk #%d %s (%d %d %d %d)")
796 795 % (h.number, h.desc, len(h.a), h.lena, len(h.b), h.lenb)
797 796 )
798 797
799 798 self.hunks += 1
800 799
801 800 if self.missing:
802 801 self.rej.append(h)
803 802 return -1
804 803
805 804 if self.exists and self.create:
806 805 if self.copysource:
807 806 self.ui.warn(
808 807 _(b"cannot create %s: destination already exists\n")
809 808 % self.fname
810 809 )
811 810 else:
812 811 self.ui.warn(_(b"file %s already exists\n") % self.fname)
813 812 self.rej.append(h)
814 813 return -1
815 814
816 815 if isinstance(h, binhunk):
817 816 if self.remove:
818 817 self.backend.unlink(self.fname)
819 818 else:
820 819 l = h.new(self.lines)
821 820 self.lines[:] = l
822 821 self.offset += len(l)
823 822 self.dirty = True
824 823 return 0
825 824
826 825 horig = h
827 826 if (
828 827 self.eolmode in (b'crlf', b'lf')
829 828 or self.eolmode == b'auto'
830 829 and self.eol
831 830 ):
832 831 # If new eols are going to be normalized, then normalize
833 832 # hunk data before patching. Otherwise, preserve input
834 833 # line-endings.
835 834 h = h.getnormalized()
836 835
837 836 # fast case first, no offsets, no fuzz
838 837 old, oldstart, new, newstart = h.fuzzit(0, False)
839 838 oldstart += self.offset
840 839 orig_start = oldstart
841 840 # if there's skew we want to emit the "(offset %d lines)" even
842 841 # when the hunk cleanly applies at start + skew, so skip the
843 842 # fast case code
844 843 if self.skew == 0 and diffhelper.testhunk(old, self.lines, oldstart):
845 844 if self.remove:
846 845 self.backend.unlink(self.fname)
847 846 else:
848 847 self.lines[oldstart : oldstart + len(old)] = new
849 848 self.offset += len(new) - len(old)
850 849 self.dirty = True
851 850 return 0
852 851
853 852 # ok, we couldn't match the hunk. Lets look for offsets and fuzz it
854 853 self.hash = {}
855 854 for x, s in enumerate(self.lines):
856 855 self.hash.setdefault(s, []).append(x)
857 856
858 857 for fuzzlen in pycompat.xrange(
859 858 self.ui.configint(b"patch", b"fuzz") + 1
860 859 ):
861 860 for toponly in [True, False]:
862 861 old, oldstart, new, newstart = h.fuzzit(fuzzlen, toponly)
863 862 oldstart = oldstart + self.offset + self.skew
864 863 oldstart = min(oldstart, len(self.lines))
865 864 if old:
866 865 cand = self.findlines(old[0][1:], oldstart)
867 866 else:
868 867 # Only adding lines with no or fuzzed context, just
869 868 # take the skew in account
870 869 cand = [oldstart]
871 870
872 871 for l in cand:
873 872 if not old or diffhelper.testhunk(old, self.lines, l):
874 873 self.lines[l : l + len(old)] = new
875 874 self.offset += len(new) - len(old)
876 875 self.skew = l - orig_start
877 876 self.dirty = True
878 877 offset = l - orig_start - fuzzlen
879 878 if fuzzlen:
880 879 msg = _(
881 880 b"Hunk #%d succeeded at %d "
882 881 b"with fuzz %d "
883 882 b"(offset %d lines).\n"
884 883 )
885 884 self.printfile(True)
886 885 self.ui.warn(
887 886 msg % (h.number, l + 1, fuzzlen, offset)
888 887 )
889 888 else:
890 889 msg = _(
891 890 b"Hunk #%d succeeded at %d "
892 891 b"(offset %d lines).\n"
893 892 )
894 893 self.ui.note(msg % (h.number, l + 1, offset))
895 894 return fuzzlen
896 895 self.printfile(True)
897 896 self.ui.warn(_(b"Hunk #%d FAILED at %d\n") % (h.number, orig_start))
898 897 self.rej.append(horig)
899 898 return -1
900 899
901 900 def close(self):
902 901 if self.dirty:
903 902 self.writelines(self.fname, self.lines, self.mode)
904 903 self.write_rej()
905 904 return len(self.rej)
906 905
907 906
908 907 class header(object):
909 908 """patch header
910 909 """
911 910
912 911 diffgit_re = re.compile(b'diff --git a/(.*) b/(.*)$')
913 912 diff_re = re.compile(b'diff -r .* (.*)$')
914 913 allhunks_re = re.compile(b'(?:index|deleted file) ')
915 914 pretty_re = re.compile(b'(?:new file|deleted file) ')
916 915 special_re = re.compile(b'(?:index|deleted|copy|rename|new mode) ')
917 916 newfile_re = re.compile(b'(?:new file|copy to|rename to)')
918 917
919 918 def __init__(self, header):
920 919 self.header = header
921 920 self.hunks = []
922 921
923 922 def binary(self):
924 923 return any(h.startswith(b'index ') for h in self.header)
925 924
926 925 def pretty(self, fp):
927 926 for h in self.header:
928 927 if h.startswith(b'index '):
929 928 fp.write(_(b'this modifies a binary file (all or nothing)\n'))
930 929 break
931 930 if self.pretty_re.match(h):
932 931 fp.write(h)
933 932 if self.binary():
934 933 fp.write(_(b'this is a binary file\n'))
935 934 break
936 935 if h.startswith(b'---'):
937 936 fp.write(
938 937 _(b'%d hunks, %d lines changed\n')
939 938 % (
940 939 len(self.hunks),
941 940 sum([max(h.added, h.removed) for h in self.hunks]),
942 941 )
943 942 )
944 943 break
945 944 fp.write(h)
946 945
947 946 def write(self, fp):
948 947 fp.write(b''.join(self.header))
949 948
950 949 def allhunks(self):
951 950 return any(self.allhunks_re.match(h) for h in self.header)
952 951
953 952 def files(self):
954 953 match = self.diffgit_re.match(self.header[0])
955 954 if match:
956 955 fromfile, tofile = match.groups()
957 956 if fromfile == tofile:
958 957 return [fromfile]
959 958 return [fromfile, tofile]
960 959 else:
961 960 return self.diff_re.match(self.header[0]).groups()
962 961
963 962 def filename(self):
964 963 return self.files()[-1]
965 964
966 965 def __repr__(self):
967 966 return b'<header %s>' % (b' '.join(map(repr, self.files())))
968 967
969 968 def isnewfile(self):
970 969 return any(self.newfile_re.match(h) for h in self.header)
971 970
972 971 def special(self):
973 972 # Special files are shown only at the header level and not at the hunk
974 973 # level for example a file that has been deleted is a special file.
975 974 # The user cannot change the content of the operation, in the case of
976 975 # the deleted file he has to take the deletion or not take it, he
977 976 # cannot take some of it.
978 977 # Newly added files are special if they are empty, they are not special
979 978 # if they have some content as we want to be able to change it
980 979 nocontent = len(self.header) == 2
981 980 emptynewfile = self.isnewfile() and nocontent
982 981 return emptynewfile or any(
983 982 self.special_re.match(h) for h in self.header
984 983 )
985 984
986 985
987 986 class recordhunk(object):
988 987 """patch hunk
989 988
990 989 XXX shouldn't we merge this with the other hunk class?
991 990 """
992 991
993 992 def __init__(
994 993 self,
995 994 header,
996 995 fromline,
997 996 toline,
998 997 proc,
999 998 before,
1000 999 hunk,
1001 1000 after,
1002 1001 maxcontext=None,
1003 1002 ):
1004 1003 def trimcontext(lines, reverse=False):
1005 1004 if maxcontext is not None:
1006 1005 delta = len(lines) - maxcontext
1007 1006 if delta > 0:
1008 1007 if reverse:
1009 1008 return delta, lines[delta:]
1010 1009 else:
1011 1010 return delta, lines[:maxcontext]
1012 1011 return 0, lines
1013 1012
1014 1013 self.header = header
1015 1014 trimedbefore, self.before = trimcontext(before, True)
1016 1015 self.fromline = fromline + trimedbefore
1017 1016 self.toline = toline + trimedbefore
1018 1017 _trimedafter, self.after = trimcontext(after, False)
1019 1018 self.proc = proc
1020 1019 self.hunk = hunk
1021 1020 self.added, self.removed = self.countchanges(self.hunk)
1022 1021
1023 1022 def __eq__(self, v):
1024 1023 if not isinstance(v, recordhunk):
1025 1024 return False
1026 1025
1027 1026 return (
1028 1027 (v.hunk == self.hunk)
1029 1028 and (v.proc == self.proc)
1030 1029 and (self.fromline == v.fromline)
1031 1030 and (self.header.files() == v.header.files())
1032 1031 )
1033 1032
1034 1033 def __hash__(self):
1035 1034 return hash(
1036 1035 (
1037 1036 tuple(self.hunk),
1038 1037 tuple(self.header.files()),
1039 1038 self.fromline,
1040 1039 self.proc,
1041 1040 )
1042 1041 )
1043 1042
1044 1043 def countchanges(self, hunk):
1045 1044 """hunk -> (n+,n-)"""
1046 1045 add = len([h for h in hunk if h.startswith(b'+')])
1047 1046 rem = len([h for h in hunk if h.startswith(b'-')])
1048 1047 return add, rem
1049 1048
1050 1049 def reversehunk(self):
1051 1050 """return another recordhunk which is the reverse of the hunk
1052 1051
1053 1052 If this hunk is diff(A, B), the returned hunk is diff(B, A). To do
1054 1053 that, swap fromline/toline and +/- signs while keep other things
1055 1054 unchanged.
1056 1055 """
1057 1056 m = {b'+': b'-', b'-': b'+', b'\\': b'\\'}
1058 1057 hunk = [b'%s%s' % (m[l[0:1]], l[1:]) for l in self.hunk]
1059 1058 return recordhunk(
1060 1059 self.header,
1061 1060 self.toline,
1062 1061 self.fromline,
1063 1062 self.proc,
1064 1063 self.before,
1065 1064 hunk,
1066 1065 self.after,
1067 1066 )
1068 1067
1069 1068 def write(self, fp):
1070 1069 delta = len(self.before) + len(self.after)
1071 1070 if self.after and self.after[-1] == b'\\ No newline at end of file\n':
1072 1071 delta -= 1
1073 1072 fromlen = delta + self.removed
1074 1073 tolen = delta + self.added
1075 1074 fp.write(
1076 1075 b'@@ -%d,%d +%d,%d @@%s\n'
1077 1076 % (
1078 1077 self.fromline,
1079 1078 fromlen,
1080 1079 self.toline,
1081 1080 tolen,
1082 1081 self.proc and (b' ' + self.proc),
1083 1082 )
1084 1083 )
1085 1084 fp.write(b''.join(self.before + self.hunk + self.after))
1086 1085
1087 1086 pretty = write
1088 1087
1089 1088 def filename(self):
1090 1089 return self.header.filename()
1091 1090
1092 1091 def __repr__(self):
1093 1092 return b'<hunk %r@%d>' % (self.filename(), self.fromline)
1094 1093
1095 1094
1096 1095 def getmessages():
1097 1096 return {
1098 1097 b'multiple': {
1099 1098 b'apply': _(b"apply change %d/%d to '%s'?"),
1100 1099 b'discard': _(b"discard change %d/%d to '%s'?"),
1101 1100 b'keep': _(b"keep change %d/%d to '%s'?"),
1102 1101 b'record': _(b"record change %d/%d to '%s'?"),
1103 1102 },
1104 1103 b'single': {
1105 1104 b'apply': _(b"apply this change to '%s'?"),
1106 1105 b'discard': _(b"discard this change to '%s'?"),
1107 1106 b'keep': _(b"keep this change to '%s'?"),
1108 1107 b'record': _(b"record this change to '%s'?"),
1109 1108 },
1110 1109 b'help': {
1111 1110 b'apply': _(
1112 1111 b'[Ynesfdaq?]'
1113 1112 b'$$ &Yes, apply this change'
1114 1113 b'$$ &No, skip this change'
1115 1114 b'$$ &Edit this change manually'
1116 1115 b'$$ &Skip remaining changes to this file'
1117 1116 b'$$ Apply remaining changes to this &file'
1118 1117 b'$$ &Done, skip remaining changes and files'
1119 1118 b'$$ Apply &all changes to all remaining files'
1120 1119 b'$$ &Quit, applying no changes'
1121 1120 b'$$ &? (display help)'
1122 1121 ),
1123 1122 b'discard': _(
1124 1123 b'[Ynesfdaq?]'
1125 1124 b'$$ &Yes, discard this change'
1126 1125 b'$$ &No, skip this change'
1127 1126 b'$$ &Edit this change manually'
1128 1127 b'$$ &Skip remaining changes to this file'
1129 1128 b'$$ Discard remaining changes to this &file'
1130 1129 b'$$ &Done, skip remaining changes and files'
1131 1130 b'$$ Discard &all changes to all remaining files'
1132 1131 b'$$ &Quit, discarding no changes'
1133 1132 b'$$ &? (display help)'
1134 1133 ),
1135 1134 b'keep': _(
1136 1135 b'[Ynesfdaq?]'
1137 1136 b'$$ &Yes, keep this change'
1138 1137 b'$$ &No, skip this change'
1139 1138 b'$$ &Edit this change manually'
1140 1139 b'$$ &Skip remaining changes to this file'
1141 1140 b'$$ Keep remaining changes to this &file'
1142 1141 b'$$ &Done, skip remaining changes and files'
1143 1142 b'$$ Keep &all changes to all remaining files'
1144 1143 b'$$ &Quit, keeping all changes'
1145 1144 b'$$ &? (display help)'
1146 1145 ),
1147 1146 b'record': _(
1148 1147 b'[Ynesfdaq?]'
1149 1148 b'$$ &Yes, record this change'
1150 1149 b'$$ &No, skip this change'
1151 1150 b'$$ &Edit this change manually'
1152 1151 b'$$ &Skip remaining changes to this file'
1153 1152 b'$$ Record remaining changes to this &file'
1154 1153 b'$$ &Done, skip remaining changes and files'
1155 1154 b'$$ Record &all changes to all remaining files'
1156 1155 b'$$ &Quit, recording no changes'
1157 1156 b'$$ &? (display help)'
1158 1157 ),
1159 1158 },
1160 1159 }
1161 1160
1162 1161
1163 1162 def filterpatch(ui, headers, match, operation=None):
1164 1163 """Interactively filter patch chunks into applied-only chunks"""
1165 1164 messages = getmessages()
1166 1165
1167 1166 if operation is None:
1168 1167 operation = b'record'
1169 1168
1170 1169 def prompt(skipfile, skipall, query, chunk):
1171 1170 """prompt query, and process base inputs
1172 1171
1173 1172 - y/n for the rest of file
1174 1173 - y/n for the rest
1175 1174 - ? (help)
1176 1175 - q (quit)
1177 1176
1178 1177 Return True/False and possibly updated skipfile and skipall.
1179 1178 """
1180 1179 newpatches = None
1181 1180 if skipall is not None:
1182 1181 return skipall, skipfile, skipall, newpatches
1183 1182 if skipfile is not None:
1184 1183 return skipfile, skipfile, skipall, newpatches
1185 1184 while True:
1186 1185 ui.flush()
1187 1186 resps = messages[b'help'][operation]
1188 1187 # IMPORTANT: keep the last line of this prompt short (<40 english
1189 1188 # chars is a good target) because of issue6158.
1190 1189 r = ui.promptchoice(b"%s\n(enter ? for help) %s" % (query, resps))
1191 1190 ui.write(b"\n")
1192 1191 if r == 8: # ?
1193 1192 for c, t in ui.extractchoices(resps)[1]:
1194 1193 ui.write(b'%s - %s\n' % (c, encoding.lower(t)))
1195 1194 continue
1196 1195 elif r == 0: # yes
1197 1196 ret = True
1198 1197 elif r == 1: # no
1199 1198 ret = False
1200 1199 elif r == 2: # Edit patch
1201 1200 if chunk is None:
1202 1201 ui.write(_(b'cannot edit patch for whole file'))
1203 1202 ui.write(b"\n")
1204 1203 continue
1205 1204 if chunk.header.binary():
1206 1205 ui.write(_(b'cannot edit patch for binary file'))
1207 1206 ui.write(b"\n")
1208 1207 continue
1209 1208 # Patch comment based on the Git one (based on comment at end of
1210 1209 # https://mercurial-scm.org/wiki/RecordExtension)
1211 1210 phelp = b'---' + _(
1212 1211 """
1213 1212 To remove '-' lines, make them ' ' lines (context).
1214 1213 To remove '+' lines, delete them.
1215 1214 Lines starting with # will be removed from the patch.
1216 1215
1217 1216 If the patch applies cleanly, the edited hunk will immediately be
1218 1217 added to the record list. If it does not apply cleanly, a rejects
1219 1218 file will be generated: you can use that when you try again. If
1220 1219 all lines of the hunk are removed, then the edit is aborted and
1221 1220 the hunk is left unchanged.
1222 1221 """
1223 1222 )
1224 1223 (patchfd, patchfn) = pycompat.mkstemp(
1225 1224 prefix=b"hg-editor-", suffix=b".diff"
1226 1225 )
1227 1226 ncpatchfp = None
1228 1227 try:
1229 1228 # Write the initial patch
1230 1229 f = util.nativeeolwriter(os.fdopen(patchfd, r'wb'))
1231 1230 chunk.header.write(f)
1232 1231 chunk.write(f)
1233 1232 f.write(
1234 1233 b''.join(
1235 1234 [b'# ' + i + b'\n' for i in phelp.splitlines()]
1236 1235 )
1237 1236 )
1238 1237 f.close()
1239 1238 # Start the editor and wait for it to complete
1240 1239 editor = ui.geteditor()
1241 1240 ret = ui.system(
1242 1241 b"%s \"%s\"" % (editor, patchfn),
1243 1242 environ={b'HGUSER': ui.username()},
1244 1243 blockedtag=b'filterpatch',
1245 1244 )
1246 1245 if ret != 0:
1247 1246 ui.warn(_(b"editor exited with exit code %d\n") % ret)
1248 1247 continue
1249 1248 # Remove comment lines
1250 1249 patchfp = open(patchfn, r'rb')
1251 1250 ncpatchfp = stringio()
1252 1251 for line in util.iterfile(patchfp):
1253 1252 line = util.fromnativeeol(line)
1254 1253 if not line.startswith(b'#'):
1255 1254 ncpatchfp.write(line)
1256 1255 patchfp.close()
1257 1256 ncpatchfp.seek(0)
1258 1257 newpatches = parsepatch(ncpatchfp)
1259 1258 finally:
1260 1259 os.unlink(patchfn)
1261 1260 del ncpatchfp
1262 1261 # Signal that the chunk shouldn't be applied as-is, but
1263 1262 # provide the new patch to be used instead.
1264 1263 ret = False
1265 1264 elif r == 3: # Skip
1266 1265 ret = skipfile = False
1267 1266 elif r == 4: # file (Record remaining)
1268 1267 ret = skipfile = True
1269 1268 elif r == 5: # done, skip remaining
1270 1269 ret = skipall = False
1271 1270 elif r == 6: # all
1272 1271 ret = skipall = True
1273 1272 elif r == 7: # quit
1274 1273 raise error.Abort(_(b'user quit'))
1275 1274 return ret, skipfile, skipall, newpatches
1276 1275
1277 1276 seen = set()
1278 1277 applied = {} # 'filename' -> [] of chunks
1279 1278 skipfile, skipall = None, None
1280 1279 pos, total = 1, sum(len(h.hunks) for h in headers)
1281 1280 for h in headers:
1282 1281 pos += len(h.hunks)
1283 1282 skipfile = None
1284 1283 fixoffset = 0
1285 1284 hdr = b''.join(h.header)
1286 1285 if hdr in seen:
1287 1286 continue
1288 1287 seen.add(hdr)
1289 1288 if skipall is None:
1290 1289 h.pretty(ui)
1291 1290 files = h.files()
1292 1291 msg = _(b'examine changes to %s?') % _(b' and ').join(
1293 1292 b"'%s'" % f for f in files
1294 1293 )
1295 1294 if all(match.exact(f) for f in files):
1296 1295 r, skipall, np = True, None, None
1297 1296 else:
1298 1297 r, skipfile, skipall, np = prompt(skipfile, skipall, msg, None)
1299 1298 if not r:
1300 1299 continue
1301 1300 applied[h.filename()] = [h]
1302 1301 if h.allhunks():
1303 1302 applied[h.filename()] += h.hunks
1304 1303 continue
1305 1304 for i, chunk in enumerate(h.hunks):
1306 1305 if skipfile is None and skipall is None:
1307 1306 chunk.pretty(ui)
1308 1307 if total == 1:
1309 1308 msg = messages[b'single'][operation] % chunk.filename()
1310 1309 else:
1311 1310 idx = pos - len(h.hunks) + i
1312 1311 msg = messages[b'multiple'][operation] % (
1313 1312 idx,
1314 1313 total,
1315 1314 chunk.filename(),
1316 1315 )
1317 1316 r, skipfile, skipall, newpatches = prompt(
1318 1317 skipfile, skipall, msg, chunk
1319 1318 )
1320 1319 if r:
1321 1320 if fixoffset:
1322 1321 chunk = copy.copy(chunk)
1323 1322 chunk.toline += fixoffset
1324 1323 applied[chunk.filename()].append(chunk)
1325 1324 elif newpatches is not None:
1326 1325 for newpatch in newpatches:
1327 1326 for newhunk in newpatch.hunks:
1328 1327 if fixoffset:
1329 1328 newhunk.toline += fixoffset
1330 1329 applied[newhunk.filename()].append(newhunk)
1331 1330 else:
1332 1331 fixoffset += chunk.removed - chunk.added
1333 1332 return (
1334 1333 sum(
1335 1334 [
1336 1335 h
1337 1336 for h in pycompat.itervalues(applied)
1338 1337 if h[0].special() or len(h) > 1
1339 1338 ],
1340 1339 [],
1341 1340 ),
1342 1341 {},
1343 1342 )
1344 1343
1345 1344
1346 1345 class hunk(object):
1347 1346 def __init__(self, desc, num, lr, context):
1348 1347 self.number = num
1349 1348 self.desc = desc
1350 1349 self.hunk = [desc]
1351 1350 self.a = []
1352 1351 self.b = []
1353 1352 self.starta = self.lena = None
1354 1353 self.startb = self.lenb = None
1355 1354 if lr is not None:
1356 1355 if context:
1357 1356 self.read_context_hunk(lr)
1358 1357 else:
1359 1358 self.read_unified_hunk(lr)
1360 1359
1361 1360 def getnormalized(self):
1362 1361 """Return a copy with line endings normalized to LF."""
1363 1362
1364 1363 def normalize(lines):
1365 1364 nlines = []
1366 1365 for line in lines:
1367 1366 if line.endswith(b'\r\n'):
1368 1367 line = line[:-2] + b'\n'
1369 1368 nlines.append(line)
1370 1369 return nlines
1371 1370
1372 1371 # Dummy object, it is rebuilt manually
1373 1372 nh = hunk(self.desc, self.number, None, None)
1374 1373 nh.number = self.number
1375 1374 nh.desc = self.desc
1376 1375 nh.hunk = self.hunk
1377 1376 nh.a = normalize(self.a)
1378 1377 nh.b = normalize(self.b)
1379 1378 nh.starta = self.starta
1380 1379 nh.startb = self.startb
1381 1380 nh.lena = self.lena
1382 1381 nh.lenb = self.lenb
1383 1382 return nh
1384 1383
1385 1384 def read_unified_hunk(self, lr):
1386 1385 m = unidesc.match(self.desc)
1387 1386 if not m:
1388 1387 raise PatchError(_(b"bad hunk #%d") % self.number)
1389 1388 self.starta, self.lena, self.startb, self.lenb = m.groups()
1390 1389 if self.lena is None:
1391 1390 self.lena = 1
1392 1391 else:
1393 1392 self.lena = int(self.lena)
1394 1393 if self.lenb is None:
1395 1394 self.lenb = 1
1396 1395 else:
1397 1396 self.lenb = int(self.lenb)
1398 1397 self.starta = int(self.starta)
1399 1398 self.startb = int(self.startb)
1400 1399 try:
1401 1400 diffhelper.addlines(
1402 1401 lr, self.hunk, self.lena, self.lenb, self.a, self.b
1403 1402 )
1404 1403 except error.ParseError as e:
1405 1404 raise PatchError(_(b"bad hunk #%d: %s") % (self.number, e))
1406 1405 # if we hit eof before finishing out the hunk, the last line will
1407 1406 # be zero length. Lets try to fix it up.
1408 1407 while len(self.hunk[-1]) == 0:
1409 1408 del self.hunk[-1]
1410 1409 del self.a[-1]
1411 1410 del self.b[-1]
1412 1411 self.lena -= 1
1413 1412 self.lenb -= 1
1414 1413 self._fixnewline(lr)
1415 1414
1416 1415 def read_context_hunk(self, lr):
1417 1416 self.desc = lr.readline()
1418 1417 m = contextdesc.match(self.desc)
1419 1418 if not m:
1420 1419 raise PatchError(_(b"bad hunk #%d") % self.number)
1421 1420 self.starta, aend = m.groups()
1422 1421 self.starta = int(self.starta)
1423 1422 if aend is None:
1424 1423 aend = self.starta
1425 1424 self.lena = int(aend) - self.starta
1426 1425 if self.starta:
1427 1426 self.lena += 1
1428 1427 for x in pycompat.xrange(self.lena):
1429 1428 l = lr.readline()
1430 1429 if l.startswith(b'---'):
1431 1430 # lines addition, old block is empty
1432 1431 lr.push(l)
1433 1432 break
1434 1433 s = l[2:]
1435 1434 if l.startswith(b'- ') or l.startswith(b'! '):
1436 1435 u = b'-' + s
1437 1436 elif l.startswith(b' '):
1438 1437 u = b' ' + s
1439 1438 else:
1440 1439 raise PatchError(
1441 1440 _(b"bad hunk #%d old text line %d") % (self.number, x)
1442 1441 )
1443 1442 self.a.append(u)
1444 1443 self.hunk.append(u)
1445 1444
1446 1445 l = lr.readline()
1447 1446 if l.startswith(br'\ '):
1448 1447 s = self.a[-1][:-1]
1449 1448 self.a[-1] = s
1450 1449 self.hunk[-1] = s
1451 1450 l = lr.readline()
1452 1451 m = contextdesc.match(l)
1453 1452 if not m:
1454 1453 raise PatchError(_(b"bad hunk #%d") % self.number)
1455 1454 self.startb, bend = m.groups()
1456 1455 self.startb = int(self.startb)
1457 1456 if bend is None:
1458 1457 bend = self.startb
1459 1458 self.lenb = int(bend) - self.startb
1460 1459 if self.startb:
1461 1460 self.lenb += 1
1462 1461 hunki = 1
1463 1462 for x in pycompat.xrange(self.lenb):
1464 1463 l = lr.readline()
1465 1464 if l.startswith(br'\ '):
1466 1465 # XXX: the only way to hit this is with an invalid line range.
1467 1466 # The no-eol marker is not counted in the line range, but I
1468 1467 # guess there are diff(1) out there which behave differently.
1469 1468 s = self.b[-1][:-1]
1470 1469 self.b[-1] = s
1471 1470 self.hunk[hunki - 1] = s
1472 1471 continue
1473 1472 if not l:
1474 1473 # line deletions, new block is empty and we hit EOF
1475 1474 lr.push(l)
1476 1475 break
1477 1476 s = l[2:]
1478 1477 if l.startswith(b'+ ') or l.startswith(b'! '):
1479 1478 u = b'+' + s
1480 1479 elif l.startswith(b' '):
1481 1480 u = b' ' + s
1482 1481 elif len(self.b) == 0:
1483 1482 # line deletions, new block is empty
1484 1483 lr.push(l)
1485 1484 break
1486 1485 else:
1487 1486 raise PatchError(
1488 1487 _(b"bad hunk #%d old text line %d") % (self.number, x)
1489 1488 )
1490 1489 self.b.append(s)
1491 1490 while True:
1492 1491 if hunki >= len(self.hunk):
1493 1492 h = b""
1494 1493 else:
1495 1494 h = self.hunk[hunki]
1496 1495 hunki += 1
1497 1496 if h == u:
1498 1497 break
1499 1498 elif h.startswith(b'-'):
1500 1499 continue
1501 1500 else:
1502 1501 self.hunk.insert(hunki - 1, u)
1503 1502 break
1504 1503
1505 1504 if not self.a:
1506 1505 # this happens when lines were only added to the hunk
1507 1506 for x in self.hunk:
1508 1507 if x.startswith(b'-') or x.startswith(b' '):
1509 1508 self.a.append(x)
1510 1509 if not self.b:
1511 1510 # this happens when lines were only deleted from the hunk
1512 1511 for x in self.hunk:
1513 1512 if x.startswith(b'+') or x.startswith(b' '):
1514 1513 self.b.append(x[1:])
1515 1514 # @@ -start,len +start,len @@
1516 1515 self.desc = b"@@ -%d,%d +%d,%d @@\n" % (
1517 1516 self.starta,
1518 1517 self.lena,
1519 1518 self.startb,
1520 1519 self.lenb,
1521 1520 )
1522 1521 self.hunk[0] = self.desc
1523 1522 self._fixnewline(lr)
1524 1523
1525 1524 def _fixnewline(self, lr):
1526 1525 l = lr.readline()
1527 1526 if l.startswith(br'\ '):
1528 1527 diffhelper.fixnewline(self.hunk, self.a, self.b)
1529 1528 else:
1530 1529 lr.push(l)
1531 1530
1532 1531 def complete(self):
1533 1532 return len(self.a) == self.lena and len(self.b) == self.lenb
1534 1533
1535 1534 def _fuzzit(self, old, new, fuzz, toponly):
1536 1535 # this removes context lines from the top and bottom of list 'l'. It
1537 1536 # checks the hunk to make sure only context lines are removed, and then
1538 1537 # returns a new shortened list of lines.
1539 1538 fuzz = min(fuzz, len(old))
1540 1539 if fuzz:
1541 1540 top = 0
1542 1541 bot = 0
1543 1542 hlen = len(self.hunk)
1544 1543 for x in pycompat.xrange(hlen - 1):
1545 1544 # the hunk starts with the @@ line, so use x+1
1546 1545 if self.hunk[x + 1].startswith(b' '):
1547 1546 top += 1
1548 1547 else:
1549 1548 break
1550 1549 if not toponly:
1551 1550 for x in pycompat.xrange(hlen - 1):
1552 1551 if self.hunk[hlen - bot - 1].startswith(b' '):
1553 1552 bot += 1
1554 1553 else:
1555 1554 break
1556 1555
1557 1556 bot = min(fuzz, bot)
1558 1557 top = min(fuzz, top)
1559 1558 return old[top : len(old) - bot], new[top : len(new) - bot], top
1560 1559 return old, new, 0
1561 1560
1562 1561 def fuzzit(self, fuzz, toponly):
1563 1562 old, new, top = self._fuzzit(self.a, self.b, fuzz, toponly)
1564 1563 oldstart = self.starta + top
1565 1564 newstart = self.startb + top
1566 1565 # zero length hunk ranges already have their start decremented
1567 1566 if self.lena and oldstart > 0:
1568 1567 oldstart -= 1
1569 1568 if self.lenb and newstart > 0:
1570 1569 newstart -= 1
1571 1570 return old, oldstart, new, newstart
1572 1571
1573 1572
1574 1573 class binhunk(object):
1575 1574 b'A binary patch file.'
1576 1575
1577 1576 def __init__(self, lr, fname):
1578 1577 self.text = None
1579 1578 self.delta = False
1580 1579 self.hunk = [b'GIT binary patch\n']
1581 1580 self._fname = fname
1582 1581 self._read(lr)
1583 1582
1584 1583 def complete(self):
1585 1584 return self.text is not None
1586 1585
1587 1586 def new(self, lines):
1588 1587 if self.delta:
1589 1588 return [applybindelta(self.text, b''.join(lines))]
1590 1589 return [self.text]
1591 1590
1592 1591 def _read(self, lr):
1593 1592 def getline(lr, hunk):
1594 1593 l = lr.readline()
1595 1594 hunk.append(l)
1596 1595 return l.rstrip(b'\r\n')
1597 1596
1598 1597 while True:
1599 1598 line = getline(lr, self.hunk)
1600 1599 if not line:
1601 1600 raise PatchError(
1602 1601 _(b'could not extract "%s" binary data') % self._fname
1603 1602 )
1604 1603 if line.startswith(b'literal '):
1605 1604 size = int(line[8:].rstrip())
1606 1605 break
1607 1606 if line.startswith(b'delta '):
1608 1607 size = int(line[6:].rstrip())
1609 1608 self.delta = True
1610 1609 break
1611 1610 dec = []
1612 1611 line = getline(lr, self.hunk)
1613 1612 while len(line) > 1:
1614 1613 l = line[0:1]
1615 1614 if l <= b'Z' and l >= b'A':
1616 1615 l = ord(l) - ord(b'A') + 1
1617 1616 else:
1618 1617 l = ord(l) - ord(b'a') + 27
1619 1618 try:
1620 1619 dec.append(util.b85decode(line[1:])[:l])
1621 1620 except ValueError as e:
1622 1621 raise PatchError(
1623 1622 _(b'could not decode "%s" binary patch: %s')
1624 1623 % (self._fname, stringutil.forcebytestr(e))
1625 1624 )
1626 1625 line = getline(lr, self.hunk)
1627 1626 text = zlib.decompress(b''.join(dec))
1628 1627 if len(text) != size:
1629 1628 raise PatchError(
1630 1629 _(b'"%s" length is %d bytes, should be %d')
1631 1630 % (self._fname, len(text), size)
1632 1631 )
1633 1632 self.text = text
1634 1633
1635 1634
1636 1635 def parsefilename(str):
1637 1636 # --- filename \t|space stuff
1638 1637 s = str[4:].rstrip(b'\r\n')
1639 1638 i = s.find(b'\t')
1640 1639 if i < 0:
1641 1640 i = s.find(b' ')
1642 1641 if i < 0:
1643 1642 return s
1644 1643 return s[:i]
1645 1644
1646 1645
1647 1646 def reversehunks(hunks):
1648 1647 '''reverse the signs in the hunks given as argument
1649 1648
1650 1649 This function operates on hunks coming out of patch.filterpatch, that is
1651 1650 a list of the form: [header1, hunk1, hunk2, header2...]. Example usage:
1652 1651
1653 1652 >>> rawpatch = b"""diff --git a/folder1/g b/folder1/g
1654 1653 ... --- a/folder1/g
1655 1654 ... +++ b/folder1/g
1656 1655 ... @@ -1,7 +1,7 @@
1657 1656 ... +firstline
1658 1657 ... c
1659 1658 ... 1
1660 1659 ... 2
1661 1660 ... + 3
1662 1661 ... -4
1663 1662 ... 5
1664 1663 ... d
1665 1664 ... +lastline"""
1666 1665 >>> hunks = parsepatch([rawpatch])
1667 1666 >>> hunkscomingfromfilterpatch = []
1668 1667 >>> for h in hunks:
1669 1668 ... hunkscomingfromfilterpatch.append(h)
1670 1669 ... hunkscomingfromfilterpatch.extend(h.hunks)
1671 1670
1672 1671 >>> reversedhunks = reversehunks(hunkscomingfromfilterpatch)
1673 1672 >>> from . import util
1674 1673 >>> fp = util.stringio()
1675 1674 >>> for c in reversedhunks:
1676 1675 ... c.write(fp)
1677 1676 >>> fp.seek(0) or None
1678 1677 >>> reversedpatch = fp.read()
1679 1678 >>> print(pycompat.sysstr(reversedpatch))
1680 1679 diff --git a/folder1/g b/folder1/g
1681 1680 --- a/folder1/g
1682 1681 +++ b/folder1/g
1683 1682 @@ -1,4 +1,3 @@
1684 1683 -firstline
1685 1684 c
1686 1685 1
1687 1686 2
1688 1687 @@ -2,6 +1,6 @@
1689 1688 c
1690 1689 1
1691 1690 2
1692 1691 - 3
1693 1692 +4
1694 1693 5
1695 1694 d
1696 1695 @@ -6,3 +5,2 @@
1697 1696 5
1698 1697 d
1699 1698 -lastline
1700 1699
1701 1700 '''
1702 1701
1703 1702 newhunks = []
1704 1703 for c in hunks:
1705 1704 if util.safehasattr(c, b'reversehunk'):
1706 1705 c = c.reversehunk()
1707 1706 newhunks.append(c)
1708 1707 return newhunks
1709 1708
1710 1709
1711 1710 def parsepatch(originalchunks, maxcontext=None):
1712 1711 """patch -> [] of headers -> [] of hunks
1713 1712
1714 1713 If maxcontext is not None, trim context lines if necessary.
1715 1714
1716 1715 >>> rawpatch = b'''diff --git a/folder1/g b/folder1/g
1717 1716 ... --- a/folder1/g
1718 1717 ... +++ b/folder1/g
1719 1718 ... @@ -1,8 +1,10 @@
1720 1719 ... 1
1721 1720 ... 2
1722 1721 ... -3
1723 1722 ... 4
1724 1723 ... 5
1725 1724 ... 6
1726 1725 ... +6.1
1727 1726 ... +6.2
1728 1727 ... 7
1729 1728 ... 8
1730 1729 ... +9'''
1731 1730 >>> out = util.stringio()
1732 1731 >>> headers = parsepatch([rawpatch], maxcontext=1)
1733 1732 >>> for header in headers:
1734 1733 ... header.write(out)
1735 1734 ... for hunk in header.hunks:
1736 1735 ... hunk.write(out)
1737 1736 >>> print(pycompat.sysstr(out.getvalue()))
1738 1737 diff --git a/folder1/g b/folder1/g
1739 1738 --- a/folder1/g
1740 1739 +++ b/folder1/g
1741 1740 @@ -2,3 +2,2 @@
1742 1741 2
1743 1742 -3
1744 1743 4
1745 1744 @@ -6,2 +5,4 @@
1746 1745 6
1747 1746 +6.1
1748 1747 +6.2
1749 1748 7
1750 1749 @@ -8,1 +9,2 @@
1751 1750 8
1752 1751 +9
1753 1752 """
1754 1753
1755 1754 class parser(object):
1756 1755 """patch parsing state machine"""
1757 1756
1758 1757 def __init__(self):
1759 1758 self.fromline = 0
1760 1759 self.toline = 0
1761 1760 self.proc = b''
1762 1761 self.header = None
1763 1762 self.context = []
1764 1763 self.before = []
1765 1764 self.hunk = []
1766 1765 self.headers = []
1767 1766
1768 1767 def addrange(self, limits):
1769 1768 self.addcontext([])
1770 1769 fromstart, fromend, tostart, toend, proc = limits
1771 1770 self.fromline = int(fromstart)
1772 1771 self.toline = int(tostart)
1773 1772 self.proc = proc
1774 1773
1775 1774 def addcontext(self, context):
1776 1775 if self.hunk:
1777 1776 h = recordhunk(
1778 1777 self.header,
1779 1778 self.fromline,
1780 1779 self.toline,
1781 1780 self.proc,
1782 1781 self.before,
1783 1782 self.hunk,
1784 1783 context,
1785 1784 maxcontext,
1786 1785 )
1787 1786 self.header.hunks.append(h)
1788 1787 self.fromline += len(self.before) + h.removed
1789 1788 self.toline += len(self.before) + h.added
1790 1789 self.before = []
1791 1790 self.hunk = []
1792 1791 self.context = context
1793 1792
1794 1793 def addhunk(self, hunk):
1795 1794 if self.context:
1796 1795 self.before = self.context
1797 1796 self.context = []
1798 1797 if self.hunk:
1799 1798 self.addcontext([])
1800 1799 self.hunk = hunk
1801 1800
1802 1801 def newfile(self, hdr):
1803 1802 self.addcontext([])
1804 1803 h = header(hdr)
1805 1804 self.headers.append(h)
1806 1805 self.header = h
1807 1806
1808 1807 def addother(self, line):
1809 1808 pass # 'other' lines are ignored
1810 1809
1811 1810 def finished(self):
1812 1811 self.addcontext([])
1813 1812 return self.headers
1814 1813
1815 1814 transitions = {
1816 1815 b'file': {
1817 1816 b'context': addcontext,
1818 1817 b'file': newfile,
1819 1818 b'hunk': addhunk,
1820 1819 b'range': addrange,
1821 1820 },
1822 1821 b'context': {
1823 1822 b'file': newfile,
1824 1823 b'hunk': addhunk,
1825 1824 b'range': addrange,
1826 1825 b'other': addother,
1827 1826 },
1828 1827 b'hunk': {
1829 1828 b'context': addcontext,
1830 1829 b'file': newfile,
1831 1830 b'range': addrange,
1832 1831 },
1833 1832 b'range': {b'context': addcontext, b'hunk': addhunk},
1834 1833 b'other': {b'other': addother},
1835 1834 }
1836 1835
1837 1836 p = parser()
1838 1837 fp = stringio()
1839 1838 fp.write(b''.join(originalchunks))
1840 1839 fp.seek(0)
1841 1840
1842 1841 state = b'context'
1843 1842 for newstate, data in scanpatch(fp):
1844 1843 try:
1845 1844 p.transitions[state][newstate](p, data)
1846 1845 except KeyError:
1847 1846 raise PatchError(
1848 1847 b'unhandled transition: %s -> %s' % (state, newstate)
1849 1848 )
1850 1849 state = newstate
1851 1850 del fp
1852 1851 return p.finished()
1853 1852
1854 1853
1855 1854 def pathtransform(path, strip, prefix):
1856 1855 '''turn a path from a patch into a path suitable for the repository
1857 1856
1858 1857 prefix, if not empty, is expected to be normalized with a / at the end.
1859 1858
1860 1859 Returns (stripped components, path in repository).
1861 1860
1862 1861 >>> pathtransform(b'a/b/c', 0, b'')
1863 1862 ('', 'a/b/c')
1864 1863 >>> pathtransform(b' a/b/c ', 0, b'')
1865 1864 ('', ' a/b/c')
1866 1865 >>> pathtransform(b' a/b/c ', 2, b'')
1867 1866 ('a/b/', 'c')
1868 1867 >>> pathtransform(b'a/b/c', 0, b'd/e/')
1869 1868 ('', 'd/e/a/b/c')
1870 1869 >>> pathtransform(b' a//b/c ', 2, b'd/e/')
1871 1870 ('a//b/', 'd/e/c')
1872 1871 >>> pathtransform(b'a/b/c', 3, b'')
1873 1872 Traceback (most recent call last):
1874 1873 PatchError: unable to strip away 1 of 3 dirs from a/b/c
1875 1874 '''
1876 1875 pathlen = len(path)
1877 1876 i = 0
1878 1877 if strip == 0:
1879 1878 return b'', prefix + path.rstrip()
1880 1879 count = strip
1881 1880 while count > 0:
1882 1881 i = path.find(b'/', i)
1883 1882 if i == -1:
1884 1883 raise PatchError(
1885 1884 _(b"unable to strip away %d of %d dirs from %s")
1886 1885 % (count, strip, path)
1887 1886 )
1888 1887 i += 1
1889 1888 # consume '//' in the path
1890 1889 while i < pathlen - 1 and path[i : i + 1] == b'/':
1891 1890 i += 1
1892 1891 count -= 1
1893 1892 return path[:i].lstrip(), prefix + path[i:].rstrip()
1894 1893
1895 1894
1896 1895 def makepatchmeta(backend, afile_orig, bfile_orig, hunk, strip, prefix):
1897 1896 nulla = afile_orig == b"/dev/null"
1898 1897 nullb = bfile_orig == b"/dev/null"
1899 1898 create = nulla and hunk.starta == 0 and hunk.lena == 0
1900 1899 remove = nullb and hunk.startb == 0 and hunk.lenb == 0
1901 1900 abase, afile = pathtransform(afile_orig, strip, prefix)
1902 1901 gooda = not nulla and backend.exists(afile)
1903 1902 bbase, bfile = pathtransform(bfile_orig, strip, prefix)
1904 1903 if afile == bfile:
1905 1904 goodb = gooda
1906 1905 else:
1907 1906 goodb = not nullb and backend.exists(bfile)
1908 1907 missing = not goodb and not gooda and not create
1909 1908
1910 1909 # some diff programs apparently produce patches where the afile is
1911 1910 # not /dev/null, but afile starts with bfile
1912 1911 abasedir = afile[: afile.rfind(b'/') + 1]
1913 1912 bbasedir = bfile[: bfile.rfind(b'/') + 1]
1914 1913 if (
1915 1914 missing
1916 1915 and abasedir == bbasedir
1917 1916 and afile.startswith(bfile)
1918 1917 and hunk.starta == 0
1919 1918 and hunk.lena == 0
1920 1919 ):
1921 1920 create = True
1922 1921 missing = False
1923 1922
1924 1923 # If afile is "a/b/foo" and bfile is "a/b/foo.orig" we assume the
1925 1924 # diff is between a file and its backup. In this case, the original
1926 1925 # file should be patched (see original mpatch code).
1927 1926 isbackup = abase == bbase and bfile.startswith(afile)
1928 1927 fname = None
1929 1928 if not missing:
1930 1929 if gooda and goodb:
1931 1930 if isbackup:
1932 1931 fname = afile
1933 1932 else:
1934 1933 fname = bfile
1935 1934 elif gooda:
1936 1935 fname = afile
1937 1936
1938 1937 if not fname:
1939 1938 if not nullb:
1940 1939 if isbackup:
1941 1940 fname = afile
1942 1941 else:
1943 1942 fname = bfile
1944 1943 elif not nulla:
1945 1944 fname = afile
1946 1945 else:
1947 1946 raise PatchError(_(b"undefined source and destination files"))
1948 1947
1949 1948 gp = patchmeta(fname)
1950 1949 if create:
1951 1950 gp.op = b'ADD'
1952 1951 elif remove:
1953 1952 gp.op = b'DELETE'
1954 1953 return gp
1955 1954
1956 1955
1957 1956 def scanpatch(fp):
1958 1957 """like patch.iterhunks, but yield different events
1959 1958
1960 1959 - ('file', [header_lines + fromfile + tofile])
1961 1960 - ('context', [context_lines])
1962 1961 - ('hunk', [hunk_lines])
1963 1962 - ('range', (-start,len, +start,len, proc))
1964 1963 """
1965 1964 lines_re = re.compile(br'@@ -(\d+),(\d+) \+(\d+),(\d+) @@\s*(.*)')
1966 1965 lr = linereader(fp)
1967 1966
1968 1967 def scanwhile(first, p):
1969 1968 """scan lr while predicate holds"""
1970 1969 lines = [first]
1971 1970 for line in iter(lr.readline, b''):
1972 1971 if p(line):
1973 1972 lines.append(line)
1974 1973 else:
1975 1974 lr.push(line)
1976 1975 break
1977 1976 return lines
1978 1977
1979 1978 for line in iter(lr.readline, b''):
1980 1979 if line.startswith(b'diff --git a/') or line.startswith(b'diff -r '):
1981 1980
1982 1981 def notheader(line):
1983 1982 s = line.split(None, 1)
1984 1983 return not s or s[0] not in (b'---', b'diff')
1985 1984
1986 1985 header = scanwhile(line, notheader)
1987 1986 fromfile = lr.readline()
1988 1987 if fromfile.startswith(b'---'):
1989 1988 tofile = lr.readline()
1990 1989 header += [fromfile, tofile]
1991 1990 else:
1992 1991 lr.push(fromfile)
1993 1992 yield b'file', header
1994 1993 elif line.startswith(b' '):
1995 1994 cs = (b' ', b'\\')
1996 1995 yield b'context', scanwhile(line, lambda l: l.startswith(cs))
1997 1996 elif line.startswith((b'-', b'+')):
1998 1997 cs = (b'-', b'+', b'\\')
1999 1998 yield b'hunk', scanwhile(line, lambda l: l.startswith(cs))
2000 1999 else:
2001 2000 m = lines_re.match(line)
2002 2001 if m:
2003 2002 yield b'range', m.groups()
2004 2003 else:
2005 2004 yield b'other', line
2006 2005
2007 2006
2008 2007 def scangitpatch(lr, firstline):
2009 2008 """
2010 2009 Git patches can emit:
2011 2010 - rename a to b
2012 2011 - change b
2013 2012 - copy a to c
2014 2013 - change c
2015 2014
2016 2015 We cannot apply this sequence as-is, the renamed 'a' could not be
2017 2016 found for it would have been renamed already. And we cannot copy
2018 2017 from 'b' instead because 'b' would have been changed already. So
2019 2018 we scan the git patch for copy and rename commands so we can
2020 2019 perform the copies ahead of time.
2021 2020 """
2022 2021 pos = 0
2023 2022 try:
2024 2023 pos = lr.fp.tell()
2025 2024 fp = lr.fp
2026 2025 except IOError:
2027 2026 fp = stringio(lr.fp.read())
2028 2027 gitlr = linereader(fp)
2029 2028 gitlr.push(firstline)
2030 2029 gitpatches = readgitpatch(gitlr)
2031 2030 fp.seek(pos)
2032 2031 return gitpatches
2033 2032
2034 2033
2035 2034 def iterhunks(fp):
2036 2035 """Read a patch and yield the following events:
2037 2036 - ("file", afile, bfile, firsthunk): select a new target file.
2038 2037 - ("hunk", hunk): a new hunk is ready to be applied, follows a
2039 2038 "file" event.
2040 2039 - ("git", gitchanges): current diff is in git format, gitchanges
2041 2040 maps filenames to gitpatch records. Unique event.
2042 2041 """
2043 2042 afile = b""
2044 2043 bfile = b""
2045 2044 state = None
2046 2045 hunknum = 0
2047 2046 emitfile = newfile = False
2048 2047 gitpatches = None
2049 2048
2050 2049 # our states
2051 2050 BFILE = 1
2052 2051 context = None
2053 2052 lr = linereader(fp)
2054 2053
2055 2054 for x in iter(lr.readline, b''):
2056 2055 if state == BFILE and (
2057 2056 (not context and x.startswith(b'@'))
2058 2057 or (context is not False and x.startswith(b'***************'))
2059 2058 or x.startswith(b'GIT binary patch')
2060 2059 ):
2061 2060 gp = None
2062 2061 if gitpatches and gitpatches[-1].ispatching(afile, bfile):
2063 2062 gp = gitpatches.pop()
2064 2063 if x.startswith(b'GIT binary patch'):
2065 2064 h = binhunk(lr, gp.path)
2066 2065 else:
2067 2066 if context is None and x.startswith(b'***************'):
2068 2067 context = True
2069 2068 h = hunk(x, hunknum + 1, lr, context)
2070 2069 hunknum += 1
2071 2070 if emitfile:
2072 2071 emitfile = False
2073 2072 yield b'file', (afile, bfile, h, gp and gp.copy() or None)
2074 2073 yield b'hunk', h
2075 2074 elif x.startswith(b'diff --git a/'):
2076 2075 m = gitre.match(x.rstrip(b' \r\n'))
2077 2076 if not m:
2078 2077 continue
2079 2078 if gitpatches is None:
2080 2079 # scan whole input for git metadata
2081 2080 gitpatches = scangitpatch(lr, x)
2082 2081 yield b'git', [
2083 2082 g.copy() for g in gitpatches if g.op in (b'COPY', b'RENAME')
2084 2083 ]
2085 2084 gitpatches.reverse()
2086 2085 afile = b'a/' + m.group(1)
2087 2086 bfile = b'b/' + m.group(2)
2088 2087 while gitpatches and not gitpatches[-1].ispatching(afile, bfile):
2089 2088 gp = gitpatches.pop()
2090 2089 yield b'file', (
2091 2090 b'a/' + gp.path,
2092 2091 b'b/' + gp.path,
2093 2092 None,
2094 2093 gp.copy(),
2095 2094 )
2096 2095 if not gitpatches:
2097 2096 raise PatchError(
2098 2097 _(b'failed to synchronize metadata for "%s"') % afile[2:]
2099 2098 )
2100 2099 newfile = True
2101 2100 elif x.startswith(b'---'):
2102 2101 # check for a unified diff
2103 2102 l2 = lr.readline()
2104 2103 if not l2.startswith(b'+++'):
2105 2104 lr.push(l2)
2106 2105 continue
2107 2106 newfile = True
2108 2107 context = False
2109 2108 afile = parsefilename(x)
2110 2109 bfile = parsefilename(l2)
2111 2110 elif x.startswith(b'***'):
2112 2111 # check for a context diff
2113 2112 l2 = lr.readline()
2114 2113 if not l2.startswith(b'---'):
2115 2114 lr.push(l2)
2116 2115 continue
2117 2116 l3 = lr.readline()
2118 2117 lr.push(l3)
2119 2118 if not l3.startswith(b"***************"):
2120 2119 lr.push(l2)
2121 2120 continue
2122 2121 newfile = True
2123 2122 context = True
2124 2123 afile = parsefilename(x)
2125 2124 bfile = parsefilename(l2)
2126 2125
2127 2126 if newfile:
2128 2127 newfile = False
2129 2128 emitfile = True
2130 2129 state = BFILE
2131 2130 hunknum = 0
2132 2131
2133 2132 while gitpatches:
2134 2133 gp = gitpatches.pop()
2135 2134 yield b'file', (b'a/' + gp.path, b'b/' + gp.path, None, gp.copy())
2136 2135
2137 2136
2138 2137 def applybindelta(binchunk, data):
2139 2138 """Apply a binary delta hunk
2140 2139 The algorithm used is the algorithm from git's patch-delta.c
2141 2140 """
2142 2141
2143 2142 def deltahead(binchunk):
2144 2143 i = 0
2145 2144 for c in pycompat.bytestr(binchunk):
2146 2145 i += 1
2147 2146 if not (ord(c) & 0x80):
2148 2147 return i
2149 2148 return i
2150 2149
2151 2150 out = b""
2152 2151 s = deltahead(binchunk)
2153 2152 binchunk = binchunk[s:]
2154 2153 s = deltahead(binchunk)
2155 2154 binchunk = binchunk[s:]
2156 2155 i = 0
2157 2156 while i < len(binchunk):
2158 2157 cmd = ord(binchunk[i : i + 1])
2159 2158 i += 1
2160 2159 if cmd & 0x80:
2161 2160 offset = 0
2162 2161 size = 0
2163 2162 if cmd & 0x01:
2164 2163 offset = ord(binchunk[i : i + 1])
2165 2164 i += 1
2166 2165 if cmd & 0x02:
2167 2166 offset |= ord(binchunk[i : i + 1]) << 8
2168 2167 i += 1
2169 2168 if cmd & 0x04:
2170 2169 offset |= ord(binchunk[i : i + 1]) << 16
2171 2170 i += 1
2172 2171 if cmd & 0x08:
2173 2172 offset |= ord(binchunk[i : i + 1]) << 24
2174 2173 i += 1
2175 2174 if cmd & 0x10:
2176 2175 size = ord(binchunk[i : i + 1])
2177 2176 i += 1
2178 2177 if cmd & 0x20:
2179 2178 size |= ord(binchunk[i : i + 1]) << 8
2180 2179 i += 1
2181 2180 if cmd & 0x40:
2182 2181 size |= ord(binchunk[i : i + 1]) << 16
2183 2182 i += 1
2184 2183 if size == 0:
2185 2184 size = 0x10000
2186 2185 offset_end = offset + size
2187 2186 out += data[offset:offset_end]
2188 2187 elif cmd != 0:
2189 2188 offset_end = i + cmd
2190 2189 out += binchunk[i:offset_end]
2191 2190 i += cmd
2192 2191 else:
2193 2192 raise PatchError(_(b'unexpected delta opcode 0'))
2194 2193 return out
2195 2194
2196 2195
2197 2196 def applydiff(ui, fp, backend, store, strip=1, prefix=b'', eolmode=b'strict'):
2198 2197 """Reads a patch from fp and tries to apply it.
2199 2198
2200 2199 Returns 0 for a clean patch, -1 if any rejects were found and 1 if
2201 2200 there was any fuzz.
2202 2201
2203 2202 If 'eolmode' is 'strict', the patch content and patched file are
2204 2203 read in binary mode. Otherwise, line endings are ignored when
2205 2204 patching then normalized according to 'eolmode'.
2206 2205 """
2207 2206 return _applydiff(
2208 2207 ui,
2209 2208 fp,
2210 2209 patchfile,
2211 2210 backend,
2212 2211 store,
2213 2212 strip=strip,
2214 2213 prefix=prefix,
2215 2214 eolmode=eolmode,
2216 2215 )
2217 2216
2218 2217
2219 2218 def _canonprefix(repo, prefix):
2220 2219 if prefix:
2221 2220 prefix = pathutil.canonpath(repo.root, repo.getcwd(), prefix)
2222 2221 if prefix != b'':
2223 2222 prefix += b'/'
2224 2223 return prefix
2225 2224
2226 2225
2227 2226 def _applydiff(
2228 2227 ui, fp, patcher, backend, store, strip=1, prefix=b'', eolmode=b'strict'
2229 2228 ):
2230 2229 prefix = _canonprefix(backend.repo, prefix)
2231 2230
2232 2231 def pstrip(p):
2233 2232 return pathtransform(p, strip - 1, prefix)[1]
2234 2233
2235 2234 rejects = 0
2236 2235 err = 0
2237 2236 current_file = None
2238 2237
2239 2238 for state, values in iterhunks(fp):
2240 2239 if state == b'hunk':
2241 2240 if not current_file:
2242 2241 continue
2243 2242 ret = current_file.apply(values)
2244 2243 if ret > 0:
2245 2244 err = 1
2246 2245 elif state == b'file':
2247 2246 if current_file:
2248 2247 rejects += current_file.close()
2249 2248 current_file = None
2250 2249 afile, bfile, first_hunk, gp = values
2251 2250 if gp:
2252 2251 gp.path = pstrip(gp.path)
2253 2252 if gp.oldpath:
2254 2253 gp.oldpath = pstrip(gp.oldpath)
2255 2254 else:
2256 2255 gp = makepatchmeta(
2257 2256 backend, afile, bfile, first_hunk, strip, prefix
2258 2257 )
2259 2258 if gp.op == b'RENAME':
2260 2259 backend.unlink(gp.oldpath)
2261 2260 if not first_hunk:
2262 2261 if gp.op == b'DELETE':
2263 2262 backend.unlink(gp.path)
2264 2263 continue
2265 2264 data, mode = None, None
2266 2265 if gp.op in (b'RENAME', b'COPY'):
2267 2266 data, mode = store.getfile(gp.oldpath)[:2]
2268 2267 if data is None:
2269 2268 # This means that the old path does not exist
2270 2269 raise PatchError(
2271 2270 _(b"source file '%s' does not exist") % gp.oldpath
2272 2271 )
2273 2272 if gp.mode:
2274 2273 mode = gp.mode
2275 2274 if gp.op == b'ADD':
2276 2275 # Added files without content have no hunk and
2277 2276 # must be created
2278 2277 data = b''
2279 2278 if data or mode:
2280 2279 if gp.op in (b'ADD', b'RENAME', b'COPY') and backend.exists(
2281 2280 gp.path
2282 2281 ):
2283 2282 raise PatchError(
2284 2283 _(
2285 2284 b"cannot create %s: destination "
2286 2285 b"already exists"
2287 2286 )
2288 2287 % gp.path
2289 2288 )
2290 2289 backend.setfile(gp.path, data, mode, gp.oldpath)
2291 2290 continue
2292 2291 try:
2293 2292 current_file = patcher(ui, gp, backend, store, eolmode=eolmode)
2294 2293 except PatchError as inst:
2295 2294 ui.warn(str(inst) + b'\n')
2296 2295 current_file = None
2297 2296 rejects += 1
2298 2297 continue
2299 2298 elif state == b'git':
2300 2299 for gp in values:
2301 2300 path = pstrip(gp.oldpath)
2302 2301 data, mode = backend.getfile(path)
2303 2302 if data is None:
2304 2303 # The error ignored here will trigger a getfile()
2305 2304 # error in a place more appropriate for error
2306 2305 # handling, and will not interrupt the patching
2307 2306 # process.
2308 2307 pass
2309 2308 else:
2310 2309 store.setfile(path, data, mode)
2311 2310 else:
2312 2311 raise error.Abort(_(b'unsupported parser state: %s') % state)
2313 2312
2314 2313 if current_file:
2315 2314 rejects += current_file.close()
2316 2315
2317 2316 if rejects:
2318 2317 return -1
2319 2318 return err
2320 2319
2321 2320
2322 2321 def _externalpatch(ui, repo, patcher, patchname, strip, files, similarity):
2323 2322 """use <patcher> to apply <patchname> to the working directory.
2324 2323 returns whether patch was applied with fuzz factor."""
2325 2324
2326 2325 fuzz = False
2327 2326 args = []
2328 2327 cwd = repo.root
2329 2328 if cwd:
2330 2329 args.append(b'-d %s' % procutil.shellquote(cwd))
2331 2330 cmd = b'%s %s -p%d < %s' % (
2332 2331 patcher,
2333 2332 b' '.join(args),
2334 2333 strip,
2335 2334 procutil.shellquote(patchname),
2336 2335 )
2337 2336 ui.debug(b'Using external patch tool: %s\n' % cmd)
2338 2337 fp = procutil.popen(cmd, b'rb')
2339 2338 try:
2340 2339 for line in util.iterfile(fp):
2341 2340 line = line.rstrip()
2342 2341 ui.note(line + b'\n')
2343 2342 if line.startswith(b'patching file '):
2344 2343 pf = util.parsepatchoutput(line)
2345 2344 printed_file = False
2346 2345 files.add(pf)
2347 2346 elif line.find(b'with fuzz') >= 0:
2348 2347 fuzz = True
2349 2348 if not printed_file:
2350 2349 ui.warn(pf + b'\n')
2351 2350 printed_file = True
2352 2351 ui.warn(line + b'\n')
2353 2352 elif line.find(b'saving rejects to file') >= 0:
2354 2353 ui.warn(line + b'\n')
2355 2354 elif line.find(b'FAILED') >= 0:
2356 2355 if not printed_file:
2357 2356 ui.warn(pf + b'\n')
2358 2357 printed_file = True
2359 2358 ui.warn(line + b'\n')
2360 2359 finally:
2361 2360 if files:
2362 2361 scmutil.marktouched(repo, files, similarity)
2363 2362 code = fp.close()
2364 2363 if code:
2365 2364 raise PatchError(
2366 2365 _(b"patch command failed: %s") % procutil.explainexit(code)
2367 2366 )
2368 2367 return fuzz
2369 2368
2370 2369
2371 2370 def patchbackend(
2372 2371 ui, backend, patchobj, strip, prefix, files=None, eolmode=b'strict'
2373 2372 ):
2374 2373 if files is None:
2375 2374 files = set()
2376 2375 if eolmode is None:
2377 2376 eolmode = ui.config(b'patch', b'eol')
2378 2377 if eolmode.lower() not in eolmodes:
2379 2378 raise error.Abort(_(b'unsupported line endings type: %s') % eolmode)
2380 2379 eolmode = eolmode.lower()
2381 2380
2382 2381 store = filestore()
2383 2382 try:
2384 2383 fp = open(patchobj, b'rb')
2385 2384 except TypeError:
2386 2385 fp = patchobj
2387 2386 try:
2388 2387 ret = applydiff(
2389 2388 ui, fp, backend, store, strip=strip, prefix=prefix, eolmode=eolmode
2390 2389 )
2391 2390 finally:
2392 2391 if fp != patchobj:
2393 2392 fp.close()
2394 2393 files.update(backend.close())
2395 2394 store.close()
2396 2395 if ret < 0:
2397 2396 raise PatchError(_(b'patch failed to apply'))
2398 2397 return ret > 0
2399 2398
2400 2399
2401 2400 def internalpatch(
2402 2401 ui,
2403 2402 repo,
2404 2403 patchobj,
2405 2404 strip,
2406 2405 prefix=b'',
2407 2406 files=None,
2408 2407 eolmode=b'strict',
2409 2408 similarity=0,
2410 2409 ):
2411 2410 """use builtin patch to apply <patchobj> to the working directory.
2412 2411 returns whether patch was applied with fuzz factor."""
2413 2412 backend = workingbackend(ui, repo, similarity)
2414 2413 return patchbackend(ui, backend, patchobj, strip, prefix, files, eolmode)
2415 2414
2416 2415
2417 2416 def patchrepo(
2418 2417 ui, repo, ctx, store, patchobj, strip, prefix, files=None, eolmode=b'strict'
2419 2418 ):
2420 2419 backend = repobackend(ui, repo, ctx, store)
2421 2420 return patchbackend(ui, backend, patchobj, strip, prefix, files, eolmode)
2422 2421
2423 2422
2424 2423 def patch(
2425 2424 ui,
2426 2425 repo,
2427 2426 patchname,
2428 2427 strip=1,
2429 2428 prefix=b'',
2430 2429 files=None,
2431 2430 eolmode=b'strict',
2432 2431 similarity=0,
2433 2432 ):
2434 2433 """Apply <patchname> to the working directory.
2435 2434
2436 2435 'eolmode' specifies how end of lines should be handled. It can be:
2437 2436 - 'strict': inputs are read in binary mode, EOLs are preserved
2438 2437 - 'crlf': EOLs are ignored when patching and reset to CRLF
2439 2438 - 'lf': EOLs are ignored when patching and reset to LF
2440 2439 - None: get it from user settings, default to 'strict'
2441 2440 'eolmode' is ignored when using an external patcher program.
2442 2441
2443 2442 Returns whether patch was applied with fuzz factor.
2444 2443 """
2445 2444 patcher = ui.config(b'ui', b'patch')
2446 2445 if files is None:
2447 2446 files = set()
2448 2447 if patcher:
2449 2448 return _externalpatch(
2450 2449 ui, repo, patcher, patchname, strip, files, similarity
2451 2450 )
2452 2451 return internalpatch(
2453 2452 ui, repo, patchname, strip, prefix, files, eolmode, similarity
2454 2453 )
2455 2454
2456 2455
2457 2456 def changedfiles(ui, repo, patchpath, strip=1, prefix=b''):
2458 2457 backend = fsbackend(ui, repo.root)
2459 2458 prefix = _canonprefix(repo, prefix)
2460 2459 with open(patchpath, b'rb') as fp:
2461 2460 changed = set()
2462 2461 for state, values in iterhunks(fp):
2463 2462 if state == b'file':
2464 2463 afile, bfile, first_hunk, gp = values
2465 2464 if gp:
2466 2465 gp.path = pathtransform(gp.path, strip - 1, prefix)[1]
2467 2466 if gp.oldpath:
2468 2467 gp.oldpath = pathtransform(
2469 2468 gp.oldpath, strip - 1, prefix
2470 2469 )[1]
2471 2470 else:
2472 2471 gp = makepatchmeta(
2473 2472 backend, afile, bfile, first_hunk, strip, prefix
2474 2473 )
2475 2474 changed.add(gp.path)
2476 2475 if gp.op == b'RENAME':
2477 2476 changed.add(gp.oldpath)
2478 2477 elif state not in (b'hunk', b'git'):
2479 2478 raise error.Abort(_(b'unsupported parser state: %s') % state)
2480 2479 return changed
2481 2480
2482 2481
2483 2482 class GitDiffRequired(Exception):
2484 2483 pass
2485 2484
2486 2485
2487 2486 diffopts = diffutil.diffallopts
2488 2487 diffallopts = diffutil.diffallopts
2489 2488 difffeatureopts = diffutil.difffeatureopts
2490 2489
2491 2490
2492 2491 def diff(
2493 2492 repo,
2494 2493 node1=None,
2495 2494 node2=None,
2496 2495 match=None,
2497 2496 changes=None,
2498 2497 opts=None,
2499 2498 losedatafn=None,
2500 2499 pathfn=None,
2501 2500 copy=None,
2502 2501 copysourcematch=None,
2503 2502 hunksfilterfn=None,
2504 2503 ):
2505 2504 '''yields diff of changes to files between two nodes, or node and
2506 2505 working directory.
2507 2506
2508 2507 if node1 is None, use first dirstate parent instead.
2509 2508 if node2 is None, compare node1 with working directory.
2510 2509
2511 2510 losedatafn(**kwarg) is a callable run when opts.upgrade=True and
2512 2511 every time some change cannot be represented with the current
2513 2512 patch format. Return False to upgrade to git patch format, True to
2514 2513 accept the loss or raise an exception to abort the diff. It is
2515 2514 called with the name of current file being diffed as 'fn'. If set
2516 2515 to None, patches will always be upgraded to git format when
2517 2516 necessary.
2518 2517
2519 2518 prefix is a filename prefix that is prepended to all filenames on
2520 2519 display (used for subrepos).
2521 2520
2522 2521 relroot, if not empty, must be normalized with a trailing /. Any match
2523 2522 patterns that fall outside it will be ignored.
2524 2523
2525 2524 copy, if not empty, should contain mappings {dst@y: src@x} of copy
2526 2525 information.
2527 2526
2528 2527 if copysourcematch is not None, then copy sources will be filtered by this
2529 2528 matcher
2530 2529
2531 2530 hunksfilterfn, if not None, should be a function taking a filectx and
2532 2531 hunks generator that may yield filtered hunks.
2533 2532 '''
2534 2533 if not node1 and not node2:
2535 2534 node1 = repo.dirstate.p1()
2536 2535
2537 2536 ctx1 = repo[node1]
2538 2537 ctx2 = repo[node2]
2539 2538
2540 2539 for fctx1, fctx2, hdr, hunks in diffhunks(
2541 2540 repo,
2542 2541 ctx1=ctx1,
2543 2542 ctx2=ctx2,
2544 2543 match=match,
2545 2544 changes=changes,
2546 2545 opts=opts,
2547 2546 losedatafn=losedatafn,
2548 2547 pathfn=pathfn,
2549 2548 copy=copy,
2550 2549 copysourcematch=copysourcematch,
2551 2550 ):
2552 2551 if hunksfilterfn is not None:
2553 2552 # If the file has been removed, fctx2 is None; but this should
2554 2553 # not occur here since we catch removed files early in
2555 2554 # logcmdutil.getlinerangerevs() for 'hg log -L'.
2556 2555 assert (
2557 2556 fctx2 is not None
2558 2557 ), b'fctx2 unexpectly None in diff hunks filtering'
2559 2558 hunks = hunksfilterfn(fctx2, hunks)
2560 2559 text = b''.join(sum((list(hlines) for hrange, hlines in hunks), []))
2561 2560 if hdr and (text or len(hdr) > 1):
2562 2561 yield b'\n'.join(hdr) + b'\n'
2563 2562 if text:
2564 2563 yield text
2565 2564
2566 2565
2567 2566 def diffhunks(
2568 2567 repo,
2569 2568 ctx1,
2570 2569 ctx2,
2571 2570 match=None,
2572 2571 changes=None,
2573 2572 opts=None,
2574 2573 losedatafn=None,
2575 2574 pathfn=None,
2576 2575 copy=None,
2577 2576 copysourcematch=None,
2578 2577 ):
2579 2578 """Yield diff of changes to files in the form of (`header`, `hunks`) tuples
2580 2579 where `header` is a list of diff headers and `hunks` is an iterable of
2581 2580 (`hunkrange`, `hunklines`) tuples.
2582 2581
2583 2582 See diff() for the meaning of parameters.
2584 2583 """
2585 2584
2586 2585 if opts is None:
2587 2586 opts = mdiff.defaultopts
2588 2587
2589 2588 def lrugetfilectx():
2590 2589 cache = {}
2591 2590 order = collections.deque()
2592 2591
2593 2592 def getfilectx(f, ctx):
2594 2593 fctx = ctx.filectx(f, filelog=cache.get(f))
2595 2594 if f not in cache:
2596 2595 if len(cache) > 20:
2597 2596 del cache[order.popleft()]
2598 2597 cache[f] = fctx.filelog()
2599 2598 else:
2600 2599 order.remove(f)
2601 2600 order.append(f)
2602 2601 return fctx
2603 2602
2604 2603 return getfilectx
2605 2604
2606 2605 getfilectx = lrugetfilectx()
2607 2606
2608 2607 if not changes:
2609 2608 changes = ctx1.status(ctx2, match=match)
2610 2609 modified, added, removed = changes[:3]
2611 2610
2612 2611 if not modified and not added and not removed:
2613 2612 return []
2614 2613
2615 2614 if repo.ui.debugflag:
2616 2615 hexfunc = hex
2617 2616 else:
2618 2617 hexfunc = short
2619 2618 revs = [hexfunc(node) for node in [ctx1.node(), ctx2.node()] if node]
2620 2619
2621 2620 if copy is None:
2622 2621 copy = {}
2623 2622 if opts.git or opts.upgrade:
2624 2623 copy = copies.pathcopies(ctx1, ctx2, match=match)
2625 2624
2626 2625 if copysourcematch:
2627 2626 # filter out copies where source side isn't inside the matcher
2628 2627 # (copies.pathcopies() already filtered out the destination)
2629 2628 copy = {
2630 2629 dst: src
2631 2630 for dst, src in pycompat.iteritems(copy)
2632 2631 if copysourcematch(src)
2633 2632 }
2634 2633
2635 2634 modifiedset = set(modified)
2636 2635 addedset = set(added)
2637 2636 removedset = set(removed)
2638 2637 for f in modified:
2639 2638 if f not in ctx1:
2640 2639 # Fix up added, since merged-in additions appear as
2641 2640 # modifications during merges
2642 2641 modifiedset.remove(f)
2643 2642 addedset.add(f)
2644 2643 for f in removed:
2645 2644 if f not in ctx1:
2646 2645 # Merged-in additions that are then removed are reported as removed.
2647 2646 # They are not in ctx1, so We don't want to show them in the diff.
2648 2647 removedset.remove(f)
2649 2648 modified = sorted(modifiedset)
2650 2649 added = sorted(addedset)
2651 2650 removed = sorted(removedset)
2652 2651 for dst, src in list(copy.items()):
2653 2652 if src not in ctx1:
2654 2653 # Files merged in during a merge and then copied/renamed are
2655 2654 # reported as copies. We want to show them in the diff as additions.
2656 2655 del copy[dst]
2657 2656
2658 2657 prefetchmatch = scmutil.matchfiles(
2659 2658 repo, list(modifiedset | addedset | removedset)
2660 2659 )
2661 2660 scmutil.prefetchfiles(repo, [ctx1.rev(), ctx2.rev()], prefetchmatch)
2662 2661
2663 2662 def difffn(opts, losedata):
2664 2663 return trydiff(
2665 2664 repo,
2666 2665 revs,
2667 2666 ctx1,
2668 2667 ctx2,
2669 2668 modified,
2670 2669 added,
2671 2670 removed,
2672 2671 copy,
2673 2672 getfilectx,
2674 2673 opts,
2675 2674 losedata,
2676 2675 pathfn,
2677 2676 )
2678 2677
2679 2678 if opts.upgrade and not opts.git:
2680 2679 try:
2681 2680
2682 2681 def losedata(fn):
2683 2682 if not losedatafn or not losedatafn(fn=fn):
2684 2683 raise GitDiffRequired
2685 2684
2686 2685 # Buffer the whole output until we are sure it can be generated
2687 2686 return list(difffn(opts.copy(git=False), losedata))
2688 2687 except GitDiffRequired:
2689 2688 return difffn(opts.copy(git=True), None)
2690 2689 else:
2691 2690 return difffn(opts, None)
2692 2691
2693 2692
2694 2693 def diffsinglehunk(hunklines):
2695 2694 """yield tokens for a list of lines in a single hunk"""
2696 2695 for line in hunklines:
2697 2696 # chomp
2698 2697 chompline = line.rstrip(b'\r\n')
2699 2698 # highlight tabs and trailing whitespace
2700 2699 stripline = chompline.rstrip()
2701 2700 if line.startswith(b'-'):
2702 2701 label = b'diff.deleted'
2703 2702 elif line.startswith(b'+'):
2704 2703 label = b'diff.inserted'
2705 2704 else:
2706 2705 raise error.ProgrammingError(b'unexpected hunk line: %s' % line)
2707 2706 for token in tabsplitter.findall(stripline):
2708 2707 if token.startswith(b'\t'):
2709 2708 yield (token, b'diff.tab')
2710 2709 else:
2711 2710 yield (token, label)
2712 2711
2713 2712 if chompline != stripline:
2714 2713 yield (chompline[len(stripline) :], b'diff.trailingwhitespace')
2715 2714 if chompline != line:
2716 2715 yield (line[len(chompline) :], b'')
2717 2716
2718 2717
2719 2718 def diffsinglehunkinline(hunklines):
2720 2719 """yield tokens for a list of lines in a single hunk, with inline colors"""
2721 2720 # prepare deleted, and inserted content
2722 2721 a = b''
2723 2722 b = b''
2724 2723 for line in hunklines:
2725 2724 if line[0:1] == b'-':
2726 2725 a += line[1:]
2727 2726 elif line[0:1] == b'+':
2728 2727 b += line[1:]
2729 2728 else:
2730 2729 raise error.ProgrammingError(b'unexpected hunk line: %s' % line)
2731 2730 # fast path: if either side is empty, use diffsinglehunk
2732 2731 if not a or not b:
2733 2732 for t in diffsinglehunk(hunklines):
2734 2733 yield t
2735 2734 return
2736 2735 # re-split the content into words
2737 2736 al = wordsplitter.findall(a)
2738 2737 bl = wordsplitter.findall(b)
2739 2738 # re-arrange the words to lines since the diff algorithm is line-based
2740 2739 aln = [s if s == b'\n' else s + b'\n' for s in al]
2741 2740 bln = [s if s == b'\n' else s + b'\n' for s in bl]
2742 2741 an = b''.join(aln)
2743 2742 bn = b''.join(bln)
2744 2743 # run the diff algorithm, prepare atokens and btokens
2745 2744 atokens = []
2746 2745 btokens = []
2747 2746 blocks = mdiff.allblocks(an, bn, lines1=aln, lines2=bln)
2748 2747 for (a1, a2, b1, b2), btype in blocks:
2749 2748 changed = btype == b'!'
2750 2749 for token in mdiff.splitnewlines(b''.join(al[a1:a2])):
2751 2750 atokens.append((changed, token))
2752 2751 for token in mdiff.splitnewlines(b''.join(bl[b1:b2])):
2753 2752 btokens.append((changed, token))
2754 2753
2755 2754 # yield deleted tokens, then inserted ones
2756 2755 for prefix, label, tokens in [
2757 2756 (b'-', b'diff.deleted', atokens),
2758 2757 (b'+', b'diff.inserted', btokens),
2759 2758 ]:
2760 2759 nextisnewline = True
2761 2760 for changed, token in tokens:
2762 2761 if nextisnewline:
2763 2762 yield (prefix, label)
2764 2763 nextisnewline = False
2765 2764 # special handling line end
2766 2765 isendofline = token.endswith(b'\n')
2767 2766 if isendofline:
2768 2767 chomp = token[:-1] # chomp
2769 2768 if chomp.endswith(b'\r'):
2770 2769 chomp = chomp[:-1]
2771 2770 endofline = token[len(chomp) :]
2772 2771 token = chomp.rstrip() # detect spaces at the end
2773 2772 endspaces = chomp[len(token) :]
2774 2773 # scan tabs
2775 2774 for maybetab in tabsplitter.findall(token):
2776 2775 if b'\t' == maybetab[0:1]:
2777 2776 currentlabel = b'diff.tab'
2778 2777 else:
2779 2778 if changed:
2780 2779 currentlabel = label + b'.changed'
2781 2780 else:
2782 2781 currentlabel = label + b'.unchanged'
2783 2782 yield (maybetab, currentlabel)
2784 2783 if isendofline:
2785 2784 if endspaces:
2786 2785 yield (endspaces, b'diff.trailingwhitespace')
2787 2786 yield (endofline, b'')
2788 2787 nextisnewline = True
2789 2788
2790 2789
2791 2790 def difflabel(func, *args, **kw):
2792 2791 '''yields 2-tuples of (output, label) based on the output of func()'''
2793 2792 if kw.get(r'opts') and kw[r'opts'].worddiff:
2794 2793 dodiffhunk = diffsinglehunkinline
2795 2794 else:
2796 2795 dodiffhunk = diffsinglehunk
2797 2796 headprefixes = [
2798 2797 (b'diff', b'diff.diffline'),
2799 2798 (b'copy', b'diff.extended'),
2800 2799 (b'rename', b'diff.extended'),
2801 2800 (b'old', b'diff.extended'),
2802 2801 (b'new', b'diff.extended'),
2803 2802 (b'deleted', b'diff.extended'),
2804 2803 (b'index', b'diff.extended'),
2805 2804 (b'similarity', b'diff.extended'),
2806 2805 (b'---', b'diff.file_a'),
2807 2806 (b'+++', b'diff.file_b'),
2808 2807 ]
2809 2808 textprefixes = [
2810 2809 (b'@', b'diff.hunk'),
2811 2810 # - and + are handled by diffsinglehunk
2812 2811 ]
2813 2812 head = False
2814 2813
2815 2814 # buffers a hunk, i.e. adjacent "-", "+" lines without other changes.
2816 2815 hunkbuffer = []
2817 2816
2818 2817 def consumehunkbuffer():
2819 2818 if hunkbuffer:
2820 2819 for token in dodiffhunk(hunkbuffer):
2821 2820 yield token
2822 2821 hunkbuffer[:] = []
2823 2822
2824 2823 for chunk in func(*args, **kw):
2825 2824 lines = chunk.split(b'\n')
2826 2825 linecount = len(lines)
2827 2826 for i, line in enumerate(lines):
2828 2827 if head:
2829 2828 if line.startswith(b'@'):
2830 2829 head = False
2831 2830 else:
2832 2831 if line and not line.startswith(
2833 2832 (b' ', b'+', b'-', b'@', b'\\')
2834 2833 ):
2835 2834 head = True
2836 2835 diffline = False
2837 2836 if not head and line and line.startswith((b'+', b'-')):
2838 2837 diffline = True
2839 2838
2840 2839 prefixes = textprefixes
2841 2840 if head:
2842 2841 prefixes = headprefixes
2843 2842 if diffline:
2844 2843 # buffered
2845 2844 bufferedline = line
2846 2845 if i + 1 < linecount:
2847 2846 bufferedline += b"\n"
2848 2847 hunkbuffer.append(bufferedline)
2849 2848 else:
2850 2849 # unbuffered
2851 2850 for token in consumehunkbuffer():
2852 2851 yield token
2853 2852 stripline = line.rstrip()
2854 2853 for prefix, label in prefixes:
2855 2854 if stripline.startswith(prefix):
2856 2855 yield (stripline, label)
2857 2856 if line != stripline:
2858 2857 yield (
2859 2858 line[len(stripline) :],
2860 2859 b'diff.trailingwhitespace',
2861 2860 )
2862 2861 break
2863 2862 else:
2864 2863 yield (line, b'')
2865 2864 if i + 1 < linecount:
2866 2865 yield (b'\n', b'')
2867 2866 for token in consumehunkbuffer():
2868 2867 yield token
2869 2868
2870 2869
2871 2870 def diffui(*args, **kw):
2872 2871 '''like diff(), but yields 2-tuples of (output, label) for ui.write()'''
2873 2872 return difflabel(diff, *args, **kw)
2874 2873
2875 2874
2876 2875 def _filepairs(modified, added, removed, copy, opts):
2877 2876 '''generates tuples (f1, f2, copyop), where f1 is the name of the file
2878 2877 before and f2 is the the name after. For added files, f1 will be None,
2879 2878 and for removed files, f2 will be None. copyop may be set to None, 'copy'
2880 2879 or 'rename' (the latter two only if opts.git is set).'''
2881 2880 gone = set()
2882 2881
2883 2882 copyto = dict([(v, k) for k, v in copy.items()])
2884 2883
2885 2884 addedset, removedset = set(added), set(removed)
2886 2885
2887 2886 for f in sorted(modified + added + removed):
2888 2887 copyop = None
2889 2888 f1, f2 = f, f
2890 2889 if f in addedset:
2891 2890 f1 = None
2892 2891 if f in copy:
2893 2892 if opts.git:
2894 2893 f1 = copy[f]
2895 2894 if f1 in removedset and f1 not in gone:
2896 2895 copyop = b'rename'
2897 2896 gone.add(f1)
2898 2897 else:
2899 2898 copyop = b'copy'
2900 2899 elif f in removedset:
2901 2900 f2 = None
2902 2901 if opts.git:
2903 2902 # have we already reported a copy above?
2904 2903 if (
2905 2904 f in copyto
2906 2905 and copyto[f] in addedset
2907 2906 and copy[copyto[f]] == f
2908 2907 ):
2909 2908 continue
2910 2909 yield f1, f2, copyop
2911 2910
2912 2911
2913 2912 def trydiff(
2914 2913 repo,
2915 2914 revs,
2916 2915 ctx1,
2917 2916 ctx2,
2918 2917 modified,
2919 2918 added,
2920 2919 removed,
2921 2920 copy,
2922 2921 getfilectx,
2923 2922 opts,
2924 2923 losedatafn,
2925 2924 pathfn,
2926 2925 ):
2927 2926 '''given input data, generate a diff and yield it in blocks
2928 2927
2929 2928 If generating a diff would lose data like flags or binary data and
2930 2929 losedatafn is not None, it will be called.
2931 2930
2932 2931 pathfn is applied to every path in the diff output.
2933 2932 '''
2934 2933
2935 2934 def gitindex(text):
2936 2935 if not text:
2937 2936 text = b""
2938 2937 l = len(text)
2939 2938 s = hashlib.sha1(b'blob %d\0' % l)
2940 2939 s.update(text)
2941 2940 return hex(s.digest())
2942 2941
2943 2942 if opts.noprefix:
2944 2943 aprefix = bprefix = b''
2945 2944 else:
2946 2945 aprefix = b'a/'
2947 2946 bprefix = b'b/'
2948 2947
2949 2948 def diffline(f, revs):
2950 2949 revinfo = b' '.join([b"-r %s" % rev for rev in revs])
2951 2950 return b'diff %s %s' % (revinfo, f)
2952 2951
2953 2952 def isempty(fctx):
2954 2953 return fctx is None or fctx.size() == 0
2955 2954
2956 2955 date1 = dateutil.datestr(ctx1.date())
2957 2956 date2 = dateutil.datestr(ctx2.date())
2958 2957
2959 2958 gitmode = {b'l': b'120000', b'x': b'100755', b'': b'100644'}
2960 2959
2961 2960 if not pathfn:
2962 2961 pathfn = lambda f: f
2963 2962
2964 2963 for f1, f2, copyop in _filepairs(modified, added, removed, copy, opts):
2965 2964 content1 = None
2966 2965 content2 = None
2967 2966 fctx1 = None
2968 2967 fctx2 = None
2969 2968 flag1 = None
2970 2969 flag2 = None
2971 2970 if f1:
2972 2971 fctx1 = getfilectx(f1, ctx1)
2973 2972 if opts.git or losedatafn:
2974 2973 flag1 = ctx1.flags(f1)
2975 2974 if f2:
2976 2975 fctx2 = getfilectx(f2, ctx2)
2977 2976 if opts.git or losedatafn:
2978 2977 flag2 = ctx2.flags(f2)
2979 2978 # if binary is True, output "summary" or "base85", but not "text diff"
2980 2979 if opts.text:
2981 2980 binary = False
2982 2981 else:
2983 2982 binary = any(f.isbinary() for f in [fctx1, fctx2] if f is not None)
2984 2983
2985 2984 if losedatafn and not opts.git:
2986 2985 if (
2987 2986 binary
2988 2987 or
2989 2988 # copy/rename
2990 2989 f2 in copy
2991 2990 or
2992 2991 # empty file creation
2993 2992 (not f1 and isempty(fctx2))
2994 2993 or
2995 2994 # empty file deletion
2996 2995 (isempty(fctx1) and not f2)
2997 2996 or
2998 2997 # create with flags
2999 2998 (not f1 and flag2)
3000 2999 or
3001 3000 # change flags
3002 3001 (f1 and f2 and flag1 != flag2)
3003 3002 ):
3004 3003 losedatafn(f2 or f1)
3005 3004
3006 3005 path1 = pathfn(f1 or f2)
3007 3006 path2 = pathfn(f2 or f1)
3008 3007 header = []
3009 3008 if opts.git:
3010 3009 header.append(
3011 3010 b'diff --git %s%s %s%s' % (aprefix, path1, bprefix, path2)
3012 3011 )
3013 3012 if not f1: # added
3014 3013 header.append(b'new file mode %s' % gitmode[flag2])
3015 3014 elif not f2: # removed
3016 3015 header.append(b'deleted file mode %s' % gitmode[flag1])
3017 3016 else: # modified/copied/renamed
3018 3017 mode1, mode2 = gitmode[flag1], gitmode[flag2]
3019 3018 if mode1 != mode2:
3020 3019 header.append(b'old mode %s' % mode1)
3021 3020 header.append(b'new mode %s' % mode2)
3022 3021 if copyop is not None:
3023 3022 if opts.showsimilarity:
3024 3023 sim = similar.score(ctx1[path1], ctx2[path2]) * 100
3025 3024 header.append(b'similarity index %d%%' % sim)
3026 3025 header.append(b'%s from %s' % (copyop, path1))
3027 3026 header.append(b'%s to %s' % (copyop, path2))
3028 3027 elif revs:
3029 3028 header.append(diffline(path1, revs))
3030 3029
3031 3030 # fctx.is | diffopts | what to | is fctx.data()
3032 3031 # binary() | text nobinary git index | output? | outputted?
3033 3032 # ------------------------------------|----------------------------
3034 3033 # yes | no no no * | summary | no
3035 3034 # yes | no no yes * | base85 | yes
3036 3035 # yes | no yes no * | summary | no
3037 3036 # yes | no yes yes 0 | summary | no
3038 3037 # yes | no yes yes >0 | summary | semi [1]
3039 3038 # yes | yes * * * | text diff | yes
3040 3039 # no | * * * * | text diff | yes
3041 3040 # [1]: hash(fctx.data()) is outputted. so fctx.data() cannot be faked
3042 3041 if binary and (
3043 3042 not opts.git or (opts.git and opts.nobinary and not opts.index)
3044 3043 ):
3045 3044 # fast path: no binary content will be displayed, content1 and
3046 3045 # content2 are only used for equivalent test. cmp() could have a
3047 3046 # fast path.
3048 3047 if fctx1 is not None:
3049 3048 content1 = b'\0'
3050 3049 if fctx2 is not None:
3051 3050 if fctx1 is not None and not fctx1.cmp(fctx2):
3052 3051 content2 = b'\0' # not different
3053 3052 else:
3054 3053 content2 = b'\0\0'
3055 3054 else:
3056 3055 # normal path: load contents
3057 3056 if fctx1 is not None:
3058 3057 content1 = fctx1.data()
3059 3058 if fctx2 is not None:
3060 3059 content2 = fctx2.data()
3061 3060
3062 3061 if binary and opts.git and not opts.nobinary:
3063 3062 text = mdiff.b85diff(content1, content2)
3064 3063 if text:
3065 3064 header.append(
3066 3065 b'index %s..%s' % (gitindex(content1), gitindex(content2))
3067 3066 )
3068 3067 hunks = ((None, [text]),)
3069 3068 else:
3070 3069 if opts.git and opts.index > 0:
3071 3070 flag = flag1
3072 3071 if flag is None:
3073 3072 flag = flag2
3074 3073 header.append(
3075 3074 b'index %s..%s %s'
3076 3075 % (
3077 3076 gitindex(content1)[0 : opts.index],
3078 3077 gitindex(content2)[0 : opts.index],
3079 3078 gitmode[flag],
3080 3079 )
3081 3080 )
3082 3081
3083 3082 uheaders, hunks = mdiff.unidiff(
3084 3083 content1,
3085 3084 date1,
3086 3085 content2,
3087 3086 date2,
3088 3087 path1,
3089 3088 path2,
3090 3089 binary=binary,
3091 3090 opts=opts,
3092 3091 )
3093 3092 header.extend(uheaders)
3094 3093 yield fctx1, fctx2, header, hunks
3095 3094
3096 3095
3097 3096 def diffstatsum(stats):
3098 3097 maxfile, maxtotal, addtotal, removetotal, binary = 0, 0, 0, 0, False
3099 3098 for f, a, r, b in stats:
3100 3099 maxfile = max(maxfile, encoding.colwidth(f))
3101 3100 maxtotal = max(maxtotal, a + r)
3102 3101 addtotal += a
3103 3102 removetotal += r
3104 3103 binary = binary or b
3105 3104
3106 3105 return maxfile, maxtotal, addtotal, removetotal, binary
3107 3106
3108 3107
3109 3108 def diffstatdata(lines):
3110 3109 diffre = re.compile(br'^diff .*-r [a-z0-9]+\s(.*)$')
3111 3110
3112 3111 results = []
3113 3112 filename, adds, removes, isbinary = None, 0, 0, False
3114 3113
3115 3114 def addresult():
3116 3115 if filename:
3117 3116 results.append((filename, adds, removes, isbinary))
3118 3117
3119 3118 # inheader is used to track if a line is in the
3120 3119 # header portion of the diff. This helps properly account
3121 3120 # for lines that start with '--' or '++'
3122 3121 inheader = False
3123 3122
3124 3123 for line in lines:
3125 3124 if line.startswith(b'diff'):
3126 3125 addresult()
3127 3126 # starting a new file diff
3128 3127 # set numbers to 0 and reset inheader
3129 3128 inheader = True
3130 3129 adds, removes, isbinary = 0, 0, False
3131 3130 if line.startswith(b'diff --git a/'):
3132 3131 filename = gitre.search(line).group(2)
3133 3132 elif line.startswith(b'diff -r'):
3134 3133 # format: "diff -r ... -r ... filename"
3135 3134 filename = diffre.search(line).group(1)
3136 3135 elif line.startswith(b'@@'):
3137 3136 inheader = False
3138 3137 elif line.startswith(b'+') and not inheader:
3139 3138 adds += 1
3140 3139 elif line.startswith(b'-') and not inheader:
3141 3140 removes += 1
3142 3141 elif line.startswith(b'GIT binary patch') or line.startswith(
3143 3142 b'Binary file'
3144 3143 ):
3145 3144 isbinary = True
3146 3145 elif line.startswith(b'rename from'):
3147 3146 filename = line[12:]
3148 3147 elif line.startswith(b'rename to'):
3149 3148 filename += b' => %s' % line[10:]
3150 3149 addresult()
3151 3150 return results
3152 3151
3153 3152
3154 3153 def diffstat(lines, width=80):
3155 3154 output = []
3156 3155 stats = diffstatdata(lines)
3157 3156 maxname, maxtotal, totaladds, totalremoves, hasbinary = diffstatsum(stats)
3158 3157
3159 3158 countwidth = len(str(maxtotal))
3160 3159 if hasbinary and countwidth < 3:
3161 3160 countwidth = 3
3162 3161 graphwidth = width - countwidth - maxname - 6
3163 3162 if graphwidth < 10:
3164 3163 graphwidth = 10
3165 3164
3166 3165 def scale(i):
3167 3166 if maxtotal <= graphwidth:
3168 3167 return i
3169 3168 # If diffstat runs out of room it doesn't print anything,
3170 3169 # which isn't very useful, so always print at least one + or -
3171 3170 # if there were at least some changes.
3172 3171 return max(i * graphwidth // maxtotal, int(bool(i)))
3173 3172
3174 3173 for filename, adds, removes, isbinary in stats:
3175 3174 if isbinary:
3176 3175 count = b'Bin'
3177 3176 else:
3178 3177 count = b'%d' % (adds + removes)
3179 3178 pluses = b'+' * scale(adds)
3180 3179 minuses = b'-' * scale(removes)
3181 3180 output.append(
3182 3181 b' %s%s | %*s %s%s\n'
3183 3182 % (
3184 3183 filename,
3185 3184 b' ' * (maxname - encoding.colwidth(filename)),
3186 3185 countwidth,
3187 3186 count,
3188 3187 pluses,
3189 3188 minuses,
3190 3189 )
3191 3190 )
3192 3191
3193 3192 if stats:
3194 3193 output.append(
3195 3194 _(b' %d files changed, %d insertions(+), %d deletions(-)\n')
3196 3195 % (len(stats), totaladds, totalremoves)
3197 3196 )
3198 3197
3199 3198 return b''.join(output)
3200 3199
3201 3200
3202 3201 def diffstatui(*args, **kw):
3203 3202 '''like diffstat(), but yields 2-tuples of (output, label) for
3204 3203 ui.write()
3205 3204 '''
3206 3205
3207 3206 for line in diffstat(*args, **kw).splitlines():
3208 3207 if line and line[-1] in b'+-':
3209 3208 name, graph = line.rsplit(b' ', 1)
3210 3209 yield (name + b' ', b'')
3211 3210 m = re.search(br'\++', graph)
3212 3211 if m:
3213 3212 yield (m.group(0), b'diffstat.inserted')
3214 3213 m = re.search(br'-+', graph)
3215 3214 if m:
3216 3215 yield (m.group(0), b'diffstat.deleted')
3217 3216 else:
3218 3217 yield (line, b'')
3219 3218 yield (b'\n', b'')
General Comments 0
You need to be logged in to leave comments. Login now