##// END OF EJS Templates
convert: stop using the `pycompat.open()` shim
Matt Harbison -
r53272:bb07b1ca default
parent child Browse files
Show More
@@ -1,620 +1,619
1 1 # common.py - common code for the convert extension
2 2 #
3 3 # Copyright 2005-2009 Olivia Mackall <olivia@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import annotations
9 9
10 10 import base64
11 11 import os
12 12 import pickle
13 13 import re
14 14 import shlex
15 15 import subprocess
16 16 import typing
17 17
18 18 from typing import (
19 19 Any,
20 20 AnyStr,
21 21 Optional,
22 22 )
23 23
24 24 from mercurial.i18n import _
25 from mercurial.pycompat import open
26 25 from mercurial import (
27 26 encoding,
28 27 error,
29 28 phases,
30 29 pycompat,
31 30 util,
32 31 )
33 32 from mercurial.utils import (
34 33 dateutil,
35 34 procutil,
36 35 )
37 36
38 37 if typing.TYPE_CHECKING:
39 38 from typing import (
40 39 overload,
41 40 )
42 41 from mercurial import (
43 42 ui as uimod,
44 43 )
45 44
46 45 propertycache = util.propertycache
47 46
48 47
49 48 if typing.TYPE_CHECKING:
50 49
51 50 @overload
52 51 def _encodeornone(d: str) -> bytes:
53 52 pass
54 53
55 54 @overload
56 55 def _encodeornone(d: None) -> None:
57 56 pass
58 57
59 58
60 59 def _encodeornone(d):
61 60 if d is None:
62 61 return
63 62 return d.encode('latin1')
64 63
65 64
66 65 class _shlexpy3proxy:
67 66 def __init__(self, l: shlex.shlex) -> None:
68 67 self._l = l
69 68
70 69 def __iter__(self):
71 70 return (_encodeornone(v) for v in self._l)
72 71
73 72 def get_token(self):
74 73 return _encodeornone(self._l.get_token())
75 74
76 75 @property
77 76 def infile(self) -> bytes:
78 77 if self._l.infile is not None:
79 78 return encoding.strtolocal(self._l.infile)
80 79 return b'<unknown>'
81 80
82 81 @property
83 82 def lineno(self) -> int:
84 83 return self._l.lineno
85 84
86 85
87 86 def shlexer(
88 87 data=None,
89 88 filepath: Optional[bytes] = None,
90 89 wordchars: Optional[bytes] = None,
91 90 whitespace: Optional[bytes] = None,
92 91 ):
93 92 if data is None:
94 data = open(filepath, b'r', encoding='latin1')
93 data = open(filepath, 'r', encoding='latin1')
95 94 else:
96 95 if filepath is not None:
97 96 raise error.ProgrammingError(
98 97 b'shlexer only accepts data or filepath, not both'
99 98 )
100 99 data = data.decode('latin1')
101 100 infile = encoding.strfromlocal(filepath) if filepath is not None else None
102 101 l = shlex.shlex(data, infile=infile, posix=True)
103 102 if whitespace is not None:
104 103 l.whitespace_split = True
105 104 l.whitespace += whitespace.decode('latin1')
106 105 if wordchars is not None:
107 106 l.wordchars += wordchars.decode('latin1')
108 107 return _shlexpy3proxy(l)
109 108
110 109
111 110 def encodeargs(args: Any) -> bytes:
112 111 def encodearg(s: bytes) -> bytes:
113 112 lines = base64.encodebytes(s)
114 113 lines = [l.splitlines()[0] for l in pycompat.iterbytestr(lines)]
115 114 return b''.join(lines)
116 115
117 116 s = pickle.dumps(args)
118 117 return encodearg(s)
119 118
120 119
121 120 def decodeargs(s: bytes) -> Any:
122 121 s = base64.decodebytes(s)
123 122 return pickle.loads(s)
124 123
125 124
126 125 class MissingTool(Exception):
127 126 pass
128 127
129 128
130 129 def checktool(
131 130 exe: bytes, name: Optional[bytes] = None, abort: bool = True
132 131 ) -> None:
133 132 name = name or exe
134 133 if not procutil.findexe(exe):
135 134 if abort:
136 135 exc = error.Abort
137 136 else:
138 137 exc = MissingTool
139 138 raise exc(_(b'cannot find required "%s" tool') % name)
140 139
141 140
142 141 class NoRepo(Exception):
143 142 pass
144 143
145 144
146 145 SKIPREV: bytes = b'SKIP'
147 146
148 147
149 148 class commit:
150 149 def __init__(
151 150 self,
152 151 author: bytes,
153 152 date: bytes,
154 153 desc: bytes,
155 154 parents,
156 155 branch: Optional[bytes] = None,
157 156 rev=None,
158 157 extra=None,
159 158 sortkey=None,
160 159 saverev=True,
161 160 phase: int = phases.draft,
162 161 optparents=None,
163 162 ctx=None,
164 163 ) -> None:
165 164 self.author = author or b'unknown'
166 165 self.date = date or b'0 0'
167 166 self.desc = desc
168 167 self.parents = parents # will be converted and used as parents
169 168 self.optparents = optparents or [] # will be used if already converted
170 169 self.branch = branch
171 170 self.rev = rev
172 171 self.extra = extra or {}
173 172 self.sortkey = sortkey
174 173 self.saverev = saverev
175 174 self.phase = phase
176 175 self.ctx = ctx # for hg to hg conversions
177 176
178 177
179 178 class converter_source:
180 179 """Conversion source interface"""
181 180
182 181 def __init__(
183 182 self,
184 183 ui: "uimod.ui",
185 184 repotype: bytes,
186 185 path: Optional[bytes] = None,
187 186 revs=None,
188 187 ) -> None:
189 188 """Initialize conversion source (or raise NoRepo("message")
190 189 exception if path is not a valid repository)"""
191 190 self.ui = ui
192 191 self.path = path
193 192 self.revs = revs
194 193 self.repotype = repotype
195 194
196 195 self.encoding = b'utf-8'
197 196
198 197 def checkhexformat(
199 198 self, revstr: bytes, mapname: bytes = b'splicemap'
200 199 ) -> None:
201 200 """fails if revstr is not a 40 byte hex. mercurial and git both uses
202 201 such format for their revision numbering
203 202 """
204 203 if not re.match(br'[0-9a-fA-F]{40,40}$', revstr):
205 204 raise error.Abort(
206 205 _(b'%s entry %s is not a valid revision identifier')
207 206 % (mapname, revstr)
208 207 )
209 208
210 209 def before(self) -> None:
211 210 pass
212 211
213 212 def after(self) -> None:
214 213 pass
215 214
216 215 def targetfilebelongstosource(self, targetfilename):
217 216 """Returns true if the given targetfile belongs to the source repo. This
218 217 is useful when only a subdirectory of the target belongs to the source
219 218 repo."""
220 219 # For normal full repo converts, this is always True.
221 220 return True
222 221
223 222 def setrevmap(self, revmap):
224 223 """set the map of already-converted revisions"""
225 224
226 225 def getheads(self):
227 226 """Return a list of this repository's heads"""
228 227 raise NotImplementedError
229 228
230 229 def getfile(self, name, rev):
231 230 """Return a pair (data, mode) where data is the file content
232 231 as a string and mode one of '', 'x' or 'l'. rev is the
233 232 identifier returned by a previous call to getchanges().
234 233 Data is None if file is missing/deleted in rev.
235 234 """
236 235 raise NotImplementedError
237 236
238 237 def getchanges(self, version, full):
239 238 """Returns a tuple of (files, copies, cleanp2).
240 239
241 240 files is a sorted list of (filename, id) tuples for all files
242 241 changed between version and its first parent returned by
243 242 getcommit(). If full, all files in that revision is returned.
244 243 id is the source revision id of the file.
245 244
246 245 copies is a dictionary of dest: source
247 246
248 247 cleanp2 is the set of files filenames that are clean against p2.
249 248 (Files that are clean against p1 are already not in files (unless
250 249 full). This makes it possible to handle p2 clean files similarly.)
251 250 """
252 251 raise NotImplementedError
253 252
254 253 def getcommit(self, version):
255 254 """Return the commit object for version"""
256 255 raise NotImplementedError
257 256
258 257 def numcommits(self):
259 258 """Return the number of commits in this source.
260 259
261 260 If unknown, return None.
262 261 """
263 262 return None
264 263
265 264 def gettags(self):
266 265 """Return the tags as a dictionary of name: revision
267 266
268 267 Tag names must be UTF-8 strings.
269 268 """
270 269 raise NotImplementedError
271 270
272 271 def recode(self, s: AnyStr, encoding: Optional[bytes] = None) -> bytes:
273 272 if not encoding:
274 273 encoding = self.encoding or b'utf-8'
275 274
276 275 if isinstance(s, str):
277 276 return s.encode("utf-8")
278 277 try:
279 278 return s.decode(pycompat.sysstr(encoding)).encode("utf-8")
280 279 except UnicodeError:
281 280 try:
282 281 return s.decode("latin-1").encode("utf-8")
283 282 except UnicodeError:
284 283 return s.decode(pycompat.sysstr(encoding), "replace").encode(
285 284 "utf-8"
286 285 )
287 286
288 287 def getchangedfiles(self, rev, i):
289 288 """Return the files changed by rev compared to parent[i].
290 289
291 290 i is an index selecting one of the parents of rev. The return
292 291 value should be the list of files that are different in rev and
293 292 this parent.
294 293
295 294 If rev has no parents, i is None.
296 295
297 296 This function is only needed to support --filemap
298 297 """
299 298 raise NotImplementedError
300 299
301 300 def converted(self, rev, sinkrev) -> None:
302 301 '''Notify the source that a revision has been converted.'''
303 302
304 303 def hasnativeorder(self) -> bool:
305 304 """Return true if this source has a meaningful, native revision
306 305 order. For instance, Mercurial revisions are store sequentially
307 306 while there is no such global ordering with Darcs.
308 307 """
309 308 return False
310 309
311 310 def hasnativeclose(self) -> bool:
312 311 """Return true if this source has ability to close branch."""
313 312 return False
314 313
315 314 def lookuprev(self, rev):
316 315 """If rev is a meaningful revision reference in source, return
317 316 the referenced identifier in the same format used by getcommit().
318 317 return None otherwise.
319 318 """
320 319 return None
321 320
322 321 def getbookmarks(self):
323 322 """Return the bookmarks as a dictionary of name: revision
324 323
325 324 Bookmark names are to be UTF-8 strings.
326 325 """
327 326 return {}
328 327
329 328 def checkrevformat(self, revstr, mapname: bytes = b'splicemap') -> bool:
330 329 """revstr is a string that describes a revision in the given
331 330 source control system. Return true if revstr has correct
332 331 format.
333 332 """
334 333 return True
335 334
336 335
337 336 class converter_sink:
338 337 """Conversion sink (target) interface"""
339 338
340 339 def __init__(self, ui: "uimod.ui", repotype: bytes, path: bytes) -> None:
341 340 """Initialize conversion sink (or raise NoRepo("message")
342 341 exception if path is not a valid repository)
343 342
344 343 created is a list of paths to remove if a fatal error occurs
345 344 later"""
346 345 self.ui = ui
347 346 self.path = path
348 347 self.created = []
349 348 self.repotype = repotype
350 349
351 350 def revmapfile(self):
352 351 """Path to a file that will contain lines
353 352 source_rev_id sink_rev_id
354 353 mapping equivalent revision identifiers for each system."""
355 354 raise NotImplementedError
356 355
357 356 def authorfile(self):
358 357 """Path to a file that will contain lines
359 358 srcauthor=dstauthor
360 359 mapping equivalent authors identifiers for each system."""
361 360 return None
362 361
363 362 def putcommit(
364 363 self, files, copies, parents, commit, source, revmap, full, cleanp2
365 364 ):
366 365 """Create a revision with all changed files listed in 'files'
367 366 and having listed parents. 'commit' is a commit object
368 367 containing at a minimum the author, date, and message for this
369 368 changeset. 'files' is a list of (path, version) tuples,
370 369 'copies' is a dictionary mapping destinations to sources,
371 370 'source' is the source repository, and 'revmap' is a mapfile
372 371 of source revisions to converted revisions. Only getfile() and
373 372 lookuprev() should be called on 'source'. 'full' means that 'files'
374 373 is complete and all other files should be removed.
375 374 'cleanp2' is a set of the filenames that are unchanged from p2
376 375 (only in the common merge case where there two parents).
377 376
378 377 Note that the sink repository is not told to update itself to
379 378 a particular revision (or even what that revision would be)
380 379 before it receives the file data.
381 380 """
382 381 raise NotImplementedError
383 382
384 383 def puttags(self, tags):
385 384 """Put tags into sink.
386 385
387 386 tags: {tagname: sink_rev_id, ...} where tagname is an UTF-8 string.
388 387 Return a pair (tag_revision, tag_parent_revision), or (None, None)
389 388 if nothing was changed.
390 389 """
391 390 raise NotImplementedError
392 391
393 392 def setbranch(self, branch, pbranches):
394 393 """Set the current branch name. Called before the first putcommit
395 394 on the branch.
396 395 branch: branch name for subsequent commits
397 396 pbranches: (converted parent revision, parent branch) tuples"""
398 397
399 398 def setfilemapmode(self, active):
400 399 """Tell the destination that we're using a filemap
401 400
402 401 Some converter_sources (svn in particular) can claim that a file
403 402 was changed in a revision, even if there was no change. This method
404 403 tells the destination that we're using a filemap and that it should
405 404 filter empty revisions.
406 405 """
407 406
408 407 def before(self) -> None:
409 408 pass
410 409
411 410 def after(self) -> None:
412 411 pass
413 412
414 413 def putbookmarks(self, bookmarks):
415 414 """Put bookmarks into sink.
416 415
417 416 bookmarks: {bookmarkname: sink_rev_id, ...}
418 417 where bookmarkname is an UTF-8 string.
419 418 """
420 419
421 420 def hascommitfrommap(self, rev):
422 421 """Return False if a rev mentioned in a filemap is known to not be
423 422 present."""
424 423 raise NotImplementedError
425 424
426 425 def hascommitforsplicemap(self, rev):
427 426 """This method is for the special needs for splicemap handling and not
428 427 for general use. Returns True if the sink contains rev, aborts on some
429 428 special cases."""
430 429 raise NotImplementedError
431 430
432 431
433 432 class commandline:
434 433 def __init__(self, ui: "uimod.ui", command: bytes) -> None:
435 434 self.ui = ui
436 435 self.command = command
437 436
438 437 def prerun(self) -> None:
439 438 pass
440 439
441 440 def postrun(self) -> None:
442 441 pass
443 442
444 443 def _cmdline(self, cmd: bytes, *args: bytes, **kwargs) -> bytes:
445 444 kwargs = pycompat.byteskwargs(kwargs)
446 445 cmdline = [self.command, cmd] + list(args)
447 446 for k, v in kwargs.items():
448 447 if len(k) == 1:
449 448 cmdline.append(b'-' + k)
450 449 else:
451 450 cmdline.append(b'--' + k.replace(b'_', b'-'))
452 451 try:
453 452 if len(k) == 1:
454 453 cmdline.append(b'' + v)
455 454 else:
456 455 cmdline[-1] += b'=' + v
457 456 except TypeError:
458 457 pass
459 458 cmdline = [procutil.shellquote(arg) for arg in cmdline]
460 459 if not self.ui.debugflag:
461 460 cmdline += [b'2>', pycompat.bytestr(os.devnull)]
462 461 cmdline = b' '.join(cmdline)
463 462 return cmdline
464 463
465 464 def _run(self, cmd: bytes, *args: bytes, **kwargs):
466 465 def popen(cmdline):
467 466 p = subprocess.Popen(
468 467 procutil.tonativestr(cmdline),
469 468 shell=True,
470 469 bufsize=-1,
471 470 close_fds=procutil.closefds,
472 471 stdout=subprocess.PIPE,
473 472 )
474 473 return p
475 474
476 475 return self._dorun(popen, cmd, *args, **kwargs)
477 476
478 477 def _run2(self, cmd: bytes, *args: bytes, **kwargs):
479 478 return self._dorun(procutil.popen2, cmd, *args, **kwargs)
480 479
481 480 def _run3(self, cmd: bytes, *args: bytes, **kwargs):
482 481 return self._dorun(procutil.popen3, cmd, *args, **kwargs)
483 482
484 483 def _dorun(self, openfunc, cmd: bytes, *args: bytes, **kwargs):
485 484 cmdline = self._cmdline(cmd, *args, **kwargs)
486 485 self.ui.debug(b'running: %s\n' % (cmdline,))
487 486 self.prerun()
488 487 try:
489 488 return openfunc(cmdline)
490 489 finally:
491 490 self.postrun()
492 491
493 492 def run(self, cmd: bytes, *args: bytes, **kwargs):
494 493 p = self._run(cmd, *args, **kwargs)
495 494 output = p.communicate()[0]
496 495 self.ui.debug(output)
497 496 return output, p.returncode
498 497
499 498 def runlines(self, cmd: bytes, *args: bytes, **kwargs):
500 499 p = self._run(cmd, *args, **kwargs)
501 500 output = p.stdout.readlines()
502 501 p.wait()
503 502 self.ui.debug(b''.join(output))
504 503 return output, p.returncode
505 504
506 505 def checkexit(self, status, output: bytes = b'') -> None:
507 506 if status:
508 507 if output:
509 508 self.ui.warn(_(b'%s error:\n') % self.command)
510 509 self.ui.warn(output)
511 510 msg = procutil.explainexit(status)
512 511 raise error.Abort(b'%s %s' % (self.command, msg))
513 512
514 513 def run0(self, cmd: bytes, *args: bytes, **kwargs):
515 514 output, status = self.run(cmd, *args, **kwargs)
516 515 self.checkexit(status, output)
517 516 return output
518 517
519 518 def runlines0(self, cmd: bytes, *args: bytes, **kwargs):
520 519 output, status = self.runlines(cmd, *args, **kwargs)
521 520 self.checkexit(status, b''.join(output))
522 521 return output
523 522
524 523 @propertycache
525 524 def argmax(self):
526 525 # POSIX requires at least 4096 bytes for ARG_MAX
527 526 argmax = 4096
528 527 try:
529 528 argmax = os.sysconf("SC_ARG_MAX")
530 529 except (AttributeError, ValueError):
531 530 pass
532 531
533 532 # Windows shells impose their own limits on command line length,
534 533 # down to 2047 bytes for cmd.exe under Windows NT/2k and 2500 bytes
535 534 # for older 4nt.exe. See http://support.microsoft.com/kb/830473 for
536 535 # details about cmd.exe limitations.
537 536
538 537 # Since ARG_MAX is for command line _and_ environment, lower our limit
539 538 # (and make happy Windows shells while doing this).
540 539 return argmax // 2 - 1
541 540
542 541 def _limit_arglist(self, arglist, cmd: bytes, *args: bytes, **kwargs):
543 542 cmdlen = len(self._cmdline(cmd, *args, **kwargs))
544 543 limit = self.argmax - cmdlen
545 544 numbytes = 0
546 545 fl = []
547 546 for fn in arglist:
548 547 b = len(fn) + 3
549 548 if numbytes + b < limit or len(fl) == 0:
550 549 fl.append(fn)
551 550 numbytes += b
552 551 else:
553 552 yield fl
554 553 fl = [fn]
555 554 numbytes = b
556 555 if fl:
557 556 yield fl
558 557
559 558 def xargs(self, arglist, cmd: bytes, *args: bytes, **kwargs):
560 559 for l in self._limit_arglist(arglist, cmd, *args, **kwargs):
561 560 self.run0(cmd, *(list(args) + l), **kwargs)
562 561
563 562
564 563 class mapfile(dict):
565 564 def __init__(self, ui: "uimod.ui", path: bytes) -> None:
566 565 super(mapfile, self).__init__()
567 566 self.ui = ui
568 567 self.path = path
569 568 self.fp = None
570 569 self.order = []
571 570 self._read()
572 571
573 572 def _read(self) -> None:
574 573 if not self.path:
575 574 return
576 575 try:
577 fp = open(self.path, b'rb')
576 fp = open(self.path, 'rb')
578 577 except FileNotFoundError:
579 578 return
580 579
581 580 try:
582 581 for i, line in enumerate(fp):
583 582 line = line.splitlines()[0].rstrip()
584 583 if not line:
585 584 # Ignore blank lines
586 585 continue
587 586 try:
588 587 key, value = line.rsplit(b' ', 1)
589 588 except ValueError:
590 589 raise error.Abort(
591 590 _(b'syntax error in %s(%d): key/value pair expected')
592 591 % (self.path, i + 1)
593 592 )
594 593 if key not in self:
595 594 self.order.append(key)
596 595 super(mapfile, self).__setitem__(key, value)
597 596 finally:
598 597 fp.close()
599 598
600 599 def __setitem__(self, key, value) -> None:
601 600 if self.fp is None:
602 601 try:
603 self.fp = open(self.path, b'ab')
602 self.fp = open(self.path, 'ab')
604 603 except IOError as err:
605 604 raise error.Abort(
606 605 _(b'could not open map file %r: %s')
607 606 % (self.path, encoding.strtolocal(err.strerror))
608 607 )
609 608 self.fp.write(util.tonativeeol(b'%s %s\n' % (key, value)))
610 609 self.fp.flush()
611 610 super(mapfile, self).__setitem__(key, value)
612 611
613 612 def close(self) -> None:
614 613 if self.fp:
615 614 self.fp.close()
616 615 self.fp = None
617 616
618 617
619 618 def makedatetimestamp(t: float) -> dateutil.hgdate:
620 619 return dateutil.makedate(t)
General Comments 0
You need to be logged in to leave comments. Login now