Show More
@@ -1,597 +1,597 b'' | |||
|
1 | 1 | # common.py - common code for the convert extension |
|
2 | 2 | # |
|
3 | 3 | # Copyright 2005-2009 Olivia Mackall <olivia@selenic.com> and others |
|
4 | 4 | # |
|
5 | 5 | # This software may be used and distributed according to the terms of the |
|
6 | 6 | # GNU General Public License version 2 or any later version. |
|
7 | 7 | |
|
8 | 8 | import base64 |
|
9 | 9 | import datetime |
|
10 | 10 | import errno |
|
11 | 11 | import os |
|
12 | 12 | import pickle |
|
13 | 13 | import re |
|
14 | 14 | import shlex |
|
15 | 15 | import subprocess |
|
16 | 16 | |
|
17 | 17 | from mercurial.i18n import _ |
|
18 | 18 | from mercurial.pycompat import open |
|
19 | 19 | from mercurial import ( |
|
20 | 20 | encoding, |
|
21 | 21 | error, |
|
22 | 22 | phases, |
|
23 | 23 | pycompat, |
|
24 | 24 | util, |
|
25 | 25 | ) |
|
26 | 26 | from mercurial.utils import procutil |
|
27 | 27 | |
|
28 | 28 | propertycache = util.propertycache |
|
29 | 29 | |
|
30 | 30 | |
|
31 | 31 | def _encodeornone(d): |
|
32 | 32 | if d is None: |
|
33 | 33 | return |
|
34 | 34 | return d.encode('latin1') |
|
35 | 35 | |
|
36 | 36 | |
|
37 | 37 | class _shlexpy3proxy(object): |
|
38 | 38 | def __init__(self, l): |
|
39 | 39 | self._l = l |
|
40 | 40 | |
|
41 | 41 | def __iter__(self): |
|
42 | 42 | return (_encodeornone(v) for v in self._l) |
|
43 | 43 | |
|
44 | 44 | def get_token(self): |
|
45 | 45 | return _encodeornone(self._l.get_token()) |
|
46 | 46 | |
|
47 | 47 | @property |
|
48 | 48 | def infile(self): |
|
49 | 49 | return self._l.infile or b'<unknown>' |
|
50 | 50 | |
|
51 | 51 | @property |
|
52 | 52 | def lineno(self): |
|
53 | 53 | return self._l.lineno |
|
54 | 54 | |
|
55 | 55 | |
|
56 | 56 | def shlexer(data=None, filepath=None, wordchars=None, whitespace=None): |
|
57 | 57 | if data is None: |
|
58 | 58 | if pycompat.ispy3: |
|
59 | 59 | data = open(filepath, b'r', encoding='latin1') |
|
60 | 60 | else: |
|
61 | 61 | data = open(filepath, b'r') |
|
62 | 62 | else: |
|
63 | 63 | if filepath is not None: |
|
64 | 64 | raise error.ProgrammingError( |
|
65 | 65 | b'shlexer only accepts data or filepath, not both' |
|
66 | 66 | ) |
|
67 | 67 | if pycompat.ispy3: |
|
68 | 68 | data = data.decode('latin1') |
|
69 | 69 | l = shlex.shlex(data, infile=filepath, posix=True) |
|
70 | 70 | if whitespace is not None: |
|
71 | 71 | l.whitespace_split = True |
|
72 | 72 | if pycompat.ispy3: |
|
73 | 73 | l.whitespace += whitespace.decode('latin1') |
|
74 | 74 | else: |
|
75 | 75 | l.whitespace += whitespace |
|
76 | 76 | if wordchars is not None: |
|
77 | 77 | if pycompat.ispy3: |
|
78 | 78 | l.wordchars += wordchars.decode('latin1') |
|
79 | 79 | else: |
|
80 | 80 | l.wordchars += wordchars |
|
81 | 81 | if pycompat.ispy3: |
|
82 | 82 | return _shlexpy3proxy(l) |
|
83 | 83 | return l |
|
84 | 84 | |
|
85 | 85 | |
|
86 | 86 | if pycompat.ispy3: |
|
87 | 87 | base64_encodebytes = base64.encodebytes |
|
88 | 88 | base64_decodebytes = base64.decodebytes |
|
89 | 89 | else: |
|
90 | 90 | base64_encodebytes = base64.encodestring |
|
91 | 91 | base64_decodebytes = base64.decodestring |
|
92 | 92 | |
|
93 | 93 | |
|
94 | 94 | def encodeargs(args): |
|
95 | 95 | def encodearg(s): |
|
96 | 96 | lines = base64_encodebytes(s) |
|
97 | 97 | lines = [l.splitlines()[0] for l in pycompat.iterbytestr(lines)] |
|
98 | 98 | return b''.join(lines) |
|
99 | 99 | |
|
100 | 100 | s = pickle.dumps(args) |
|
101 | 101 | return encodearg(s) |
|
102 | 102 | |
|
103 | 103 | |
|
104 | 104 | def decodeargs(s): |
|
105 | 105 | s = base64_decodebytes(s) |
|
106 | 106 | return pickle.loads(s) |
|
107 | 107 | |
|
108 | 108 | |
|
109 | 109 | class MissingTool(Exception): |
|
110 | 110 | pass |
|
111 | 111 | |
|
112 | 112 | |
|
113 | 113 | def checktool(exe, name=None, abort=True): |
|
114 | 114 | name = name or exe |
|
115 | 115 | if not procutil.findexe(exe): |
|
116 | 116 | if abort: |
|
117 | 117 | exc = error.Abort |
|
118 | 118 | else: |
|
119 | 119 | exc = MissingTool |
|
120 | 120 | raise exc(_(b'cannot find required "%s" tool') % name) |
|
121 | 121 | |
|
122 | 122 | |
|
123 | 123 | class NoRepo(Exception): |
|
124 | 124 | pass |
|
125 | 125 | |
|
126 | 126 | |
|
127 | 127 | SKIPREV = b'SKIP' |
|
128 | 128 | |
|
129 | 129 | |
|
130 | 130 | class commit(object): |
|
131 | 131 | def __init__( |
|
132 | 132 | self, |
|
133 | 133 | author, |
|
134 | 134 | date, |
|
135 | 135 | desc, |
|
136 | 136 | parents, |
|
137 | 137 | branch=None, |
|
138 | 138 | rev=None, |
|
139 | 139 | extra=None, |
|
140 | 140 | sortkey=None, |
|
141 | 141 | saverev=True, |
|
142 | 142 | phase=phases.draft, |
|
143 | 143 | optparents=None, |
|
144 | 144 | ctx=None, |
|
145 | 145 | ): |
|
146 | 146 | self.author = author or b'unknown' |
|
147 | 147 | self.date = date or b'0 0' |
|
148 | 148 | self.desc = desc |
|
149 | 149 | self.parents = parents # will be converted and used as parents |
|
150 | 150 | self.optparents = optparents or [] # will be used if already converted |
|
151 | 151 | self.branch = branch |
|
152 | 152 | self.rev = rev |
|
153 | 153 | self.extra = extra or {} |
|
154 | 154 | self.sortkey = sortkey |
|
155 | 155 | self.saverev = saverev |
|
156 | 156 | self.phase = phase |
|
157 | 157 | self.ctx = ctx # for hg to hg conversions |
|
158 | 158 | |
|
159 | 159 | |
|
160 | 160 | class converter_source(object): |
|
161 | 161 | """Conversion source interface""" |
|
162 | 162 | |
|
163 | 163 | def __init__(self, ui, repotype, path=None, revs=None): |
|
164 | 164 | """Initialize conversion source (or raise NoRepo("message") |
|
165 | 165 | exception if path is not a valid repository)""" |
|
166 | 166 | self.ui = ui |
|
167 | 167 | self.path = path |
|
168 | 168 | self.revs = revs |
|
169 | 169 | self.repotype = repotype |
|
170 | 170 | |
|
171 | 171 | self.encoding = b'utf-8' |
|
172 | 172 | |
|
173 | 173 | def checkhexformat(self, revstr, mapname=b'splicemap'): |
|
174 | 174 | """fails if revstr is not a 40 byte hex. mercurial and git both uses |
|
175 | 175 | such format for their revision numbering |
|
176 | 176 | """ |
|
177 | 177 | if not re.match(br'[0-9a-fA-F]{40,40}$', revstr): |
|
178 | 178 | raise error.Abort( |
|
179 | 179 | _(b'%s entry %s is not a valid revision identifier') |
|
180 | 180 | % (mapname, revstr) |
|
181 | 181 | ) |
|
182 | 182 | |
|
183 | 183 | def before(self): |
|
184 | 184 | pass |
|
185 | 185 | |
|
186 | 186 | def after(self): |
|
187 | 187 | pass |
|
188 | 188 | |
|
189 | 189 | def targetfilebelongstosource(self, targetfilename): |
|
190 | 190 | """Returns true if the given targetfile belongs to the source repo. This |
|
191 | 191 | is useful when only a subdirectory of the target belongs to the source |
|
192 | 192 | repo.""" |
|
193 | 193 | # For normal full repo converts, this is always True. |
|
194 | 194 | return True |
|
195 | 195 | |
|
196 | 196 | def setrevmap(self, revmap): |
|
197 | 197 | """set the map of already-converted revisions""" |
|
198 | 198 | |
|
199 | 199 | def getheads(self): |
|
200 | 200 | """Return a list of this repository's heads""" |
|
201 | 201 | raise NotImplementedError |
|
202 | 202 | |
|
203 | 203 | def getfile(self, name, rev): |
|
204 | 204 | """Return a pair (data, mode) where data is the file content |
|
205 | 205 | as a string and mode one of '', 'x' or 'l'. rev is the |
|
206 | 206 | identifier returned by a previous call to getchanges(). |
|
207 | 207 | Data is None if file is missing/deleted in rev. |
|
208 | 208 | """ |
|
209 | 209 | raise NotImplementedError |
|
210 | 210 | |
|
211 | 211 | def getchanges(self, version, full): |
|
212 | 212 | """Returns a tuple of (files, copies, cleanp2). |
|
213 | 213 | |
|
214 | 214 | files is a sorted list of (filename, id) tuples for all files |
|
215 | 215 | changed between version and its first parent returned by |
|
216 | 216 | getcommit(). If full, all files in that revision is returned. |
|
217 | 217 | id is the source revision id of the file. |
|
218 | 218 | |
|
219 | 219 | copies is a dictionary of dest: source |
|
220 | 220 | |
|
221 | 221 | cleanp2 is the set of files filenames that are clean against p2. |
|
222 | 222 | (Files that are clean against p1 are already not in files (unless |
|
223 | 223 | full). This makes it possible to handle p2 clean files similarly.) |
|
224 | 224 | """ |
|
225 | 225 | raise NotImplementedError |
|
226 | 226 | |
|
227 | 227 | def getcommit(self, version): |
|
228 | 228 | """Return the commit object for version""" |
|
229 | 229 | raise NotImplementedError |
|
230 | 230 | |
|
231 | 231 | def numcommits(self): |
|
232 | 232 | """Return the number of commits in this source. |
|
233 | 233 | |
|
234 | 234 | If unknown, return None. |
|
235 | 235 | """ |
|
236 | 236 | return None |
|
237 | 237 | |
|
238 | 238 | def gettags(self): |
|
239 | 239 | """Return the tags as a dictionary of name: revision |
|
240 | 240 | |
|
241 | 241 | Tag names must be UTF-8 strings. |
|
242 | 242 | """ |
|
243 | 243 | raise NotImplementedError |
|
244 | 244 | |
|
245 | 245 | def recode(self, s, encoding=None): |
|
246 | 246 | if not encoding: |
|
247 | 247 | encoding = self.encoding or b'utf-8' |
|
248 | 248 | |
|
249 | 249 | if isinstance(s, str): |
|
250 | 250 | return s.encode("utf-8") |
|
251 | 251 | try: |
|
252 | 252 | return s.decode(pycompat.sysstr(encoding)).encode("utf-8") |
|
253 | 253 | except UnicodeError: |
|
254 | 254 | try: |
|
255 | 255 | return s.decode("latin-1").encode("utf-8") |
|
256 | 256 | except UnicodeError: |
|
257 | 257 | return s.decode(pycompat.sysstr(encoding), "replace").encode( |
|
258 | 258 | "utf-8" |
|
259 | 259 | ) |
|
260 | 260 | |
|
261 | 261 | def getchangedfiles(self, rev, i): |
|
262 | 262 | """Return the files changed by rev compared to parent[i]. |
|
263 | 263 | |
|
264 | 264 | i is an index selecting one of the parents of rev. The return |
|
265 | 265 | value should be the list of files that are different in rev and |
|
266 | 266 | this parent. |
|
267 | 267 | |
|
268 | 268 | If rev has no parents, i is None. |
|
269 | 269 | |
|
270 | 270 | This function is only needed to support --filemap |
|
271 | 271 | """ |
|
272 | 272 | raise NotImplementedError |
|
273 | 273 | |
|
274 | 274 | def converted(self, rev, sinkrev): |
|
275 | 275 | '''Notify the source that a revision has been converted.''' |
|
276 | 276 | |
|
277 | 277 | def hasnativeorder(self): |
|
278 | 278 | """Return true if this source has a meaningful, native revision |
|
279 | 279 | order. For instance, Mercurial revisions are store sequentially |
|
280 | 280 | while there is no such global ordering with Darcs. |
|
281 | 281 | """ |
|
282 | 282 | return False |
|
283 | 283 | |
|
284 | 284 | def hasnativeclose(self): |
|
285 | 285 | """Return true if this source has ability to close branch.""" |
|
286 | 286 | return False |
|
287 | 287 | |
|
288 | 288 | def lookuprev(self, rev): |
|
289 | 289 | """If rev is a meaningful revision reference in source, return |
|
290 | 290 | the referenced identifier in the same format used by getcommit(). |
|
291 | 291 | return None otherwise. |
|
292 | 292 | """ |
|
293 | 293 | return None |
|
294 | 294 | |
|
295 | 295 | def getbookmarks(self): |
|
296 | 296 | """Return the bookmarks as a dictionary of name: revision |
|
297 | 297 | |
|
298 | 298 | Bookmark names are to be UTF-8 strings. |
|
299 | 299 | """ |
|
300 | 300 | return {} |
|
301 | 301 | |
|
302 | 302 | def checkrevformat(self, revstr, mapname=b'splicemap'): |
|
303 | 303 | """revstr is a string that describes a revision in the given |
|
304 | 304 | source control system. Return true if revstr has correct |
|
305 | 305 | format. |
|
306 | 306 | """ |
|
307 | 307 | return True |
|
308 | 308 | |
|
309 | 309 | |
|
310 | 310 | class converter_sink(object): |
|
311 | 311 | """Conversion sink (target) interface""" |
|
312 | 312 | |
|
313 | 313 | def __init__(self, ui, repotype, path): |
|
314 | 314 | """Initialize conversion sink (or raise NoRepo("message") |
|
315 | 315 | exception if path is not a valid repository) |
|
316 | 316 | |
|
317 | 317 | created is a list of paths to remove if a fatal error occurs |
|
318 | 318 | later""" |
|
319 | 319 | self.ui = ui |
|
320 | 320 | self.path = path |
|
321 | 321 | self.created = [] |
|
322 | 322 | self.repotype = repotype |
|
323 | 323 | |
|
324 | 324 | def revmapfile(self): |
|
325 | 325 | """Path to a file that will contain lines |
|
326 | 326 | source_rev_id sink_rev_id |
|
327 | 327 | mapping equivalent revision identifiers for each system.""" |
|
328 | 328 | raise NotImplementedError |
|
329 | 329 | |
|
330 | 330 | def authorfile(self): |
|
331 | 331 | """Path to a file that will contain lines |
|
332 | 332 | srcauthor=dstauthor |
|
333 | 333 | mapping equivalent authors identifiers for each system.""" |
|
334 | 334 | return None |
|
335 | 335 | |
|
336 | 336 | def putcommit( |
|
337 | 337 | self, files, copies, parents, commit, source, revmap, full, cleanp2 |
|
338 | 338 | ): |
|
339 | 339 | """Create a revision with all changed files listed in 'files' |
|
340 | 340 | and having listed parents. 'commit' is a commit object |
|
341 | 341 | containing at a minimum the author, date, and message for this |
|
342 | 342 | changeset. 'files' is a list of (path, version) tuples, |
|
343 | 343 | 'copies' is a dictionary mapping destinations to sources, |
|
344 | 344 | 'source' is the source repository, and 'revmap' is a mapfile |
|
345 | 345 | of source revisions to converted revisions. Only getfile() and |
|
346 | 346 | lookuprev() should be called on 'source'. 'full' means that 'files' |
|
347 | 347 | is complete and all other files should be removed. |
|
348 | 348 | 'cleanp2' is a set of the filenames that are unchanged from p2 |
|
349 | 349 | (only in the common merge case where there two parents). |
|
350 | 350 | |
|
351 | 351 | Note that the sink repository is not told to update itself to |
|
352 | 352 | a particular revision (or even what that revision would be) |
|
353 | 353 | before it receives the file data. |
|
354 | 354 | """ |
|
355 | 355 | raise NotImplementedError |
|
356 | 356 | |
|
357 | 357 | def puttags(self, tags): |
|
358 | 358 | """Put tags into sink. |
|
359 | 359 | |
|
360 | 360 | tags: {tagname: sink_rev_id, ...} where tagname is an UTF-8 string. |
|
361 | 361 | Return a pair (tag_revision, tag_parent_revision), or (None, None) |
|
362 | 362 | if nothing was changed. |
|
363 | 363 | """ |
|
364 | 364 | raise NotImplementedError |
|
365 | 365 | |
|
366 | 366 | def setbranch(self, branch, pbranches): |
|
367 | 367 | """Set the current branch name. Called before the first putcommit |
|
368 | 368 | on the branch. |
|
369 | 369 | branch: branch name for subsequent commits |
|
370 | 370 | pbranches: (converted parent revision, parent branch) tuples""" |
|
371 | 371 | |
|
372 | 372 | def setfilemapmode(self, active): |
|
373 | 373 | """Tell the destination that we're using a filemap |
|
374 | 374 | |
|
375 | 375 | Some converter_sources (svn in particular) can claim that a file |
|
376 | 376 | was changed in a revision, even if there was no change. This method |
|
377 | 377 | tells the destination that we're using a filemap and that it should |
|
378 | 378 | filter empty revisions. |
|
379 | 379 | """ |
|
380 | 380 | |
|
381 | 381 | def before(self): |
|
382 | 382 | pass |
|
383 | 383 | |
|
384 | 384 | def after(self): |
|
385 | 385 | pass |
|
386 | 386 | |
|
387 | 387 | def putbookmarks(self, bookmarks): |
|
388 | 388 | """Put bookmarks into sink. |
|
389 | 389 | |
|
390 | 390 | bookmarks: {bookmarkname: sink_rev_id, ...} |
|
391 | 391 | where bookmarkname is an UTF-8 string. |
|
392 | 392 | """ |
|
393 | 393 | |
|
394 | 394 | def hascommitfrommap(self, rev): |
|
395 | 395 | """Return False if a rev mentioned in a filemap is known to not be |
|
396 | 396 | present.""" |
|
397 | 397 | raise NotImplementedError |
|
398 | 398 | |
|
399 | 399 | def hascommitforsplicemap(self, rev): |
|
400 | 400 | """This method is for the special needs for splicemap handling and not |
|
401 | 401 | for general use. Returns True if the sink contains rev, aborts on some |
|
402 | 402 | special cases.""" |
|
403 | 403 | raise NotImplementedError |
|
404 | 404 | |
|
405 | 405 | |
|
406 | 406 | class commandline(object): |
|
407 | 407 | def __init__(self, ui, command): |
|
408 | 408 | self.ui = ui |
|
409 | 409 | self.command = command |
|
410 | 410 | |
|
411 | 411 | def prerun(self): |
|
412 | 412 | pass |
|
413 | 413 | |
|
414 | 414 | def postrun(self): |
|
415 | 415 | pass |
|
416 | 416 | |
|
417 | 417 | def _cmdline(self, cmd, *args, **kwargs): |
|
418 | 418 | kwargs = pycompat.byteskwargs(kwargs) |
|
419 | 419 | cmdline = [self.command, cmd] + list(args) |
|
420 | 420 | for k, v in kwargs.items(): |
|
421 | 421 | if len(k) == 1: |
|
422 | 422 | cmdline.append(b'-' + k) |
|
423 | 423 | else: |
|
424 | 424 | cmdline.append(b'--' + k.replace(b'_', b'-')) |
|
425 | 425 | try: |
|
426 | 426 | if len(k) == 1: |
|
427 | 427 | cmdline.append(b'' + v) |
|
428 | 428 | else: |
|
429 | 429 | cmdline[-1] += b'=' + v |
|
430 | 430 | except TypeError: |
|
431 | 431 | pass |
|
432 | 432 | cmdline = [procutil.shellquote(arg) for arg in cmdline] |
|
433 | 433 | if not self.ui.debugflag: |
|
434 | 434 | cmdline += [b'2>', pycompat.bytestr(os.devnull)] |
|
435 | 435 | cmdline = b' '.join(cmdline) |
|
436 | 436 | return cmdline |
|
437 | 437 | |
|
438 | 438 | def _run(self, cmd, *args, **kwargs): |
|
439 | 439 | def popen(cmdline): |
|
440 | 440 | p = subprocess.Popen( |
|
441 | 441 | procutil.tonativestr(cmdline), |
|
442 | 442 | shell=True, |
|
443 | 443 | bufsize=-1, |
|
444 | 444 | close_fds=procutil.closefds, |
|
445 | 445 | stdout=subprocess.PIPE, |
|
446 | 446 | ) |
|
447 | 447 | return p |
|
448 | 448 | |
|
449 | 449 | return self._dorun(popen, cmd, *args, **kwargs) |
|
450 | 450 | |
|
451 | 451 | def _run2(self, cmd, *args, **kwargs): |
|
452 | 452 | return self._dorun(procutil.popen2, cmd, *args, **kwargs) |
|
453 | 453 | |
|
454 | 454 | def _run3(self, cmd, *args, **kwargs): |
|
455 | 455 | return self._dorun(procutil.popen3, cmd, *args, **kwargs) |
|
456 | 456 | |
|
457 | 457 | def _dorun(self, openfunc, cmd, *args, **kwargs): |
|
458 | 458 | cmdline = self._cmdline(cmd, *args, **kwargs) |
|
459 | 459 | self.ui.debug(b'running: %s\n' % (cmdline,)) |
|
460 | 460 | self.prerun() |
|
461 | 461 | try: |
|
462 | 462 | return openfunc(cmdline) |
|
463 | 463 | finally: |
|
464 | 464 | self.postrun() |
|
465 | 465 | |
|
466 | 466 | def run(self, cmd, *args, **kwargs): |
|
467 | 467 | p = self._run(cmd, *args, **kwargs) |
|
468 | 468 | output = p.communicate()[0] |
|
469 | 469 | self.ui.debug(output) |
|
470 | 470 | return output, p.returncode |
|
471 | 471 | |
|
472 | 472 | def runlines(self, cmd, *args, **kwargs): |
|
473 | 473 | p = self._run(cmd, *args, **kwargs) |
|
474 | 474 | output = p.stdout.readlines() |
|
475 | 475 | p.wait() |
|
476 | 476 | self.ui.debug(b''.join(output)) |
|
477 | 477 | return output, p.returncode |
|
478 | 478 | |
|
479 | 479 | def checkexit(self, status, output=b''): |
|
480 | 480 | if status: |
|
481 | 481 | if output: |
|
482 | 482 | self.ui.warn(_(b'%s error:\n') % self.command) |
|
483 | 483 | self.ui.warn(output) |
|
484 | 484 | msg = procutil.explainexit(status) |
|
485 | 485 | raise error.Abort(b'%s %s' % (self.command, msg)) |
|
486 | 486 | |
|
487 | 487 | def run0(self, cmd, *args, **kwargs): |
|
488 | 488 | output, status = self.run(cmd, *args, **kwargs) |
|
489 | 489 | self.checkexit(status, output) |
|
490 | 490 | return output |
|
491 | 491 | |
|
492 | 492 | def runlines0(self, cmd, *args, **kwargs): |
|
493 | 493 | output, status = self.runlines(cmd, *args, **kwargs) |
|
494 | 494 | self.checkexit(status, b''.join(output)) |
|
495 | 495 | return output |
|
496 | 496 | |
|
497 | 497 | @propertycache |
|
498 | 498 | def argmax(self): |
|
499 | 499 | # POSIX requires at least 4096 bytes for ARG_MAX |
|
500 | 500 | argmax = 4096 |
|
501 | 501 | try: |
|
502 | 502 | argmax = os.sysconf("SC_ARG_MAX") |
|
503 | 503 | except (AttributeError, ValueError): |
|
504 | 504 | pass |
|
505 | 505 | |
|
506 | 506 | # Windows shells impose their own limits on command line length, |
|
507 | 507 | # down to 2047 bytes for cmd.exe under Windows NT/2k and 2500 bytes |
|
508 | 508 | # for older 4nt.exe. See http://support.microsoft.com/kb/830473 for |
|
509 | 509 | # details about cmd.exe limitations. |
|
510 | 510 | |
|
511 | 511 | # Since ARG_MAX is for command line _and_ environment, lower our limit |
|
512 | 512 | # (and make happy Windows shells while doing this). |
|
513 | 513 | return argmax // 2 - 1 |
|
514 | 514 | |
|
515 | 515 | def _limit_arglist(self, arglist, cmd, *args, **kwargs): |
|
516 | 516 | cmdlen = len(self._cmdline(cmd, *args, **kwargs)) |
|
517 | 517 | limit = self.argmax - cmdlen |
|
518 | 518 | numbytes = 0 |
|
519 | 519 | fl = [] |
|
520 | 520 | for fn in arglist: |
|
521 | 521 | b = len(fn) + 3 |
|
522 | 522 | if numbytes + b < limit or len(fl) == 0: |
|
523 | 523 | fl.append(fn) |
|
524 | 524 | numbytes += b |
|
525 | 525 | else: |
|
526 | 526 | yield fl |
|
527 | 527 | fl = [fn] |
|
528 | 528 | numbytes = b |
|
529 | 529 | if fl: |
|
530 | 530 | yield fl |
|
531 | 531 | |
|
532 | 532 | def xargs(self, arglist, cmd, *args, **kwargs): |
|
533 | 533 | for l in self._limit_arglist(arglist, cmd, *args, **kwargs): |
|
534 | 534 | self.run0(cmd, *(list(args) + l), **kwargs) |
|
535 | 535 | |
|
536 | 536 | |
|
537 | 537 | class mapfile(dict): |
|
538 | 538 | def __init__(self, ui, path): |
|
539 | 539 | super(mapfile, self).__init__() |
|
540 | 540 | self.ui = ui |
|
541 | 541 | self.path = path |
|
542 | 542 | self.fp = None |
|
543 | 543 | self.order = [] |
|
544 | 544 | self._read() |
|
545 | 545 | |
|
546 | 546 | def _read(self): |
|
547 | 547 | if not self.path: |
|
548 | 548 | return |
|
549 | 549 | try: |
|
550 | 550 | fp = open(self.path, b'rb') |
|
551 | 551 | except IOError as err: |
|
552 | 552 | if err.errno != errno.ENOENT: |
|
553 | 553 | raise |
|
554 | 554 | return |
|
555 |
for i, line in enumerate( |
|
|
555 | for i, line in enumerate(fp): | |
|
556 | 556 | line = line.splitlines()[0].rstrip() |
|
557 | 557 | if not line: |
|
558 | 558 | # Ignore blank lines |
|
559 | 559 | continue |
|
560 | 560 | try: |
|
561 | 561 | key, value = line.rsplit(b' ', 1) |
|
562 | 562 | except ValueError: |
|
563 | 563 | raise error.Abort( |
|
564 | 564 | _(b'syntax error in %s(%d): key/value pair expected') |
|
565 | 565 | % (self.path, i + 1) |
|
566 | 566 | ) |
|
567 | 567 | if key not in self: |
|
568 | 568 | self.order.append(key) |
|
569 | 569 | super(mapfile, self).__setitem__(key, value) |
|
570 | 570 | fp.close() |
|
571 | 571 | |
|
572 | 572 | def __setitem__(self, key, value): |
|
573 | 573 | if self.fp is None: |
|
574 | 574 | try: |
|
575 | 575 | self.fp = open(self.path, b'ab') |
|
576 | 576 | except IOError as err: |
|
577 | 577 | raise error.Abort( |
|
578 | 578 | _(b'could not open map file %r: %s') |
|
579 | 579 | % (self.path, encoding.strtolocal(err.strerror)) |
|
580 | 580 | ) |
|
581 | 581 | self.fp.write(util.tonativeeol(b'%s %s\n' % (key, value))) |
|
582 | 582 | self.fp.flush() |
|
583 | 583 | super(mapfile, self).__setitem__(key, value) |
|
584 | 584 | |
|
585 | 585 | def close(self): |
|
586 | 586 | if self.fp: |
|
587 | 587 | self.fp.close() |
|
588 | 588 | self.fp = None |
|
589 | 589 | |
|
590 | 590 | |
|
591 | 591 | def makedatetimestamp(t): |
|
592 | 592 | """Like dateutil.makedate() but for time t instead of current time""" |
|
593 | 593 | delta = datetime.datetime.utcfromtimestamp( |
|
594 | 594 | t |
|
595 | 595 | ) - datetime.datetime.fromtimestamp(t) |
|
596 | 596 | tz = delta.days * 86400 + delta.seconds |
|
597 | 597 | return t, tz |
@@ -1,667 +1,667 b'' | |||
|
1 | 1 | # convcmd - convert extension commands definition |
|
2 | 2 | # |
|
3 | 3 | # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com> |
|
4 | 4 | # |
|
5 | 5 | # This software may be used and distributed according to the terms of the |
|
6 | 6 | # GNU General Public License version 2 or any later version. |
|
7 | 7 | |
|
8 | 8 | import collections |
|
9 | 9 | import os |
|
10 | 10 | import shutil |
|
11 | 11 | |
|
12 | 12 | from mercurial.i18n import _ |
|
13 | 13 | from mercurial.pycompat import open |
|
14 | 14 | from mercurial import ( |
|
15 | 15 | encoding, |
|
16 | 16 | error, |
|
17 | 17 | hg, |
|
18 | 18 | pycompat, |
|
19 | 19 | scmutil, |
|
20 | 20 | util, |
|
21 | 21 | ) |
|
22 | 22 | from mercurial.utils import dateutil |
|
23 | 23 | |
|
24 | 24 | from . import ( |
|
25 | 25 | bzr, |
|
26 | 26 | common, |
|
27 | 27 | cvs, |
|
28 | 28 | darcs, |
|
29 | 29 | filemap, |
|
30 | 30 | git, |
|
31 | 31 | gnuarch, |
|
32 | 32 | hg as hgconvert, |
|
33 | 33 | monotone, |
|
34 | 34 | p4, |
|
35 | 35 | subversion, |
|
36 | 36 | ) |
|
37 | 37 | |
|
38 | 38 | mapfile = common.mapfile |
|
39 | 39 | MissingTool = common.MissingTool |
|
40 | 40 | NoRepo = common.NoRepo |
|
41 | 41 | SKIPREV = common.SKIPREV |
|
42 | 42 | |
|
43 | 43 | bzr_source = bzr.bzr_source |
|
44 | 44 | convert_cvs = cvs.convert_cvs |
|
45 | 45 | convert_git = git.convert_git |
|
46 | 46 | darcs_source = darcs.darcs_source |
|
47 | 47 | gnuarch_source = gnuarch.gnuarch_source |
|
48 | 48 | mercurial_sink = hgconvert.mercurial_sink |
|
49 | 49 | mercurial_source = hgconvert.mercurial_source |
|
50 | 50 | monotone_source = monotone.monotone_source |
|
51 | 51 | p4_source = p4.p4_source |
|
52 | 52 | svn_sink = subversion.svn_sink |
|
53 | 53 | svn_source = subversion.svn_source |
|
54 | 54 | |
|
55 | 55 | orig_encoding = b'ascii' |
|
56 | 56 | |
|
57 | 57 | |
|
58 | 58 | def readauthormap(ui, authorfile, authors=None): |
|
59 | 59 | if authors is None: |
|
60 | 60 | authors = {} |
|
61 | 61 | with open(authorfile, b'rb') as afile: |
|
62 | 62 | for line in afile: |
|
63 | 63 | |
|
64 | 64 | line = line.strip() |
|
65 | 65 | if not line or line.startswith(b'#'): |
|
66 | 66 | continue |
|
67 | 67 | |
|
68 | 68 | try: |
|
69 | 69 | srcauthor, dstauthor = line.split(b'=', 1) |
|
70 | 70 | except ValueError: |
|
71 | 71 | msg = _(b'ignoring bad line in author map file %s: %s\n') |
|
72 | 72 | ui.warn(msg % (authorfile, line.rstrip())) |
|
73 | 73 | continue |
|
74 | 74 | |
|
75 | 75 | srcauthor = srcauthor.strip() |
|
76 | 76 | dstauthor = dstauthor.strip() |
|
77 | 77 | if authors.get(srcauthor) in (None, dstauthor): |
|
78 | 78 | msg = _(b'mapping author %s to %s\n') |
|
79 | 79 | ui.debug(msg % (srcauthor, dstauthor)) |
|
80 | 80 | authors[srcauthor] = dstauthor |
|
81 | 81 | continue |
|
82 | 82 | |
|
83 | 83 | m = _(b'overriding mapping for author %s, was %s, will be %s\n') |
|
84 | 84 | ui.status(m % (srcauthor, authors[srcauthor], dstauthor)) |
|
85 | 85 | return authors |
|
86 | 86 | |
|
87 | 87 | |
|
88 | 88 | def recode(s): |
|
89 | 89 | if isinstance(s, str): |
|
90 | 90 | return s.encode(pycompat.sysstr(orig_encoding), 'replace') |
|
91 | 91 | else: |
|
92 | 92 | return s.decode('utf-8').encode( |
|
93 | 93 | pycompat.sysstr(orig_encoding), 'replace' |
|
94 | 94 | ) |
|
95 | 95 | |
|
96 | 96 | |
|
97 | 97 | def mapbranch(branch, branchmap): |
|
98 | 98 | """ |
|
99 | 99 | >>> bmap = {b'default': b'branch1'} |
|
100 | 100 | >>> for i in [b'', None]: |
|
101 | 101 | ... mapbranch(i, bmap) |
|
102 | 102 | 'branch1' |
|
103 | 103 | 'branch1' |
|
104 | 104 | >>> bmap = {b'None': b'branch2'} |
|
105 | 105 | >>> for i in [b'', None]: |
|
106 | 106 | ... mapbranch(i, bmap) |
|
107 | 107 | 'branch2' |
|
108 | 108 | 'branch2' |
|
109 | 109 | >>> bmap = {b'None': b'branch3', b'default': b'branch4'} |
|
110 | 110 | >>> for i in [b'None', b'', None, b'default', b'branch5']: |
|
111 | 111 | ... mapbranch(i, bmap) |
|
112 | 112 | 'branch3' |
|
113 | 113 | 'branch4' |
|
114 | 114 | 'branch4' |
|
115 | 115 | 'branch4' |
|
116 | 116 | 'branch5' |
|
117 | 117 | """ |
|
118 | 118 | # If branch is None or empty, this commit is coming from the source |
|
119 | 119 | # repository's default branch and destined for the default branch in the |
|
120 | 120 | # destination repository. For such commits, using a literal "default" |
|
121 | 121 | # in branchmap below allows the user to map "default" to an alternate |
|
122 | 122 | # default branch in the destination repository. |
|
123 | 123 | branch = branchmap.get(branch or b'default', branch) |
|
124 | 124 | # At some point we used "None" literal to denote the default branch, |
|
125 | 125 | # attempt to use that for backward compatibility. |
|
126 | 126 | if not branch: |
|
127 | 127 | branch = branchmap.get(b'None', branch) |
|
128 | 128 | return branch |
|
129 | 129 | |
|
130 | 130 | |
|
131 | 131 | source_converters = [ |
|
132 | 132 | (b'cvs', convert_cvs, b'branchsort'), |
|
133 | 133 | (b'git', convert_git, b'branchsort'), |
|
134 | 134 | (b'svn', svn_source, b'branchsort'), |
|
135 | 135 | (b'hg', mercurial_source, b'sourcesort'), |
|
136 | 136 | (b'darcs', darcs_source, b'branchsort'), |
|
137 | 137 | (b'mtn', monotone_source, b'branchsort'), |
|
138 | 138 | (b'gnuarch', gnuarch_source, b'branchsort'), |
|
139 | 139 | (b'bzr', bzr_source, b'branchsort'), |
|
140 | 140 | (b'p4', p4_source, b'branchsort'), |
|
141 | 141 | ] |
|
142 | 142 | |
|
143 | 143 | sink_converters = [ |
|
144 | 144 | (b'hg', mercurial_sink), |
|
145 | 145 | (b'svn', svn_sink), |
|
146 | 146 | ] |
|
147 | 147 | |
|
148 | 148 | |
|
149 | 149 | def convertsource(ui, path, type, revs): |
|
150 | 150 | exceptions = [] |
|
151 | 151 | if type and type not in [s[0] for s in source_converters]: |
|
152 | 152 | raise error.Abort(_(b'%s: invalid source repository type') % type) |
|
153 | 153 | for name, source, sortmode in source_converters: |
|
154 | 154 | try: |
|
155 | 155 | if not type or name == type: |
|
156 | 156 | return source(ui, name, path, revs), sortmode |
|
157 | 157 | except (NoRepo, MissingTool) as inst: |
|
158 | 158 | exceptions.append(inst) |
|
159 | 159 | if not ui.quiet: |
|
160 | 160 | for inst in exceptions: |
|
161 | 161 | ui.write(b"%s\n" % pycompat.bytestr(inst.args[0])) |
|
162 | 162 | raise error.Abort(_(b'%s: missing or unsupported repository') % path) |
|
163 | 163 | |
|
164 | 164 | |
|
165 | 165 | def convertsink(ui, path, type): |
|
166 | 166 | if type and type not in [s[0] for s in sink_converters]: |
|
167 | 167 | raise error.Abort(_(b'%s: invalid destination repository type') % type) |
|
168 | 168 | for name, sink in sink_converters: |
|
169 | 169 | try: |
|
170 | 170 | if not type or name == type: |
|
171 | 171 | return sink(ui, name, path) |
|
172 | 172 | except NoRepo as inst: |
|
173 | 173 | ui.note(_(b"convert: %s\n") % inst) |
|
174 | 174 | except MissingTool as inst: |
|
175 | 175 | raise error.Abort(b'%s\n' % inst) |
|
176 | 176 | raise error.Abort(_(b'%s: unknown repository type') % path) |
|
177 | 177 | |
|
178 | 178 | |
|
179 | 179 | class progresssource(object): |
|
180 | 180 | def __init__(self, ui, source, filecount): |
|
181 | 181 | self.ui = ui |
|
182 | 182 | self.source = source |
|
183 | 183 | self.progress = ui.makeprogress( |
|
184 | 184 | _(b'getting files'), unit=_(b'files'), total=filecount |
|
185 | 185 | ) |
|
186 | 186 | |
|
187 | 187 | def getfile(self, file, rev): |
|
188 | 188 | self.progress.increment(item=file) |
|
189 | 189 | return self.source.getfile(file, rev) |
|
190 | 190 | |
|
191 | 191 | def targetfilebelongstosource(self, targetfilename): |
|
192 | 192 | return self.source.targetfilebelongstosource(targetfilename) |
|
193 | 193 | |
|
194 | 194 | def lookuprev(self, rev): |
|
195 | 195 | return self.source.lookuprev(rev) |
|
196 | 196 | |
|
197 | 197 | def close(self): |
|
198 | 198 | self.progress.complete() |
|
199 | 199 | |
|
200 | 200 | |
|
201 | 201 | class converter(object): |
|
202 | 202 | def __init__(self, ui, source, dest, revmapfile, opts): |
|
203 | 203 | |
|
204 | 204 | self.source = source |
|
205 | 205 | self.dest = dest |
|
206 | 206 | self.ui = ui |
|
207 | 207 | self.opts = opts |
|
208 | 208 | self.commitcache = {} |
|
209 | 209 | self.authors = {} |
|
210 | 210 | self.authorfile = None |
|
211 | 211 | |
|
212 | 212 | # Record converted revisions persistently: maps source revision |
|
213 | 213 | # ID to target revision ID (both strings). (This is how |
|
214 | 214 | # incremental conversions work.) |
|
215 | 215 | self.map = mapfile(ui, revmapfile) |
|
216 | 216 | |
|
217 | 217 | # Read first the dst author map if any |
|
218 | 218 | authorfile = self.dest.authorfile() |
|
219 | 219 | if authorfile and os.path.exists(authorfile): |
|
220 | 220 | self.readauthormap(authorfile) |
|
221 | 221 | # Extend/Override with new author map if necessary |
|
222 | 222 | if opts.get(b'authormap'): |
|
223 | 223 | self.readauthormap(opts.get(b'authormap')) |
|
224 | 224 | self.authorfile = self.dest.authorfile() |
|
225 | 225 | |
|
226 | 226 | self.splicemap = self.parsesplicemap(opts.get(b'splicemap')) |
|
227 | 227 | self.branchmap = mapfile(ui, opts.get(b'branchmap')) |
|
228 | 228 | |
|
229 | 229 | def parsesplicemap(self, path): |
|
230 | 230 | """check and validate the splicemap format and |
|
231 | 231 | return a child/parents dictionary. |
|
232 | 232 | Format checking has two parts. |
|
233 | 233 | 1. generic format which is same across all source types |
|
234 | 234 | 2. specific format checking which may be different for |
|
235 | 235 | different source type. This logic is implemented in |
|
236 | 236 | checkrevformat function in source files like |
|
237 | 237 | hg.py, subversion.py etc. |
|
238 | 238 | """ |
|
239 | 239 | |
|
240 | 240 | if not path: |
|
241 | 241 | return {} |
|
242 | 242 | m = {} |
|
243 | 243 | try: |
|
244 | 244 | fp = open(path, b'rb') |
|
245 |
for i, line in enumerate( |
|
|
245 | for i, line in enumerate(fp): | |
|
246 | 246 | line = line.splitlines()[0].rstrip() |
|
247 | 247 | if not line: |
|
248 | 248 | # Ignore blank lines |
|
249 | 249 | continue |
|
250 | 250 | # split line |
|
251 | 251 | lex = common.shlexer(data=line, whitespace=b',') |
|
252 | 252 | line = list(lex) |
|
253 | 253 | # check number of parents |
|
254 | 254 | if not (2 <= len(line) <= 3): |
|
255 | 255 | raise error.Abort( |
|
256 | 256 | _( |
|
257 | 257 | b'syntax error in %s(%d): child parent1' |
|
258 | 258 | b'[,parent2] expected' |
|
259 | 259 | ) |
|
260 | 260 | % (path, i + 1) |
|
261 | 261 | ) |
|
262 | 262 | for part in line: |
|
263 | 263 | self.source.checkrevformat(part) |
|
264 | 264 | child, p1, p2 = line[0], line[1:2], line[2:] |
|
265 | 265 | if p1 == p2: |
|
266 | 266 | m[child] = p1 |
|
267 | 267 | else: |
|
268 | 268 | m[child] = p1 + p2 |
|
269 | 269 | # if file does not exist or error reading, exit |
|
270 | 270 | except IOError: |
|
271 | 271 | raise error.Abort( |
|
272 | 272 | _(b'splicemap file not found or error reading %s:') % path |
|
273 | 273 | ) |
|
274 | 274 | return m |
|
275 | 275 | |
|
276 | 276 | def walktree(self, heads): |
|
277 | 277 | """Return a mapping that identifies the uncommitted parents of every |
|
278 | 278 | uncommitted changeset.""" |
|
279 | 279 | visit = list(heads) |
|
280 | 280 | known = set() |
|
281 | 281 | parents = {} |
|
282 | 282 | numcommits = self.source.numcommits() |
|
283 | 283 | progress = self.ui.makeprogress( |
|
284 | 284 | _(b'scanning'), unit=_(b'revisions'), total=numcommits |
|
285 | 285 | ) |
|
286 | 286 | while visit: |
|
287 | 287 | n = visit.pop(0) |
|
288 | 288 | if n in known: |
|
289 | 289 | continue |
|
290 | 290 | if n in self.map: |
|
291 | 291 | m = self.map[n] |
|
292 | 292 | if m == SKIPREV or self.dest.hascommitfrommap(m): |
|
293 | 293 | continue |
|
294 | 294 | known.add(n) |
|
295 | 295 | progress.update(len(known)) |
|
296 | 296 | commit = self.cachecommit(n) |
|
297 | 297 | parents[n] = [] |
|
298 | 298 | for p in commit.parents: |
|
299 | 299 | parents[n].append(p) |
|
300 | 300 | visit.append(p) |
|
301 | 301 | progress.complete() |
|
302 | 302 | |
|
303 | 303 | return parents |
|
304 | 304 | |
|
305 | 305 | def mergesplicemap(self, parents, splicemap): |
|
306 | 306 | """A splicemap redefines child/parent relationships. Check the |
|
307 | 307 | map contains valid revision identifiers and merge the new |
|
308 | 308 | links in the source graph. |
|
309 | 309 | """ |
|
310 | 310 | for c in sorted(splicemap): |
|
311 | 311 | if c not in parents: |
|
312 | 312 | if not self.dest.hascommitforsplicemap(self.map.get(c, c)): |
|
313 | 313 | # Could be in source but not converted during this run |
|
314 | 314 | self.ui.warn( |
|
315 | 315 | _( |
|
316 | 316 | b'splice map revision %s is not being ' |
|
317 | 317 | b'converted, ignoring\n' |
|
318 | 318 | ) |
|
319 | 319 | % c |
|
320 | 320 | ) |
|
321 | 321 | continue |
|
322 | 322 | pc = [] |
|
323 | 323 | for p in splicemap[c]: |
|
324 | 324 | # We do not have to wait for nodes already in dest. |
|
325 | 325 | if self.dest.hascommitforsplicemap(self.map.get(p, p)): |
|
326 | 326 | continue |
|
327 | 327 | # Parent is not in dest and not being converted, not good |
|
328 | 328 | if p not in parents: |
|
329 | 329 | raise error.Abort(_(b'unknown splice map parent: %s') % p) |
|
330 | 330 | pc.append(p) |
|
331 | 331 | parents[c] = pc |
|
332 | 332 | |
|
333 | 333 | def toposort(self, parents, sortmode): |
|
334 | 334 | """Return an ordering such that every uncommitted changeset is |
|
335 | 335 | preceded by all its uncommitted ancestors.""" |
|
336 | 336 | |
|
337 | 337 | def mapchildren(parents): |
|
338 | 338 | """Return a (children, roots) tuple where 'children' maps parent |
|
339 | 339 | revision identifiers to children ones, and 'roots' is the list of |
|
340 | 340 | revisions without parents. 'parents' must be a mapping of revision |
|
341 | 341 | identifier to its parents ones. |
|
342 | 342 | """ |
|
343 | 343 | visit = collections.deque(sorted(parents)) |
|
344 | 344 | seen = set() |
|
345 | 345 | children = {} |
|
346 | 346 | roots = [] |
|
347 | 347 | |
|
348 | 348 | while visit: |
|
349 | 349 | n = visit.popleft() |
|
350 | 350 | if n in seen: |
|
351 | 351 | continue |
|
352 | 352 | seen.add(n) |
|
353 | 353 | # Ensure that nodes without parents are present in the |
|
354 | 354 | # 'children' mapping. |
|
355 | 355 | children.setdefault(n, []) |
|
356 | 356 | hasparent = False |
|
357 | 357 | for p in parents[n]: |
|
358 | 358 | if p not in self.map: |
|
359 | 359 | visit.append(p) |
|
360 | 360 | hasparent = True |
|
361 | 361 | children.setdefault(p, []).append(n) |
|
362 | 362 | if not hasparent: |
|
363 | 363 | roots.append(n) |
|
364 | 364 | |
|
365 | 365 | return children, roots |
|
366 | 366 | |
|
367 | 367 | # Sort functions are supposed to take a list of revisions which |
|
368 | 368 | # can be converted immediately and pick one |
|
369 | 369 | |
|
370 | 370 | def makebranchsorter(): |
|
371 | 371 | """If the previously converted revision has a child in the |
|
372 | 372 | eligible revisions list, pick it. Return the list head |
|
373 | 373 | otherwise. Branch sort attempts to minimize branch |
|
374 | 374 | switching, which is harmful for Mercurial backend |
|
375 | 375 | compression. |
|
376 | 376 | """ |
|
377 | 377 | prev = [None] |
|
378 | 378 | |
|
379 | 379 | def picknext(nodes): |
|
380 | 380 | next = nodes[0] |
|
381 | 381 | for n in nodes: |
|
382 | 382 | if prev[0] in parents[n]: |
|
383 | 383 | next = n |
|
384 | 384 | break |
|
385 | 385 | prev[0] = next |
|
386 | 386 | return next |
|
387 | 387 | |
|
388 | 388 | return picknext |
|
389 | 389 | |
|
390 | 390 | def makesourcesorter(): |
|
391 | 391 | """Source specific sort.""" |
|
392 | 392 | keyfn = lambda n: self.commitcache[n].sortkey |
|
393 | 393 | |
|
394 | 394 | def picknext(nodes): |
|
395 | 395 | return sorted(nodes, key=keyfn)[0] |
|
396 | 396 | |
|
397 | 397 | return picknext |
|
398 | 398 | |
|
399 | 399 | def makeclosesorter(): |
|
400 | 400 | """Close order sort.""" |
|
401 | 401 | keyfn = lambda n: ( |
|
402 | 402 | b'close' not in self.commitcache[n].extra, |
|
403 | 403 | self.commitcache[n].sortkey, |
|
404 | 404 | ) |
|
405 | 405 | |
|
406 | 406 | def picknext(nodes): |
|
407 | 407 | return sorted(nodes, key=keyfn)[0] |
|
408 | 408 | |
|
409 | 409 | return picknext |
|
410 | 410 | |
|
411 | 411 | def makedatesorter(): |
|
412 | 412 | """Sort revisions by date.""" |
|
413 | 413 | dates = {} |
|
414 | 414 | |
|
415 | 415 | def getdate(n): |
|
416 | 416 | if n not in dates: |
|
417 | 417 | dates[n] = dateutil.parsedate(self.commitcache[n].date) |
|
418 | 418 | return dates[n] |
|
419 | 419 | |
|
420 | 420 | def picknext(nodes): |
|
421 | 421 | return min([(getdate(n), n) for n in nodes])[1] |
|
422 | 422 | |
|
423 | 423 | return picknext |
|
424 | 424 | |
|
425 | 425 | if sortmode == b'branchsort': |
|
426 | 426 | picknext = makebranchsorter() |
|
427 | 427 | elif sortmode == b'datesort': |
|
428 | 428 | picknext = makedatesorter() |
|
429 | 429 | elif sortmode == b'sourcesort': |
|
430 | 430 | picknext = makesourcesorter() |
|
431 | 431 | elif sortmode == b'closesort': |
|
432 | 432 | picknext = makeclosesorter() |
|
433 | 433 | else: |
|
434 | 434 | raise error.Abort(_(b'unknown sort mode: %s') % sortmode) |
|
435 | 435 | |
|
436 | 436 | children, actives = mapchildren(parents) |
|
437 | 437 | |
|
438 | 438 | s = [] |
|
439 | 439 | pendings = {} |
|
440 | 440 | while actives: |
|
441 | 441 | n = picknext(actives) |
|
442 | 442 | actives.remove(n) |
|
443 | 443 | s.append(n) |
|
444 | 444 | |
|
445 | 445 | # Update dependents list |
|
446 | 446 | for c in children.get(n, []): |
|
447 | 447 | if c not in pendings: |
|
448 | 448 | pendings[c] = [p for p in parents[c] if p not in self.map] |
|
449 | 449 | try: |
|
450 | 450 | pendings[c].remove(n) |
|
451 | 451 | except ValueError: |
|
452 | 452 | raise error.Abort( |
|
453 | 453 | _(b'cycle detected between %s and %s') |
|
454 | 454 | % (recode(c), recode(n)) |
|
455 | 455 | ) |
|
456 | 456 | if not pendings[c]: |
|
457 | 457 | # Parents are converted, node is eligible |
|
458 | 458 | actives.insert(0, c) |
|
459 | 459 | pendings[c] = None |
|
460 | 460 | |
|
461 | 461 | if len(s) != len(parents): |
|
462 | 462 | raise error.Abort(_(b"not all revisions were sorted")) |
|
463 | 463 | |
|
464 | 464 | return s |
|
465 | 465 | |
|
466 | 466 | def writeauthormap(self): |
|
467 | 467 | authorfile = self.authorfile |
|
468 | 468 | if authorfile: |
|
469 | 469 | self.ui.status(_(b'writing author map file %s\n') % authorfile) |
|
470 | 470 | ofile = open(authorfile, b'wb+') |
|
471 | 471 | for author in self.authors: |
|
472 | 472 | ofile.write( |
|
473 | 473 | util.tonativeeol( |
|
474 | 474 | b"%s=%s\n" % (author, self.authors[author]) |
|
475 | 475 | ) |
|
476 | 476 | ) |
|
477 | 477 | ofile.close() |
|
478 | 478 | |
|
479 | 479 | def readauthormap(self, authorfile): |
|
480 | 480 | self.authors = readauthormap(self.ui, authorfile, self.authors) |
|
481 | 481 | |
|
482 | 482 | def cachecommit(self, rev): |
|
483 | 483 | commit = self.source.getcommit(rev) |
|
484 | 484 | commit.author = self.authors.get(commit.author, commit.author) |
|
485 | 485 | commit.branch = mapbranch(commit.branch, self.branchmap) |
|
486 | 486 | self.commitcache[rev] = commit |
|
487 | 487 | return commit |
|
488 | 488 | |
|
489 | 489 | def copy(self, rev): |
|
490 | 490 | commit = self.commitcache[rev] |
|
491 | 491 | full = self.opts.get(b'full') |
|
492 | 492 | changes = self.source.getchanges(rev, full) |
|
493 | 493 | if isinstance(changes, bytes): |
|
494 | 494 | if changes == SKIPREV: |
|
495 | 495 | dest = SKIPREV |
|
496 | 496 | else: |
|
497 | 497 | dest = self.map[changes] |
|
498 | 498 | self.map[rev] = dest |
|
499 | 499 | return |
|
500 | 500 | files, copies, cleanp2 = changes |
|
501 | 501 | pbranches = [] |
|
502 | 502 | if commit.parents: |
|
503 | 503 | for prev in commit.parents: |
|
504 | 504 | if prev not in self.commitcache: |
|
505 | 505 | self.cachecommit(prev) |
|
506 | 506 | pbranches.append( |
|
507 | 507 | (self.map[prev], self.commitcache[prev].branch) |
|
508 | 508 | ) |
|
509 | 509 | self.dest.setbranch(commit.branch, pbranches) |
|
510 | 510 | try: |
|
511 | 511 | parents = self.splicemap[rev] |
|
512 | 512 | self.ui.status( |
|
513 | 513 | _(b'spliced in %s as parents of %s\n') |
|
514 | 514 | % (_(b' and ').join(parents), rev) |
|
515 | 515 | ) |
|
516 | 516 | parents = [self.map.get(p, p) for p in parents] |
|
517 | 517 | except KeyError: |
|
518 | 518 | parents = [b[0] for b in pbranches] |
|
519 | 519 | parents.extend( |
|
520 | 520 | self.map[x] for x in commit.optparents if x in self.map |
|
521 | 521 | ) |
|
522 | 522 | if len(pbranches) != 2: |
|
523 | 523 | cleanp2 = set() |
|
524 | 524 | if len(parents) < 3: |
|
525 | 525 | source = progresssource(self.ui, self.source, len(files)) |
|
526 | 526 | else: |
|
527 | 527 | # For an octopus merge, we end up traversing the list of |
|
528 | 528 | # changed files N-1 times. This tweak to the number of |
|
529 | 529 | # files makes it so the progress bar doesn't overflow |
|
530 | 530 | # itself. |
|
531 | 531 | source = progresssource( |
|
532 | 532 | self.ui, self.source, len(files) * (len(parents) - 1) |
|
533 | 533 | ) |
|
534 | 534 | newnode = self.dest.putcommit( |
|
535 | 535 | files, copies, parents, commit, source, self.map, full, cleanp2 |
|
536 | 536 | ) |
|
537 | 537 | source.close() |
|
538 | 538 | self.source.converted(rev, newnode) |
|
539 | 539 | self.map[rev] = newnode |
|
540 | 540 | |
|
541 | 541 | def convert(self, sortmode): |
|
542 | 542 | try: |
|
543 | 543 | self.source.before() |
|
544 | 544 | self.dest.before() |
|
545 | 545 | self.source.setrevmap(self.map) |
|
546 | 546 | self.ui.status(_(b"scanning source...\n")) |
|
547 | 547 | heads = self.source.getheads() |
|
548 | 548 | parents = self.walktree(heads) |
|
549 | 549 | self.mergesplicemap(parents, self.splicemap) |
|
550 | 550 | self.ui.status(_(b"sorting...\n")) |
|
551 | 551 | t = self.toposort(parents, sortmode) |
|
552 | 552 | num = len(t) |
|
553 | 553 | c = None |
|
554 | 554 | |
|
555 | 555 | self.ui.status(_(b"converting...\n")) |
|
556 | 556 | progress = self.ui.makeprogress( |
|
557 | 557 | _(b'converting'), unit=_(b'revisions'), total=len(t) |
|
558 | 558 | ) |
|
559 | 559 | for i, c in enumerate(t): |
|
560 | 560 | num -= 1 |
|
561 | 561 | desc = self.commitcache[c].desc |
|
562 | 562 | if b"\n" in desc: |
|
563 | 563 | desc = desc.splitlines()[0] |
|
564 | 564 | # convert log message to local encoding without using |
|
565 | 565 | # tolocal() because the encoding.encoding convert() |
|
566 | 566 | # uses is 'utf-8' |
|
567 | 567 | self.ui.status(b"%d %s\n" % (num, recode(desc))) |
|
568 | 568 | self.ui.note(_(b"source: %s\n") % recode(c)) |
|
569 | 569 | progress.update(i) |
|
570 | 570 | self.copy(c) |
|
571 | 571 | progress.complete() |
|
572 | 572 | |
|
573 | 573 | if not self.ui.configbool(b'convert', b'skiptags'): |
|
574 | 574 | tags = self.source.gettags() |
|
575 | 575 | ctags = {} |
|
576 | 576 | for k in tags: |
|
577 | 577 | v = tags[k] |
|
578 | 578 | if self.map.get(v, SKIPREV) != SKIPREV: |
|
579 | 579 | ctags[k] = self.map[v] |
|
580 | 580 | |
|
581 | 581 | if c and ctags: |
|
582 | 582 | nrev, tagsparent = self.dest.puttags(ctags) |
|
583 | 583 | if nrev and tagsparent: |
|
584 | 584 | # write another hash correspondence to override the |
|
585 | 585 | # previous one so we don't end up with extra tag heads |
|
586 | 586 | tagsparents = [ |
|
587 | 587 | e for e in self.map.items() if e[1] == tagsparent |
|
588 | 588 | ] |
|
589 | 589 | if tagsparents: |
|
590 | 590 | self.map[tagsparents[0][0]] = nrev |
|
591 | 591 | |
|
592 | 592 | bookmarks = self.source.getbookmarks() |
|
593 | 593 | cbookmarks = {} |
|
594 | 594 | for k in bookmarks: |
|
595 | 595 | v = bookmarks[k] |
|
596 | 596 | if self.map.get(v, SKIPREV) != SKIPREV: |
|
597 | 597 | cbookmarks[k] = self.map[v] |
|
598 | 598 | |
|
599 | 599 | if c and cbookmarks: |
|
600 | 600 | self.dest.putbookmarks(cbookmarks) |
|
601 | 601 | |
|
602 | 602 | self.writeauthormap() |
|
603 | 603 | finally: |
|
604 | 604 | self.cleanup() |
|
605 | 605 | |
|
606 | 606 | def cleanup(self): |
|
607 | 607 | try: |
|
608 | 608 | self.dest.after() |
|
609 | 609 | finally: |
|
610 | 610 | self.source.after() |
|
611 | 611 | self.map.close() |
|
612 | 612 | |
|
613 | 613 | |
|
614 | 614 | def convert(ui, src, dest=None, revmapfile=None, **opts): |
|
615 | 615 | opts = pycompat.byteskwargs(opts) |
|
616 | 616 | global orig_encoding |
|
617 | 617 | orig_encoding = encoding.encoding |
|
618 | 618 | encoding.encoding = b'UTF-8' |
|
619 | 619 | |
|
620 | 620 | # support --authors as an alias for --authormap |
|
621 | 621 | if not opts.get(b'authormap'): |
|
622 | 622 | opts[b'authormap'] = opts.get(b'authors') |
|
623 | 623 | |
|
624 | 624 | if not dest: |
|
625 | 625 | dest = hg.defaultdest(src) + b"-hg" |
|
626 | 626 | ui.status(_(b"assuming destination %s\n") % dest) |
|
627 | 627 | |
|
628 | 628 | destc = convertsink(ui, dest, opts.get(b'dest_type')) |
|
629 | 629 | destc = scmutil.wrapconvertsink(destc) |
|
630 | 630 | |
|
631 | 631 | try: |
|
632 | 632 | srcc, defaultsort = convertsource( |
|
633 | 633 | ui, src, opts.get(b'source_type'), opts.get(b'rev') |
|
634 | 634 | ) |
|
635 | 635 | except Exception: |
|
636 | 636 | for path in destc.created: |
|
637 | 637 | shutil.rmtree(path, True) |
|
638 | 638 | raise |
|
639 | 639 | |
|
640 | 640 | sortmodes = (b'branchsort', b'datesort', b'sourcesort', b'closesort') |
|
641 | 641 | sortmode = [m for m in sortmodes if opts.get(m)] |
|
642 | 642 | if len(sortmode) > 1: |
|
643 | 643 | raise error.Abort(_(b'more than one sort mode specified')) |
|
644 | 644 | if sortmode: |
|
645 | 645 | sortmode = sortmode[0] |
|
646 | 646 | else: |
|
647 | 647 | sortmode = defaultsort |
|
648 | 648 | |
|
649 | 649 | if sortmode == b'sourcesort' and not srcc.hasnativeorder(): |
|
650 | 650 | raise error.Abort( |
|
651 | 651 | _(b'--sourcesort is not supported by this data source') |
|
652 | 652 | ) |
|
653 | 653 | if sortmode == b'closesort' and not srcc.hasnativeclose(): |
|
654 | 654 | raise error.Abort( |
|
655 | 655 | _(b'--closesort is not supported by this data source') |
|
656 | 656 | ) |
|
657 | 657 | |
|
658 | 658 | fmap = opts.get(b'filemap') |
|
659 | 659 | if fmap: |
|
660 | 660 | srcc = filemap.filemap_source(ui, srcc, fmap) |
|
661 | 661 | destc.setfilemapmode(True) |
|
662 | 662 | |
|
663 | 663 | if not revmapfile: |
|
664 | 664 | revmapfile = destc.revmapfile() |
|
665 | 665 | |
|
666 | 666 | c = converter(ui, srcc, destc, revmapfile, opts) |
|
667 | 667 | c.convert(sortmode) |
@@ -1,1655 +1,1655 b'' | |||
|
1 | 1 | # match.py - filename matching |
|
2 | 2 | # |
|
3 | 3 | # Copyright 2008, 2009 Olivia Mackall <olivia@selenic.com> and others |
|
4 | 4 | # |
|
5 | 5 | # This software may be used and distributed according to the terms of the |
|
6 | 6 | # GNU General Public License version 2 or any later version. |
|
7 | 7 | |
|
8 | 8 | |
|
9 | 9 | import bisect |
|
10 | 10 | import copy |
|
11 | 11 | import itertools |
|
12 | 12 | import os |
|
13 | 13 | import re |
|
14 | 14 | |
|
15 | 15 | from .i18n import _ |
|
16 | 16 | from .pycompat import open |
|
17 | 17 | from . import ( |
|
18 | 18 | encoding, |
|
19 | 19 | error, |
|
20 | 20 | pathutil, |
|
21 | 21 | policy, |
|
22 | 22 | pycompat, |
|
23 | 23 | util, |
|
24 | 24 | ) |
|
25 | 25 | from .utils import stringutil |
|
26 | 26 | |
|
27 | 27 | rustmod = policy.importrust('dirstate') |
|
28 | 28 | |
|
29 | 29 | allpatternkinds = ( |
|
30 | 30 | b're', |
|
31 | 31 | b'glob', |
|
32 | 32 | b'path', |
|
33 | 33 | b'relglob', |
|
34 | 34 | b'relpath', |
|
35 | 35 | b'relre', |
|
36 | 36 | b'rootglob', |
|
37 | 37 | b'listfile', |
|
38 | 38 | b'listfile0', |
|
39 | 39 | b'set', |
|
40 | 40 | b'include', |
|
41 | 41 | b'subinclude', |
|
42 | 42 | b'rootfilesin', |
|
43 | 43 | ) |
|
44 | 44 | cwdrelativepatternkinds = (b'relpath', b'glob') |
|
45 | 45 | |
|
46 | 46 | propertycache = util.propertycache |
|
47 | 47 | |
|
48 | 48 | |
|
49 | 49 | def _rematcher(regex): |
|
50 | 50 | """compile the regexp with the best available regexp engine and return a |
|
51 | 51 | matcher function""" |
|
52 | 52 | m = util.re.compile(regex) |
|
53 | 53 | try: |
|
54 | 54 | # slightly faster, provided by facebook's re2 bindings |
|
55 | 55 | return m.test_match |
|
56 | 56 | except AttributeError: |
|
57 | 57 | return m.match |
|
58 | 58 | |
|
59 | 59 | |
|
60 | 60 | def _expandsets(cwd, kindpats, ctx=None, listsubrepos=False, badfn=None): |
|
61 | 61 | '''Returns the kindpats list with the 'set' patterns expanded to matchers''' |
|
62 | 62 | matchers = [] |
|
63 | 63 | other = [] |
|
64 | 64 | |
|
65 | 65 | for kind, pat, source in kindpats: |
|
66 | 66 | if kind == b'set': |
|
67 | 67 | if ctx is None: |
|
68 | 68 | raise error.ProgrammingError( |
|
69 | 69 | b"fileset expression with no context" |
|
70 | 70 | ) |
|
71 | 71 | matchers.append(ctx.matchfileset(cwd, pat, badfn=badfn)) |
|
72 | 72 | |
|
73 | 73 | if listsubrepos: |
|
74 | 74 | for subpath in ctx.substate: |
|
75 | 75 | sm = ctx.sub(subpath).matchfileset(cwd, pat, badfn=badfn) |
|
76 | 76 | pm = prefixdirmatcher(subpath, sm, badfn=badfn) |
|
77 | 77 | matchers.append(pm) |
|
78 | 78 | |
|
79 | 79 | continue |
|
80 | 80 | other.append((kind, pat, source)) |
|
81 | 81 | return matchers, other |
|
82 | 82 | |
|
83 | 83 | |
|
84 | 84 | def _expandsubinclude(kindpats, root): |
|
85 | 85 | """Returns the list of subinclude matcher args and the kindpats without the |
|
86 | 86 | subincludes in it.""" |
|
87 | 87 | relmatchers = [] |
|
88 | 88 | other = [] |
|
89 | 89 | |
|
90 | 90 | for kind, pat, source in kindpats: |
|
91 | 91 | if kind == b'subinclude': |
|
92 | 92 | sourceroot = pathutil.dirname(util.normpath(source)) |
|
93 | 93 | pat = util.pconvert(pat) |
|
94 | 94 | path = pathutil.join(sourceroot, pat) |
|
95 | 95 | |
|
96 | 96 | newroot = pathutil.dirname(path) |
|
97 | 97 | matcherargs = (newroot, b'', [], [b'include:%s' % path]) |
|
98 | 98 | |
|
99 | 99 | prefix = pathutil.canonpath(root, root, newroot) |
|
100 | 100 | if prefix: |
|
101 | 101 | prefix += b'/' |
|
102 | 102 | relmatchers.append((prefix, matcherargs)) |
|
103 | 103 | else: |
|
104 | 104 | other.append((kind, pat, source)) |
|
105 | 105 | |
|
106 | 106 | return relmatchers, other |
|
107 | 107 | |
|
108 | 108 | |
|
109 | 109 | def _kindpatsalwaysmatch(kindpats): |
|
110 | 110 | """Checks whether the kindspats match everything, as e.g. |
|
111 | 111 | 'relpath:.' does. |
|
112 | 112 | """ |
|
113 | 113 | for kind, pat, source in kindpats: |
|
114 | 114 | if pat != b'' or kind not in [b'relpath', b'glob']: |
|
115 | 115 | return False |
|
116 | 116 | return True |
|
117 | 117 | |
|
118 | 118 | |
|
119 | 119 | def _buildkindpatsmatcher( |
|
120 | 120 | matchercls, |
|
121 | 121 | root, |
|
122 | 122 | cwd, |
|
123 | 123 | kindpats, |
|
124 | 124 | ctx=None, |
|
125 | 125 | listsubrepos=False, |
|
126 | 126 | badfn=None, |
|
127 | 127 | ): |
|
128 | 128 | matchers = [] |
|
129 | 129 | fms, kindpats = _expandsets( |
|
130 | 130 | cwd, |
|
131 | 131 | kindpats, |
|
132 | 132 | ctx=ctx, |
|
133 | 133 | listsubrepos=listsubrepos, |
|
134 | 134 | badfn=badfn, |
|
135 | 135 | ) |
|
136 | 136 | if kindpats: |
|
137 | 137 | m = matchercls(root, kindpats, badfn=badfn) |
|
138 | 138 | matchers.append(m) |
|
139 | 139 | if fms: |
|
140 | 140 | matchers.extend(fms) |
|
141 | 141 | if not matchers: |
|
142 | 142 | return nevermatcher(badfn=badfn) |
|
143 | 143 | if len(matchers) == 1: |
|
144 | 144 | return matchers[0] |
|
145 | 145 | return unionmatcher(matchers) |
|
146 | 146 | |
|
147 | 147 | |
|
148 | 148 | def match( |
|
149 | 149 | root, |
|
150 | 150 | cwd, |
|
151 | 151 | patterns=None, |
|
152 | 152 | include=None, |
|
153 | 153 | exclude=None, |
|
154 | 154 | default=b'glob', |
|
155 | 155 | auditor=None, |
|
156 | 156 | ctx=None, |
|
157 | 157 | listsubrepos=False, |
|
158 | 158 | warn=None, |
|
159 | 159 | badfn=None, |
|
160 | 160 | icasefs=False, |
|
161 | 161 | ): |
|
162 | 162 | r"""build an object to match a set of file patterns |
|
163 | 163 | |
|
164 | 164 | arguments: |
|
165 | 165 | root - the canonical root of the tree you're matching against |
|
166 | 166 | cwd - the current working directory, if relevant |
|
167 | 167 | patterns - patterns to find |
|
168 | 168 | include - patterns to include (unless they are excluded) |
|
169 | 169 | exclude - patterns to exclude (even if they are included) |
|
170 | 170 | default - if a pattern in patterns has no explicit type, assume this one |
|
171 | 171 | auditor - optional path auditor |
|
172 | 172 | ctx - optional changecontext |
|
173 | 173 | listsubrepos - if True, recurse into subrepositories |
|
174 | 174 | warn - optional function used for printing warnings |
|
175 | 175 | badfn - optional bad() callback for this matcher instead of the default |
|
176 | 176 | icasefs - make a matcher for wdir on case insensitive filesystems, which |
|
177 | 177 | normalizes the given patterns to the case in the filesystem |
|
178 | 178 | |
|
179 | 179 | a pattern is one of: |
|
180 | 180 | 'glob:<glob>' - a glob relative to cwd |
|
181 | 181 | 're:<regexp>' - a regular expression |
|
182 | 182 | 'path:<path>' - a path relative to repository root, which is matched |
|
183 | 183 | recursively |
|
184 | 184 | 'rootfilesin:<path>' - a path relative to repository root, which is |
|
185 | 185 | matched non-recursively (will not match subdirectories) |
|
186 | 186 | 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs) |
|
187 | 187 | 'relpath:<path>' - a path relative to cwd |
|
188 | 188 | 'relre:<regexp>' - a regexp that needn't match the start of a name |
|
189 | 189 | 'set:<fileset>' - a fileset expression |
|
190 | 190 | 'include:<path>' - a file of patterns to read and include |
|
191 | 191 | 'subinclude:<path>' - a file of patterns to match against files under |
|
192 | 192 | the same directory |
|
193 | 193 | '<something>' - a pattern of the specified default type |
|
194 | 194 | |
|
195 | 195 | >>> def _match(root, *args, **kwargs): |
|
196 | 196 | ... return match(util.localpath(root), *args, **kwargs) |
|
197 | 197 | |
|
198 | 198 | Usually a patternmatcher is returned: |
|
199 | 199 | >>> _match(b'/foo', b'.', [b're:.*\.c$', b'path:foo/a', b'*.py']) |
|
200 | 200 | <patternmatcher patterns='.*\\.c$|foo/a(?:/|$)|[^/]*\\.py$'> |
|
201 | 201 | |
|
202 | 202 | Combining 'patterns' with 'include' (resp. 'exclude') gives an |
|
203 | 203 | intersectionmatcher (resp. a differencematcher): |
|
204 | 204 | >>> type(_match(b'/foo', b'.', [b're:.*\.c$'], include=[b'path:lib'])) |
|
205 | 205 | <class 'mercurial.match.intersectionmatcher'> |
|
206 | 206 | >>> type(_match(b'/foo', b'.', [b're:.*\.c$'], exclude=[b'path:build'])) |
|
207 | 207 | <class 'mercurial.match.differencematcher'> |
|
208 | 208 | |
|
209 | 209 | Notice that, if 'patterns' is empty, an alwaysmatcher is returned: |
|
210 | 210 | >>> _match(b'/foo', b'.', []) |
|
211 | 211 | <alwaysmatcher> |
|
212 | 212 | |
|
213 | 213 | The 'default' argument determines which kind of pattern is assumed if a |
|
214 | 214 | pattern has no prefix: |
|
215 | 215 | >>> _match(b'/foo', b'.', [b'.*\.c$'], default=b're') |
|
216 | 216 | <patternmatcher patterns='.*\\.c$'> |
|
217 | 217 | >>> _match(b'/foo', b'.', [b'main.py'], default=b'relpath') |
|
218 | 218 | <patternmatcher patterns='main\\.py(?:/|$)'> |
|
219 | 219 | >>> _match(b'/foo', b'.', [b'main.py'], default=b're') |
|
220 | 220 | <patternmatcher patterns='main.py'> |
|
221 | 221 | |
|
222 | 222 | The primary use of matchers is to check whether a value (usually a file |
|
223 | 223 | name) matches againset one of the patterns given at initialization. There |
|
224 | 224 | are two ways of doing this check. |
|
225 | 225 | |
|
226 | 226 | >>> m = _match(b'/foo', b'', [b're:.*\.c$', b'relpath:a']) |
|
227 | 227 | |
|
228 | 228 | 1. Calling the matcher with a file name returns True if any pattern |
|
229 | 229 | matches that file name: |
|
230 | 230 | >>> m(b'a') |
|
231 | 231 | True |
|
232 | 232 | >>> m(b'main.c') |
|
233 | 233 | True |
|
234 | 234 | >>> m(b'test.py') |
|
235 | 235 | False |
|
236 | 236 | |
|
237 | 237 | 2. Using the exact() method only returns True if the file name matches one |
|
238 | 238 | of the exact patterns (i.e. not re: or glob: patterns): |
|
239 | 239 | >>> m.exact(b'a') |
|
240 | 240 | True |
|
241 | 241 | >>> m.exact(b'main.c') |
|
242 | 242 | False |
|
243 | 243 | """ |
|
244 | 244 | assert os.path.isabs(root) |
|
245 | 245 | cwd = os.path.join(root, util.localpath(cwd)) |
|
246 | 246 | normalize = _donormalize |
|
247 | 247 | if icasefs: |
|
248 | 248 | dirstate = ctx.repo().dirstate |
|
249 | 249 | dsnormalize = dirstate.normalize |
|
250 | 250 | |
|
251 | 251 | def normalize(patterns, default, root, cwd, auditor, warn): |
|
252 | 252 | kp = _donormalize(patterns, default, root, cwd, auditor, warn) |
|
253 | 253 | kindpats = [] |
|
254 | 254 | for kind, pats, source in kp: |
|
255 | 255 | if kind not in (b're', b'relre'): # regex can't be normalized |
|
256 | 256 | p = pats |
|
257 | 257 | pats = dsnormalize(pats) |
|
258 | 258 | |
|
259 | 259 | # Preserve the original to handle a case only rename. |
|
260 | 260 | if p != pats and p in dirstate: |
|
261 | 261 | kindpats.append((kind, p, source)) |
|
262 | 262 | |
|
263 | 263 | kindpats.append((kind, pats, source)) |
|
264 | 264 | return kindpats |
|
265 | 265 | |
|
266 | 266 | if patterns: |
|
267 | 267 | kindpats = normalize(patterns, default, root, cwd, auditor, warn) |
|
268 | 268 | if _kindpatsalwaysmatch(kindpats): |
|
269 | 269 | m = alwaysmatcher(badfn) |
|
270 | 270 | else: |
|
271 | 271 | m = _buildkindpatsmatcher( |
|
272 | 272 | patternmatcher, |
|
273 | 273 | root, |
|
274 | 274 | cwd, |
|
275 | 275 | kindpats, |
|
276 | 276 | ctx=ctx, |
|
277 | 277 | listsubrepos=listsubrepos, |
|
278 | 278 | badfn=badfn, |
|
279 | 279 | ) |
|
280 | 280 | else: |
|
281 | 281 | # It's a little strange that no patterns means to match everything. |
|
282 | 282 | # Consider changing this to match nothing (probably using nevermatcher). |
|
283 | 283 | m = alwaysmatcher(badfn) |
|
284 | 284 | |
|
285 | 285 | if include: |
|
286 | 286 | kindpats = normalize(include, b'glob', root, cwd, auditor, warn) |
|
287 | 287 | im = _buildkindpatsmatcher( |
|
288 | 288 | includematcher, |
|
289 | 289 | root, |
|
290 | 290 | cwd, |
|
291 | 291 | kindpats, |
|
292 | 292 | ctx=ctx, |
|
293 | 293 | listsubrepos=listsubrepos, |
|
294 | 294 | badfn=None, |
|
295 | 295 | ) |
|
296 | 296 | m = intersectmatchers(m, im) |
|
297 | 297 | if exclude: |
|
298 | 298 | kindpats = normalize(exclude, b'glob', root, cwd, auditor, warn) |
|
299 | 299 | em = _buildkindpatsmatcher( |
|
300 | 300 | includematcher, |
|
301 | 301 | root, |
|
302 | 302 | cwd, |
|
303 | 303 | kindpats, |
|
304 | 304 | ctx=ctx, |
|
305 | 305 | listsubrepos=listsubrepos, |
|
306 | 306 | badfn=None, |
|
307 | 307 | ) |
|
308 | 308 | m = differencematcher(m, em) |
|
309 | 309 | return m |
|
310 | 310 | |
|
311 | 311 | |
|
312 | 312 | def exact(files, badfn=None): |
|
313 | 313 | return exactmatcher(files, badfn=badfn) |
|
314 | 314 | |
|
315 | 315 | |
|
316 | 316 | def always(badfn=None): |
|
317 | 317 | return alwaysmatcher(badfn) |
|
318 | 318 | |
|
319 | 319 | |
|
320 | 320 | def never(badfn=None): |
|
321 | 321 | return nevermatcher(badfn) |
|
322 | 322 | |
|
323 | 323 | |
|
324 | 324 | def badmatch(match, badfn): |
|
325 | 325 | """Make a copy of the given matcher, replacing its bad method with the given |
|
326 | 326 | one. |
|
327 | 327 | """ |
|
328 | 328 | m = copy.copy(match) |
|
329 | 329 | m.bad = badfn |
|
330 | 330 | return m |
|
331 | 331 | |
|
332 | 332 | |
|
333 | 333 | def _donormalize(patterns, default, root, cwd, auditor=None, warn=None): |
|
334 | 334 | """Convert 'kind:pat' from the patterns list to tuples with kind and |
|
335 | 335 | normalized and rooted patterns and with listfiles expanded.""" |
|
336 | 336 | kindpats = [] |
|
337 | 337 | for kind, pat in [_patsplit(p, default) for p in patterns]: |
|
338 | 338 | if kind in cwdrelativepatternkinds: |
|
339 | 339 | pat = pathutil.canonpath(root, cwd, pat, auditor=auditor) |
|
340 | 340 | elif kind in (b'relglob', b'path', b'rootfilesin', b'rootglob'): |
|
341 | 341 | pat = util.normpath(pat) |
|
342 | 342 | elif kind in (b'listfile', b'listfile0'): |
|
343 | 343 | try: |
|
344 | 344 | files = util.readfile(pat) |
|
345 | 345 | if kind == b'listfile0': |
|
346 | 346 | files = files.split(b'\0') |
|
347 | 347 | else: |
|
348 | 348 | files = files.splitlines() |
|
349 | 349 | files = [f for f in files if f] |
|
350 | 350 | except EnvironmentError: |
|
351 | 351 | raise error.Abort(_(b"unable to read file list (%s)") % pat) |
|
352 | 352 | for k, p, source in _donormalize( |
|
353 | 353 | files, default, root, cwd, auditor, warn |
|
354 | 354 | ): |
|
355 | 355 | kindpats.append((k, p, pat)) |
|
356 | 356 | continue |
|
357 | 357 | elif kind == b'include': |
|
358 | 358 | try: |
|
359 | 359 | fullpath = os.path.join(root, util.localpath(pat)) |
|
360 | 360 | includepats = readpatternfile(fullpath, warn) |
|
361 | 361 | for k, p, source in _donormalize( |
|
362 | 362 | includepats, default, root, cwd, auditor, warn |
|
363 | 363 | ): |
|
364 | 364 | kindpats.append((k, p, source or pat)) |
|
365 | 365 | except error.Abort as inst: |
|
366 | 366 | raise error.Abort( |
|
367 | 367 | b'%s: %s' |
|
368 | 368 | % ( |
|
369 | 369 | pat, |
|
370 | 370 | inst.message, |
|
371 | 371 | ) # pytype: disable=unsupported-operands |
|
372 | 372 | ) |
|
373 | 373 | except IOError as inst: |
|
374 | 374 | if warn: |
|
375 | 375 | warn( |
|
376 | 376 | _(b"skipping unreadable pattern file '%s': %s\n") |
|
377 | 377 | % (pat, stringutil.forcebytestr(inst.strerror)) |
|
378 | 378 | ) |
|
379 | 379 | continue |
|
380 | 380 | # else: re or relre - which cannot be normalized |
|
381 | 381 | kindpats.append((kind, pat, b'')) |
|
382 | 382 | return kindpats |
|
383 | 383 | |
|
384 | 384 | |
|
385 | 385 | class basematcher(object): |
|
386 | 386 | def __init__(self, badfn=None): |
|
387 | 387 | if badfn is not None: |
|
388 | 388 | self.bad = badfn |
|
389 | 389 | |
|
390 | 390 | def __call__(self, fn): |
|
391 | 391 | return self.matchfn(fn) |
|
392 | 392 | |
|
393 | 393 | # Callbacks related to how the matcher is used by dirstate.walk. |
|
394 | 394 | # Subscribers to these events must monkeypatch the matcher object. |
|
395 | 395 | def bad(self, f, msg): |
|
396 | 396 | """Callback from dirstate.walk for each explicit file that can't be |
|
397 | 397 | found/accessed, with an error message.""" |
|
398 | 398 | |
|
399 | 399 | # If an traversedir is set, it will be called when a directory discovered |
|
400 | 400 | # by recursive traversal is visited. |
|
401 | 401 | traversedir = None |
|
402 | 402 | |
|
403 | 403 | @propertycache |
|
404 | 404 | def _files(self): |
|
405 | 405 | return [] |
|
406 | 406 | |
|
407 | 407 | def files(self): |
|
408 | 408 | """Explicitly listed files or patterns or roots: |
|
409 | 409 | if no patterns or .always(): empty list, |
|
410 | 410 | if exact: list exact files, |
|
411 | 411 | if not .anypats(): list all files and dirs, |
|
412 | 412 | else: optimal roots""" |
|
413 | 413 | return self._files |
|
414 | 414 | |
|
415 | 415 | @propertycache |
|
416 | 416 | def _fileset(self): |
|
417 | 417 | return set(self._files) |
|
418 | 418 | |
|
419 | 419 | def exact(self, f): |
|
420 | 420 | '''Returns True if f is in .files().''' |
|
421 | 421 | return f in self._fileset |
|
422 | 422 | |
|
423 | 423 | def matchfn(self, f): |
|
424 | 424 | return False |
|
425 | 425 | |
|
426 | 426 | def visitdir(self, dir): |
|
427 | 427 | """Decides whether a directory should be visited based on whether it |
|
428 | 428 | has potential matches in it or one of its subdirectories. This is |
|
429 | 429 | based on the match's primary, included, and excluded patterns. |
|
430 | 430 | |
|
431 | 431 | Returns the string 'all' if the given directory and all subdirectories |
|
432 | 432 | should be visited. Otherwise returns True or False indicating whether |
|
433 | 433 | the given directory should be visited. |
|
434 | 434 | """ |
|
435 | 435 | return True |
|
436 | 436 | |
|
437 | 437 | def visitchildrenset(self, dir): |
|
438 | 438 | """Decides whether a directory should be visited based on whether it |
|
439 | 439 | has potential matches in it or one of its subdirectories, and |
|
440 | 440 | potentially lists which subdirectories of that directory should be |
|
441 | 441 | visited. This is based on the match's primary, included, and excluded |
|
442 | 442 | patterns. |
|
443 | 443 | |
|
444 | 444 | This function is very similar to 'visitdir', and the following mapping |
|
445 | 445 | can be applied: |
|
446 | 446 | |
|
447 | 447 | visitdir | visitchildrenlist |
|
448 | 448 | ----------+------------------- |
|
449 | 449 | False | set() |
|
450 | 450 | 'all' | 'all' |
|
451 | 451 | True | 'this' OR non-empty set of subdirs -or files- to visit |
|
452 | 452 | |
|
453 | 453 | Example: |
|
454 | 454 | Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return |
|
455 | 455 | the following values (assuming the implementation of visitchildrenset |
|
456 | 456 | is capable of recognizing this; some implementations are not). |
|
457 | 457 | |
|
458 | 458 | '' -> {'foo', 'qux'} |
|
459 | 459 | 'baz' -> set() |
|
460 | 460 | 'foo' -> {'bar'} |
|
461 | 461 | # Ideally this would be 'all', but since the prefix nature of matchers |
|
462 | 462 | # is applied to the entire matcher, we have to downgrade this to |
|
463 | 463 | # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed |
|
464 | 464 | # in. |
|
465 | 465 | 'foo/bar' -> 'this' |
|
466 | 466 | 'qux' -> 'this' |
|
467 | 467 | |
|
468 | 468 | Important: |
|
469 | 469 | Most matchers do not know if they're representing files or |
|
470 | 470 | directories. They see ['path:dir/f'] and don't know whether 'f' is a |
|
471 | 471 | file or a directory, so visitchildrenset('dir') for most matchers will |
|
472 | 472 | return {'f'}, but if the matcher knows it's a file (like exactmatcher |
|
473 | 473 | does), it may return 'this'. Do not rely on the return being a set |
|
474 | 474 | indicating that there are no files in this dir to investigate (or |
|
475 | 475 | equivalently that if there are files to investigate in 'dir' that it |
|
476 | 476 | will always return 'this'). |
|
477 | 477 | """ |
|
478 | 478 | return b'this' |
|
479 | 479 | |
|
480 | 480 | def always(self): |
|
481 | 481 | """Matcher will match everything and .files() will be empty -- |
|
482 | 482 | optimization might be possible.""" |
|
483 | 483 | return False |
|
484 | 484 | |
|
485 | 485 | def isexact(self): |
|
486 | 486 | """Matcher will match exactly the list of files in .files() -- |
|
487 | 487 | optimization might be possible.""" |
|
488 | 488 | return False |
|
489 | 489 | |
|
490 | 490 | def prefix(self): |
|
491 | 491 | """Matcher will match the paths in .files() recursively -- |
|
492 | 492 | optimization might be possible.""" |
|
493 | 493 | return False |
|
494 | 494 | |
|
495 | 495 | def anypats(self): |
|
496 | 496 | """None of .always(), .isexact(), and .prefix() is true -- |
|
497 | 497 | optimizations will be difficult.""" |
|
498 | 498 | return not self.always() and not self.isexact() and not self.prefix() |
|
499 | 499 | |
|
500 | 500 | |
|
501 | 501 | class alwaysmatcher(basematcher): |
|
502 | 502 | '''Matches everything.''' |
|
503 | 503 | |
|
504 | 504 | def __init__(self, badfn=None): |
|
505 | 505 | super(alwaysmatcher, self).__init__(badfn) |
|
506 | 506 | |
|
507 | 507 | def always(self): |
|
508 | 508 | return True |
|
509 | 509 | |
|
510 | 510 | def matchfn(self, f): |
|
511 | 511 | return True |
|
512 | 512 | |
|
513 | 513 | def visitdir(self, dir): |
|
514 | 514 | return b'all' |
|
515 | 515 | |
|
516 | 516 | def visitchildrenset(self, dir): |
|
517 | 517 | return b'all' |
|
518 | 518 | |
|
519 | 519 | def __repr__(self): |
|
520 | 520 | return r'<alwaysmatcher>' |
|
521 | 521 | |
|
522 | 522 | |
|
523 | 523 | class nevermatcher(basematcher): |
|
524 | 524 | '''Matches nothing.''' |
|
525 | 525 | |
|
526 | 526 | def __init__(self, badfn=None): |
|
527 | 527 | super(nevermatcher, self).__init__(badfn) |
|
528 | 528 | |
|
529 | 529 | # It's a little weird to say that the nevermatcher is an exact matcher |
|
530 | 530 | # or a prefix matcher, but it seems to make sense to let callers take |
|
531 | 531 | # fast paths based on either. There will be no exact matches, nor any |
|
532 | 532 | # prefixes (files() returns []), so fast paths iterating over them should |
|
533 | 533 | # be efficient (and correct). |
|
534 | 534 | def isexact(self): |
|
535 | 535 | return True |
|
536 | 536 | |
|
537 | 537 | def prefix(self): |
|
538 | 538 | return True |
|
539 | 539 | |
|
540 | 540 | def visitdir(self, dir): |
|
541 | 541 | return False |
|
542 | 542 | |
|
543 | 543 | def visitchildrenset(self, dir): |
|
544 | 544 | return set() |
|
545 | 545 | |
|
546 | 546 | def __repr__(self): |
|
547 | 547 | return r'<nevermatcher>' |
|
548 | 548 | |
|
549 | 549 | |
|
550 | 550 | class predicatematcher(basematcher): |
|
551 | 551 | """A matcher adapter for a simple boolean function""" |
|
552 | 552 | |
|
553 | 553 | def __init__(self, predfn, predrepr=None, badfn=None): |
|
554 | 554 | super(predicatematcher, self).__init__(badfn) |
|
555 | 555 | self.matchfn = predfn |
|
556 | 556 | self._predrepr = predrepr |
|
557 | 557 | |
|
558 | 558 | @encoding.strmethod |
|
559 | 559 | def __repr__(self): |
|
560 | 560 | s = stringutil.buildrepr(self._predrepr) or pycompat.byterepr( |
|
561 | 561 | self.matchfn |
|
562 | 562 | ) |
|
563 | 563 | return b'<predicatenmatcher pred=%s>' % s |
|
564 | 564 | |
|
565 | 565 | |
|
566 | 566 | def path_or_parents_in_set(path, prefix_set): |
|
567 | 567 | """Returns True if `path` (or any parent of `path`) is in `prefix_set`.""" |
|
568 | 568 | l = len(prefix_set) |
|
569 | 569 | if l == 0: |
|
570 | 570 | return False |
|
571 | 571 | if path in prefix_set: |
|
572 | 572 | return True |
|
573 | 573 | # If there's more than 5 paths in prefix_set, it's *probably* quicker to |
|
574 | 574 | # "walk up" the directory hierarchy instead, with the assumption that most |
|
575 | 575 | # directory hierarchies are relatively shallow and hash lookup is cheap. |
|
576 | 576 | if l > 5: |
|
577 | 577 | return any( |
|
578 | 578 | parentdir in prefix_set for parentdir in pathutil.finddirs(path) |
|
579 | 579 | ) |
|
580 | 580 | |
|
581 | 581 | # FIXME: Ideally we'd never get to this point if this is the case - we'd |
|
582 | 582 | # recognize ourselves as an 'always' matcher and skip this. |
|
583 | 583 | if b'' in prefix_set: |
|
584 | 584 | return True |
|
585 | 585 | |
|
586 | 586 | sl = ord(b'/') |
|
587 | 587 | |
|
588 | 588 | # We already checked that path isn't in prefix_set exactly, so |
|
589 | 589 | # `path[len(pf)] should never raise IndexError. |
|
590 | 590 | return any(path.startswith(pf) and path[len(pf)] == sl for pf in prefix_set) |
|
591 | 591 | |
|
592 | 592 | |
|
593 | 593 | class patternmatcher(basematcher): |
|
594 | 594 | r"""Matches a set of (kind, pat, source) against a 'root' directory. |
|
595 | 595 | |
|
596 | 596 | >>> kindpats = [ |
|
597 | 597 | ... (b're', br'.*\.c$', b''), |
|
598 | 598 | ... (b'path', b'foo/a', b''), |
|
599 | 599 | ... (b'relpath', b'b', b''), |
|
600 | 600 | ... (b'glob', b'*.h', b''), |
|
601 | 601 | ... ] |
|
602 | 602 | >>> m = patternmatcher(b'foo', kindpats) |
|
603 | 603 | >>> m(b'main.c') # matches re:.*\.c$ |
|
604 | 604 | True |
|
605 | 605 | >>> m(b'b.txt') |
|
606 | 606 | False |
|
607 | 607 | >>> m(b'foo/a') # matches path:foo/a |
|
608 | 608 | True |
|
609 | 609 | >>> m(b'a') # does not match path:b, since 'root' is 'foo' |
|
610 | 610 | False |
|
611 | 611 | >>> m(b'b') # matches relpath:b, since 'root' is 'foo' |
|
612 | 612 | True |
|
613 | 613 | >>> m(b'lib.h') # matches glob:*.h |
|
614 | 614 | True |
|
615 | 615 | |
|
616 | 616 | >>> m.files() |
|
617 | 617 | ['', 'foo/a', 'b', ''] |
|
618 | 618 | >>> m.exact(b'foo/a') |
|
619 | 619 | True |
|
620 | 620 | >>> m.exact(b'b') |
|
621 | 621 | True |
|
622 | 622 | >>> m.exact(b'lib.h') # exact matches are for (rel)path kinds |
|
623 | 623 | False |
|
624 | 624 | """ |
|
625 | 625 | |
|
626 | 626 | def __init__(self, root, kindpats, badfn=None): |
|
627 | 627 | super(patternmatcher, self).__init__(badfn) |
|
628 | 628 | |
|
629 | 629 | self._files = _explicitfiles(kindpats) |
|
630 | 630 | self._prefix = _prefix(kindpats) |
|
631 | 631 | self._pats, self.matchfn = _buildmatch(kindpats, b'$', root) |
|
632 | 632 | |
|
633 | 633 | @propertycache |
|
634 | 634 | def _dirs(self): |
|
635 | 635 | return set(pathutil.dirs(self._fileset)) |
|
636 | 636 | |
|
637 | 637 | def visitdir(self, dir): |
|
638 | 638 | if self._prefix and dir in self._fileset: |
|
639 | 639 | return b'all' |
|
640 | 640 | return dir in self._dirs or path_or_parents_in_set(dir, self._fileset) |
|
641 | 641 | |
|
642 | 642 | def visitchildrenset(self, dir): |
|
643 | 643 | ret = self.visitdir(dir) |
|
644 | 644 | if ret is True: |
|
645 | 645 | return b'this' |
|
646 | 646 | elif not ret: |
|
647 | 647 | return set() |
|
648 | 648 | assert ret == b'all' |
|
649 | 649 | return b'all' |
|
650 | 650 | |
|
651 | 651 | def prefix(self): |
|
652 | 652 | return self._prefix |
|
653 | 653 | |
|
654 | 654 | @encoding.strmethod |
|
655 | 655 | def __repr__(self): |
|
656 | 656 | return b'<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats) |
|
657 | 657 | |
|
658 | 658 | |
|
659 | 659 | # This is basically a reimplementation of pathutil.dirs that stores the |
|
660 | 660 | # children instead of just a count of them, plus a small optional optimization |
|
661 | 661 | # to avoid some directories we don't need. |
|
662 | 662 | class _dirchildren(object): |
|
663 | 663 | def __init__(self, paths, onlyinclude=None): |
|
664 | 664 | self._dirs = {} |
|
665 | 665 | self._onlyinclude = onlyinclude or [] |
|
666 | 666 | addpath = self.addpath |
|
667 | 667 | for f in paths: |
|
668 | 668 | addpath(f) |
|
669 | 669 | |
|
670 | 670 | def addpath(self, path): |
|
671 | 671 | if path == b'': |
|
672 | 672 | return |
|
673 | 673 | dirs = self._dirs |
|
674 | 674 | findsplitdirs = _dirchildren._findsplitdirs |
|
675 | 675 | for d, b in findsplitdirs(path): |
|
676 | 676 | if d not in self._onlyinclude: |
|
677 | 677 | continue |
|
678 | 678 | dirs.setdefault(d, set()).add(b) |
|
679 | 679 | |
|
680 | 680 | @staticmethod |
|
681 | 681 | def _findsplitdirs(path): |
|
682 | 682 | # yields (dirname, basename) tuples, walking back to the root. This is |
|
683 | 683 | # very similar to pathutil.finddirs, except: |
|
684 | 684 | # - produces a (dirname, basename) tuple, not just 'dirname' |
|
685 | 685 | # Unlike manifest._splittopdir, this does not suffix `dirname` with a |
|
686 | 686 | # slash. |
|
687 | 687 | oldpos = len(path) |
|
688 | 688 | pos = path.rfind(b'/') |
|
689 | 689 | while pos != -1: |
|
690 | 690 | yield path[:pos], path[pos + 1 : oldpos] |
|
691 | 691 | oldpos = pos |
|
692 | 692 | pos = path.rfind(b'/', 0, pos) |
|
693 | 693 | yield b'', path[:oldpos] |
|
694 | 694 | |
|
695 | 695 | def get(self, path): |
|
696 | 696 | return self._dirs.get(path, set()) |
|
697 | 697 | |
|
698 | 698 | |
|
699 | 699 | class includematcher(basematcher): |
|
700 | 700 | def __init__(self, root, kindpats, badfn=None): |
|
701 | 701 | super(includematcher, self).__init__(badfn) |
|
702 | 702 | if rustmod is not None: |
|
703 | 703 | # We need to pass the patterns to Rust because they can contain |
|
704 | 704 | # patterns from the user interface |
|
705 | 705 | self._kindpats = kindpats |
|
706 | 706 | self._pats, self.matchfn = _buildmatch(kindpats, b'(?:/|$)', root) |
|
707 | 707 | self._prefix = _prefix(kindpats) |
|
708 | 708 | roots, dirs, parents = _rootsdirsandparents(kindpats) |
|
709 | 709 | # roots are directories which are recursively included. |
|
710 | 710 | self._roots = set(roots) |
|
711 | 711 | # dirs are directories which are non-recursively included. |
|
712 | 712 | self._dirs = set(dirs) |
|
713 | 713 | # parents are directories which are non-recursively included because |
|
714 | 714 | # they are needed to get to items in _dirs or _roots. |
|
715 | 715 | self._parents = parents |
|
716 | 716 | |
|
717 | 717 | def visitdir(self, dir): |
|
718 | 718 | if self._prefix and dir in self._roots: |
|
719 | 719 | return b'all' |
|
720 | 720 | return ( |
|
721 | 721 | dir in self._dirs |
|
722 | 722 | or dir in self._parents |
|
723 | 723 | or path_or_parents_in_set(dir, self._roots) |
|
724 | 724 | ) |
|
725 | 725 | |
|
726 | 726 | @propertycache |
|
727 | 727 | def _allparentschildren(self): |
|
728 | 728 | # It may seem odd that we add dirs, roots, and parents, and then |
|
729 | 729 | # restrict to only parents. This is to catch the case of: |
|
730 | 730 | # dirs = ['foo/bar'] |
|
731 | 731 | # parents = ['foo'] |
|
732 | 732 | # if we asked for the children of 'foo', but had only added |
|
733 | 733 | # self._parents, we wouldn't be able to respond ['bar']. |
|
734 | 734 | return _dirchildren( |
|
735 | 735 | itertools.chain(self._dirs, self._roots, self._parents), |
|
736 | 736 | onlyinclude=self._parents, |
|
737 | 737 | ) |
|
738 | 738 | |
|
739 | 739 | def visitchildrenset(self, dir): |
|
740 | 740 | if self._prefix and dir in self._roots: |
|
741 | 741 | return b'all' |
|
742 | 742 | # Note: this does *not* include the 'dir in self._parents' case from |
|
743 | 743 | # visitdir, that's handled below. |
|
744 | 744 | if ( |
|
745 | 745 | b'' in self._roots |
|
746 | 746 | or dir in self._dirs |
|
747 | 747 | or path_or_parents_in_set(dir, self._roots) |
|
748 | 748 | ): |
|
749 | 749 | return b'this' |
|
750 | 750 | |
|
751 | 751 | if dir in self._parents: |
|
752 | 752 | return self._allparentschildren.get(dir) or set() |
|
753 | 753 | return set() |
|
754 | 754 | |
|
755 | 755 | @encoding.strmethod |
|
756 | 756 | def __repr__(self): |
|
757 | 757 | return b'<includematcher includes=%r>' % pycompat.bytestr(self._pats) |
|
758 | 758 | |
|
759 | 759 | |
|
760 | 760 | class exactmatcher(basematcher): |
|
761 | 761 | r"""Matches the input files exactly. They are interpreted as paths, not |
|
762 | 762 | patterns (so no kind-prefixes). |
|
763 | 763 | |
|
764 | 764 | >>> m = exactmatcher([b'a.txt', br're:.*\.c$']) |
|
765 | 765 | >>> m(b'a.txt') |
|
766 | 766 | True |
|
767 | 767 | >>> m(b'b.txt') |
|
768 | 768 | False |
|
769 | 769 | |
|
770 | 770 | Input files that would be matched are exactly those returned by .files() |
|
771 | 771 | >>> m.files() |
|
772 | 772 | ['a.txt', 're:.*\\.c$'] |
|
773 | 773 | |
|
774 | 774 | So pattern 're:.*\.c$' is not considered as a regex, but as a file name |
|
775 | 775 | >>> m(b'main.c') |
|
776 | 776 | False |
|
777 | 777 | >>> m(br're:.*\.c$') |
|
778 | 778 | True |
|
779 | 779 | """ |
|
780 | 780 | |
|
781 | 781 | def __init__(self, files, badfn=None): |
|
782 | 782 | super(exactmatcher, self).__init__(badfn) |
|
783 | 783 | |
|
784 | 784 | if isinstance(files, list): |
|
785 | 785 | self._files = files |
|
786 | 786 | else: |
|
787 | 787 | self._files = list(files) |
|
788 | 788 | |
|
789 | 789 | matchfn = basematcher.exact |
|
790 | 790 | |
|
791 | 791 | @propertycache |
|
792 | 792 | def _dirs(self): |
|
793 | 793 | return set(pathutil.dirs(self._fileset)) |
|
794 | 794 | |
|
795 | 795 | def visitdir(self, dir): |
|
796 | 796 | return dir in self._dirs |
|
797 | 797 | |
|
798 | 798 | @propertycache |
|
799 | 799 | def _visitchildrenset_candidates(self): |
|
800 | 800 | """A memoized set of candidates for visitchildrenset.""" |
|
801 | 801 | return self._fileset | self._dirs - {b''} |
|
802 | 802 | |
|
803 | 803 | @propertycache |
|
804 | 804 | def _sorted_visitchildrenset_candidates(self): |
|
805 | 805 | """A memoized sorted list of candidates for visitchildrenset.""" |
|
806 | 806 | return sorted(self._visitchildrenset_candidates) |
|
807 | 807 | |
|
808 | 808 | def visitchildrenset(self, dir): |
|
809 | 809 | if not self._fileset or dir not in self._dirs: |
|
810 | 810 | return set() |
|
811 | 811 | |
|
812 | 812 | if dir == b'': |
|
813 | 813 | candidates = self._visitchildrenset_candidates |
|
814 | 814 | else: |
|
815 | 815 | candidates = self._sorted_visitchildrenset_candidates |
|
816 | 816 | d = dir + b'/' |
|
817 | 817 | # Use bisect to find the first element potentially starting with d |
|
818 | 818 | # (i.e. >= d). This should always find at least one element (we'll |
|
819 | 819 | # assert later if this is not the case). |
|
820 | 820 | first = bisect.bisect_left(candidates, d) |
|
821 | 821 | # We need a representation of the first element that is > d that |
|
822 | 822 | # does not start with d, so since we added a `/` on the end of dir, |
|
823 | 823 | # we'll add whatever comes after slash (we could probably assume |
|
824 | 824 | # that `0` is after `/`, but let's not) to the end of dir instead. |
|
825 | 825 | dnext = dir + encoding.strtolocal(chr(ord(b'/') + 1)) |
|
826 | 826 | # Use bisect to find the first element >= d_next |
|
827 | 827 | last = bisect.bisect_left(candidates, dnext, lo=first) |
|
828 | 828 | dlen = len(d) |
|
829 | 829 | candidates = {c[dlen:] for c in candidates[first:last]} |
|
830 | 830 | # self._dirs includes all of the directories, recursively, so if |
|
831 | 831 | # we're attempting to match foo/bar/baz.txt, it'll have '', 'foo', |
|
832 | 832 | # 'foo/bar' in it. Thus we can safely ignore a candidate that has a |
|
833 | 833 | # '/' in it, indicating a it's for a subdir-of-a-subdir; the |
|
834 | 834 | # immediate subdir will be in there without a slash. |
|
835 | 835 | ret = {c for c in candidates if b'/' not in c} |
|
836 | 836 | # We really do not expect ret to be empty, since that would imply that |
|
837 | 837 | # there's something in _dirs that didn't have a file in _fileset. |
|
838 | 838 | assert ret |
|
839 | 839 | return ret |
|
840 | 840 | |
|
841 | 841 | def isexact(self): |
|
842 | 842 | return True |
|
843 | 843 | |
|
844 | 844 | @encoding.strmethod |
|
845 | 845 | def __repr__(self): |
|
846 | 846 | return b'<exactmatcher files=%r>' % self._files |
|
847 | 847 | |
|
848 | 848 | |
|
849 | 849 | class differencematcher(basematcher): |
|
850 | 850 | """Composes two matchers by matching if the first matches and the second |
|
851 | 851 | does not. |
|
852 | 852 | |
|
853 | 853 | The second matcher's non-matching-attributes (bad, traversedir) are ignored. |
|
854 | 854 | """ |
|
855 | 855 | |
|
856 | 856 | def __init__(self, m1, m2): |
|
857 | 857 | super(differencematcher, self).__init__() |
|
858 | 858 | self._m1 = m1 |
|
859 | 859 | self._m2 = m2 |
|
860 | 860 | self.bad = m1.bad |
|
861 | 861 | self.traversedir = m1.traversedir |
|
862 | 862 | |
|
863 | 863 | def matchfn(self, f): |
|
864 | 864 | return self._m1(f) and not self._m2(f) |
|
865 | 865 | |
|
866 | 866 | @propertycache |
|
867 | 867 | def _files(self): |
|
868 | 868 | if self.isexact(): |
|
869 | 869 | return [f for f in self._m1.files() if self(f)] |
|
870 | 870 | # If m1 is not an exact matcher, we can't easily figure out the set of |
|
871 | 871 | # files, because its files() are not always files. For example, if |
|
872 | 872 | # m1 is "path:dir" and m2 is "rootfileins:.", we don't |
|
873 | 873 | # want to remove "dir" from the set even though it would match m2, |
|
874 | 874 | # because the "dir" in m1 may not be a file. |
|
875 | 875 | return self._m1.files() |
|
876 | 876 | |
|
877 | 877 | def visitdir(self, dir): |
|
878 | 878 | if self._m2.visitdir(dir) == b'all': |
|
879 | 879 | return False |
|
880 | 880 | elif not self._m2.visitdir(dir): |
|
881 | 881 | # m2 does not match dir, we can return 'all' here if possible |
|
882 | 882 | return self._m1.visitdir(dir) |
|
883 | 883 | return bool(self._m1.visitdir(dir)) |
|
884 | 884 | |
|
885 | 885 | def visitchildrenset(self, dir): |
|
886 | 886 | m2_set = self._m2.visitchildrenset(dir) |
|
887 | 887 | if m2_set == b'all': |
|
888 | 888 | return set() |
|
889 | 889 | m1_set = self._m1.visitchildrenset(dir) |
|
890 | 890 | # Possible values for m1: 'all', 'this', set(...), set() |
|
891 | 891 | # Possible values for m2: 'this', set(...), set() |
|
892 | 892 | # If m2 has nothing under here that we care about, return m1, even if |
|
893 | 893 | # it's 'all'. This is a change in behavior from visitdir, which would |
|
894 | 894 | # return True, not 'all', for some reason. |
|
895 | 895 | if not m2_set: |
|
896 | 896 | return m1_set |
|
897 | 897 | if m1_set in [b'all', b'this']: |
|
898 | 898 | # Never return 'all' here if m2_set is any kind of non-empty (either |
|
899 | 899 | # 'this' or set(foo)), since m2 might return set() for a |
|
900 | 900 | # subdirectory. |
|
901 | 901 | return b'this' |
|
902 | 902 | # Possible values for m1: set(...), set() |
|
903 | 903 | # Possible values for m2: 'this', set(...) |
|
904 | 904 | # We ignore m2's set results. They're possibly incorrect: |
|
905 | 905 | # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset(''): |
|
906 | 906 | # m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd |
|
907 | 907 | # return set(), which is *not* correct, we still need to visit 'dir'! |
|
908 | 908 | return m1_set |
|
909 | 909 | |
|
910 | 910 | def isexact(self): |
|
911 | 911 | return self._m1.isexact() |
|
912 | 912 | |
|
913 | 913 | @encoding.strmethod |
|
914 | 914 | def __repr__(self): |
|
915 | 915 | return b'<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2) |
|
916 | 916 | |
|
917 | 917 | |
|
918 | 918 | def intersectmatchers(m1, m2): |
|
919 | 919 | """Composes two matchers by matching if both of them match. |
|
920 | 920 | |
|
921 | 921 | The second matcher's non-matching-attributes (bad, traversedir) are ignored. |
|
922 | 922 | """ |
|
923 | 923 | if m1 is None or m2 is None: |
|
924 | 924 | return m1 or m2 |
|
925 | 925 | if m1.always(): |
|
926 | 926 | m = copy.copy(m2) |
|
927 | 927 | # TODO: Consider encapsulating these things in a class so there's only |
|
928 | 928 | # one thing to copy from m1. |
|
929 | 929 | m.bad = m1.bad |
|
930 | 930 | m.traversedir = m1.traversedir |
|
931 | 931 | return m |
|
932 | 932 | if m2.always(): |
|
933 | 933 | m = copy.copy(m1) |
|
934 | 934 | return m |
|
935 | 935 | return intersectionmatcher(m1, m2) |
|
936 | 936 | |
|
937 | 937 | |
|
938 | 938 | class intersectionmatcher(basematcher): |
|
939 | 939 | def __init__(self, m1, m2): |
|
940 | 940 | super(intersectionmatcher, self).__init__() |
|
941 | 941 | self._m1 = m1 |
|
942 | 942 | self._m2 = m2 |
|
943 | 943 | self.bad = m1.bad |
|
944 | 944 | self.traversedir = m1.traversedir |
|
945 | 945 | |
|
946 | 946 | @propertycache |
|
947 | 947 | def _files(self): |
|
948 | 948 | if self.isexact(): |
|
949 | 949 | m1, m2 = self._m1, self._m2 |
|
950 | 950 | if not m1.isexact(): |
|
951 | 951 | m1, m2 = m2, m1 |
|
952 | 952 | return [f for f in m1.files() if m2(f)] |
|
953 | 953 | # It neither m1 nor m2 is an exact matcher, we can't easily intersect |
|
954 | 954 | # the set of files, because their files() are not always files. For |
|
955 | 955 | # example, if intersecting a matcher "-I glob:foo.txt" with matcher of |
|
956 | 956 | # "path:dir2", we don't want to remove "dir2" from the set. |
|
957 | 957 | return self._m1.files() + self._m2.files() |
|
958 | 958 | |
|
959 | 959 | def matchfn(self, f): |
|
960 | 960 | return self._m1(f) and self._m2(f) |
|
961 | 961 | |
|
962 | 962 | def visitdir(self, dir): |
|
963 | 963 | visit1 = self._m1.visitdir(dir) |
|
964 | 964 | if visit1 == b'all': |
|
965 | 965 | return self._m2.visitdir(dir) |
|
966 | 966 | # bool() because visit1=True + visit2='all' should not be 'all' |
|
967 | 967 | return bool(visit1 and self._m2.visitdir(dir)) |
|
968 | 968 | |
|
969 | 969 | def visitchildrenset(self, dir): |
|
970 | 970 | m1_set = self._m1.visitchildrenset(dir) |
|
971 | 971 | if not m1_set: |
|
972 | 972 | return set() |
|
973 | 973 | m2_set = self._m2.visitchildrenset(dir) |
|
974 | 974 | if not m2_set: |
|
975 | 975 | return set() |
|
976 | 976 | |
|
977 | 977 | if m1_set == b'all': |
|
978 | 978 | return m2_set |
|
979 | 979 | elif m2_set == b'all': |
|
980 | 980 | return m1_set |
|
981 | 981 | |
|
982 | 982 | if m1_set == b'this' or m2_set == b'this': |
|
983 | 983 | return b'this' |
|
984 | 984 | |
|
985 | 985 | assert isinstance(m1_set, set) and isinstance(m2_set, set) |
|
986 | 986 | return m1_set.intersection(m2_set) |
|
987 | 987 | |
|
988 | 988 | def always(self): |
|
989 | 989 | return self._m1.always() and self._m2.always() |
|
990 | 990 | |
|
991 | 991 | def isexact(self): |
|
992 | 992 | return self._m1.isexact() or self._m2.isexact() |
|
993 | 993 | |
|
994 | 994 | @encoding.strmethod |
|
995 | 995 | def __repr__(self): |
|
996 | 996 | return b'<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2) |
|
997 | 997 | |
|
998 | 998 | |
|
999 | 999 | class subdirmatcher(basematcher): |
|
1000 | 1000 | """Adapt a matcher to work on a subdirectory only. |
|
1001 | 1001 | |
|
1002 | 1002 | The paths are remapped to remove/insert the path as needed: |
|
1003 | 1003 | |
|
1004 | 1004 | >>> from . import pycompat |
|
1005 | 1005 | >>> m1 = match(util.localpath(b'/root'), b'', [b'a.txt', b'sub/b.txt'], auditor=lambda name: None) |
|
1006 | 1006 | >>> m2 = subdirmatcher(b'sub', m1) |
|
1007 | 1007 | >>> m2(b'a.txt') |
|
1008 | 1008 | False |
|
1009 | 1009 | >>> m2(b'b.txt') |
|
1010 | 1010 | True |
|
1011 | 1011 | >>> m2.matchfn(b'a.txt') |
|
1012 | 1012 | False |
|
1013 | 1013 | >>> m2.matchfn(b'b.txt') |
|
1014 | 1014 | True |
|
1015 | 1015 | >>> m2.files() |
|
1016 | 1016 | ['b.txt'] |
|
1017 | 1017 | >>> m2.exact(b'b.txt') |
|
1018 | 1018 | True |
|
1019 | 1019 | >>> def bad(f, msg): |
|
1020 | 1020 | ... print(pycompat.sysstr(b"%s: %s" % (f, msg))) |
|
1021 | 1021 | >>> m1.bad = bad |
|
1022 | 1022 | >>> m2.bad(b'x.txt', b'No such file') |
|
1023 | 1023 | sub/x.txt: No such file |
|
1024 | 1024 | """ |
|
1025 | 1025 | |
|
1026 | 1026 | def __init__(self, path, matcher): |
|
1027 | 1027 | super(subdirmatcher, self).__init__() |
|
1028 | 1028 | self._path = path |
|
1029 | 1029 | self._matcher = matcher |
|
1030 | 1030 | self._always = matcher.always() |
|
1031 | 1031 | |
|
1032 | 1032 | self._files = [ |
|
1033 | 1033 | f[len(path) + 1 :] |
|
1034 | 1034 | for f in matcher._files |
|
1035 | 1035 | if f.startswith(path + b"/") |
|
1036 | 1036 | ] |
|
1037 | 1037 | |
|
1038 | 1038 | # If the parent repo had a path to this subrepo and the matcher is |
|
1039 | 1039 | # a prefix matcher, this submatcher always matches. |
|
1040 | 1040 | if matcher.prefix(): |
|
1041 | 1041 | self._always = any(f == path for f in matcher._files) |
|
1042 | 1042 | |
|
1043 | 1043 | def bad(self, f, msg): |
|
1044 | 1044 | self._matcher.bad(self._path + b"/" + f, msg) |
|
1045 | 1045 | |
|
1046 | 1046 | def matchfn(self, f): |
|
1047 | 1047 | # Some information is lost in the superclass's constructor, so we |
|
1048 | 1048 | # can not accurately create the matching function for the subdirectory |
|
1049 | 1049 | # from the inputs. Instead, we override matchfn() and visitdir() to |
|
1050 | 1050 | # call the original matcher with the subdirectory path prepended. |
|
1051 | 1051 | return self._matcher.matchfn(self._path + b"/" + f) |
|
1052 | 1052 | |
|
1053 | 1053 | def visitdir(self, dir): |
|
1054 | 1054 | if dir == b'': |
|
1055 | 1055 | dir = self._path |
|
1056 | 1056 | else: |
|
1057 | 1057 | dir = self._path + b"/" + dir |
|
1058 | 1058 | return self._matcher.visitdir(dir) |
|
1059 | 1059 | |
|
1060 | 1060 | def visitchildrenset(self, dir): |
|
1061 | 1061 | if dir == b'': |
|
1062 | 1062 | dir = self._path |
|
1063 | 1063 | else: |
|
1064 | 1064 | dir = self._path + b"/" + dir |
|
1065 | 1065 | return self._matcher.visitchildrenset(dir) |
|
1066 | 1066 | |
|
1067 | 1067 | def always(self): |
|
1068 | 1068 | return self._always |
|
1069 | 1069 | |
|
1070 | 1070 | def prefix(self): |
|
1071 | 1071 | return self._matcher.prefix() and not self._always |
|
1072 | 1072 | |
|
1073 | 1073 | @encoding.strmethod |
|
1074 | 1074 | def __repr__(self): |
|
1075 | 1075 | return b'<subdirmatcher path=%r, matcher=%r>' % ( |
|
1076 | 1076 | self._path, |
|
1077 | 1077 | self._matcher, |
|
1078 | 1078 | ) |
|
1079 | 1079 | |
|
1080 | 1080 | |
|
1081 | 1081 | class prefixdirmatcher(basematcher): |
|
1082 | 1082 | """Adapt a matcher to work on a parent directory. |
|
1083 | 1083 | |
|
1084 | 1084 | The matcher's non-matching-attributes (bad, traversedir) are ignored. |
|
1085 | 1085 | |
|
1086 | 1086 | The prefix path should usually be the relative path from the root of |
|
1087 | 1087 | this matcher to the root of the wrapped matcher. |
|
1088 | 1088 | |
|
1089 | 1089 | >>> m1 = match(util.localpath(b'/root/d/e'), b'f', [b'../a.txt', b'b.txt'], auditor=lambda name: None) |
|
1090 | 1090 | >>> m2 = prefixdirmatcher(b'd/e', m1) |
|
1091 | 1091 | >>> m2(b'a.txt') |
|
1092 | 1092 | False |
|
1093 | 1093 | >>> m2(b'd/e/a.txt') |
|
1094 | 1094 | True |
|
1095 | 1095 | >>> m2(b'd/e/b.txt') |
|
1096 | 1096 | False |
|
1097 | 1097 | >>> m2.files() |
|
1098 | 1098 | ['d/e/a.txt', 'd/e/f/b.txt'] |
|
1099 | 1099 | >>> m2.exact(b'd/e/a.txt') |
|
1100 | 1100 | True |
|
1101 | 1101 | >>> m2.visitdir(b'd') |
|
1102 | 1102 | True |
|
1103 | 1103 | >>> m2.visitdir(b'd/e') |
|
1104 | 1104 | True |
|
1105 | 1105 | >>> m2.visitdir(b'd/e/f') |
|
1106 | 1106 | True |
|
1107 | 1107 | >>> m2.visitdir(b'd/e/g') |
|
1108 | 1108 | False |
|
1109 | 1109 | >>> m2.visitdir(b'd/ef') |
|
1110 | 1110 | False |
|
1111 | 1111 | """ |
|
1112 | 1112 | |
|
1113 | 1113 | def __init__(self, path, matcher, badfn=None): |
|
1114 | 1114 | super(prefixdirmatcher, self).__init__(badfn) |
|
1115 | 1115 | if not path: |
|
1116 | 1116 | raise error.ProgrammingError(b'prefix path must not be empty') |
|
1117 | 1117 | self._path = path |
|
1118 | 1118 | self._pathprefix = path + b'/' |
|
1119 | 1119 | self._matcher = matcher |
|
1120 | 1120 | |
|
1121 | 1121 | @propertycache |
|
1122 | 1122 | def _files(self): |
|
1123 | 1123 | return [self._pathprefix + f for f in self._matcher._files] |
|
1124 | 1124 | |
|
1125 | 1125 | def matchfn(self, f): |
|
1126 | 1126 | if not f.startswith(self._pathprefix): |
|
1127 | 1127 | return False |
|
1128 | 1128 | return self._matcher.matchfn(f[len(self._pathprefix) :]) |
|
1129 | 1129 | |
|
1130 | 1130 | @propertycache |
|
1131 | 1131 | def _pathdirs(self): |
|
1132 | 1132 | return set(pathutil.finddirs(self._path)) |
|
1133 | 1133 | |
|
1134 | 1134 | def visitdir(self, dir): |
|
1135 | 1135 | if dir == self._path: |
|
1136 | 1136 | return self._matcher.visitdir(b'') |
|
1137 | 1137 | if dir.startswith(self._pathprefix): |
|
1138 | 1138 | return self._matcher.visitdir(dir[len(self._pathprefix) :]) |
|
1139 | 1139 | return dir in self._pathdirs |
|
1140 | 1140 | |
|
1141 | 1141 | def visitchildrenset(self, dir): |
|
1142 | 1142 | if dir == self._path: |
|
1143 | 1143 | return self._matcher.visitchildrenset(b'') |
|
1144 | 1144 | if dir.startswith(self._pathprefix): |
|
1145 | 1145 | return self._matcher.visitchildrenset(dir[len(self._pathprefix) :]) |
|
1146 | 1146 | if dir in self._pathdirs: |
|
1147 | 1147 | return b'this' |
|
1148 | 1148 | return set() |
|
1149 | 1149 | |
|
1150 | 1150 | def isexact(self): |
|
1151 | 1151 | return self._matcher.isexact() |
|
1152 | 1152 | |
|
1153 | 1153 | def prefix(self): |
|
1154 | 1154 | return self._matcher.prefix() |
|
1155 | 1155 | |
|
1156 | 1156 | @encoding.strmethod |
|
1157 | 1157 | def __repr__(self): |
|
1158 | 1158 | return b'<prefixdirmatcher path=%r, matcher=%r>' % ( |
|
1159 | 1159 | pycompat.bytestr(self._path), |
|
1160 | 1160 | self._matcher, |
|
1161 | 1161 | ) |
|
1162 | 1162 | |
|
1163 | 1163 | |
|
1164 | 1164 | class unionmatcher(basematcher): |
|
1165 | 1165 | """A matcher that is the union of several matchers. |
|
1166 | 1166 | |
|
1167 | 1167 | The non-matching-attributes (bad, traversedir) are taken from the first |
|
1168 | 1168 | matcher. |
|
1169 | 1169 | """ |
|
1170 | 1170 | |
|
1171 | 1171 | def __init__(self, matchers): |
|
1172 | 1172 | m1 = matchers[0] |
|
1173 | 1173 | super(unionmatcher, self).__init__() |
|
1174 | 1174 | self.traversedir = m1.traversedir |
|
1175 | 1175 | self._matchers = matchers |
|
1176 | 1176 | |
|
1177 | 1177 | def matchfn(self, f): |
|
1178 | 1178 | for match in self._matchers: |
|
1179 | 1179 | if match(f): |
|
1180 | 1180 | return True |
|
1181 | 1181 | return False |
|
1182 | 1182 | |
|
1183 | 1183 | def visitdir(self, dir): |
|
1184 | 1184 | r = False |
|
1185 | 1185 | for m in self._matchers: |
|
1186 | 1186 | v = m.visitdir(dir) |
|
1187 | 1187 | if v == b'all': |
|
1188 | 1188 | return v |
|
1189 | 1189 | r |= v |
|
1190 | 1190 | return r |
|
1191 | 1191 | |
|
1192 | 1192 | def visitchildrenset(self, dir): |
|
1193 | 1193 | r = set() |
|
1194 | 1194 | this = False |
|
1195 | 1195 | for m in self._matchers: |
|
1196 | 1196 | v = m.visitchildrenset(dir) |
|
1197 | 1197 | if not v: |
|
1198 | 1198 | continue |
|
1199 | 1199 | if v == b'all': |
|
1200 | 1200 | return v |
|
1201 | 1201 | if this or v == b'this': |
|
1202 | 1202 | this = True |
|
1203 | 1203 | # don't break, we might have an 'all' in here. |
|
1204 | 1204 | continue |
|
1205 | 1205 | assert isinstance(v, set) |
|
1206 | 1206 | r = r.union(v) |
|
1207 | 1207 | if this: |
|
1208 | 1208 | return b'this' |
|
1209 | 1209 | return r |
|
1210 | 1210 | |
|
1211 | 1211 | @encoding.strmethod |
|
1212 | 1212 | def __repr__(self): |
|
1213 | 1213 | return b'<unionmatcher matchers=%r>' % self._matchers |
|
1214 | 1214 | |
|
1215 | 1215 | |
|
1216 | 1216 | def patkind(pattern, default=None): |
|
1217 | 1217 | r"""If pattern is 'kind:pat' with a known kind, return kind. |
|
1218 | 1218 | |
|
1219 | 1219 | >>> patkind(br're:.*\.c$') |
|
1220 | 1220 | 're' |
|
1221 | 1221 | >>> patkind(b'glob:*.c') |
|
1222 | 1222 | 'glob' |
|
1223 | 1223 | >>> patkind(b'relpath:test.py') |
|
1224 | 1224 | 'relpath' |
|
1225 | 1225 | >>> patkind(b'main.py') |
|
1226 | 1226 | >>> patkind(b'main.py', default=b're') |
|
1227 | 1227 | 're' |
|
1228 | 1228 | """ |
|
1229 | 1229 | return _patsplit(pattern, default)[0] |
|
1230 | 1230 | |
|
1231 | 1231 | |
|
1232 | 1232 | def _patsplit(pattern, default): |
|
1233 | 1233 | """Split a string into the optional pattern kind prefix and the actual |
|
1234 | 1234 | pattern.""" |
|
1235 | 1235 | if b':' in pattern: |
|
1236 | 1236 | kind, pat = pattern.split(b':', 1) |
|
1237 | 1237 | if kind in allpatternkinds: |
|
1238 | 1238 | return kind, pat |
|
1239 | 1239 | return default, pattern |
|
1240 | 1240 | |
|
1241 | 1241 | |
|
1242 | 1242 | def _globre(pat): |
|
1243 | 1243 | r"""Convert an extended glob string to a regexp string. |
|
1244 | 1244 | |
|
1245 | 1245 | >>> from . import pycompat |
|
1246 | 1246 | >>> def bprint(s): |
|
1247 | 1247 | ... print(pycompat.sysstr(s)) |
|
1248 | 1248 | >>> bprint(_globre(br'?')) |
|
1249 | 1249 | . |
|
1250 | 1250 | >>> bprint(_globre(br'*')) |
|
1251 | 1251 | [^/]* |
|
1252 | 1252 | >>> bprint(_globre(br'**')) |
|
1253 | 1253 | .* |
|
1254 | 1254 | >>> bprint(_globre(br'**/a')) |
|
1255 | 1255 | (?:.*/)?a |
|
1256 | 1256 | >>> bprint(_globre(br'a/**/b')) |
|
1257 | 1257 | a/(?:.*/)?b |
|
1258 | 1258 | >>> bprint(_globre(br'[a*?!^][^b][!c]')) |
|
1259 | 1259 | [a*?!^][\^b][^c] |
|
1260 | 1260 | >>> bprint(_globre(br'{a,b}')) |
|
1261 | 1261 | (?:a|b) |
|
1262 | 1262 | >>> bprint(_globre(br'.\*\?')) |
|
1263 | 1263 | \.\*\? |
|
1264 | 1264 | """ |
|
1265 | 1265 | i, n = 0, len(pat) |
|
1266 | 1266 | res = b'' |
|
1267 | 1267 | group = 0 |
|
1268 | 1268 | escape = util.stringutil.regexbytesescapemap.get |
|
1269 | 1269 | |
|
1270 | 1270 | def peek(): |
|
1271 | 1271 | return i < n and pat[i : i + 1] |
|
1272 | 1272 | |
|
1273 | 1273 | while i < n: |
|
1274 | 1274 | c = pat[i : i + 1] |
|
1275 | 1275 | i += 1 |
|
1276 | 1276 | if c not in b'*?[{},\\': |
|
1277 | 1277 | res += escape(c, c) |
|
1278 | 1278 | elif c == b'*': |
|
1279 | 1279 | if peek() == b'*': |
|
1280 | 1280 | i += 1 |
|
1281 | 1281 | if peek() == b'/': |
|
1282 | 1282 | i += 1 |
|
1283 | 1283 | res += b'(?:.*/)?' |
|
1284 | 1284 | else: |
|
1285 | 1285 | res += b'.*' |
|
1286 | 1286 | else: |
|
1287 | 1287 | res += b'[^/]*' |
|
1288 | 1288 | elif c == b'?': |
|
1289 | 1289 | res += b'.' |
|
1290 | 1290 | elif c == b'[': |
|
1291 | 1291 | j = i |
|
1292 | 1292 | if j < n and pat[j : j + 1] in b'!]': |
|
1293 | 1293 | j += 1 |
|
1294 | 1294 | while j < n and pat[j : j + 1] != b']': |
|
1295 | 1295 | j += 1 |
|
1296 | 1296 | if j >= n: |
|
1297 | 1297 | res += b'\\[' |
|
1298 | 1298 | else: |
|
1299 | 1299 | stuff = pat[i:j].replace(b'\\', b'\\\\') |
|
1300 | 1300 | i = j + 1 |
|
1301 | 1301 | if stuff[0:1] == b'!': |
|
1302 | 1302 | stuff = b'^' + stuff[1:] |
|
1303 | 1303 | elif stuff[0:1] == b'^': |
|
1304 | 1304 | stuff = b'\\' + stuff |
|
1305 | 1305 | res = b'%s[%s]' % (res, stuff) |
|
1306 | 1306 | elif c == b'{': |
|
1307 | 1307 | group += 1 |
|
1308 | 1308 | res += b'(?:' |
|
1309 | 1309 | elif c == b'}' and group: |
|
1310 | 1310 | res += b')' |
|
1311 | 1311 | group -= 1 |
|
1312 | 1312 | elif c == b',' and group: |
|
1313 | 1313 | res += b'|' |
|
1314 | 1314 | elif c == b'\\': |
|
1315 | 1315 | p = peek() |
|
1316 | 1316 | if p: |
|
1317 | 1317 | i += 1 |
|
1318 | 1318 | res += escape(p, p) |
|
1319 | 1319 | else: |
|
1320 | 1320 | res += escape(c, c) |
|
1321 | 1321 | else: |
|
1322 | 1322 | res += escape(c, c) |
|
1323 | 1323 | return res |
|
1324 | 1324 | |
|
1325 | 1325 | |
|
1326 | 1326 | def _regex(kind, pat, globsuffix): |
|
1327 | 1327 | """Convert a (normalized) pattern of any kind into a |
|
1328 | 1328 | regular expression. |
|
1329 | 1329 | globsuffix is appended to the regexp of globs.""" |
|
1330 | 1330 | if not pat and kind in (b'glob', b'relpath'): |
|
1331 | 1331 | return b'' |
|
1332 | 1332 | if kind == b're': |
|
1333 | 1333 | return pat |
|
1334 | 1334 | if kind in (b'path', b'relpath'): |
|
1335 | 1335 | if pat == b'.': |
|
1336 | 1336 | return b'' |
|
1337 | 1337 | return util.stringutil.reescape(pat) + b'(?:/|$)' |
|
1338 | 1338 | if kind == b'rootfilesin': |
|
1339 | 1339 | if pat == b'.': |
|
1340 | 1340 | escaped = b'' |
|
1341 | 1341 | else: |
|
1342 | 1342 | # Pattern is a directory name. |
|
1343 | 1343 | escaped = util.stringutil.reescape(pat) + b'/' |
|
1344 | 1344 | # Anything after the pattern must be a non-directory. |
|
1345 | 1345 | return escaped + b'[^/]+$' |
|
1346 | 1346 | if kind == b'relglob': |
|
1347 | 1347 | globre = _globre(pat) |
|
1348 | 1348 | if globre.startswith(b'[^/]*'): |
|
1349 | 1349 | # When pat has the form *XYZ (common), make the returned regex more |
|
1350 | 1350 | # legible by returning the regex for **XYZ instead of **/*XYZ. |
|
1351 | 1351 | return b'.*' + globre[len(b'[^/]*') :] + globsuffix |
|
1352 | 1352 | return b'(?:|.*/)' + globre + globsuffix |
|
1353 | 1353 | if kind == b'relre': |
|
1354 | 1354 | if pat.startswith(b'^'): |
|
1355 | 1355 | return pat |
|
1356 | 1356 | return b'.*' + pat |
|
1357 | 1357 | if kind in (b'glob', b'rootglob'): |
|
1358 | 1358 | return _globre(pat) + globsuffix |
|
1359 | 1359 | raise error.ProgrammingError(b'not a regex pattern: %s:%s' % (kind, pat)) |
|
1360 | 1360 | |
|
1361 | 1361 | |
|
1362 | 1362 | def _buildmatch(kindpats, globsuffix, root): |
|
1363 | 1363 | """Return regexp string and a matcher function for kindpats. |
|
1364 | 1364 | globsuffix is appended to the regexp of globs.""" |
|
1365 | 1365 | matchfuncs = [] |
|
1366 | 1366 | |
|
1367 | 1367 | subincludes, kindpats = _expandsubinclude(kindpats, root) |
|
1368 | 1368 | if subincludes: |
|
1369 | 1369 | submatchers = {} |
|
1370 | 1370 | |
|
1371 | 1371 | def matchsubinclude(f): |
|
1372 | 1372 | for prefix, matcherargs in subincludes: |
|
1373 | 1373 | if f.startswith(prefix): |
|
1374 | 1374 | mf = submatchers.get(prefix) |
|
1375 | 1375 | if mf is None: |
|
1376 | 1376 | mf = match(*matcherargs) |
|
1377 | 1377 | submatchers[prefix] = mf |
|
1378 | 1378 | |
|
1379 | 1379 | if mf(f[len(prefix) :]): |
|
1380 | 1380 | return True |
|
1381 | 1381 | return False |
|
1382 | 1382 | |
|
1383 | 1383 | matchfuncs.append(matchsubinclude) |
|
1384 | 1384 | |
|
1385 | 1385 | regex = b'' |
|
1386 | 1386 | if kindpats: |
|
1387 | 1387 | if all(k == b'rootfilesin' for k, p, s in kindpats): |
|
1388 | 1388 | dirs = {p for k, p, s in kindpats} |
|
1389 | 1389 | |
|
1390 | 1390 | def mf(f): |
|
1391 | 1391 | i = f.rfind(b'/') |
|
1392 | 1392 | if i >= 0: |
|
1393 | 1393 | dir = f[:i] |
|
1394 | 1394 | else: |
|
1395 | 1395 | dir = b'.' |
|
1396 | 1396 | return dir in dirs |
|
1397 | 1397 | |
|
1398 | 1398 | regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs))) |
|
1399 | 1399 | matchfuncs.append(mf) |
|
1400 | 1400 | else: |
|
1401 | 1401 | regex, mf = _buildregexmatch(kindpats, globsuffix) |
|
1402 | 1402 | matchfuncs.append(mf) |
|
1403 | 1403 | |
|
1404 | 1404 | if len(matchfuncs) == 1: |
|
1405 | 1405 | return regex, matchfuncs[0] |
|
1406 | 1406 | else: |
|
1407 | 1407 | return regex, lambda f: any(mf(f) for mf in matchfuncs) |
|
1408 | 1408 | |
|
1409 | 1409 | |
|
1410 | 1410 | MAX_RE_SIZE = 20000 |
|
1411 | 1411 | |
|
1412 | 1412 | |
|
1413 | 1413 | def _joinregexes(regexps): |
|
1414 | 1414 | """gather multiple regular expressions into a single one""" |
|
1415 | 1415 | return b'|'.join(regexps) |
|
1416 | 1416 | |
|
1417 | 1417 | |
|
1418 | 1418 | def _buildregexmatch(kindpats, globsuffix): |
|
1419 | 1419 | """Build a match function from a list of kinds and kindpats, |
|
1420 | 1420 | return regexp string and a matcher function. |
|
1421 | 1421 | |
|
1422 | 1422 | Test too large input |
|
1423 | 1423 | >>> _buildregexmatch([ |
|
1424 | 1424 | ... (b'relglob', b'?' * MAX_RE_SIZE, b'') |
|
1425 | 1425 | ... ], b'$') |
|
1426 | 1426 | Traceback (most recent call last): |
|
1427 | 1427 | ... |
|
1428 | 1428 | Abort: matcher pattern is too long (20009 bytes) |
|
1429 | 1429 | """ |
|
1430 | 1430 | try: |
|
1431 | 1431 | allgroups = [] |
|
1432 | 1432 | regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats] |
|
1433 | 1433 | fullregexp = _joinregexes(regexps) |
|
1434 | 1434 | |
|
1435 | 1435 | startidx = 0 |
|
1436 | 1436 | groupsize = 0 |
|
1437 | 1437 | for idx, r in enumerate(regexps): |
|
1438 | 1438 | piecesize = len(r) |
|
1439 | 1439 | if piecesize > MAX_RE_SIZE: |
|
1440 | 1440 | msg = _(b"matcher pattern is too long (%d bytes)") % piecesize |
|
1441 | 1441 | raise error.Abort(msg) |
|
1442 | 1442 | elif (groupsize + piecesize) > MAX_RE_SIZE: |
|
1443 | 1443 | group = regexps[startidx:idx] |
|
1444 | 1444 | allgroups.append(_joinregexes(group)) |
|
1445 | 1445 | startidx = idx |
|
1446 | 1446 | groupsize = 0 |
|
1447 | 1447 | groupsize += piecesize + 1 |
|
1448 | 1448 | |
|
1449 | 1449 | if startidx == 0: |
|
1450 | 1450 | matcher = _rematcher(fullregexp) |
|
1451 | 1451 | func = lambda s: bool(matcher(s)) |
|
1452 | 1452 | else: |
|
1453 | 1453 | group = regexps[startidx:] |
|
1454 | 1454 | allgroups.append(_joinregexes(group)) |
|
1455 | 1455 | allmatchers = [_rematcher(g) for g in allgroups] |
|
1456 | 1456 | func = lambda s: any(m(s) for m in allmatchers) |
|
1457 | 1457 | return fullregexp, func |
|
1458 | 1458 | except re.error: |
|
1459 | 1459 | for k, p, s in kindpats: |
|
1460 | 1460 | try: |
|
1461 | 1461 | _rematcher(_regex(k, p, globsuffix)) |
|
1462 | 1462 | except re.error: |
|
1463 | 1463 | if s: |
|
1464 | 1464 | raise error.Abort( |
|
1465 | 1465 | _(b"%s: invalid pattern (%s): %s") % (s, k, p) |
|
1466 | 1466 | ) |
|
1467 | 1467 | else: |
|
1468 | 1468 | raise error.Abort(_(b"invalid pattern (%s): %s") % (k, p)) |
|
1469 | 1469 | raise error.Abort(_(b"invalid pattern")) |
|
1470 | 1470 | |
|
1471 | 1471 | |
|
1472 | 1472 | def _patternrootsanddirs(kindpats): |
|
1473 | 1473 | """Returns roots and directories corresponding to each pattern. |
|
1474 | 1474 | |
|
1475 | 1475 | This calculates the roots and directories exactly matching the patterns and |
|
1476 | 1476 | returns a tuple of (roots, dirs) for each. It does not return other |
|
1477 | 1477 | directories which may also need to be considered, like the parent |
|
1478 | 1478 | directories. |
|
1479 | 1479 | """ |
|
1480 | 1480 | r = [] |
|
1481 | 1481 | d = [] |
|
1482 | 1482 | for kind, pat, source in kindpats: |
|
1483 | 1483 | if kind in (b'glob', b'rootglob'): # find the non-glob prefix |
|
1484 | 1484 | root = [] |
|
1485 | 1485 | for p in pat.split(b'/'): |
|
1486 | 1486 | if b'[' in p or b'{' in p or b'*' in p or b'?' in p: |
|
1487 | 1487 | break |
|
1488 | 1488 | root.append(p) |
|
1489 | 1489 | r.append(b'/'.join(root)) |
|
1490 | 1490 | elif kind in (b'relpath', b'path'): |
|
1491 | 1491 | if pat == b'.': |
|
1492 | 1492 | pat = b'' |
|
1493 | 1493 | r.append(pat) |
|
1494 | 1494 | elif kind in (b'rootfilesin',): |
|
1495 | 1495 | if pat == b'.': |
|
1496 | 1496 | pat = b'' |
|
1497 | 1497 | d.append(pat) |
|
1498 | 1498 | else: # relglob, re, relre |
|
1499 | 1499 | r.append(b'') |
|
1500 | 1500 | return r, d |
|
1501 | 1501 | |
|
1502 | 1502 | |
|
1503 | 1503 | def _roots(kindpats): |
|
1504 | 1504 | '''Returns root directories to match recursively from the given patterns.''' |
|
1505 | 1505 | roots, dirs = _patternrootsanddirs(kindpats) |
|
1506 | 1506 | return roots |
|
1507 | 1507 | |
|
1508 | 1508 | |
|
1509 | 1509 | def _rootsdirsandparents(kindpats): |
|
1510 | 1510 | """Returns roots and exact directories from patterns. |
|
1511 | 1511 | |
|
1512 | 1512 | `roots` are directories to match recursively, `dirs` should |
|
1513 | 1513 | be matched non-recursively, and `parents` are the implicitly required |
|
1514 | 1514 | directories to walk to items in either roots or dirs. |
|
1515 | 1515 | |
|
1516 | 1516 | Returns a tuple of (roots, dirs, parents). |
|
1517 | 1517 | |
|
1518 | 1518 | >>> r = _rootsdirsandparents( |
|
1519 | 1519 | ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''), |
|
1520 | 1520 | ... (b'glob', b'g*', b'')]) |
|
1521 | 1521 | >>> print(r[0:2], sorted(r[2])) # the set has an unstable output |
|
1522 | 1522 | (['g/h', 'g/h', ''], []) ['', 'g'] |
|
1523 | 1523 | >>> r = _rootsdirsandparents( |
|
1524 | 1524 | ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')]) |
|
1525 | 1525 | >>> print(r[0:2], sorted(r[2])) # the set has an unstable output |
|
1526 | 1526 | ([], ['g/h', '']) ['', 'g'] |
|
1527 | 1527 | >>> r = _rootsdirsandparents( |
|
1528 | 1528 | ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''), |
|
1529 | 1529 | ... (b'path', b'', b'')]) |
|
1530 | 1530 | >>> print(r[0:2], sorted(r[2])) # the set has an unstable output |
|
1531 | 1531 | (['r', 'p/p', ''], []) ['', 'p'] |
|
1532 | 1532 | >>> r = _rootsdirsandparents( |
|
1533 | 1533 | ... [(b'relglob', b'rg*', b''), (b're', b're/', b''), |
|
1534 | 1534 | ... (b'relre', b'rr', b'')]) |
|
1535 | 1535 | >>> print(r[0:2], sorted(r[2])) # the set has an unstable output |
|
1536 | 1536 | (['', '', ''], []) [''] |
|
1537 | 1537 | """ |
|
1538 | 1538 | r, d = _patternrootsanddirs(kindpats) |
|
1539 | 1539 | |
|
1540 | 1540 | p = set() |
|
1541 | 1541 | # Add the parents as non-recursive/exact directories, since they must be |
|
1542 | 1542 | # scanned to get to either the roots or the other exact directories. |
|
1543 | 1543 | p.update(pathutil.dirs(d)) |
|
1544 | 1544 | p.update(pathutil.dirs(r)) |
|
1545 | 1545 | |
|
1546 | 1546 | # FIXME: all uses of this function convert these to sets, do so before |
|
1547 | 1547 | # returning. |
|
1548 | 1548 | # FIXME: all uses of this function do not need anything in 'roots' and |
|
1549 | 1549 | # 'dirs' to also be in 'parents', consider removing them before returning. |
|
1550 | 1550 | return r, d, p |
|
1551 | 1551 | |
|
1552 | 1552 | |
|
1553 | 1553 | def _explicitfiles(kindpats): |
|
1554 | 1554 | """Returns the potential explicit filenames from the patterns. |
|
1555 | 1555 | |
|
1556 | 1556 | >>> _explicitfiles([(b'path', b'foo/bar', b'')]) |
|
1557 | 1557 | ['foo/bar'] |
|
1558 | 1558 | >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')]) |
|
1559 | 1559 | [] |
|
1560 | 1560 | """ |
|
1561 | 1561 | # Keep only the pattern kinds where one can specify filenames (vs only |
|
1562 | 1562 | # directory names). |
|
1563 | 1563 | filable = [kp for kp in kindpats if kp[0] not in (b'rootfilesin',)] |
|
1564 | 1564 | return _roots(filable) |
|
1565 | 1565 | |
|
1566 | 1566 | |
|
1567 | 1567 | def _prefix(kindpats): |
|
1568 | 1568 | '''Whether all the patterns match a prefix (i.e. recursively)''' |
|
1569 | 1569 | for kind, pat, source in kindpats: |
|
1570 | 1570 | if kind not in (b'path', b'relpath'): |
|
1571 | 1571 | return False |
|
1572 | 1572 | return True |
|
1573 | 1573 | |
|
1574 | 1574 | |
|
1575 | 1575 | _commentre = None |
|
1576 | 1576 | |
|
1577 | 1577 | |
|
1578 | 1578 | def readpatternfile(filepath, warn, sourceinfo=False): |
|
1579 | 1579 | """parse a pattern file, returning a list of |
|
1580 | 1580 | patterns. These patterns should be given to compile() |
|
1581 | 1581 | to be validated and converted into a match function. |
|
1582 | 1582 | |
|
1583 | 1583 | trailing white space is dropped. |
|
1584 | 1584 | the escape character is backslash. |
|
1585 | 1585 | comments start with #. |
|
1586 | 1586 | empty lines are skipped. |
|
1587 | 1587 | |
|
1588 | 1588 | lines can be of the following formats: |
|
1589 | 1589 | |
|
1590 | 1590 | syntax: regexp # defaults following lines to non-rooted regexps |
|
1591 | 1591 | syntax: glob # defaults following lines to non-rooted globs |
|
1592 | 1592 | re:pattern # non-rooted regular expression |
|
1593 | 1593 | glob:pattern # non-rooted glob |
|
1594 | 1594 | rootglob:pat # rooted glob (same root as ^ in regexps) |
|
1595 | 1595 | pattern # pattern of the current default type |
|
1596 | 1596 | |
|
1597 | 1597 | if sourceinfo is set, returns a list of tuples: |
|
1598 | 1598 | (pattern, lineno, originalline). |
|
1599 | 1599 | This is useful to debug ignore patterns. |
|
1600 | 1600 | """ |
|
1601 | 1601 | |
|
1602 | 1602 | syntaxes = { |
|
1603 | 1603 | b're': b'relre:', |
|
1604 | 1604 | b'regexp': b'relre:', |
|
1605 | 1605 | b'glob': b'relglob:', |
|
1606 | 1606 | b'rootglob': b'rootglob:', |
|
1607 | 1607 | b'include': b'include', |
|
1608 | 1608 | b'subinclude': b'subinclude', |
|
1609 | 1609 | } |
|
1610 | 1610 | syntax = b'relre:' |
|
1611 | 1611 | patterns = [] |
|
1612 | 1612 | |
|
1613 | 1613 | fp = open(filepath, b'rb') |
|
1614 |
for lineno, line in enumerate( |
|
|
1614 | for lineno, line in enumerate(fp, start=1): | |
|
1615 | 1615 | if b"#" in line: |
|
1616 | 1616 | global _commentre |
|
1617 | 1617 | if not _commentre: |
|
1618 | 1618 | _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*') |
|
1619 | 1619 | # remove comments prefixed by an even number of escapes |
|
1620 | 1620 | m = _commentre.search(line) |
|
1621 | 1621 | if m: |
|
1622 | 1622 | line = line[: m.end(1)] |
|
1623 | 1623 | # fixup properly escaped comments that survived the above |
|
1624 | 1624 | line = line.replace(b"\\#", b"#") |
|
1625 | 1625 | line = line.rstrip() |
|
1626 | 1626 | if not line: |
|
1627 | 1627 | continue |
|
1628 | 1628 | |
|
1629 | 1629 | if line.startswith(b'syntax:'): |
|
1630 | 1630 | s = line[7:].strip() |
|
1631 | 1631 | try: |
|
1632 | 1632 | syntax = syntaxes[s] |
|
1633 | 1633 | except KeyError: |
|
1634 | 1634 | if warn: |
|
1635 | 1635 | warn( |
|
1636 | 1636 | _(b"%s: ignoring invalid syntax '%s'\n") % (filepath, s) |
|
1637 | 1637 | ) |
|
1638 | 1638 | continue |
|
1639 | 1639 | |
|
1640 | 1640 | linesyntax = syntax |
|
1641 | 1641 | for s, rels in syntaxes.items(): |
|
1642 | 1642 | if line.startswith(rels): |
|
1643 | 1643 | linesyntax = rels |
|
1644 | 1644 | line = line[len(rels) :] |
|
1645 | 1645 | break |
|
1646 | 1646 | elif line.startswith(s + b':'): |
|
1647 | 1647 | linesyntax = rels |
|
1648 | 1648 | line = line[len(s) + 1 :] |
|
1649 | 1649 | break |
|
1650 | 1650 | if sourceinfo: |
|
1651 | 1651 | patterns.append((linesyntax + line, lineno, line)) |
|
1652 | 1652 | else: |
|
1653 | 1653 | patterns.append(linesyntax + line) |
|
1654 | 1654 | fp.close() |
|
1655 | 1655 | return patterns |
@@ -1,3261 +1,3261 b'' | |||
|
1 | 1 | # patch.py - patch file parsing routines |
|
2 | 2 | # |
|
3 | 3 | # Copyright 2006 Brendan Cully <brendan@kublai.com> |
|
4 | 4 | # Copyright 2007 Chris Mason <chris.mason@oracle.com> |
|
5 | 5 | # |
|
6 | 6 | # This software may be used and distributed according to the terms of the |
|
7 | 7 | # GNU General Public License version 2 or any later version. |
|
8 | 8 | |
|
9 | 9 | |
|
10 | 10 | import collections |
|
11 | 11 | import contextlib |
|
12 | 12 | import copy |
|
13 | 13 | import errno |
|
14 | 14 | import os |
|
15 | 15 | import re |
|
16 | 16 | import shutil |
|
17 | 17 | import zlib |
|
18 | 18 | |
|
19 | 19 | from .i18n import _ |
|
20 | 20 | from .node import ( |
|
21 | 21 | hex, |
|
22 | 22 | sha1nodeconstants, |
|
23 | 23 | short, |
|
24 | 24 | ) |
|
25 | 25 | from .pycompat import open |
|
26 | 26 | from . import ( |
|
27 | 27 | copies, |
|
28 | 28 | diffhelper, |
|
29 | 29 | diffutil, |
|
30 | 30 | encoding, |
|
31 | 31 | error, |
|
32 | 32 | mail, |
|
33 | 33 | mdiff, |
|
34 | 34 | pathutil, |
|
35 | 35 | pycompat, |
|
36 | 36 | scmutil, |
|
37 | 37 | similar, |
|
38 | 38 | util, |
|
39 | 39 | vfs as vfsmod, |
|
40 | 40 | ) |
|
41 | 41 | from .utils import ( |
|
42 | 42 | dateutil, |
|
43 | 43 | hashutil, |
|
44 | 44 | procutil, |
|
45 | 45 | stringutil, |
|
46 | 46 | ) |
|
47 | 47 | |
|
48 | 48 | stringio = util.stringio |
|
49 | 49 | |
|
50 | 50 | gitre = re.compile(br'diff --git a/(.*) b/(.*)') |
|
51 | 51 | tabsplitter = re.compile(br'(\t+|[^\t]+)') |
|
52 | 52 | wordsplitter = re.compile( |
|
53 | 53 | br'(\t+| +|[a-zA-Z0-9_\x80-\xff]+|[^ \ta-zA-Z0-9_\x80-\xff])' |
|
54 | 54 | ) |
|
55 | 55 | |
|
56 | 56 | PatchError = error.PatchError |
|
57 | 57 | PatchParseError = error.PatchParseError |
|
58 | 58 | PatchApplicationError = error.PatchApplicationError |
|
59 | 59 | |
|
60 | 60 | # public functions |
|
61 | 61 | |
|
62 | 62 | |
|
63 | 63 | def split(stream): |
|
64 | 64 | '''return an iterator of individual patches from a stream''' |
|
65 | 65 | |
|
66 | 66 | def isheader(line, inheader): |
|
67 | 67 | if inheader and line.startswith((b' ', b'\t')): |
|
68 | 68 | # continuation |
|
69 | 69 | return True |
|
70 | 70 | if line.startswith((b' ', b'-', b'+')): |
|
71 | 71 | # diff line - don't check for header pattern in there |
|
72 | 72 | return False |
|
73 | 73 | l = line.split(b': ', 1) |
|
74 | 74 | return len(l) == 2 and b' ' not in l[0] |
|
75 | 75 | |
|
76 | 76 | def chunk(lines): |
|
77 | 77 | return stringio(b''.join(lines)) |
|
78 | 78 | |
|
79 | 79 | def hgsplit(stream, cur): |
|
80 | 80 | inheader = True |
|
81 | 81 | |
|
82 | 82 | for line in stream: |
|
83 | 83 | if not line.strip(): |
|
84 | 84 | inheader = False |
|
85 | 85 | if not inheader and line.startswith(b'# HG changeset patch'): |
|
86 | 86 | yield chunk(cur) |
|
87 | 87 | cur = [] |
|
88 | 88 | inheader = True |
|
89 | 89 | |
|
90 | 90 | cur.append(line) |
|
91 | 91 | |
|
92 | 92 | if cur: |
|
93 | 93 | yield chunk(cur) |
|
94 | 94 | |
|
95 | 95 | def mboxsplit(stream, cur): |
|
96 | 96 | for line in stream: |
|
97 | 97 | if line.startswith(b'From '): |
|
98 | 98 | for c in split(chunk(cur[1:])): |
|
99 | 99 | yield c |
|
100 | 100 | cur = [] |
|
101 | 101 | |
|
102 | 102 | cur.append(line) |
|
103 | 103 | |
|
104 | 104 | if cur: |
|
105 | 105 | for c in split(chunk(cur[1:])): |
|
106 | 106 | yield c |
|
107 | 107 | |
|
108 | 108 | def mimesplit(stream, cur): |
|
109 | 109 | def msgfp(m): |
|
110 | 110 | fp = stringio() |
|
111 | 111 | # pytype: disable=wrong-arg-types |
|
112 | 112 | g = mail.Generator(fp, mangle_from_=False) |
|
113 | 113 | # pytype: enable=wrong-arg-types |
|
114 | 114 | g.flatten(m) |
|
115 | 115 | fp.seek(0) |
|
116 | 116 | return fp |
|
117 | 117 | |
|
118 | 118 | for line in stream: |
|
119 | 119 | cur.append(line) |
|
120 | 120 | c = chunk(cur) |
|
121 | 121 | |
|
122 | 122 | m = mail.parse(c) |
|
123 | 123 | if not m.is_multipart(): |
|
124 | 124 | yield msgfp(m) |
|
125 | 125 | else: |
|
126 | 126 | ok_types = (b'text/plain', b'text/x-diff', b'text/x-patch') |
|
127 | 127 | for part in m.walk(): |
|
128 | 128 | ct = part.get_content_type() |
|
129 | 129 | if ct not in ok_types: |
|
130 | 130 | continue |
|
131 | 131 | yield msgfp(part) |
|
132 | 132 | |
|
133 | 133 | def headersplit(stream, cur): |
|
134 | 134 | inheader = False |
|
135 | 135 | |
|
136 | 136 | for line in stream: |
|
137 | 137 | if not inheader and isheader(line, inheader): |
|
138 | 138 | yield chunk(cur) |
|
139 | 139 | cur = [] |
|
140 | 140 | inheader = True |
|
141 | 141 | if inheader and not isheader(line, inheader): |
|
142 | 142 | inheader = False |
|
143 | 143 | |
|
144 | 144 | cur.append(line) |
|
145 | 145 | |
|
146 | 146 | if cur: |
|
147 | 147 | yield chunk(cur) |
|
148 | 148 | |
|
149 | 149 | def remainder(cur): |
|
150 | 150 | yield chunk(cur) |
|
151 | 151 | |
|
152 | 152 | class fiter(object): |
|
153 | 153 | def __init__(self, fp): |
|
154 | 154 | self.fp = fp |
|
155 | 155 | |
|
156 | 156 | def __iter__(self): |
|
157 | 157 | return self |
|
158 | 158 | |
|
159 | 159 | def next(self): |
|
160 | 160 | l = self.fp.readline() |
|
161 | 161 | if not l: |
|
162 | 162 | raise StopIteration |
|
163 | 163 | return l |
|
164 | 164 | |
|
165 | 165 | __next__ = next |
|
166 | 166 | |
|
167 | 167 | inheader = False |
|
168 | 168 | cur = [] |
|
169 | 169 | |
|
170 | 170 | mimeheaders = [b'content-type'] |
|
171 | 171 | |
|
172 | 172 | if not util.safehasattr(stream, b'next'): |
|
173 | 173 | # http responses, for example, have readline but not next |
|
174 | 174 | stream = fiter(stream) |
|
175 | 175 | |
|
176 | 176 | for line in stream: |
|
177 | 177 | cur.append(line) |
|
178 | 178 | if line.startswith(b'# HG changeset patch'): |
|
179 | 179 | return hgsplit(stream, cur) |
|
180 | 180 | elif line.startswith(b'From '): |
|
181 | 181 | return mboxsplit(stream, cur) |
|
182 | 182 | elif isheader(line, inheader): |
|
183 | 183 | inheader = True |
|
184 | 184 | if line.split(b':', 1)[0].lower() in mimeheaders: |
|
185 | 185 | # let email parser handle this |
|
186 | 186 | return mimesplit(stream, cur) |
|
187 | 187 | elif line.startswith(b'--- ') and inheader: |
|
188 | 188 | # No evil headers seen by diff start, split by hand |
|
189 | 189 | return headersplit(stream, cur) |
|
190 | 190 | # Not enough info, keep reading |
|
191 | 191 | |
|
192 | 192 | # if we are here, we have a very plain patch |
|
193 | 193 | return remainder(cur) |
|
194 | 194 | |
|
195 | 195 | |
|
196 | 196 | ## Some facility for extensible patch parsing: |
|
197 | 197 | # list of pairs ("header to match", "data key") |
|
198 | 198 | patchheadermap = [ |
|
199 | 199 | (b'Date', b'date'), |
|
200 | 200 | (b'Branch', b'branch'), |
|
201 | 201 | (b'Node ID', b'nodeid'), |
|
202 | 202 | ] |
|
203 | 203 | |
|
204 | 204 | |
|
205 | 205 | @contextlib.contextmanager |
|
206 | 206 | def extract(ui, fileobj): |
|
207 | 207 | """extract patch from data read from fileobj. |
|
208 | 208 | |
|
209 | 209 | patch can be a normal patch or contained in an email message. |
|
210 | 210 | |
|
211 | 211 | return a dictionary. Standard keys are: |
|
212 | 212 | - filename, |
|
213 | 213 | - message, |
|
214 | 214 | - user, |
|
215 | 215 | - date, |
|
216 | 216 | - branch, |
|
217 | 217 | - node, |
|
218 | 218 | - p1, |
|
219 | 219 | - p2. |
|
220 | 220 | Any item can be missing from the dictionary. If filename is missing, |
|
221 | 221 | fileobj did not contain a patch. Caller must unlink filename when done.""" |
|
222 | 222 | |
|
223 | 223 | fd, tmpname = pycompat.mkstemp(prefix=b'hg-patch-') |
|
224 | 224 | tmpfp = os.fdopen(fd, 'wb') |
|
225 | 225 | try: |
|
226 | 226 | yield _extract(ui, fileobj, tmpname, tmpfp) |
|
227 | 227 | finally: |
|
228 | 228 | tmpfp.close() |
|
229 | 229 | os.unlink(tmpname) |
|
230 | 230 | |
|
231 | 231 | |
|
232 | 232 | def _extract(ui, fileobj, tmpname, tmpfp): |
|
233 | 233 | |
|
234 | 234 | # attempt to detect the start of a patch |
|
235 | 235 | # (this heuristic is borrowed from quilt) |
|
236 | 236 | diffre = re.compile( |
|
237 | 237 | br'^(?:Index:[ \t]|diff[ \t]-|RCS file: |' |
|
238 | 238 | br'retrieving revision [0-9]+(\.[0-9]+)*$|' |
|
239 | 239 | br'---[ \t].*?^\+\+\+[ \t]|' |
|
240 | 240 | br'\*\*\*[ \t].*?^---[ \t])', |
|
241 | 241 | re.MULTILINE | re.DOTALL, |
|
242 | 242 | ) |
|
243 | 243 | |
|
244 | 244 | data = {} |
|
245 | 245 | |
|
246 | 246 | msg = mail.parse(fileobj) |
|
247 | 247 | |
|
248 | 248 | subject = msg['Subject'] and mail.headdecode(msg['Subject']) |
|
249 | 249 | data[b'user'] = msg['From'] and mail.headdecode(msg['From']) |
|
250 | 250 | if not subject and not data[b'user']: |
|
251 | 251 | # Not an email, restore parsed headers if any |
|
252 | 252 | subject = ( |
|
253 | 253 | b'\n'.join( |
|
254 | 254 | b': '.join(map(encoding.strtolocal, h)) for h in msg.items() |
|
255 | 255 | ) |
|
256 | 256 | + b'\n' |
|
257 | 257 | ) |
|
258 | 258 | |
|
259 | 259 | # should try to parse msg['Date'] |
|
260 | 260 | parents = [] |
|
261 | 261 | |
|
262 | 262 | nodeid = msg['X-Mercurial-Node'] |
|
263 | 263 | if nodeid: |
|
264 | 264 | data[b'nodeid'] = nodeid = mail.headdecode(nodeid) |
|
265 | 265 | ui.debug(b'Node ID: %s\n' % nodeid) |
|
266 | 266 | |
|
267 | 267 | if subject: |
|
268 | 268 | if subject.startswith(b'[PATCH'): |
|
269 | 269 | pend = subject.find(b']') |
|
270 | 270 | if pend >= 0: |
|
271 | 271 | subject = subject[pend + 1 :].lstrip() |
|
272 | 272 | subject = re.sub(br'\n[ \t]+', b' ', subject) |
|
273 | 273 | ui.debug(b'Subject: %s\n' % subject) |
|
274 | 274 | if data[b'user']: |
|
275 | 275 | ui.debug(b'From: %s\n' % data[b'user']) |
|
276 | 276 | diffs_seen = 0 |
|
277 | 277 | ok_types = (b'text/plain', b'text/x-diff', b'text/x-patch') |
|
278 | 278 | message = b'' |
|
279 | 279 | for part in msg.walk(): |
|
280 | 280 | content_type = pycompat.bytestr(part.get_content_type()) |
|
281 | 281 | ui.debug(b'Content-Type: %s\n' % content_type) |
|
282 | 282 | if content_type not in ok_types: |
|
283 | 283 | continue |
|
284 | 284 | payload = part.get_payload(decode=True) |
|
285 | 285 | m = diffre.search(payload) |
|
286 | 286 | if m: |
|
287 | 287 | hgpatch = False |
|
288 | 288 | hgpatchheader = False |
|
289 | 289 | ignoretext = False |
|
290 | 290 | |
|
291 | 291 | ui.debug(b'found patch at byte %d\n' % m.start(0)) |
|
292 | 292 | diffs_seen += 1 |
|
293 | 293 | cfp = stringio() |
|
294 | 294 | for line in payload[: m.start(0)].splitlines(): |
|
295 | 295 | if line.startswith(b'# HG changeset patch') and not hgpatch: |
|
296 | 296 | ui.debug(b'patch generated by hg export\n') |
|
297 | 297 | hgpatch = True |
|
298 | 298 | hgpatchheader = True |
|
299 | 299 | # drop earlier commit message content |
|
300 | 300 | cfp.seek(0) |
|
301 | 301 | cfp.truncate() |
|
302 | 302 | subject = None |
|
303 | 303 | elif hgpatchheader: |
|
304 | 304 | if line.startswith(b'# User '): |
|
305 | 305 | data[b'user'] = line[7:] |
|
306 | 306 | ui.debug(b'From: %s\n' % data[b'user']) |
|
307 | 307 | elif line.startswith(b"# Parent "): |
|
308 | 308 | parents.append(line[9:].lstrip()) |
|
309 | 309 | elif line.startswith(b"# "): |
|
310 | 310 | for header, key in patchheadermap: |
|
311 | 311 | prefix = b'# %s ' % header |
|
312 | 312 | if line.startswith(prefix): |
|
313 | 313 | data[key] = line[len(prefix) :] |
|
314 | 314 | ui.debug(b'%s: %s\n' % (header, data[key])) |
|
315 | 315 | else: |
|
316 | 316 | hgpatchheader = False |
|
317 | 317 | elif line == b'---': |
|
318 | 318 | ignoretext = True |
|
319 | 319 | if not hgpatchheader and not ignoretext: |
|
320 | 320 | cfp.write(line) |
|
321 | 321 | cfp.write(b'\n') |
|
322 | 322 | message = cfp.getvalue() |
|
323 | 323 | if tmpfp: |
|
324 | 324 | tmpfp.write(payload) |
|
325 | 325 | if not payload.endswith(b'\n'): |
|
326 | 326 | tmpfp.write(b'\n') |
|
327 | 327 | elif not diffs_seen and message and content_type == b'text/plain': |
|
328 | 328 | message += b'\n' + payload |
|
329 | 329 | |
|
330 | 330 | if subject and not message.startswith(subject): |
|
331 | 331 | message = b'%s\n%s' % (subject, message) |
|
332 | 332 | data[b'message'] = message |
|
333 | 333 | tmpfp.close() |
|
334 | 334 | if parents: |
|
335 | 335 | data[b'p1'] = parents.pop(0) |
|
336 | 336 | if parents: |
|
337 | 337 | data[b'p2'] = parents.pop(0) |
|
338 | 338 | |
|
339 | 339 | if diffs_seen: |
|
340 | 340 | data[b'filename'] = tmpname |
|
341 | 341 | |
|
342 | 342 | return data |
|
343 | 343 | |
|
344 | 344 | |
|
345 | 345 | class patchmeta(object): |
|
346 | 346 | """Patched file metadata |
|
347 | 347 | |
|
348 | 348 | 'op' is the performed operation within ADD, DELETE, RENAME, MODIFY |
|
349 | 349 | or COPY. 'path' is patched file path. 'oldpath' is set to the |
|
350 | 350 | origin file when 'op' is either COPY or RENAME, None otherwise. If |
|
351 | 351 | file mode is changed, 'mode' is a tuple (islink, isexec) where |
|
352 | 352 | 'islink' is True if the file is a symlink and 'isexec' is True if |
|
353 | 353 | the file is executable. Otherwise, 'mode' is None. |
|
354 | 354 | """ |
|
355 | 355 | |
|
356 | 356 | def __init__(self, path): |
|
357 | 357 | self.path = path |
|
358 | 358 | self.oldpath = None |
|
359 | 359 | self.mode = None |
|
360 | 360 | self.op = b'MODIFY' |
|
361 | 361 | self.binary = False |
|
362 | 362 | |
|
363 | 363 | def setmode(self, mode): |
|
364 | 364 | islink = mode & 0o20000 |
|
365 | 365 | isexec = mode & 0o100 |
|
366 | 366 | self.mode = (islink, isexec) |
|
367 | 367 | |
|
368 | 368 | def copy(self): |
|
369 | 369 | other = patchmeta(self.path) |
|
370 | 370 | other.oldpath = self.oldpath |
|
371 | 371 | other.mode = self.mode |
|
372 | 372 | other.op = self.op |
|
373 | 373 | other.binary = self.binary |
|
374 | 374 | return other |
|
375 | 375 | |
|
376 | 376 | def _ispatchinga(self, afile): |
|
377 | 377 | if afile == b'/dev/null': |
|
378 | 378 | return self.op == b'ADD' |
|
379 | 379 | return afile == b'a/' + (self.oldpath or self.path) |
|
380 | 380 | |
|
381 | 381 | def _ispatchingb(self, bfile): |
|
382 | 382 | if bfile == b'/dev/null': |
|
383 | 383 | return self.op == b'DELETE' |
|
384 | 384 | return bfile == b'b/' + self.path |
|
385 | 385 | |
|
386 | 386 | def ispatching(self, afile, bfile): |
|
387 | 387 | return self._ispatchinga(afile) and self._ispatchingb(bfile) |
|
388 | 388 | |
|
389 | 389 | def __repr__(self): |
|
390 | 390 | return "<patchmeta %s %r>" % (self.op, self.path) |
|
391 | 391 | |
|
392 | 392 | |
|
393 | 393 | def readgitpatch(lr): |
|
394 | 394 | """extract git-style metadata about patches from <patchname>""" |
|
395 | 395 | |
|
396 | 396 | # Filter patch for git information |
|
397 | 397 | gp = None |
|
398 | 398 | gitpatches = [] |
|
399 | 399 | for line in lr: |
|
400 | 400 | line = line.rstrip(b'\r\n') |
|
401 | 401 | if line.startswith(b'diff --git a/'): |
|
402 | 402 | m = gitre.match(line) |
|
403 | 403 | if m: |
|
404 | 404 | if gp: |
|
405 | 405 | gitpatches.append(gp) |
|
406 | 406 | dst = m.group(2) |
|
407 | 407 | gp = patchmeta(dst) |
|
408 | 408 | elif gp: |
|
409 | 409 | if line.startswith(b'--- '): |
|
410 | 410 | gitpatches.append(gp) |
|
411 | 411 | gp = None |
|
412 | 412 | continue |
|
413 | 413 | if line.startswith(b'rename from '): |
|
414 | 414 | gp.op = b'RENAME' |
|
415 | 415 | gp.oldpath = line[12:] |
|
416 | 416 | elif line.startswith(b'rename to '): |
|
417 | 417 | gp.path = line[10:] |
|
418 | 418 | elif line.startswith(b'copy from '): |
|
419 | 419 | gp.op = b'COPY' |
|
420 | 420 | gp.oldpath = line[10:] |
|
421 | 421 | elif line.startswith(b'copy to '): |
|
422 | 422 | gp.path = line[8:] |
|
423 | 423 | elif line.startswith(b'deleted file'): |
|
424 | 424 | gp.op = b'DELETE' |
|
425 | 425 | elif line.startswith(b'new file mode '): |
|
426 | 426 | gp.op = b'ADD' |
|
427 | 427 | gp.setmode(int(line[-6:], 8)) |
|
428 | 428 | elif line.startswith(b'new mode '): |
|
429 | 429 | gp.setmode(int(line[-6:], 8)) |
|
430 | 430 | elif line.startswith(b'GIT binary patch'): |
|
431 | 431 | gp.binary = True |
|
432 | 432 | if gp: |
|
433 | 433 | gitpatches.append(gp) |
|
434 | 434 | |
|
435 | 435 | return gitpatches |
|
436 | 436 | |
|
437 | 437 | |
|
438 | 438 | class linereader(object): |
|
439 | 439 | # simple class to allow pushing lines back into the input stream |
|
440 | 440 | def __init__(self, fp): |
|
441 | 441 | self.fp = fp |
|
442 | 442 | self.buf = [] |
|
443 | 443 | |
|
444 | 444 | def push(self, line): |
|
445 | 445 | if line is not None: |
|
446 | 446 | self.buf.append(line) |
|
447 | 447 | |
|
448 | 448 | def readline(self): |
|
449 | 449 | if self.buf: |
|
450 | 450 | l = self.buf[0] |
|
451 | 451 | del self.buf[0] |
|
452 | 452 | return l |
|
453 | 453 | return self.fp.readline() |
|
454 | 454 | |
|
455 | 455 | def __iter__(self): |
|
456 | 456 | return iter(self.readline, b'') |
|
457 | 457 | |
|
458 | 458 | |
|
459 | 459 | class abstractbackend(object): |
|
460 | 460 | def __init__(self, ui): |
|
461 | 461 | self.ui = ui |
|
462 | 462 | |
|
463 | 463 | def getfile(self, fname): |
|
464 | 464 | """Return target file data and flags as a (data, (islink, |
|
465 | 465 | isexec)) tuple. Data is None if file is missing/deleted. |
|
466 | 466 | """ |
|
467 | 467 | raise NotImplementedError |
|
468 | 468 | |
|
469 | 469 | def setfile(self, fname, data, mode, copysource): |
|
470 | 470 | """Write data to target file fname and set its mode. mode is a |
|
471 | 471 | (islink, isexec) tuple. If data is None, the file content should |
|
472 | 472 | be left unchanged. If the file is modified after being copied, |
|
473 | 473 | copysource is set to the original file name. |
|
474 | 474 | """ |
|
475 | 475 | raise NotImplementedError |
|
476 | 476 | |
|
477 | 477 | def unlink(self, fname): |
|
478 | 478 | """Unlink target file.""" |
|
479 | 479 | raise NotImplementedError |
|
480 | 480 | |
|
481 | 481 | def writerej(self, fname, failed, total, lines): |
|
482 | 482 | """Write rejected lines for fname. total is the number of hunks |
|
483 | 483 | which failed to apply and total the total number of hunks for this |
|
484 | 484 | files. |
|
485 | 485 | """ |
|
486 | 486 | |
|
487 | 487 | def exists(self, fname): |
|
488 | 488 | raise NotImplementedError |
|
489 | 489 | |
|
490 | 490 | def close(self): |
|
491 | 491 | raise NotImplementedError |
|
492 | 492 | |
|
493 | 493 | |
|
494 | 494 | class fsbackend(abstractbackend): |
|
495 | 495 | def __init__(self, ui, basedir): |
|
496 | 496 | super(fsbackend, self).__init__(ui) |
|
497 | 497 | self.opener = vfsmod.vfs(basedir) |
|
498 | 498 | |
|
499 | 499 | def getfile(self, fname): |
|
500 | 500 | if self.opener.islink(fname): |
|
501 | 501 | return (self.opener.readlink(fname), (True, False)) |
|
502 | 502 | |
|
503 | 503 | isexec = False |
|
504 | 504 | try: |
|
505 | 505 | isexec = self.opener.lstat(fname).st_mode & 0o100 != 0 |
|
506 | 506 | except OSError as e: |
|
507 | 507 | if e.errno != errno.ENOENT: |
|
508 | 508 | raise |
|
509 | 509 | try: |
|
510 | 510 | return (self.opener.read(fname), (False, isexec)) |
|
511 | 511 | except IOError as e: |
|
512 | 512 | if e.errno != errno.ENOENT: |
|
513 | 513 | raise |
|
514 | 514 | return None, None |
|
515 | 515 | |
|
516 | 516 | def setfile(self, fname, data, mode, copysource): |
|
517 | 517 | islink, isexec = mode |
|
518 | 518 | if data is None: |
|
519 | 519 | self.opener.setflags(fname, islink, isexec) |
|
520 | 520 | return |
|
521 | 521 | if islink: |
|
522 | 522 | self.opener.symlink(data, fname) |
|
523 | 523 | else: |
|
524 | 524 | self.opener.write(fname, data) |
|
525 | 525 | if isexec: |
|
526 | 526 | self.opener.setflags(fname, False, True) |
|
527 | 527 | |
|
528 | 528 | def unlink(self, fname): |
|
529 | 529 | rmdir = self.ui.configbool(b'experimental', b'removeemptydirs') |
|
530 | 530 | self.opener.unlinkpath(fname, ignoremissing=True, rmdir=rmdir) |
|
531 | 531 | |
|
532 | 532 | def writerej(self, fname, failed, total, lines): |
|
533 | 533 | fname = fname + b".rej" |
|
534 | 534 | self.ui.warn( |
|
535 | 535 | _(b"%d out of %d hunks FAILED -- saving rejects to file %s\n") |
|
536 | 536 | % (failed, total, fname) |
|
537 | 537 | ) |
|
538 | 538 | fp = self.opener(fname, b'w') |
|
539 | 539 | fp.writelines(lines) |
|
540 | 540 | fp.close() |
|
541 | 541 | |
|
542 | 542 | def exists(self, fname): |
|
543 | 543 | return self.opener.lexists(fname) |
|
544 | 544 | |
|
545 | 545 | |
|
546 | 546 | class workingbackend(fsbackend): |
|
547 | 547 | def __init__(self, ui, repo, similarity): |
|
548 | 548 | super(workingbackend, self).__init__(ui, repo.root) |
|
549 | 549 | self.repo = repo |
|
550 | 550 | self.similarity = similarity |
|
551 | 551 | self.removed = set() |
|
552 | 552 | self.changed = set() |
|
553 | 553 | self.copied = [] |
|
554 | 554 | |
|
555 | 555 | def _checkknown(self, fname): |
|
556 | 556 | if not self.repo.dirstate.get_entry(fname).any_tracked and self.exists( |
|
557 | 557 | fname |
|
558 | 558 | ): |
|
559 | 559 | raise PatchApplicationError( |
|
560 | 560 | _(b'cannot patch %s: file is not tracked') % fname |
|
561 | 561 | ) |
|
562 | 562 | |
|
563 | 563 | def setfile(self, fname, data, mode, copysource): |
|
564 | 564 | self._checkknown(fname) |
|
565 | 565 | super(workingbackend, self).setfile(fname, data, mode, copysource) |
|
566 | 566 | if copysource is not None: |
|
567 | 567 | self.copied.append((copysource, fname)) |
|
568 | 568 | self.changed.add(fname) |
|
569 | 569 | |
|
570 | 570 | def unlink(self, fname): |
|
571 | 571 | self._checkknown(fname) |
|
572 | 572 | super(workingbackend, self).unlink(fname) |
|
573 | 573 | self.removed.add(fname) |
|
574 | 574 | self.changed.add(fname) |
|
575 | 575 | |
|
576 | 576 | def close(self): |
|
577 | 577 | wctx = self.repo[None] |
|
578 | 578 | changed = set(self.changed) |
|
579 | 579 | for src, dst in self.copied: |
|
580 | 580 | scmutil.dirstatecopy(self.ui, self.repo, wctx, src, dst) |
|
581 | 581 | if self.removed: |
|
582 | 582 | wctx.forget(sorted(self.removed)) |
|
583 | 583 | for f in self.removed: |
|
584 | 584 | if f not in self.repo.dirstate: |
|
585 | 585 | # File was deleted and no longer belongs to the |
|
586 | 586 | # dirstate, it was probably marked added then |
|
587 | 587 | # deleted, and should not be considered by |
|
588 | 588 | # marktouched(). |
|
589 | 589 | changed.discard(f) |
|
590 | 590 | if changed: |
|
591 | 591 | scmutil.marktouched(self.repo, changed, self.similarity) |
|
592 | 592 | return sorted(self.changed) |
|
593 | 593 | |
|
594 | 594 | |
|
595 | 595 | class filestore(object): |
|
596 | 596 | def __init__(self, maxsize=None): |
|
597 | 597 | self.opener = None |
|
598 | 598 | self.files = {} |
|
599 | 599 | self.created = 0 |
|
600 | 600 | self.maxsize = maxsize |
|
601 | 601 | if self.maxsize is None: |
|
602 | 602 | self.maxsize = 4 * (2 ** 20) |
|
603 | 603 | self.size = 0 |
|
604 | 604 | self.data = {} |
|
605 | 605 | |
|
606 | 606 | def setfile(self, fname, data, mode, copied=None): |
|
607 | 607 | if self.maxsize < 0 or (len(data) + self.size) <= self.maxsize: |
|
608 | 608 | self.data[fname] = (data, mode, copied) |
|
609 | 609 | self.size += len(data) |
|
610 | 610 | else: |
|
611 | 611 | if self.opener is None: |
|
612 | 612 | root = pycompat.mkdtemp(prefix=b'hg-patch-') |
|
613 | 613 | self.opener = vfsmod.vfs(root) |
|
614 | 614 | # Avoid filename issues with these simple names |
|
615 | 615 | fn = b'%d' % self.created |
|
616 | 616 | self.opener.write(fn, data) |
|
617 | 617 | self.created += 1 |
|
618 | 618 | self.files[fname] = (fn, mode, copied) |
|
619 | 619 | |
|
620 | 620 | def getfile(self, fname): |
|
621 | 621 | if fname in self.data: |
|
622 | 622 | return self.data[fname] |
|
623 | 623 | if not self.opener or fname not in self.files: |
|
624 | 624 | return None, None, None |
|
625 | 625 | fn, mode, copied = self.files[fname] |
|
626 | 626 | return self.opener.read(fn), mode, copied |
|
627 | 627 | |
|
628 | 628 | def close(self): |
|
629 | 629 | if self.opener: |
|
630 | 630 | shutil.rmtree(self.opener.base) |
|
631 | 631 | |
|
632 | 632 | |
|
633 | 633 | class repobackend(abstractbackend): |
|
634 | 634 | def __init__(self, ui, repo, ctx, store): |
|
635 | 635 | super(repobackend, self).__init__(ui) |
|
636 | 636 | self.repo = repo |
|
637 | 637 | self.ctx = ctx |
|
638 | 638 | self.store = store |
|
639 | 639 | self.changed = set() |
|
640 | 640 | self.removed = set() |
|
641 | 641 | self.copied = {} |
|
642 | 642 | |
|
643 | 643 | def _checkknown(self, fname): |
|
644 | 644 | if fname not in self.ctx: |
|
645 | 645 | raise PatchApplicationError( |
|
646 | 646 | _(b'cannot patch %s: file is not tracked') % fname |
|
647 | 647 | ) |
|
648 | 648 | |
|
649 | 649 | def getfile(self, fname): |
|
650 | 650 | try: |
|
651 | 651 | fctx = self.ctx[fname] |
|
652 | 652 | except error.LookupError: |
|
653 | 653 | return None, None |
|
654 | 654 | flags = fctx.flags() |
|
655 | 655 | return fctx.data(), (b'l' in flags, b'x' in flags) |
|
656 | 656 | |
|
657 | 657 | def setfile(self, fname, data, mode, copysource): |
|
658 | 658 | if copysource: |
|
659 | 659 | self._checkknown(copysource) |
|
660 | 660 | if data is None: |
|
661 | 661 | data = self.ctx[fname].data() |
|
662 | 662 | self.store.setfile(fname, data, mode, copysource) |
|
663 | 663 | self.changed.add(fname) |
|
664 | 664 | if copysource: |
|
665 | 665 | self.copied[fname] = copysource |
|
666 | 666 | |
|
667 | 667 | def unlink(self, fname): |
|
668 | 668 | self._checkknown(fname) |
|
669 | 669 | self.removed.add(fname) |
|
670 | 670 | |
|
671 | 671 | def exists(self, fname): |
|
672 | 672 | return fname in self.ctx |
|
673 | 673 | |
|
674 | 674 | def close(self): |
|
675 | 675 | return self.changed | self.removed |
|
676 | 676 | |
|
677 | 677 | |
|
678 | 678 | # @@ -start,len +start,len @@ or @@ -start +start @@ if len is 1 |
|
679 | 679 | unidesc = re.compile(br'@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@') |
|
680 | 680 | contextdesc = re.compile(br'(?:---|\*\*\*) (\d+)(?:,(\d+))? (?:---|\*\*\*)') |
|
681 | 681 | eolmodes = [b'strict', b'crlf', b'lf', b'auto'] |
|
682 | 682 | |
|
683 | 683 | |
|
684 | 684 | class patchfile(object): |
|
685 | 685 | def __init__(self, ui, gp, backend, store, eolmode=b'strict'): |
|
686 | 686 | self.fname = gp.path |
|
687 | 687 | self.eolmode = eolmode |
|
688 | 688 | self.eol = None |
|
689 | 689 | self.backend = backend |
|
690 | 690 | self.ui = ui |
|
691 | 691 | self.lines = [] |
|
692 | 692 | self.exists = False |
|
693 | 693 | self.missing = True |
|
694 | 694 | self.mode = gp.mode |
|
695 | 695 | self.copysource = gp.oldpath |
|
696 | 696 | self.create = gp.op in (b'ADD', b'COPY', b'RENAME') |
|
697 | 697 | self.remove = gp.op == b'DELETE' |
|
698 | 698 | if self.copysource is None: |
|
699 | 699 | data, mode = backend.getfile(self.fname) |
|
700 | 700 | else: |
|
701 | 701 | data, mode = store.getfile(self.copysource)[:2] |
|
702 | 702 | if data is not None: |
|
703 | 703 | self.exists = self.copysource is None or backend.exists(self.fname) |
|
704 | 704 | self.missing = False |
|
705 | 705 | if data: |
|
706 | 706 | self.lines = mdiff.splitnewlines(data) |
|
707 | 707 | if self.mode is None: |
|
708 | 708 | self.mode = mode |
|
709 | 709 | if self.lines: |
|
710 | 710 | # Normalize line endings |
|
711 | 711 | if self.lines[0].endswith(b'\r\n'): |
|
712 | 712 | self.eol = b'\r\n' |
|
713 | 713 | elif self.lines[0].endswith(b'\n'): |
|
714 | 714 | self.eol = b'\n' |
|
715 | 715 | if eolmode != b'strict': |
|
716 | 716 | nlines = [] |
|
717 | 717 | for l in self.lines: |
|
718 | 718 | if l.endswith(b'\r\n'): |
|
719 | 719 | l = l[:-2] + b'\n' |
|
720 | 720 | nlines.append(l) |
|
721 | 721 | self.lines = nlines |
|
722 | 722 | else: |
|
723 | 723 | if self.create: |
|
724 | 724 | self.missing = False |
|
725 | 725 | if self.mode is None: |
|
726 | 726 | self.mode = (False, False) |
|
727 | 727 | if self.missing: |
|
728 | 728 | self.ui.warn(_(b"unable to find '%s' for patching\n") % self.fname) |
|
729 | 729 | self.ui.warn( |
|
730 | 730 | _( |
|
731 | 731 | b"(use '--prefix' to apply patch relative to the " |
|
732 | 732 | b"current directory)\n" |
|
733 | 733 | ) |
|
734 | 734 | ) |
|
735 | 735 | |
|
736 | 736 | self.hash = {} |
|
737 | 737 | self.dirty = 0 |
|
738 | 738 | self.offset = 0 |
|
739 | 739 | self.skew = 0 |
|
740 | 740 | self.rej = [] |
|
741 | 741 | self.fileprinted = False |
|
742 | 742 | self.printfile(False) |
|
743 | 743 | self.hunks = 0 |
|
744 | 744 | |
|
745 | 745 | def writelines(self, fname, lines, mode): |
|
746 | 746 | if self.eolmode == b'auto': |
|
747 | 747 | eol = self.eol |
|
748 | 748 | elif self.eolmode == b'crlf': |
|
749 | 749 | eol = b'\r\n' |
|
750 | 750 | else: |
|
751 | 751 | eol = b'\n' |
|
752 | 752 | |
|
753 | 753 | if self.eolmode != b'strict' and eol and eol != b'\n': |
|
754 | 754 | rawlines = [] |
|
755 | 755 | for l in lines: |
|
756 | 756 | if l and l.endswith(b'\n'): |
|
757 | 757 | l = l[:-1] + eol |
|
758 | 758 | rawlines.append(l) |
|
759 | 759 | lines = rawlines |
|
760 | 760 | |
|
761 | 761 | self.backend.setfile(fname, b''.join(lines), mode, self.copysource) |
|
762 | 762 | |
|
763 | 763 | def printfile(self, warn): |
|
764 | 764 | if self.fileprinted: |
|
765 | 765 | return |
|
766 | 766 | if warn or self.ui.verbose: |
|
767 | 767 | self.fileprinted = True |
|
768 | 768 | s = _(b"patching file %s\n") % self.fname |
|
769 | 769 | if warn: |
|
770 | 770 | self.ui.warn(s) |
|
771 | 771 | else: |
|
772 | 772 | self.ui.note(s) |
|
773 | 773 | |
|
774 | 774 | def findlines(self, l, linenum): |
|
775 | 775 | # looks through the hash and finds candidate lines. The |
|
776 | 776 | # result is a list of line numbers sorted based on distance |
|
777 | 777 | # from linenum |
|
778 | 778 | |
|
779 | 779 | cand = self.hash.get(l, []) |
|
780 | 780 | if len(cand) > 1: |
|
781 | 781 | # resort our list of potentials forward then back. |
|
782 | 782 | cand.sort(key=lambda x: abs(x - linenum)) |
|
783 | 783 | return cand |
|
784 | 784 | |
|
785 | 785 | def write_rej(self): |
|
786 | 786 | # our rejects are a little different from patch(1). This always |
|
787 | 787 | # creates rejects in the same form as the original patch. A file |
|
788 | 788 | # header is inserted so that you can run the reject through patch again |
|
789 | 789 | # without having to type the filename. |
|
790 | 790 | if not self.rej: |
|
791 | 791 | return |
|
792 | 792 | base = os.path.basename(self.fname) |
|
793 | 793 | lines = [b"--- %s\n+++ %s\n" % (base, base)] |
|
794 | 794 | for x in self.rej: |
|
795 | 795 | for l in x.hunk: |
|
796 | 796 | lines.append(l) |
|
797 | 797 | if l[-1:] != b'\n': |
|
798 | 798 | lines.append(b'\n' + diffhelper.MISSING_NEWLINE_MARKER) |
|
799 | 799 | self.backend.writerej(self.fname, len(self.rej), self.hunks, lines) |
|
800 | 800 | |
|
801 | 801 | def apply(self, h): |
|
802 | 802 | if not h.complete(): |
|
803 | 803 | raise PatchParseError( |
|
804 | 804 | _(b"bad hunk #%d %s (%d %d %d %d)") |
|
805 | 805 | % (h.number, h.desc, len(h.a), h.lena, len(h.b), h.lenb) |
|
806 | 806 | ) |
|
807 | 807 | |
|
808 | 808 | self.hunks += 1 |
|
809 | 809 | |
|
810 | 810 | if self.missing: |
|
811 | 811 | self.rej.append(h) |
|
812 | 812 | return -1 |
|
813 | 813 | |
|
814 | 814 | if self.exists and self.create: |
|
815 | 815 | if self.copysource: |
|
816 | 816 | self.ui.warn( |
|
817 | 817 | _(b"cannot create %s: destination already exists\n") |
|
818 | 818 | % self.fname |
|
819 | 819 | ) |
|
820 | 820 | else: |
|
821 | 821 | self.ui.warn(_(b"file %s already exists\n") % self.fname) |
|
822 | 822 | self.rej.append(h) |
|
823 | 823 | return -1 |
|
824 | 824 | |
|
825 | 825 | if isinstance(h, binhunk): |
|
826 | 826 | if self.remove: |
|
827 | 827 | self.backend.unlink(self.fname) |
|
828 | 828 | else: |
|
829 | 829 | l = h.new(self.lines) |
|
830 | 830 | self.lines[:] = l |
|
831 | 831 | self.offset += len(l) |
|
832 | 832 | self.dirty = True |
|
833 | 833 | return 0 |
|
834 | 834 | |
|
835 | 835 | horig = h |
|
836 | 836 | if ( |
|
837 | 837 | self.eolmode in (b'crlf', b'lf') |
|
838 | 838 | or self.eolmode == b'auto' |
|
839 | 839 | and self.eol |
|
840 | 840 | ): |
|
841 | 841 | # If new eols are going to be normalized, then normalize |
|
842 | 842 | # hunk data before patching. Otherwise, preserve input |
|
843 | 843 | # line-endings. |
|
844 | 844 | h = h.getnormalized() |
|
845 | 845 | |
|
846 | 846 | # fast case first, no offsets, no fuzz |
|
847 | 847 | old, oldstart, new, newstart = h.fuzzit(0, False) |
|
848 | 848 | oldstart += self.offset |
|
849 | 849 | orig_start = oldstart |
|
850 | 850 | # if there's skew we want to emit the "(offset %d lines)" even |
|
851 | 851 | # when the hunk cleanly applies at start + skew, so skip the |
|
852 | 852 | # fast case code |
|
853 | 853 | if self.skew == 0 and diffhelper.testhunk(old, self.lines, oldstart): |
|
854 | 854 | if self.remove: |
|
855 | 855 | self.backend.unlink(self.fname) |
|
856 | 856 | else: |
|
857 | 857 | self.lines[oldstart : oldstart + len(old)] = new |
|
858 | 858 | self.offset += len(new) - len(old) |
|
859 | 859 | self.dirty = True |
|
860 | 860 | return 0 |
|
861 | 861 | |
|
862 | 862 | # ok, we couldn't match the hunk. Lets look for offsets and fuzz it |
|
863 | 863 | self.hash = {} |
|
864 | 864 | for x, s in enumerate(self.lines): |
|
865 | 865 | self.hash.setdefault(s, []).append(x) |
|
866 | 866 | |
|
867 | 867 | for fuzzlen in pycompat.xrange( |
|
868 | 868 | self.ui.configint(b"patch", b"fuzz") + 1 |
|
869 | 869 | ): |
|
870 | 870 | for toponly in [True, False]: |
|
871 | 871 | old, oldstart, new, newstart = h.fuzzit(fuzzlen, toponly) |
|
872 | 872 | oldstart = oldstart + self.offset + self.skew |
|
873 | 873 | oldstart = min(oldstart, len(self.lines)) |
|
874 | 874 | if old: |
|
875 | 875 | cand = self.findlines(old[0][1:], oldstart) |
|
876 | 876 | else: |
|
877 | 877 | # Only adding lines with no or fuzzed context, just |
|
878 | 878 | # take the skew in account |
|
879 | 879 | cand = [oldstart] |
|
880 | 880 | |
|
881 | 881 | for l in cand: |
|
882 | 882 | if not old or diffhelper.testhunk(old, self.lines, l): |
|
883 | 883 | self.lines[l : l + len(old)] = new |
|
884 | 884 | self.offset += len(new) - len(old) |
|
885 | 885 | self.skew = l - orig_start |
|
886 | 886 | self.dirty = True |
|
887 | 887 | offset = l - orig_start - fuzzlen |
|
888 | 888 | if fuzzlen: |
|
889 | 889 | msg = _( |
|
890 | 890 | b"Hunk #%d succeeded at %d " |
|
891 | 891 | b"with fuzz %d " |
|
892 | 892 | b"(offset %d lines).\n" |
|
893 | 893 | ) |
|
894 | 894 | self.printfile(True) |
|
895 | 895 | self.ui.warn( |
|
896 | 896 | msg % (h.number, l + 1, fuzzlen, offset) |
|
897 | 897 | ) |
|
898 | 898 | else: |
|
899 | 899 | msg = _( |
|
900 | 900 | b"Hunk #%d succeeded at %d " |
|
901 | 901 | b"(offset %d lines).\n" |
|
902 | 902 | ) |
|
903 | 903 | self.ui.note(msg % (h.number, l + 1, offset)) |
|
904 | 904 | return fuzzlen |
|
905 | 905 | self.printfile(True) |
|
906 | 906 | self.ui.warn(_(b"Hunk #%d FAILED at %d\n") % (h.number, orig_start)) |
|
907 | 907 | self.rej.append(horig) |
|
908 | 908 | return -1 |
|
909 | 909 | |
|
910 | 910 | def close(self): |
|
911 | 911 | if self.dirty: |
|
912 | 912 | self.writelines(self.fname, self.lines, self.mode) |
|
913 | 913 | self.write_rej() |
|
914 | 914 | return len(self.rej) |
|
915 | 915 | |
|
916 | 916 | |
|
917 | 917 | class header(object): |
|
918 | 918 | """patch header""" |
|
919 | 919 | |
|
920 | 920 | diffgit_re = re.compile(b'diff --git a/(.*) b/(.*)$') |
|
921 | 921 | diff_re = re.compile(b'diff -r .* (.*)$') |
|
922 | 922 | allhunks_re = re.compile(b'(?:index|deleted file) ') |
|
923 | 923 | pretty_re = re.compile(b'(?:new file|deleted file) ') |
|
924 | 924 | special_re = re.compile(b'(?:index|deleted|copy|rename|new mode) ') |
|
925 | 925 | newfile_re = re.compile(b'(?:new file|copy to|rename to)') |
|
926 | 926 | |
|
927 | 927 | def __init__(self, header): |
|
928 | 928 | self.header = header |
|
929 | 929 | self.hunks = [] |
|
930 | 930 | |
|
931 | 931 | def binary(self): |
|
932 | 932 | return any(h.startswith(b'index ') for h in self.header) |
|
933 | 933 | |
|
934 | 934 | def pretty(self, fp): |
|
935 | 935 | for h in self.header: |
|
936 | 936 | if h.startswith(b'index '): |
|
937 | 937 | fp.write(_(b'this modifies a binary file (all or nothing)\n')) |
|
938 | 938 | break |
|
939 | 939 | if self.pretty_re.match(h): |
|
940 | 940 | fp.write(h) |
|
941 | 941 | if self.binary(): |
|
942 | 942 | fp.write(_(b'this is a binary file\n')) |
|
943 | 943 | break |
|
944 | 944 | if h.startswith(b'---'): |
|
945 | 945 | fp.write( |
|
946 | 946 | _(b'%d hunks, %d lines changed\n') |
|
947 | 947 | % ( |
|
948 | 948 | len(self.hunks), |
|
949 | 949 | sum([max(h.added, h.removed) for h in self.hunks]), |
|
950 | 950 | ) |
|
951 | 951 | ) |
|
952 | 952 | break |
|
953 | 953 | fp.write(h) |
|
954 | 954 | |
|
955 | 955 | def write(self, fp): |
|
956 | 956 | fp.write(b''.join(self.header)) |
|
957 | 957 | |
|
958 | 958 | def allhunks(self): |
|
959 | 959 | return any(self.allhunks_re.match(h) for h in self.header) |
|
960 | 960 | |
|
961 | 961 | def files(self): |
|
962 | 962 | match = self.diffgit_re.match(self.header[0]) |
|
963 | 963 | if match: |
|
964 | 964 | fromfile, tofile = match.groups() |
|
965 | 965 | if fromfile == tofile: |
|
966 | 966 | return [fromfile] |
|
967 | 967 | return [fromfile, tofile] |
|
968 | 968 | else: |
|
969 | 969 | return self.diff_re.match(self.header[0]).groups() |
|
970 | 970 | |
|
971 | 971 | def filename(self): |
|
972 | 972 | return self.files()[-1] |
|
973 | 973 | |
|
974 | 974 | def __repr__(self): |
|
975 | 975 | return '<header %s>' % ( |
|
976 | 976 | ' '.join(pycompat.rapply(pycompat.fsdecode, self.files())) |
|
977 | 977 | ) |
|
978 | 978 | |
|
979 | 979 | def isnewfile(self): |
|
980 | 980 | return any(self.newfile_re.match(h) for h in self.header) |
|
981 | 981 | |
|
982 | 982 | def special(self): |
|
983 | 983 | # Special files are shown only at the header level and not at the hunk |
|
984 | 984 | # level for example a file that has been deleted is a special file. |
|
985 | 985 | # The user cannot change the content of the operation, in the case of |
|
986 | 986 | # the deleted file he has to take the deletion or not take it, he |
|
987 | 987 | # cannot take some of it. |
|
988 | 988 | # Newly added files are special if they are empty, they are not special |
|
989 | 989 | # if they have some content as we want to be able to change it |
|
990 | 990 | nocontent = len(self.header) == 2 |
|
991 | 991 | emptynewfile = self.isnewfile() and nocontent |
|
992 | 992 | return emptynewfile or any( |
|
993 | 993 | self.special_re.match(h) for h in self.header |
|
994 | 994 | ) |
|
995 | 995 | |
|
996 | 996 | |
|
997 | 997 | class recordhunk(object): |
|
998 | 998 | """patch hunk |
|
999 | 999 | |
|
1000 | 1000 | XXX shouldn't we merge this with the other hunk class? |
|
1001 | 1001 | """ |
|
1002 | 1002 | |
|
1003 | 1003 | def __init__( |
|
1004 | 1004 | self, |
|
1005 | 1005 | header, |
|
1006 | 1006 | fromline, |
|
1007 | 1007 | toline, |
|
1008 | 1008 | proc, |
|
1009 | 1009 | before, |
|
1010 | 1010 | hunk, |
|
1011 | 1011 | after, |
|
1012 | 1012 | maxcontext=None, |
|
1013 | 1013 | ): |
|
1014 | 1014 | def trimcontext(lines, reverse=False): |
|
1015 | 1015 | if maxcontext is not None: |
|
1016 | 1016 | delta = len(lines) - maxcontext |
|
1017 | 1017 | if delta > 0: |
|
1018 | 1018 | if reverse: |
|
1019 | 1019 | return delta, lines[delta:] |
|
1020 | 1020 | else: |
|
1021 | 1021 | return delta, lines[:maxcontext] |
|
1022 | 1022 | return 0, lines |
|
1023 | 1023 | |
|
1024 | 1024 | self.header = header |
|
1025 | 1025 | trimedbefore, self.before = trimcontext(before, True) |
|
1026 | 1026 | self.fromline = fromline + trimedbefore |
|
1027 | 1027 | self.toline = toline + trimedbefore |
|
1028 | 1028 | _trimedafter, self.after = trimcontext(after, False) |
|
1029 | 1029 | self.proc = proc |
|
1030 | 1030 | self.hunk = hunk |
|
1031 | 1031 | self.added, self.removed = self.countchanges(self.hunk) |
|
1032 | 1032 | |
|
1033 | 1033 | def __eq__(self, v): |
|
1034 | 1034 | if not isinstance(v, recordhunk): |
|
1035 | 1035 | return False |
|
1036 | 1036 | |
|
1037 | 1037 | return ( |
|
1038 | 1038 | (v.hunk == self.hunk) |
|
1039 | 1039 | and (v.proc == self.proc) |
|
1040 | 1040 | and (self.fromline == v.fromline) |
|
1041 | 1041 | and (self.header.files() == v.header.files()) |
|
1042 | 1042 | ) |
|
1043 | 1043 | |
|
1044 | 1044 | def __hash__(self): |
|
1045 | 1045 | return hash( |
|
1046 | 1046 | ( |
|
1047 | 1047 | tuple(self.hunk), |
|
1048 | 1048 | tuple(self.header.files()), |
|
1049 | 1049 | self.fromline, |
|
1050 | 1050 | self.proc, |
|
1051 | 1051 | ) |
|
1052 | 1052 | ) |
|
1053 | 1053 | |
|
1054 | 1054 | def countchanges(self, hunk): |
|
1055 | 1055 | """hunk -> (n+,n-)""" |
|
1056 | 1056 | add = len([h for h in hunk if h.startswith(b'+')]) |
|
1057 | 1057 | rem = len([h for h in hunk if h.startswith(b'-')]) |
|
1058 | 1058 | return add, rem |
|
1059 | 1059 | |
|
1060 | 1060 | def reversehunk(self): |
|
1061 | 1061 | """return another recordhunk which is the reverse of the hunk |
|
1062 | 1062 | |
|
1063 | 1063 | If this hunk is diff(A, B), the returned hunk is diff(B, A). To do |
|
1064 | 1064 | that, swap fromline/toline and +/- signs while keep other things |
|
1065 | 1065 | unchanged. |
|
1066 | 1066 | """ |
|
1067 | 1067 | m = {b'+': b'-', b'-': b'+', b'\\': b'\\'} |
|
1068 | 1068 | hunk = [b'%s%s' % (m[l[0:1]], l[1:]) for l in self.hunk] |
|
1069 | 1069 | return recordhunk( |
|
1070 | 1070 | self.header, |
|
1071 | 1071 | self.toline, |
|
1072 | 1072 | self.fromline, |
|
1073 | 1073 | self.proc, |
|
1074 | 1074 | self.before, |
|
1075 | 1075 | hunk, |
|
1076 | 1076 | self.after, |
|
1077 | 1077 | ) |
|
1078 | 1078 | |
|
1079 | 1079 | def write(self, fp): |
|
1080 | 1080 | delta = len(self.before) + len(self.after) |
|
1081 | 1081 | if self.after and self.after[-1] == diffhelper.MISSING_NEWLINE_MARKER: |
|
1082 | 1082 | delta -= 1 |
|
1083 | 1083 | fromlen = delta + self.removed |
|
1084 | 1084 | tolen = delta + self.added |
|
1085 | 1085 | fp.write( |
|
1086 | 1086 | b'@@ -%d,%d +%d,%d @@%s\n' |
|
1087 | 1087 | % ( |
|
1088 | 1088 | self.fromline, |
|
1089 | 1089 | fromlen, |
|
1090 | 1090 | self.toline, |
|
1091 | 1091 | tolen, |
|
1092 | 1092 | self.proc and (b' ' + self.proc), |
|
1093 | 1093 | ) |
|
1094 | 1094 | ) |
|
1095 | 1095 | fp.write(b''.join(self.before + self.hunk + self.after)) |
|
1096 | 1096 | |
|
1097 | 1097 | pretty = write |
|
1098 | 1098 | |
|
1099 | 1099 | def filename(self): |
|
1100 | 1100 | return self.header.filename() |
|
1101 | 1101 | |
|
1102 | 1102 | @encoding.strmethod |
|
1103 | 1103 | def __repr__(self): |
|
1104 | 1104 | return b'<hunk %r@%d>' % (self.filename(), self.fromline) |
|
1105 | 1105 | |
|
1106 | 1106 | |
|
1107 | 1107 | def getmessages(): |
|
1108 | 1108 | return { |
|
1109 | 1109 | b'multiple': { |
|
1110 | 1110 | b'apply': _(b"apply change %d/%d to '%s'?"), |
|
1111 | 1111 | b'discard': _(b"discard change %d/%d to '%s'?"), |
|
1112 | 1112 | b'keep': _(b"keep change %d/%d to '%s'?"), |
|
1113 | 1113 | b'record': _(b"record change %d/%d to '%s'?"), |
|
1114 | 1114 | }, |
|
1115 | 1115 | b'single': { |
|
1116 | 1116 | b'apply': _(b"apply this change to '%s'?"), |
|
1117 | 1117 | b'discard': _(b"discard this change to '%s'?"), |
|
1118 | 1118 | b'keep': _(b"keep this change to '%s'?"), |
|
1119 | 1119 | b'record': _(b"record this change to '%s'?"), |
|
1120 | 1120 | }, |
|
1121 | 1121 | b'help': { |
|
1122 | 1122 | b'apply': _( |
|
1123 | 1123 | b'[Ynesfdaq?]' |
|
1124 | 1124 | b'$$ &Yes, apply this change' |
|
1125 | 1125 | b'$$ &No, skip this change' |
|
1126 | 1126 | b'$$ &Edit this change manually' |
|
1127 | 1127 | b'$$ &Skip remaining changes to this file' |
|
1128 | 1128 | b'$$ Apply remaining changes to this &file' |
|
1129 | 1129 | b'$$ &Done, skip remaining changes and files' |
|
1130 | 1130 | b'$$ Apply &all changes to all remaining files' |
|
1131 | 1131 | b'$$ &Quit, applying no changes' |
|
1132 | 1132 | b'$$ &? (display help)' |
|
1133 | 1133 | ), |
|
1134 | 1134 | b'discard': _( |
|
1135 | 1135 | b'[Ynesfdaq?]' |
|
1136 | 1136 | b'$$ &Yes, discard this change' |
|
1137 | 1137 | b'$$ &No, skip this change' |
|
1138 | 1138 | b'$$ &Edit this change manually' |
|
1139 | 1139 | b'$$ &Skip remaining changes to this file' |
|
1140 | 1140 | b'$$ Discard remaining changes to this &file' |
|
1141 | 1141 | b'$$ &Done, skip remaining changes and files' |
|
1142 | 1142 | b'$$ Discard &all changes to all remaining files' |
|
1143 | 1143 | b'$$ &Quit, discarding no changes' |
|
1144 | 1144 | b'$$ &? (display help)' |
|
1145 | 1145 | ), |
|
1146 | 1146 | b'keep': _( |
|
1147 | 1147 | b'[Ynesfdaq?]' |
|
1148 | 1148 | b'$$ &Yes, keep this change' |
|
1149 | 1149 | b'$$ &No, skip this change' |
|
1150 | 1150 | b'$$ &Edit this change manually' |
|
1151 | 1151 | b'$$ &Skip remaining changes to this file' |
|
1152 | 1152 | b'$$ Keep remaining changes to this &file' |
|
1153 | 1153 | b'$$ &Done, skip remaining changes and files' |
|
1154 | 1154 | b'$$ Keep &all changes to all remaining files' |
|
1155 | 1155 | b'$$ &Quit, keeping all changes' |
|
1156 | 1156 | b'$$ &? (display help)' |
|
1157 | 1157 | ), |
|
1158 | 1158 | b'record': _( |
|
1159 | 1159 | b'[Ynesfdaq?]' |
|
1160 | 1160 | b'$$ &Yes, record this change' |
|
1161 | 1161 | b'$$ &No, skip this change' |
|
1162 | 1162 | b'$$ &Edit this change manually' |
|
1163 | 1163 | b'$$ &Skip remaining changes to this file' |
|
1164 | 1164 | b'$$ Record remaining changes to this &file' |
|
1165 | 1165 | b'$$ &Done, skip remaining changes and files' |
|
1166 | 1166 | b'$$ Record &all changes to all remaining files' |
|
1167 | 1167 | b'$$ &Quit, recording no changes' |
|
1168 | 1168 | b'$$ &? (display help)' |
|
1169 | 1169 | ), |
|
1170 | 1170 | }, |
|
1171 | 1171 | } |
|
1172 | 1172 | |
|
1173 | 1173 | |
|
1174 | 1174 | def filterpatch(ui, headers, match, operation=None): |
|
1175 | 1175 | """Interactively filter patch chunks into applied-only chunks""" |
|
1176 | 1176 | messages = getmessages() |
|
1177 | 1177 | |
|
1178 | 1178 | if operation is None: |
|
1179 | 1179 | operation = b'record' |
|
1180 | 1180 | |
|
1181 | 1181 | def prompt(skipfile, skipall, query, chunk): |
|
1182 | 1182 | """prompt query, and process base inputs |
|
1183 | 1183 | |
|
1184 | 1184 | - y/n for the rest of file |
|
1185 | 1185 | - y/n for the rest |
|
1186 | 1186 | - ? (help) |
|
1187 | 1187 | - q (quit) |
|
1188 | 1188 | |
|
1189 | 1189 | Return True/False and possibly updated skipfile and skipall. |
|
1190 | 1190 | """ |
|
1191 | 1191 | newpatches = None |
|
1192 | 1192 | if skipall is not None: |
|
1193 | 1193 | return skipall, skipfile, skipall, newpatches |
|
1194 | 1194 | if skipfile is not None: |
|
1195 | 1195 | return skipfile, skipfile, skipall, newpatches |
|
1196 | 1196 | while True: |
|
1197 | 1197 | resps = messages[b'help'][operation] |
|
1198 | 1198 | # IMPORTANT: keep the last line of this prompt short (<40 english |
|
1199 | 1199 | # chars is a good target) because of issue6158. |
|
1200 | 1200 | r = ui.promptchoice(b"%s\n(enter ? for help) %s" % (query, resps)) |
|
1201 | 1201 | ui.write(b"\n") |
|
1202 | 1202 | if r == 8: # ? |
|
1203 | 1203 | for c, t in ui.extractchoices(resps)[1]: |
|
1204 | 1204 | ui.write(b'%s - %s\n' % (c, encoding.lower(t))) |
|
1205 | 1205 | continue |
|
1206 | 1206 | elif r == 0: # yes |
|
1207 | 1207 | ret = True |
|
1208 | 1208 | elif r == 1: # no |
|
1209 | 1209 | ret = False |
|
1210 | 1210 | elif r == 2: # Edit patch |
|
1211 | 1211 | if chunk is None: |
|
1212 | 1212 | ui.write(_(b'cannot edit patch for whole file')) |
|
1213 | 1213 | ui.write(b"\n") |
|
1214 | 1214 | continue |
|
1215 | 1215 | if chunk.header.binary(): |
|
1216 | 1216 | ui.write(_(b'cannot edit patch for binary file')) |
|
1217 | 1217 | ui.write(b"\n") |
|
1218 | 1218 | continue |
|
1219 | 1219 | # Patch comment based on the Git one (based on comment at end of |
|
1220 | 1220 | # https://mercurial-scm.org/wiki/RecordExtension) |
|
1221 | 1221 | phelp = b'---' + _( |
|
1222 | 1222 | b""" |
|
1223 | 1223 | To remove '-' lines, make them ' ' lines (context). |
|
1224 | 1224 | To remove '+' lines, delete them. |
|
1225 | 1225 | Lines starting with # will be removed from the patch. |
|
1226 | 1226 | |
|
1227 | 1227 | If the patch applies cleanly, the edited hunk will immediately be |
|
1228 | 1228 | added to the record list. If it does not apply cleanly, a rejects |
|
1229 | 1229 | file will be generated: you can use that when you try again. If |
|
1230 | 1230 | all lines of the hunk are removed, then the edit is aborted and |
|
1231 | 1231 | the hunk is left unchanged. |
|
1232 | 1232 | """ |
|
1233 | 1233 | ) |
|
1234 | 1234 | (patchfd, patchfn) = pycompat.mkstemp( |
|
1235 | 1235 | prefix=b"hg-editor-", suffix=b".diff" |
|
1236 | 1236 | ) |
|
1237 | 1237 | ncpatchfp = None |
|
1238 | 1238 | try: |
|
1239 | 1239 | # Write the initial patch |
|
1240 | 1240 | f = util.nativeeolwriter(os.fdopen(patchfd, 'wb')) |
|
1241 | 1241 | chunk.header.write(f) |
|
1242 | 1242 | chunk.write(f) |
|
1243 | 1243 | f.write( |
|
1244 | 1244 | b''.join( |
|
1245 | 1245 | [b'# ' + i + b'\n' for i in phelp.splitlines()] |
|
1246 | 1246 | ) |
|
1247 | 1247 | ) |
|
1248 | 1248 | f.close() |
|
1249 | 1249 | # Start the editor and wait for it to complete |
|
1250 | 1250 | editor = ui.geteditor() |
|
1251 | 1251 | ret = ui.system( |
|
1252 | 1252 | b"%s \"%s\"" % (editor, patchfn), |
|
1253 | 1253 | environ={b'HGUSER': ui.username()}, |
|
1254 | 1254 | blockedtag=b'filterpatch', |
|
1255 | 1255 | ) |
|
1256 | 1256 | if ret != 0: |
|
1257 | 1257 | ui.warn(_(b"editor exited with exit code %d\n") % ret) |
|
1258 | 1258 | continue |
|
1259 | 1259 | # Remove comment lines |
|
1260 | 1260 | patchfp = open(patchfn, 'rb') |
|
1261 | 1261 | ncpatchfp = stringio() |
|
1262 |
for line in |
|
|
1262 | for line in patchfp: | |
|
1263 | 1263 | line = util.fromnativeeol(line) |
|
1264 | 1264 | if not line.startswith(b'#'): |
|
1265 | 1265 | ncpatchfp.write(line) |
|
1266 | 1266 | patchfp.close() |
|
1267 | 1267 | ncpatchfp.seek(0) |
|
1268 | 1268 | newpatches = parsepatch(ncpatchfp) |
|
1269 | 1269 | finally: |
|
1270 | 1270 | os.unlink(patchfn) |
|
1271 | 1271 | del ncpatchfp |
|
1272 | 1272 | # Signal that the chunk shouldn't be applied as-is, but |
|
1273 | 1273 | # provide the new patch to be used instead. |
|
1274 | 1274 | ret = False |
|
1275 | 1275 | elif r == 3: # Skip |
|
1276 | 1276 | ret = skipfile = False |
|
1277 | 1277 | elif r == 4: # file (Record remaining) |
|
1278 | 1278 | ret = skipfile = True |
|
1279 | 1279 | elif r == 5: # done, skip remaining |
|
1280 | 1280 | ret = skipall = False |
|
1281 | 1281 | elif r == 6: # all |
|
1282 | 1282 | ret = skipall = True |
|
1283 | 1283 | elif r == 7: # quit |
|
1284 | 1284 | raise error.CanceledError(_(b'user quit')) |
|
1285 | 1285 | return ret, skipfile, skipall, newpatches |
|
1286 | 1286 | |
|
1287 | 1287 | seen = set() |
|
1288 | 1288 | applied = {} # 'filename' -> [] of chunks |
|
1289 | 1289 | skipfile, skipall = None, None |
|
1290 | 1290 | pos, total = 1, sum(len(h.hunks) for h in headers) |
|
1291 | 1291 | for h in headers: |
|
1292 | 1292 | pos += len(h.hunks) |
|
1293 | 1293 | skipfile = None |
|
1294 | 1294 | fixoffset = 0 |
|
1295 | 1295 | hdr = b''.join(h.header) |
|
1296 | 1296 | if hdr in seen: |
|
1297 | 1297 | continue |
|
1298 | 1298 | seen.add(hdr) |
|
1299 | 1299 | if skipall is None: |
|
1300 | 1300 | h.pretty(ui) |
|
1301 | 1301 | files = h.files() |
|
1302 | 1302 | msg = _(b'examine changes to %s?') % _(b' and ').join( |
|
1303 | 1303 | b"'%s'" % f for f in files |
|
1304 | 1304 | ) |
|
1305 | 1305 | if all(match.exact(f) for f in files): |
|
1306 | 1306 | r, skipall, np = True, None, None |
|
1307 | 1307 | else: |
|
1308 | 1308 | r, skipfile, skipall, np = prompt(skipfile, skipall, msg, None) |
|
1309 | 1309 | if not r: |
|
1310 | 1310 | continue |
|
1311 | 1311 | applied[h.filename()] = [h] |
|
1312 | 1312 | if h.allhunks(): |
|
1313 | 1313 | applied[h.filename()] += h.hunks |
|
1314 | 1314 | continue |
|
1315 | 1315 | for i, chunk in enumerate(h.hunks): |
|
1316 | 1316 | if skipfile is None and skipall is None: |
|
1317 | 1317 | chunk.pretty(ui) |
|
1318 | 1318 | if total == 1: |
|
1319 | 1319 | msg = messages[b'single'][operation] % chunk.filename() |
|
1320 | 1320 | else: |
|
1321 | 1321 | idx = pos - len(h.hunks) + i |
|
1322 | 1322 | msg = messages[b'multiple'][operation] % ( |
|
1323 | 1323 | idx, |
|
1324 | 1324 | total, |
|
1325 | 1325 | chunk.filename(), |
|
1326 | 1326 | ) |
|
1327 | 1327 | r, skipfile, skipall, newpatches = prompt( |
|
1328 | 1328 | skipfile, skipall, msg, chunk |
|
1329 | 1329 | ) |
|
1330 | 1330 | if r: |
|
1331 | 1331 | if fixoffset: |
|
1332 | 1332 | chunk = copy.copy(chunk) |
|
1333 | 1333 | chunk.toline += fixoffset |
|
1334 | 1334 | applied[chunk.filename()].append(chunk) |
|
1335 | 1335 | elif newpatches is not None: |
|
1336 | 1336 | for newpatch in newpatches: |
|
1337 | 1337 | for newhunk in newpatch.hunks: |
|
1338 | 1338 | if fixoffset: |
|
1339 | 1339 | newhunk.toline += fixoffset |
|
1340 | 1340 | applied[newhunk.filename()].append(newhunk) |
|
1341 | 1341 | else: |
|
1342 | 1342 | fixoffset += chunk.removed - chunk.added |
|
1343 | 1343 | return ( |
|
1344 | 1344 | sum( |
|
1345 | 1345 | [h for h in applied.values() if h[0].special() or len(h) > 1], |
|
1346 | 1346 | [], |
|
1347 | 1347 | ), |
|
1348 | 1348 | {}, |
|
1349 | 1349 | ) |
|
1350 | 1350 | |
|
1351 | 1351 | |
|
1352 | 1352 | class hunk(object): |
|
1353 | 1353 | def __init__(self, desc, num, lr, context): |
|
1354 | 1354 | self.number = num |
|
1355 | 1355 | self.desc = desc |
|
1356 | 1356 | self.hunk = [desc] |
|
1357 | 1357 | self.a = [] |
|
1358 | 1358 | self.b = [] |
|
1359 | 1359 | self.starta = self.lena = None |
|
1360 | 1360 | self.startb = self.lenb = None |
|
1361 | 1361 | if lr is not None: |
|
1362 | 1362 | if context: |
|
1363 | 1363 | self.read_context_hunk(lr) |
|
1364 | 1364 | else: |
|
1365 | 1365 | self.read_unified_hunk(lr) |
|
1366 | 1366 | |
|
1367 | 1367 | def getnormalized(self): |
|
1368 | 1368 | """Return a copy with line endings normalized to LF.""" |
|
1369 | 1369 | |
|
1370 | 1370 | def normalize(lines): |
|
1371 | 1371 | nlines = [] |
|
1372 | 1372 | for line in lines: |
|
1373 | 1373 | if line.endswith(b'\r\n'): |
|
1374 | 1374 | line = line[:-2] + b'\n' |
|
1375 | 1375 | nlines.append(line) |
|
1376 | 1376 | return nlines |
|
1377 | 1377 | |
|
1378 | 1378 | # Dummy object, it is rebuilt manually |
|
1379 | 1379 | nh = hunk(self.desc, self.number, None, None) |
|
1380 | 1380 | nh.number = self.number |
|
1381 | 1381 | nh.desc = self.desc |
|
1382 | 1382 | nh.hunk = self.hunk |
|
1383 | 1383 | nh.a = normalize(self.a) |
|
1384 | 1384 | nh.b = normalize(self.b) |
|
1385 | 1385 | nh.starta = self.starta |
|
1386 | 1386 | nh.startb = self.startb |
|
1387 | 1387 | nh.lena = self.lena |
|
1388 | 1388 | nh.lenb = self.lenb |
|
1389 | 1389 | return nh |
|
1390 | 1390 | |
|
1391 | 1391 | def read_unified_hunk(self, lr): |
|
1392 | 1392 | m = unidesc.match(self.desc) |
|
1393 | 1393 | if not m: |
|
1394 | 1394 | raise PatchParseError(_(b"bad hunk #%d") % self.number) |
|
1395 | 1395 | self.starta, self.lena, self.startb, self.lenb = m.groups() |
|
1396 | 1396 | if self.lena is None: |
|
1397 | 1397 | self.lena = 1 |
|
1398 | 1398 | else: |
|
1399 | 1399 | self.lena = int(self.lena) |
|
1400 | 1400 | if self.lenb is None: |
|
1401 | 1401 | self.lenb = 1 |
|
1402 | 1402 | else: |
|
1403 | 1403 | self.lenb = int(self.lenb) |
|
1404 | 1404 | self.starta = int(self.starta) |
|
1405 | 1405 | self.startb = int(self.startb) |
|
1406 | 1406 | try: |
|
1407 | 1407 | diffhelper.addlines( |
|
1408 | 1408 | lr, self.hunk, self.lena, self.lenb, self.a, self.b |
|
1409 | 1409 | ) |
|
1410 | 1410 | except error.ParseError as e: |
|
1411 | 1411 | raise PatchParseError(_(b"bad hunk #%d: %s") % (self.number, e)) |
|
1412 | 1412 | # if we hit eof before finishing out the hunk, the last line will |
|
1413 | 1413 | # be zero length. Lets try to fix it up. |
|
1414 | 1414 | while len(self.hunk[-1]) == 0: |
|
1415 | 1415 | del self.hunk[-1] |
|
1416 | 1416 | del self.a[-1] |
|
1417 | 1417 | del self.b[-1] |
|
1418 | 1418 | self.lena -= 1 |
|
1419 | 1419 | self.lenb -= 1 |
|
1420 | 1420 | self._fixnewline(lr) |
|
1421 | 1421 | |
|
1422 | 1422 | def read_context_hunk(self, lr): |
|
1423 | 1423 | self.desc = lr.readline() |
|
1424 | 1424 | m = contextdesc.match(self.desc) |
|
1425 | 1425 | if not m: |
|
1426 | 1426 | raise PatchParseError(_(b"bad hunk #%d") % self.number) |
|
1427 | 1427 | self.starta, aend = m.groups() |
|
1428 | 1428 | self.starta = int(self.starta) |
|
1429 | 1429 | if aend is None: |
|
1430 | 1430 | aend = self.starta |
|
1431 | 1431 | self.lena = int(aend) - self.starta |
|
1432 | 1432 | if self.starta: |
|
1433 | 1433 | self.lena += 1 |
|
1434 | 1434 | for x in pycompat.xrange(self.lena): |
|
1435 | 1435 | l = lr.readline() |
|
1436 | 1436 | if l.startswith(b'---'): |
|
1437 | 1437 | # lines addition, old block is empty |
|
1438 | 1438 | lr.push(l) |
|
1439 | 1439 | break |
|
1440 | 1440 | s = l[2:] |
|
1441 | 1441 | if l.startswith(b'- ') or l.startswith(b'! '): |
|
1442 | 1442 | u = b'-' + s |
|
1443 | 1443 | elif l.startswith(b' '): |
|
1444 | 1444 | u = b' ' + s |
|
1445 | 1445 | else: |
|
1446 | 1446 | raise PatchParseError( |
|
1447 | 1447 | _(b"bad hunk #%d old text line %d") % (self.number, x) |
|
1448 | 1448 | ) |
|
1449 | 1449 | self.a.append(u) |
|
1450 | 1450 | self.hunk.append(u) |
|
1451 | 1451 | |
|
1452 | 1452 | l = lr.readline() |
|
1453 | 1453 | if l.startswith(br'\ '): |
|
1454 | 1454 | s = self.a[-1][:-1] |
|
1455 | 1455 | self.a[-1] = s |
|
1456 | 1456 | self.hunk[-1] = s |
|
1457 | 1457 | l = lr.readline() |
|
1458 | 1458 | m = contextdesc.match(l) |
|
1459 | 1459 | if not m: |
|
1460 | 1460 | raise PatchParseError(_(b"bad hunk #%d") % self.number) |
|
1461 | 1461 | self.startb, bend = m.groups() |
|
1462 | 1462 | self.startb = int(self.startb) |
|
1463 | 1463 | if bend is None: |
|
1464 | 1464 | bend = self.startb |
|
1465 | 1465 | self.lenb = int(bend) - self.startb |
|
1466 | 1466 | if self.startb: |
|
1467 | 1467 | self.lenb += 1 |
|
1468 | 1468 | hunki = 1 |
|
1469 | 1469 | for x in pycompat.xrange(self.lenb): |
|
1470 | 1470 | l = lr.readline() |
|
1471 | 1471 | if l.startswith(br'\ '): |
|
1472 | 1472 | # XXX: the only way to hit this is with an invalid line range. |
|
1473 | 1473 | # The no-eol marker is not counted in the line range, but I |
|
1474 | 1474 | # guess there are diff(1) out there which behave differently. |
|
1475 | 1475 | s = self.b[-1][:-1] |
|
1476 | 1476 | self.b[-1] = s |
|
1477 | 1477 | self.hunk[hunki - 1] = s |
|
1478 | 1478 | continue |
|
1479 | 1479 | if not l: |
|
1480 | 1480 | # line deletions, new block is empty and we hit EOF |
|
1481 | 1481 | lr.push(l) |
|
1482 | 1482 | break |
|
1483 | 1483 | s = l[2:] |
|
1484 | 1484 | if l.startswith(b'+ ') or l.startswith(b'! '): |
|
1485 | 1485 | u = b'+' + s |
|
1486 | 1486 | elif l.startswith(b' '): |
|
1487 | 1487 | u = b' ' + s |
|
1488 | 1488 | elif len(self.b) == 0: |
|
1489 | 1489 | # line deletions, new block is empty |
|
1490 | 1490 | lr.push(l) |
|
1491 | 1491 | break |
|
1492 | 1492 | else: |
|
1493 | 1493 | raise PatchParseError( |
|
1494 | 1494 | _(b"bad hunk #%d old text line %d") % (self.number, x) |
|
1495 | 1495 | ) |
|
1496 | 1496 | self.b.append(s) |
|
1497 | 1497 | while True: |
|
1498 | 1498 | if hunki >= len(self.hunk): |
|
1499 | 1499 | h = b"" |
|
1500 | 1500 | else: |
|
1501 | 1501 | h = self.hunk[hunki] |
|
1502 | 1502 | hunki += 1 |
|
1503 | 1503 | if h == u: |
|
1504 | 1504 | break |
|
1505 | 1505 | elif h.startswith(b'-'): |
|
1506 | 1506 | continue |
|
1507 | 1507 | else: |
|
1508 | 1508 | self.hunk.insert(hunki - 1, u) |
|
1509 | 1509 | break |
|
1510 | 1510 | |
|
1511 | 1511 | if not self.a: |
|
1512 | 1512 | # this happens when lines were only added to the hunk |
|
1513 | 1513 | for x in self.hunk: |
|
1514 | 1514 | if x.startswith(b'-') or x.startswith(b' '): |
|
1515 | 1515 | self.a.append(x) |
|
1516 | 1516 | if not self.b: |
|
1517 | 1517 | # this happens when lines were only deleted from the hunk |
|
1518 | 1518 | for x in self.hunk: |
|
1519 | 1519 | if x.startswith(b'+') or x.startswith(b' '): |
|
1520 | 1520 | self.b.append(x[1:]) |
|
1521 | 1521 | # @@ -start,len +start,len @@ |
|
1522 | 1522 | self.desc = b"@@ -%d,%d +%d,%d @@\n" % ( |
|
1523 | 1523 | self.starta, |
|
1524 | 1524 | self.lena, |
|
1525 | 1525 | self.startb, |
|
1526 | 1526 | self.lenb, |
|
1527 | 1527 | ) |
|
1528 | 1528 | self.hunk[0] = self.desc |
|
1529 | 1529 | self._fixnewline(lr) |
|
1530 | 1530 | |
|
1531 | 1531 | def _fixnewline(self, lr): |
|
1532 | 1532 | l = lr.readline() |
|
1533 | 1533 | if l.startswith(br'\ '): |
|
1534 | 1534 | diffhelper.fixnewline(self.hunk, self.a, self.b) |
|
1535 | 1535 | else: |
|
1536 | 1536 | lr.push(l) |
|
1537 | 1537 | |
|
1538 | 1538 | def complete(self): |
|
1539 | 1539 | return len(self.a) == self.lena and len(self.b) == self.lenb |
|
1540 | 1540 | |
|
1541 | 1541 | def _fuzzit(self, old, new, fuzz, toponly): |
|
1542 | 1542 | # this removes context lines from the top and bottom of list 'l'. It |
|
1543 | 1543 | # checks the hunk to make sure only context lines are removed, and then |
|
1544 | 1544 | # returns a new shortened list of lines. |
|
1545 | 1545 | fuzz = min(fuzz, len(old)) |
|
1546 | 1546 | if fuzz: |
|
1547 | 1547 | top = 0 |
|
1548 | 1548 | bot = 0 |
|
1549 | 1549 | hlen = len(self.hunk) |
|
1550 | 1550 | for x in pycompat.xrange(hlen - 1): |
|
1551 | 1551 | # the hunk starts with the @@ line, so use x+1 |
|
1552 | 1552 | if self.hunk[x + 1].startswith(b' '): |
|
1553 | 1553 | top += 1 |
|
1554 | 1554 | else: |
|
1555 | 1555 | break |
|
1556 | 1556 | if not toponly: |
|
1557 | 1557 | for x in pycompat.xrange(hlen - 1): |
|
1558 | 1558 | if self.hunk[hlen - bot - 1].startswith(b' '): |
|
1559 | 1559 | bot += 1 |
|
1560 | 1560 | else: |
|
1561 | 1561 | break |
|
1562 | 1562 | |
|
1563 | 1563 | bot = min(fuzz, bot) |
|
1564 | 1564 | top = min(fuzz, top) |
|
1565 | 1565 | return old[top : len(old) - bot], new[top : len(new) - bot], top |
|
1566 | 1566 | return old, new, 0 |
|
1567 | 1567 | |
|
1568 | 1568 | def fuzzit(self, fuzz, toponly): |
|
1569 | 1569 | old, new, top = self._fuzzit(self.a, self.b, fuzz, toponly) |
|
1570 | 1570 | oldstart = self.starta + top |
|
1571 | 1571 | newstart = self.startb + top |
|
1572 | 1572 | # zero length hunk ranges already have their start decremented |
|
1573 | 1573 | if self.lena and oldstart > 0: |
|
1574 | 1574 | oldstart -= 1 |
|
1575 | 1575 | if self.lenb and newstart > 0: |
|
1576 | 1576 | newstart -= 1 |
|
1577 | 1577 | return old, oldstart, new, newstart |
|
1578 | 1578 | |
|
1579 | 1579 | |
|
1580 | 1580 | class binhunk(object): |
|
1581 | 1581 | """A binary patch file.""" |
|
1582 | 1582 | |
|
1583 | 1583 | def __init__(self, lr, fname): |
|
1584 | 1584 | self.text = None |
|
1585 | 1585 | self.delta = False |
|
1586 | 1586 | self.hunk = [b'GIT binary patch\n'] |
|
1587 | 1587 | self._fname = fname |
|
1588 | 1588 | self._read(lr) |
|
1589 | 1589 | |
|
1590 | 1590 | def complete(self): |
|
1591 | 1591 | return self.text is not None |
|
1592 | 1592 | |
|
1593 | 1593 | def new(self, lines): |
|
1594 | 1594 | if self.delta: |
|
1595 | 1595 | return [applybindelta(self.text, b''.join(lines))] |
|
1596 | 1596 | return [self.text] |
|
1597 | 1597 | |
|
1598 | 1598 | def _read(self, lr): |
|
1599 | 1599 | def getline(lr, hunk): |
|
1600 | 1600 | l = lr.readline() |
|
1601 | 1601 | hunk.append(l) |
|
1602 | 1602 | return l.rstrip(b'\r\n') |
|
1603 | 1603 | |
|
1604 | 1604 | while True: |
|
1605 | 1605 | line = getline(lr, self.hunk) |
|
1606 | 1606 | if not line: |
|
1607 | 1607 | raise PatchParseError( |
|
1608 | 1608 | _(b'could not extract "%s" binary data') % self._fname |
|
1609 | 1609 | ) |
|
1610 | 1610 | if line.startswith(b'literal '): |
|
1611 | 1611 | size = int(line[8:].rstrip()) |
|
1612 | 1612 | break |
|
1613 | 1613 | if line.startswith(b'delta '): |
|
1614 | 1614 | size = int(line[6:].rstrip()) |
|
1615 | 1615 | self.delta = True |
|
1616 | 1616 | break |
|
1617 | 1617 | dec = [] |
|
1618 | 1618 | line = getline(lr, self.hunk) |
|
1619 | 1619 | while len(line) > 1: |
|
1620 | 1620 | l = line[0:1] |
|
1621 | 1621 | if l <= b'Z' and l >= b'A': |
|
1622 | 1622 | l = ord(l) - ord(b'A') + 1 |
|
1623 | 1623 | else: |
|
1624 | 1624 | l = ord(l) - ord(b'a') + 27 |
|
1625 | 1625 | try: |
|
1626 | 1626 | dec.append(util.b85decode(line[1:])[:l]) |
|
1627 | 1627 | except ValueError as e: |
|
1628 | 1628 | raise PatchParseError( |
|
1629 | 1629 | _(b'could not decode "%s" binary patch: %s') |
|
1630 | 1630 | % (self._fname, stringutil.forcebytestr(e)) |
|
1631 | 1631 | ) |
|
1632 | 1632 | line = getline(lr, self.hunk) |
|
1633 | 1633 | text = zlib.decompress(b''.join(dec)) |
|
1634 | 1634 | if len(text) != size: |
|
1635 | 1635 | raise PatchParseError( |
|
1636 | 1636 | _(b'"%s" length is %d bytes, should be %d') |
|
1637 | 1637 | % (self._fname, len(text), size) |
|
1638 | 1638 | ) |
|
1639 | 1639 | self.text = text |
|
1640 | 1640 | |
|
1641 | 1641 | |
|
1642 | 1642 | def parsefilename(str): |
|
1643 | 1643 | # --- filename \t|space stuff |
|
1644 | 1644 | s = str[4:].rstrip(b'\r\n') |
|
1645 | 1645 | i = s.find(b'\t') |
|
1646 | 1646 | if i < 0: |
|
1647 | 1647 | i = s.find(b' ') |
|
1648 | 1648 | if i < 0: |
|
1649 | 1649 | return s |
|
1650 | 1650 | return s[:i] |
|
1651 | 1651 | |
|
1652 | 1652 | |
|
1653 | 1653 | def reversehunks(hunks): |
|
1654 | 1654 | '''reverse the signs in the hunks given as argument |
|
1655 | 1655 | |
|
1656 | 1656 | This function operates on hunks coming out of patch.filterpatch, that is |
|
1657 | 1657 | a list of the form: [header1, hunk1, hunk2, header2...]. Example usage: |
|
1658 | 1658 | |
|
1659 | 1659 | >>> rawpatch = b"""diff --git a/folder1/g b/folder1/g |
|
1660 | 1660 | ... --- a/folder1/g |
|
1661 | 1661 | ... +++ b/folder1/g |
|
1662 | 1662 | ... @@ -1,7 +1,7 @@ |
|
1663 | 1663 | ... +firstline |
|
1664 | 1664 | ... c |
|
1665 | 1665 | ... 1 |
|
1666 | 1666 | ... 2 |
|
1667 | 1667 | ... + 3 |
|
1668 | 1668 | ... -4 |
|
1669 | 1669 | ... 5 |
|
1670 | 1670 | ... d |
|
1671 | 1671 | ... +lastline""" |
|
1672 | 1672 | >>> hunks = parsepatch([rawpatch]) |
|
1673 | 1673 | >>> hunkscomingfromfilterpatch = [] |
|
1674 | 1674 | >>> for h in hunks: |
|
1675 | 1675 | ... hunkscomingfromfilterpatch.append(h) |
|
1676 | 1676 | ... hunkscomingfromfilterpatch.extend(h.hunks) |
|
1677 | 1677 | |
|
1678 | 1678 | >>> reversedhunks = reversehunks(hunkscomingfromfilterpatch) |
|
1679 | 1679 | >>> from . import util |
|
1680 | 1680 | >>> fp = util.stringio() |
|
1681 | 1681 | >>> for c in reversedhunks: |
|
1682 | 1682 | ... c.write(fp) |
|
1683 | 1683 | >>> fp.seek(0) or None |
|
1684 | 1684 | >>> reversedpatch = fp.read() |
|
1685 | 1685 | >>> print(pycompat.sysstr(reversedpatch)) |
|
1686 | 1686 | diff --git a/folder1/g b/folder1/g |
|
1687 | 1687 | --- a/folder1/g |
|
1688 | 1688 | +++ b/folder1/g |
|
1689 | 1689 | @@ -1,4 +1,3 @@ |
|
1690 | 1690 | -firstline |
|
1691 | 1691 | c |
|
1692 | 1692 | 1 |
|
1693 | 1693 | 2 |
|
1694 | 1694 | @@ -2,6 +1,6 @@ |
|
1695 | 1695 | c |
|
1696 | 1696 | 1 |
|
1697 | 1697 | 2 |
|
1698 | 1698 | - 3 |
|
1699 | 1699 | +4 |
|
1700 | 1700 | 5 |
|
1701 | 1701 | d |
|
1702 | 1702 | @@ -6,3 +5,2 @@ |
|
1703 | 1703 | 5 |
|
1704 | 1704 | d |
|
1705 | 1705 | -lastline |
|
1706 | 1706 | |
|
1707 | 1707 | ''' |
|
1708 | 1708 | |
|
1709 | 1709 | newhunks = [] |
|
1710 | 1710 | for c in hunks: |
|
1711 | 1711 | if util.safehasattr(c, b'reversehunk'): |
|
1712 | 1712 | c = c.reversehunk() |
|
1713 | 1713 | newhunks.append(c) |
|
1714 | 1714 | return newhunks |
|
1715 | 1715 | |
|
1716 | 1716 | |
|
1717 | 1717 | def parsepatch(originalchunks, maxcontext=None): |
|
1718 | 1718 | """patch -> [] of headers -> [] of hunks |
|
1719 | 1719 | |
|
1720 | 1720 | If maxcontext is not None, trim context lines if necessary. |
|
1721 | 1721 | |
|
1722 | 1722 | >>> rawpatch = b'''diff --git a/folder1/g b/folder1/g |
|
1723 | 1723 | ... --- a/folder1/g |
|
1724 | 1724 | ... +++ b/folder1/g |
|
1725 | 1725 | ... @@ -1,8 +1,10 @@ |
|
1726 | 1726 | ... 1 |
|
1727 | 1727 | ... 2 |
|
1728 | 1728 | ... -3 |
|
1729 | 1729 | ... 4 |
|
1730 | 1730 | ... 5 |
|
1731 | 1731 | ... 6 |
|
1732 | 1732 | ... +6.1 |
|
1733 | 1733 | ... +6.2 |
|
1734 | 1734 | ... 7 |
|
1735 | 1735 | ... 8 |
|
1736 | 1736 | ... +9''' |
|
1737 | 1737 | >>> out = util.stringio() |
|
1738 | 1738 | >>> headers = parsepatch([rawpatch], maxcontext=1) |
|
1739 | 1739 | >>> for header in headers: |
|
1740 | 1740 | ... header.write(out) |
|
1741 | 1741 | ... for hunk in header.hunks: |
|
1742 | 1742 | ... hunk.write(out) |
|
1743 | 1743 | >>> print(pycompat.sysstr(out.getvalue())) |
|
1744 | 1744 | diff --git a/folder1/g b/folder1/g |
|
1745 | 1745 | --- a/folder1/g |
|
1746 | 1746 | +++ b/folder1/g |
|
1747 | 1747 | @@ -2,3 +2,2 @@ |
|
1748 | 1748 | 2 |
|
1749 | 1749 | -3 |
|
1750 | 1750 | 4 |
|
1751 | 1751 | @@ -6,2 +5,4 @@ |
|
1752 | 1752 | 6 |
|
1753 | 1753 | +6.1 |
|
1754 | 1754 | +6.2 |
|
1755 | 1755 | 7 |
|
1756 | 1756 | @@ -8,1 +9,2 @@ |
|
1757 | 1757 | 8 |
|
1758 | 1758 | +9 |
|
1759 | 1759 | """ |
|
1760 | 1760 | |
|
1761 | 1761 | class parser(object): |
|
1762 | 1762 | """patch parsing state machine""" |
|
1763 | 1763 | |
|
1764 | 1764 | def __init__(self): |
|
1765 | 1765 | self.fromline = 0 |
|
1766 | 1766 | self.toline = 0 |
|
1767 | 1767 | self.proc = b'' |
|
1768 | 1768 | self.header = None |
|
1769 | 1769 | self.context = [] |
|
1770 | 1770 | self.before = [] |
|
1771 | 1771 | self.hunk = [] |
|
1772 | 1772 | self.headers = [] |
|
1773 | 1773 | |
|
1774 | 1774 | def addrange(self, limits): |
|
1775 | 1775 | self.addcontext([]) |
|
1776 | 1776 | fromstart, fromend, tostart, toend, proc = limits |
|
1777 | 1777 | self.fromline = int(fromstart) |
|
1778 | 1778 | self.toline = int(tostart) |
|
1779 | 1779 | self.proc = proc |
|
1780 | 1780 | |
|
1781 | 1781 | def addcontext(self, context): |
|
1782 | 1782 | if self.hunk: |
|
1783 | 1783 | h = recordhunk( |
|
1784 | 1784 | self.header, |
|
1785 | 1785 | self.fromline, |
|
1786 | 1786 | self.toline, |
|
1787 | 1787 | self.proc, |
|
1788 | 1788 | self.before, |
|
1789 | 1789 | self.hunk, |
|
1790 | 1790 | context, |
|
1791 | 1791 | maxcontext, |
|
1792 | 1792 | ) |
|
1793 | 1793 | self.header.hunks.append(h) |
|
1794 | 1794 | self.fromline += len(self.before) + h.removed |
|
1795 | 1795 | self.toline += len(self.before) + h.added |
|
1796 | 1796 | self.before = [] |
|
1797 | 1797 | self.hunk = [] |
|
1798 | 1798 | self.context = context |
|
1799 | 1799 | |
|
1800 | 1800 | def addhunk(self, hunk): |
|
1801 | 1801 | if self.context: |
|
1802 | 1802 | self.before = self.context |
|
1803 | 1803 | self.context = [] |
|
1804 | 1804 | if self.hunk: |
|
1805 | 1805 | self.addcontext([]) |
|
1806 | 1806 | self.hunk = hunk |
|
1807 | 1807 | |
|
1808 | 1808 | def newfile(self, hdr): |
|
1809 | 1809 | self.addcontext([]) |
|
1810 | 1810 | h = header(hdr) |
|
1811 | 1811 | self.headers.append(h) |
|
1812 | 1812 | self.header = h |
|
1813 | 1813 | |
|
1814 | 1814 | def addother(self, line): |
|
1815 | 1815 | pass # 'other' lines are ignored |
|
1816 | 1816 | |
|
1817 | 1817 | def finished(self): |
|
1818 | 1818 | self.addcontext([]) |
|
1819 | 1819 | return self.headers |
|
1820 | 1820 | |
|
1821 | 1821 | transitions = { |
|
1822 | 1822 | b'file': { |
|
1823 | 1823 | b'context': addcontext, |
|
1824 | 1824 | b'file': newfile, |
|
1825 | 1825 | b'hunk': addhunk, |
|
1826 | 1826 | b'range': addrange, |
|
1827 | 1827 | }, |
|
1828 | 1828 | b'context': { |
|
1829 | 1829 | b'file': newfile, |
|
1830 | 1830 | b'hunk': addhunk, |
|
1831 | 1831 | b'range': addrange, |
|
1832 | 1832 | b'other': addother, |
|
1833 | 1833 | }, |
|
1834 | 1834 | b'hunk': { |
|
1835 | 1835 | b'context': addcontext, |
|
1836 | 1836 | b'file': newfile, |
|
1837 | 1837 | b'range': addrange, |
|
1838 | 1838 | }, |
|
1839 | 1839 | b'range': {b'context': addcontext, b'hunk': addhunk}, |
|
1840 | 1840 | b'other': {b'other': addother}, |
|
1841 | 1841 | } |
|
1842 | 1842 | |
|
1843 | 1843 | p = parser() |
|
1844 | 1844 | fp = stringio() |
|
1845 | 1845 | fp.write(b''.join(originalchunks)) |
|
1846 | 1846 | fp.seek(0) |
|
1847 | 1847 | |
|
1848 | 1848 | state = b'context' |
|
1849 | 1849 | for newstate, data in scanpatch(fp): |
|
1850 | 1850 | try: |
|
1851 | 1851 | p.transitions[state][newstate](p, data) |
|
1852 | 1852 | except KeyError: |
|
1853 | 1853 | raise PatchParseError( |
|
1854 | 1854 | b'unhandled transition: %s -> %s' % (state, newstate) |
|
1855 | 1855 | ) |
|
1856 | 1856 | state = newstate |
|
1857 | 1857 | del fp |
|
1858 | 1858 | return p.finished() |
|
1859 | 1859 | |
|
1860 | 1860 | |
|
1861 | 1861 | def pathtransform(path, strip, prefix): |
|
1862 | 1862 | """turn a path from a patch into a path suitable for the repository |
|
1863 | 1863 | |
|
1864 | 1864 | prefix, if not empty, is expected to be normalized with a / at the end. |
|
1865 | 1865 | |
|
1866 | 1866 | Returns (stripped components, path in repository). |
|
1867 | 1867 | |
|
1868 | 1868 | >>> pathtransform(b'a/b/c', 0, b'') |
|
1869 | 1869 | ('', 'a/b/c') |
|
1870 | 1870 | >>> pathtransform(b' a/b/c ', 0, b'') |
|
1871 | 1871 | ('', ' a/b/c') |
|
1872 | 1872 | >>> pathtransform(b' a/b/c ', 2, b'') |
|
1873 | 1873 | ('a/b/', 'c') |
|
1874 | 1874 | >>> pathtransform(b'a/b/c', 0, b'd/e/') |
|
1875 | 1875 | ('', 'd/e/a/b/c') |
|
1876 | 1876 | >>> pathtransform(b' a//b/c ', 2, b'd/e/') |
|
1877 | 1877 | ('a//b/', 'd/e/c') |
|
1878 | 1878 | >>> pathtransform(b'a/b/c', 3, b'') |
|
1879 | 1879 | Traceback (most recent call last): |
|
1880 | 1880 | PatchApplicationError: unable to strip away 1 of 3 dirs from a/b/c |
|
1881 | 1881 | """ |
|
1882 | 1882 | pathlen = len(path) |
|
1883 | 1883 | i = 0 |
|
1884 | 1884 | if strip == 0: |
|
1885 | 1885 | return b'', prefix + path.rstrip() |
|
1886 | 1886 | count = strip |
|
1887 | 1887 | while count > 0: |
|
1888 | 1888 | i = path.find(b'/', i) |
|
1889 | 1889 | if i == -1: |
|
1890 | 1890 | raise PatchApplicationError( |
|
1891 | 1891 | _(b"unable to strip away %d of %d dirs from %s") |
|
1892 | 1892 | % (count, strip, path) |
|
1893 | 1893 | ) |
|
1894 | 1894 | i += 1 |
|
1895 | 1895 | # consume '//' in the path |
|
1896 | 1896 | while i < pathlen - 1 and path[i : i + 1] == b'/': |
|
1897 | 1897 | i += 1 |
|
1898 | 1898 | count -= 1 |
|
1899 | 1899 | return path[:i].lstrip(), prefix + path[i:].rstrip() |
|
1900 | 1900 | |
|
1901 | 1901 | |
|
1902 | 1902 | def makepatchmeta(backend, afile_orig, bfile_orig, hunk, strip, prefix): |
|
1903 | 1903 | nulla = afile_orig == b"/dev/null" |
|
1904 | 1904 | nullb = bfile_orig == b"/dev/null" |
|
1905 | 1905 | create = nulla and hunk.starta == 0 and hunk.lena == 0 |
|
1906 | 1906 | remove = nullb and hunk.startb == 0 and hunk.lenb == 0 |
|
1907 | 1907 | abase, afile = pathtransform(afile_orig, strip, prefix) |
|
1908 | 1908 | gooda = not nulla and backend.exists(afile) |
|
1909 | 1909 | bbase, bfile = pathtransform(bfile_orig, strip, prefix) |
|
1910 | 1910 | if afile == bfile: |
|
1911 | 1911 | goodb = gooda |
|
1912 | 1912 | else: |
|
1913 | 1913 | goodb = not nullb and backend.exists(bfile) |
|
1914 | 1914 | missing = not goodb and not gooda and not create |
|
1915 | 1915 | |
|
1916 | 1916 | # some diff programs apparently produce patches where the afile is |
|
1917 | 1917 | # not /dev/null, but afile starts with bfile |
|
1918 | 1918 | abasedir = afile[: afile.rfind(b'/') + 1] |
|
1919 | 1919 | bbasedir = bfile[: bfile.rfind(b'/') + 1] |
|
1920 | 1920 | if ( |
|
1921 | 1921 | missing |
|
1922 | 1922 | and abasedir == bbasedir |
|
1923 | 1923 | and afile.startswith(bfile) |
|
1924 | 1924 | and hunk.starta == 0 |
|
1925 | 1925 | and hunk.lena == 0 |
|
1926 | 1926 | ): |
|
1927 | 1927 | create = True |
|
1928 | 1928 | missing = False |
|
1929 | 1929 | |
|
1930 | 1930 | # If afile is "a/b/foo" and bfile is "a/b/foo.orig" we assume the |
|
1931 | 1931 | # diff is between a file and its backup. In this case, the original |
|
1932 | 1932 | # file should be patched (see original mpatch code). |
|
1933 | 1933 | isbackup = abase == bbase and bfile.startswith(afile) |
|
1934 | 1934 | fname = None |
|
1935 | 1935 | if not missing: |
|
1936 | 1936 | if gooda and goodb: |
|
1937 | 1937 | if isbackup: |
|
1938 | 1938 | fname = afile |
|
1939 | 1939 | else: |
|
1940 | 1940 | fname = bfile |
|
1941 | 1941 | elif gooda: |
|
1942 | 1942 | fname = afile |
|
1943 | 1943 | |
|
1944 | 1944 | if not fname: |
|
1945 | 1945 | if not nullb: |
|
1946 | 1946 | if isbackup: |
|
1947 | 1947 | fname = afile |
|
1948 | 1948 | else: |
|
1949 | 1949 | fname = bfile |
|
1950 | 1950 | elif not nulla: |
|
1951 | 1951 | fname = afile |
|
1952 | 1952 | else: |
|
1953 | 1953 | raise PatchParseError(_(b"undefined source and destination files")) |
|
1954 | 1954 | |
|
1955 | 1955 | gp = patchmeta(fname) |
|
1956 | 1956 | if create: |
|
1957 | 1957 | gp.op = b'ADD' |
|
1958 | 1958 | elif remove: |
|
1959 | 1959 | gp.op = b'DELETE' |
|
1960 | 1960 | return gp |
|
1961 | 1961 | |
|
1962 | 1962 | |
|
1963 | 1963 | def scanpatch(fp): |
|
1964 | 1964 | """like patch.iterhunks, but yield different events |
|
1965 | 1965 | |
|
1966 | 1966 | - ('file', [header_lines + fromfile + tofile]) |
|
1967 | 1967 | - ('context', [context_lines]) |
|
1968 | 1968 | - ('hunk', [hunk_lines]) |
|
1969 | 1969 | - ('range', (-start,len, +start,len, proc)) |
|
1970 | 1970 | """ |
|
1971 | 1971 | lines_re = re.compile(br'@@ -(\d+),(\d+) \+(\d+),(\d+) @@\s*(.*)') |
|
1972 | 1972 | lr = linereader(fp) |
|
1973 | 1973 | |
|
1974 | 1974 | def scanwhile(first, p): |
|
1975 | 1975 | """scan lr while predicate holds""" |
|
1976 | 1976 | lines = [first] |
|
1977 | 1977 | for line in iter(lr.readline, b''): |
|
1978 | 1978 | if p(line): |
|
1979 | 1979 | lines.append(line) |
|
1980 | 1980 | else: |
|
1981 | 1981 | lr.push(line) |
|
1982 | 1982 | break |
|
1983 | 1983 | return lines |
|
1984 | 1984 | |
|
1985 | 1985 | for line in iter(lr.readline, b''): |
|
1986 | 1986 | if line.startswith(b'diff --git a/') or line.startswith(b'diff -r '): |
|
1987 | 1987 | |
|
1988 | 1988 | def notheader(line): |
|
1989 | 1989 | s = line.split(None, 1) |
|
1990 | 1990 | return not s or s[0] not in (b'---', b'diff') |
|
1991 | 1991 | |
|
1992 | 1992 | header = scanwhile(line, notheader) |
|
1993 | 1993 | fromfile = lr.readline() |
|
1994 | 1994 | if fromfile.startswith(b'---'): |
|
1995 | 1995 | tofile = lr.readline() |
|
1996 | 1996 | header += [fromfile, tofile] |
|
1997 | 1997 | else: |
|
1998 | 1998 | lr.push(fromfile) |
|
1999 | 1999 | yield b'file', header |
|
2000 | 2000 | elif line.startswith(b' '): |
|
2001 | 2001 | cs = (b' ', b'\\') |
|
2002 | 2002 | yield b'context', scanwhile(line, lambda l: l.startswith(cs)) |
|
2003 | 2003 | elif line.startswith((b'-', b'+')): |
|
2004 | 2004 | cs = (b'-', b'+', b'\\') |
|
2005 | 2005 | yield b'hunk', scanwhile(line, lambda l: l.startswith(cs)) |
|
2006 | 2006 | else: |
|
2007 | 2007 | m = lines_re.match(line) |
|
2008 | 2008 | if m: |
|
2009 | 2009 | yield b'range', m.groups() |
|
2010 | 2010 | else: |
|
2011 | 2011 | yield b'other', line |
|
2012 | 2012 | |
|
2013 | 2013 | |
|
2014 | 2014 | def scangitpatch(lr, firstline): |
|
2015 | 2015 | """ |
|
2016 | 2016 | Git patches can emit: |
|
2017 | 2017 | - rename a to b |
|
2018 | 2018 | - change b |
|
2019 | 2019 | - copy a to c |
|
2020 | 2020 | - change c |
|
2021 | 2021 | |
|
2022 | 2022 | We cannot apply this sequence as-is, the renamed 'a' could not be |
|
2023 | 2023 | found for it would have been renamed already. And we cannot copy |
|
2024 | 2024 | from 'b' instead because 'b' would have been changed already. So |
|
2025 | 2025 | we scan the git patch for copy and rename commands so we can |
|
2026 | 2026 | perform the copies ahead of time. |
|
2027 | 2027 | """ |
|
2028 | 2028 | pos = 0 |
|
2029 | 2029 | try: |
|
2030 | 2030 | pos = lr.fp.tell() |
|
2031 | 2031 | fp = lr.fp |
|
2032 | 2032 | except IOError: |
|
2033 | 2033 | fp = stringio(lr.fp.read()) |
|
2034 | 2034 | gitlr = linereader(fp) |
|
2035 | 2035 | gitlr.push(firstline) |
|
2036 | 2036 | gitpatches = readgitpatch(gitlr) |
|
2037 | 2037 | fp.seek(pos) |
|
2038 | 2038 | return gitpatches |
|
2039 | 2039 | |
|
2040 | 2040 | |
|
2041 | 2041 | def iterhunks(fp): |
|
2042 | 2042 | """Read a patch and yield the following events: |
|
2043 | 2043 | - ("file", afile, bfile, firsthunk): select a new target file. |
|
2044 | 2044 | - ("hunk", hunk): a new hunk is ready to be applied, follows a |
|
2045 | 2045 | "file" event. |
|
2046 | 2046 | - ("git", gitchanges): current diff is in git format, gitchanges |
|
2047 | 2047 | maps filenames to gitpatch records. Unique event. |
|
2048 | 2048 | """ |
|
2049 | 2049 | afile = b"" |
|
2050 | 2050 | bfile = b"" |
|
2051 | 2051 | state = None |
|
2052 | 2052 | hunknum = 0 |
|
2053 | 2053 | emitfile = newfile = False |
|
2054 | 2054 | gitpatches = None |
|
2055 | 2055 | |
|
2056 | 2056 | # our states |
|
2057 | 2057 | BFILE = 1 |
|
2058 | 2058 | context = None |
|
2059 | 2059 | lr = linereader(fp) |
|
2060 | 2060 | |
|
2061 | 2061 | for x in iter(lr.readline, b''): |
|
2062 | 2062 | if state == BFILE and ( |
|
2063 | 2063 | (not context and x.startswith(b'@')) |
|
2064 | 2064 | or (context is not False and x.startswith(b'***************')) |
|
2065 | 2065 | or x.startswith(b'GIT binary patch') |
|
2066 | 2066 | ): |
|
2067 | 2067 | gp = None |
|
2068 | 2068 | if gitpatches and gitpatches[-1].ispatching(afile, bfile): |
|
2069 | 2069 | gp = gitpatches.pop() |
|
2070 | 2070 | if x.startswith(b'GIT binary patch'): |
|
2071 | 2071 | h = binhunk(lr, gp.path) |
|
2072 | 2072 | else: |
|
2073 | 2073 | if context is None and x.startswith(b'***************'): |
|
2074 | 2074 | context = True |
|
2075 | 2075 | h = hunk(x, hunknum + 1, lr, context) |
|
2076 | 2076 | hunknum += 1 |
|
2077 | 2077 | if emitfile: |
|
2078 | 2078 | emitfile = False |
|
2079 | 2079 | yield b'file', (afile, bfile, h, gp and gp.copy() or None) |
|
2080 | 2080 | yield b'hunk', h |
|
2081 | 2081 | elif x.startswith(b'diff --git a/'): |
|
2082 | 2082 | m = gitre.match(x.rstrip(b'\r\n')) |
|
2083 | 2083 | if not m: |
|
2084 | 2084 | continue |
|
2085 | 2085 | if gitpatches is None: |
|
2086 | 2086 | # scan whole input for git metadata |
|
2087 | 2087 | gitpatches = scangitpatch(lr, x) |
|
2088 | 2088 | yield b'git', [ |
|
2089 | 2089 | g.copy() for g in gitpatches if g.op in (b'COPY', b'RENAME') |
|
2090 | 2090 | ] |
|
2091 | 2091 | gitpatches.reverse() |
|
2092 | 2092 | afile = b'a/' + m.group(1) |
|
2093 | 2093 | bfile = b'b/' + m.group(2) |
|
2094 | 2094 | while gitpatches and not gitpatches[-1].ispatching(afile, bfile): |
|
2095 | 2095 | gp = gitpatches.pop() |
|
2096 | 2096 | yield b'file', ( |
|
2097 | 2097 | b'a/' + gp.path, |
|
2098 | 2098 | b'b/' + gp.path, |
|
2099 | 2099 | None, |
|
2100 | 2100 | gp.copy(), |
|
2101 | 2101 | ) |
|
2102 | 2102 | if not gitpatches: |
|
2103 | 2103 | raise PatchParseError( |
|
2104 | 2104 | _(b'failed to synchronize metadata for "%s"') % afile[2:] |
|
2105 | 2105 | ) |
|
2106 | 2106 | newfile = True |
|
2107 | 2107 | elif x.startswith(b'---'): |
|
2108 | 2108 | # check for a unified diff |
|
2109 | 2109 | l2 = lr.readline() |
|
2110 | 2110 | if not l2.startswith(b'+++'): |
|
2111 | 2111 | lr.push(l2) |
|
2112 | 2112 | continue |
|
2113 | 2113 | newfile = True |
|
2114 | 2114 | context = False |
|
2115 | 2115 | afile = parsefilename(x) |
|
2116 | 2116 | bfile = parsefilename(l2) |
|
2117 | 2117 | elif x.startswith(b'***'): |
|
2118 | 2118 | # check for a context diff |
|
2119 | 2119 | l2 = lr.readline() |
|
2120 | 2120 | if not l2.startswith(b'---'): |
|
2121 | 2121 | lr.push(l2) |
|
2122 | 2122 | continue |
|
2123 | 2123 | l3 = lr.readline() |
|
2124 | 2124 | lr.push(l3) |
|
2125 | 2125 | if not l3.startswith(b"***************"): |
|
2126 | 2126 | lr.push(l2) |
|
2127 | 2127 | continue |
|
2128 | 2128 | newfile = True |
|
2129 | 2129 | context = True |
|
2130 | 2130 | afile = parsefilename(x) |
|
2131 | 2131 | bfile = parsefilename(l2) |
|
2132 | 2132 | |
|
2133 | 2133 | if newfile: |
|
2134 | 2134 | newfile = False |
|
2135 | 2135 | emitfile = True |
|
2136 | 2136 | state = BFILE |
|
2137 | 2137 | hunknum = 0 |
|
2138 | 2138 | |
|
2139 | 2139 | while gitpatches: |
|
2140 | 2140 | gp = gitpatches.pop() |
|
2141 | 2141 | yield b'file', (b'a/' + gp.path, b'b/' + gp.path, None, gp.copy()) |
|
2142 | 2142 | |
|
2143 | 2143 | |
|
2144 | 2144 | def applybindelta(binchunk, data): |
|
2145 | 2145 | """Apply a binary delta hunk |
|
2146 | 2146 | The algorithm used is the algorithm from git's patch-delta.c |
|
2147 | 2147 | """ |
|
2148 | 2148 | |
|
2149 | 2149 | def deltahead(binchunk): |
|
2150 | 2150 | i = 0 |
|
2151 | 2151 | for c in pycompat.bytestr(binchunk): |
|
2152 | 2152 | i += 1 |
|
2153 | 2153 | if not (ord(c) & 0x80): |
|
2154 | 2154 | return i |
|
2155 | 2155 | return i |
|
2156 | 2156 | |
|
2157 | 2157 | out = b"" |
|
2158 | 2158 | s = deltahead(binchunk) |
|
2159 | 2159 | binchunk = binchunk[s:] |
|
2160 | 2160 | s = deltahead(binchunk) |
|
2161 | 2161 | binchunk = binchunk[s:] |
|
2162 | 2162 | i = 0 |
|
2163 | 2163 | while i < len(binchunk): |
|
2164 | 2164 | cmd = ord(binchunk[i : i + 1]) |
|
2165 | 2165 | i += 1 |
|
2166 | 2166 | if cmd & 0x80: |
|
2167 | 2167 | offset = 0 |
|
2168 | 2168 | size = 0 |
|
2169 | 2169 | if cmd & 0x01: |
|
2170 | 2170 | offset = ord(binchunk[i : i + 1]) |
|
2171 | 2171 | i += 1 |
|
2172 | 2172 | if cmd & 0x02: |
|
2173 | 2173 | offset |= ord(binchunk[i : i + 1]) << 8 |
|
2174 | 2174 | i += 1 |
|
2175 | 2175 | if cmd & 0x04: |
|
2176 | 2176 | offset |= ord(binchunk[i : i + 1]) << 16 |
|
2177 | 2177 | i += 1 |
|
2178 | 2178 | if cmd & 0x08: |
|
2179 | 2179 | offset |= ord(binchunk[i : i + 1]) << 24 |
|
2180 | 2180 | i += 1 |
|
2181 | 2181 | if cmd & 0x10: |
|
2182 | 2182 | size = ord(binchunk[i : i + 1]) |
|
2183 | 2183 | i += 1 |
|
2184 | 2184 | if cmd & 0x20: |
|
2185 | 2185 | size |= ord(binchunk[i : i + 1]) << 8 |
|
2186 | 2186 | i += 1 |
|
2187 | 2187 | if cmd & 0x40: |
|
2188 | 2188 | size |= ord(binchunk[i : i + 1]) << 16 |
|
2189 | 2189 | i += 1 |
|
2190 | 2190 | if size == 0: |
|
2191 | 2191 | size = 0x10000 |
|
2192 | 2192 | offset_end = offset + size |
|
2193 | 2193 | out += data[offset:offset_end] |
|
2194 | 2194 | elif cmd != 0: |
|
2195 | 2195 | offset_end = i + cmd |
|
2196 | 2196 | out += binchunk[i:offset_end] |
|
2197 | 2197 | i += cmd |
|
2198 | 2198 | else: |
|
2199 | 2199 | raise PatchApplicationError(_(b'unexpected delta opcode 0')) |
|
2200 | 2200 | return out |
|
2201 | 2201 | |
|
2202 | 2202 | |
|
2203 | 2203 | def applydiff(ui, fp, backend, store, strip=1, prefix=b'', eolmode=b'strict'): |
|
2204 | 2204 | """Reads a patch from fp and tries to apply it. |
|
2205 | 2205 | |
|
2206 | 2206 | Returns 0 for a clean patch, -1 if any rejects were found and 1 if |
|
2207 | 2207 | there was any fuzz. |
|
2208 | 2208 | |
|
2209 | 2209 | If 'eolmode' is 'strict', the patch content and patched file are |
|
2210 | 2210 | read in binary mode. Otherwise, line endings are ignored when |
|
2211 | 2211 | patching then normalized according to 'eolmode'. |
|
2212 | 2212 | """ |
|
2213 | 2213 | return _applydiff( |
|
2214 | 2214 | ui, |
|
2215 | 2215 | fp, |
|
2216 | 2216 | patchfile, |
|
2217 | 2217 | backend, |
|
2218 | 2218 | store, |
|
2219 | 2219 | strip=strip, |
|
2220 | 2220 | prefix=prefix, |
|
2221 | 2221 | eolmode=eolmode, |
|
2222 | 2222 | ) |
|
2223 | 2223 | |
|
2224 | 2224 | |
|
2225 | 2225 | def _canonprefix(repo, prefix): |
|
2226 | 2226 | if prefix: |
|
2227 | 2227 | prefix = pathutil.canonpath(repo.root, repo.getcwd(), prefix) |
|
2228 | 2228 | if prefix != b'': |
|
2229 | 2229 | prefix += b'/' |
|
2230 | 2230 | return prefix |
|
2231 | 2231 | |
|
2232 | 2232 | |
|
2233 | 2233 | def _applydiff( |
|
2234 | 2234 | ui, fp, patcher, backend, store, strip=1, prefix=b'', eolmode=b'strict' |
|
2235 | 2235 | ): |
|
2236 | 2236 | prefix = _canonprefix(backend.repo, prefix) |
|
2237 | 2237 | |
|
2238 | 2238 | def pstrip(p): |
|
2239 | 2239 | return pathtransform(p, strip - 1, prefix)[1] |
|
2240 | 2240 | |
|
2241 | 2241 | rejects = 0 |
|
2242 | 2242 | err = 0 |
|
2243 | 2243 | current_file = None |
|
2244 | 2244 | |
|
2245 | 2245 | for state, values in iterhunks(fp): |
|
2246 | 2246 | if state == b'hunk': |
|
2247 | 2247 | if not current_file: |
|
2248 | 2248 | continue |
|
2249 | 2249 | ret = current_file.apply(values) |
|
2250 | 2250 | if ret > 0: |
|
2251 | 2251 | err = 1 |
|
2252 | 2252 | elif state == b'file': |
|
2253 | 2253 | if current_file: |
|
2254 | 2254 | rejects += current_file.close() |
|
2255 | 2255 | current_file = None |
|
2256 | 2256 | afile, bfile, first_hunk, gp = values |
|
2257 | 2257 | if gp: |
|
2258 | 2258 | gp.path = pstrip(gp.path) |
|
2259 | 2259 | if gp.oldpath: |
|
2260 | 2260 | gp.oldpath = pstrip(gp.oldpath) |
|
2261 | 2261 | else: |
|
2262 | 2262 | gp = makepatchmeta( |
|
2263 | 2263 | backend, afile, bfile, first_hunk, strip, prefix |
|
2264 | 2264 | ) |
|
2265 | 2265 | if gp.op == b'RENAME': |
|
2266 | 2266 | backend.unlink(gp.oldpath) |
|
2267 | 2267 | if not first_hunk: |
|
2268 | 2268 | if gp.op == b'DELETE': |
|
2269 | 2269 | backend.unlink(gp.path) |
|
2270 | 2270 | continue |
|
2271 | 2271 | data, mode = None, None |
|
2272 | 2272 | if gp.op in (b'RENAME', b'COPY'): |
|
2273 | 2273 | data, mode = store.getfile(gp.oldpath)[:2] |
|
2274 | 2274 | if data is None: |
|
2275 | 2275 | # This means that the old path does not exist |
|
2276 | 2276 | raise PatchApplicationError( |
|
2277 | 2277 | _(b"source file '%s' does not exist") % gp.oldpath |
|
2278 | 2278 | ) |
|
2279 | 2279 | if gp.mode: |
|
2280 | 2280 | mode = gp.mode |
|
2281 | 2281 | if gp.op == b'ADD': |
|
2282 | 2282 | # Added files without content have no hunk and |
|
2283 | 2283 | # must be created |
|
2284 | 2284 | data = b'' |
|
2285 | 2285 | if data or mode: |
|
2286 | 2286 | if gp.op in (b'ADD', b'RENAME', b'COPY') and backend.exists( |
|
2287 | 2287 | gp.path |
|
2288 | 2288 | ): |
|
2289 | 2289 | raise PatchApplicationError( |
|
2290 | 2290 | _( |
|
2291 | 2291 | b"cannot create %s: destination " |
|
2292 | 2292 | b"already exists" |
|
2293 | 2293 | ) |
|
2294 | 2294 | % gp.path |
|
2295 | 2295 | ) |
|
2296 | 2296 | backend.setfile(gp.path, data, mode, gp.oldpath) |
|
2297 | 2297 | continue |
|
2298 | 2298 | try: |
|
2299 | 2299 | current_file = patcher(ui, gp, backend, store, eolmode=eolmode) |
|
2300 | 2300 | except PatchError as inst: |
|
2301 | 2301 | ui.warn(stringutil.forcebytestr(inst) + b'\n') |
|
2302 | 2302 | current_file = None |
|
2303 | 2303 | rejects += 1 |
|
2304 | 2304 | continue |
|
2305 | 2305 | elif state == b'git': |
|
2306 | 2306 | for gp in values: |
|
2307 | 2307 | path = pstrip(gp.oldpath) |
|
2308 | 2308 | data, mode = backend.getfile(path) |
|
2309 | 2309 | if data is None: |
|
2310 | 2310 | # The error ignored here will trigger a getfile() |
|
2311 | 2311 | # error in a place more appropriate for error |
|
2312 | 2312 | # handling, and will not interrupt the patching |
|
2313 | 2313 | # process. |
|
2314 | 2314 | pass |
|
2315 | 2315 | else: |
|
2316 | 2316 | store.setfile(path, data, mode) |
|
2317 | 2317 | else: |
|
2318 | 2318 | raise error.Abort(_(b'unsupported parser state: %s') % state) |
|
2319 | 2319 | |
|
2320 | 2320 | if current_file: |
|
2321 | 2321 | rejects += current_file.close() |
|
2322 | 2322 | |
|
2323 | 2323 | if rejects: |
|
2324 | 2324 | return -1 |
|
2325 | 2325 | return err |
|
2326 | 2326 | |
|
2327 | 2327 | |
|
2328 | 2328 | def _externalpatch(ui, repo, patcher, patchname, strip, files, similarity): |
|
2329 | 2329 | """use <patcher> to apply <patchname> to the working directory. |
|
2330 | 2330 | returns whether patch was applied with fuzz factor.""" |
|
2331 | 2331 | |
|
2332 | 2332 | fuzz = False |
|
2333 | 2333 | args = [] |
|
2334 | 2334 | cwd = repo.root |
|
2335 | 2335 | if cwd: |
|
2336 | 2336 | args.append(b'-d %s' % procutil.shellquote(cwd)) |
|
2337 | 2337 | cmd = b'%s %s -p%d < %s' % ( |
|
2338 | 2338 | patcher, |
|
2339 | 2339 | b' '.join(args), |
|
2340 | 2340 | strip, |
|
2341 | 2341 | procutil.shellquote(patchname), |
|
2342 | 2342 | ) |
|
2343 | 2343 | ui.debug(b'Using external patch tool: %s\n' % cmd) |
|
2344 | 2344 | fp = procutil.popen(cmd, b'rb') |
|
2345 | 2345 | try: |
|
2346 |
for line in |
|
|
2346 | for line in fp: | |
|
2347 | 2347 | line = line.rstrip() |
|
2348 | 2348 | ui.note(line + b'\n') |
|
2349 | 2349 | if line.startswith(b'patching file '): |
|
2350 | 2350 | pf = util.parsepatchoutput(line) |
|
2351 | 2351 | printed_file = False |
|
2352 | 2352 | files.add(pf) |
|
2353 | 2353 | elif line.find(b'with fuzz') >= 0: |
|
2354 | 2354 | fuzz = True |
|
2355 | 2355 | if not printed_file: |
|
2356 | 2356 | ui.warn(pf + b'\n') |
|
2357 | 2357 | printed_file = True |
|
2358 | 2358 | ui.warn(line + b'\n') |
|
2359 | 2359 | elif line.find(b'saving rejects to file') >= 0: |
|
2360 | 2360 | ui.warn(line + b'\n') |
|
2361 | 2361 | elif line.find(b'FAILED') >= 0: |
|
2362 | 2362 | if not printed_file: |
|
2363 | 2363 | ui.warn(pf + b'\n') |
|
2364 | 2364 | printed_file = True |
|
2365 | 2365 | ui.warn(line + b'\n') |
|
2366 | 2366 | finally: |
|
2367 | 2367 | if files: |
|
2368 | 2368 | scmutil.marktouched(repo, files, similarity) |
|
2369 | 2369 | code = fp.close() |
|
2370 | 2370 | if code: |
|
2371 | 2371 | raise PatchApplicationError( |
|
2372 | 2372 | _(b"patch command failed: %s") % procutil.explainexit(code) |
|
2373 | 2373 | ) |
|
2374 | 2374 | return fuzz |
|
2375 | 2375 | |
|
2376 | 2376 | |
|
2377 | 2377 | def patchbackend( |
|
2378 | 2378 | ui, backend, patchobj, strip, prefix, files=None, eolmode=b'strict' |
|
2379 | 2379 | ): |
|
2380 | 2380 | if files is None: |
|
2381 | 2381 | files = set() |
|
2382 | 2382 | if eolmode is None: |
|
2383 | 2383 | eolmode = ui.config(b'patch', b'eol') |
|
2384 | 2384 | if eolmode.lower() not in eolmodes: |
|
2385 | 2385 | raise error.Abort(_(b'unsupported line endings type: %s') % eolmode) |
|
2386 | 2386 | eolmode = eolmode.lower() |
|
2387 | 2387 | |
|
2388 | 2388 | store = filestore() |
|
2389 | 2389 | try: |
|
2390 | 2390 | fp = open(patchobj, b'rb') |
|
2391 | 2391 | except TypeError: |
|
2392 | 2392 | fp = patchobj |
|
2393 | 2393 | try: |
|
2394 | 2394 | ret = applydiff( |
|
2395 | 2395 | ui, fp, backend, store, strip=strip, prefix=prefix, eolmode=eolmode |
|
2396 | 2396 | ) |
|
2397 | 2397 | finally: |
|
2398 | 2398 | if fp != patchobj: |
|
2399 | 2399 | fp.close() |
|
2400 | 2400 | files.update(backend.close()) |
|
2401 | 2401 | store.close() |
|
2402 | 2402 | if ret < 0: |
|
2403 | 2403 | raise PatchApplicationError(_(b'patch failed to apply')) |
|
2404 | 2404 | return ret > 0 |
|
2405 | 2405 | |
|
2406 | 2406 | |
|
2407 | 2407 | def internalpatch( |
|
2408 | 2408 | ui, |
|
2409 | 2409 | repo, |
|
2410 | 2410 | patchobj, |
|
2411 | 2411 | strip, |
|
2412 | 2412 | prefix=b'', |
|
2413 | 2413 | files=None, |
|
2414 | 2414 | eolmode=b'strict', |
|
2415 | 2415 | similarity=0, |
|
2416 | 2416 | ): |
|
2417 | 2417 | """use builtin patch to apply <patchobj> to the working directory. |
|
2418 | 2418 | returns whether patch was applied with fuzz factor.""" |
|
2419 | 2419 | backend = workingbackend(ui, repo, similarity) |
|
2420 | 2420 | return patchbackend(ui, backend, patchobj, strip, prefix, files, eolmode) |
|
2421 | 2421 | |
|
2422 | 2422 | |
|
2423 | 2423 | def patchrepo( |
|
2424 | 2424 | ui, repo, ctx, store, patchobj, strip, prefix, files=None, eolmode=b'strict' |
|
2425 | 2425 | ): |
|
2426 | 2426 | backend = repobackend(ui, repo, ctx, store) |
|
2427 | 2427 | return patchbackend(ui, backend, patchobj, strip, prefix, files, eolmode) |
|
2428 | 2428 | |
|
2429 | 2429 | |
|
2430 | 2430 | def patch( |
|
2431 | 2431 | ui, |
|
2432 | 2432 | repo, |
|
2433 | 2433 | patchname, |
|
2434 | 2434 | strip=1, |
|
2435 | 2435 | prefix=b'', |
|
2436 | 2436 | files=None, |
|
2437 | 2437 | eolmode=b'strict', |
|
2438 | 2438 | similarity=0, |
|
2439 | 2439 | ): |
|
2440 | 2440 | """Apply <patchname> to the working directory. |
|
2441 | 2441 | |
|
2442 | 2442 | 'eolmode' specifies how end of lines should be handled. It can be: |
|
2443 | 2443 | - 'strict': inputs are read in binary mode, EOLs are preserved |
|
2444 | 2444 | - 'crlf': EOLs are ignored when patching and reset to CRLF |
|
2445 | 2445 | - 'lf': EOLs are ignored when patching and reset to LF |
|
2446 | 2446 | - None: get it from user settings, default to 'strict' |
|
2447 | 2447 | 'eolmode' is ignored when using an external patcher program. |
|
2448 | 2448 | |
|
2449 | 2449 | Returns whether patch was applied with fuzz factor. |
|
2450 | 2450 | """ |
|
2451 | 2451 | patcher = ui.config(b'ui', b'patch') |
|
2452 | 2452 | if files is None: |
|
2453 | 2453 | files = set() |
|
2454 | 2454 | if patcher: |
|
2455 | 2455 | return _externalpatch( |
|
2456 | 2456 | ui, repo, patcher, patchname, strip, files, similarity |
|
2457 | 2457 | ) |
|
2458 | 2458 | return internalpatch( |
|
2459 | 2459 | ui, repo, patchname, strip, prefix, files, eolmode, similarity |
|
2460 | 2460 | ) |
|
2461 | 2461 | |
|
2462 | 2462 | |
|
2463 | 2463 | def changedfiles(ui, repo, patchpath, strip=1, prefix=b''): |
|
2464 | 2464 | backend = fsbackend(ui, repo.root) |
|
2465 | 2465 | prefix = _canonprefix(repo, prefix) |
|
2466 | 2466 | with open(patchpath, b'rb') as fp: |
|
2467 | 2467 | changed = set() |
|
2468 | 2468 | for state, values in iterhunks(fp): |
|
2469 | 2469 | if state == b'file': |
|
2470 | 2470 | afile, bfile, first_hunk, gp = values |
|
2471 | 2471 | if gp: |
|
2472 | 2472 | gp.path = pathtransform(gp.path, strip - 1, prefix)[1] |
|
2473 | 2473 | if gp.oldpath: |
|
2474 | 2474 | gp.oldpath = pathtransform( |
|
2475 | 2475 | gp.oldpath, strip - 1, prefix |
|
2476 | 2476 | )[1] |
|
2477 | 2477 | else: |
|
2478 | 2478 | gp = makepatchmeta( |
|
2479 | 2479 | backend, afile, bfile, first_hunk, strip, prefix |
|
2480 | 2480 | ) |
|
2481 | 2481 | changed.add(gp.path) |
|
2482 | 2482 | if gp.op == b'RENAME': |
|
2483 | 2483 | changed.add(gp.oldpath) |
|
2484 | 2484 | elif state not in (b'hunk', b'git'): |
|
2485 | 2485 | raise error.Abort(_(b'unsupported parser state: %s') % state) |
|
2486 | 2486 | return changed |
|
2487 | 2487 | |
|
2488 | 2488 | |
|
2489 | 2489 | class GitDiffRequired(Exception): |
|
2490 | 2490 | pass |
|
2491 | 2491 | |
|
2492 | 2492 | |
|
2493 | 2493 | diffopts = diffutil.diffallopts |
|
2494 | 2494 | diffallopts = diffutil.diffallopts |
|
2495 | 2495 | difffeatureopts = diffutil.difffeatureopts |
|
2496 | 2496 | |
|
2497 | 2497 | |
|
2498 | 2498 | def diff( |
|
2499 | 2499 | repo, |
|
2500 | 2500 | node1=None, |
|
2501 | 2501 | node2=None, |
|
2502 | 2502 | match=None, |
|
2503 | 2503 | changes=None, |
|
2504 | 2504 | opts=None, |
|
2505 | 2505 | losedatafn=None, |
|
2506 | 2506 | pathfn=None, |
|
2507 | 2507 | copy=None, |
|
2508 | 2508 | copysourcematch=None, |
|
2509 | 2509 | hunksfilterfn=None, |
|
2510 | 2510 | ): |
|
2511 | 2511 | """yields diff of changes to files between two nodes, or node and |
|
2512 | 2512 | working directory. |
|
2513 | 2513 | |
|
2514 | 2514 | if node1 is None, use first dirstate parent instead. |
|
2515 | 2515 | if node2 is None, compare node1 with working directory. |
|
2516 | 2516 | |
|
2517 | 2517 | losedatafn(**kwarg) is a callable run when opts.upgrade=True and |
|
2518 | 2518 | every time some change cannot be represented with the current |
|
2519 | 2519 | patch format. Return False to upgrade to git patch format, True to |
|
2520 | 2520 | accept the loss or raise an exception to abort the diff. It is |
|
2521 | 2521 | called with the name of current file being diffed as 'fn'. If set |
|
2522 | 2522 | to None, patches will always be upgraded to git format when |
|
2523 | 2523 | necessary. |
|
2524 | 2524 | |
|
2525 | 2525 | prefix is a filename prefix that is prepended to all filenames on |
|
2526 | 2526 | display (used for subrepos). |
|
2527 | 2527 | |
|
2528 | 2528 | relroot, if not empty, must be normalized with a trailing /. Any match |
|
2529 | 2529 | patterns that fall outside it will be ignored. |
|
2530 | 2530 | |
|
2531 | 2531 | copy, if not empty, should contain mappings {dst@y: src@x} of copy |
|
2532 | 2532 | information. |
|
2533 | 2533 | |
|
2534 | 2534 | if copysourcematch is not None, then copy sources will be filtered by this |
|
2535 | 2535 | matcher |
|
2536 | 2536 | |
|
2537 | 2537 | hunksfilterfn, if not None, should be a function taking a filectx and |
|
2538 | 2538 | hunks generator that may yield filtered hunks. |
|
2539 | 2539 | """ |
|
2540 | 2540 | if not node1 and not node2: |
|
2541 | 2541 | node1 = repo.dirstate.p1() |
|
2542 | 2542 | |
|
2543 | 2543 | ctx1 = repo[node1] |
|
2544 | 2544 | ctx2 = repo[node2] |
|
2545 | 2545 | |
|
2546 | 2546 | for fctx1, fctx2, hdr, hunks in diffhunks( |
|
2547 | 2547 | repo, |
|
2548 | 2548 | ctx1=ctx1, |
|
2549 | 2549 | ctx2=ctx2, |
|
2550 | 2550 | match=match, |
|
2551 | 2551 | changes=changes, |
|
2552 | 2552 | opts=opts, |
|
2553 | 2553 | losedatafn=losedatafn, |
|
2554 | 2554 | pathfn=pathfn, |
|
2555 | 2555 | copy=copy, |
|
2556 | 2556 | copysourcematch=copysourcematch, |
|
2557 | 2557 | ): |
|
2558 | 2558 | if hunksfilterfn is not None: |
|
2559 | 2559 | # If the file has been removed, fctx2 is None; but this should |
|
2560 | 2560 | # not occur here since we catch removed files early in |
|
2561 | 2561 | # logcmdutil.getlinerangerevs() for 'hg log -L'. |
|
2562 | 2562 | assert ( |
|
2563 | 2563 | fctx2 is not None |
|
2564 | 2564 | ), b'fctx2 unexpectly None in diff hunks filtering' |
|
2565 | 2565 | hunks = hunksfilterfn(fctx2, hunks) |
|
2566 | 2566 | text = b''.join(b''.join(hlines) for hrange, hlines in hunks) |
|
2567 | 2567 | if hdr and (text or len(hdr) > 1): |
|
2568 | 2568 | yield b'\n'.join(hdr) + b'\n' |
|
2569 | 2569 | if text: |
|
2570 | 2570 | yield text |
|
2571 | 2571 | |
|
2572 | 2572 | |
|
2573 | 2573 | def diffhunks( |
|
2574 | 2574 | repo, |
|
2575 | 2575 | ctx1, |
|
2576 | 2576 | ctx2, |
|
2577 | 2577 | match=None, |
|
2578 | 2578 | changes=None, |
|
2579 | 2579 | opts=None, |
|
2580 | 2580 | losedatafn=None, |
|
2581 | 2581 | pathfn=None, |
|
2582 | 2582 | copy=None, |
|
2583 | 2583 | copysourcematch=None, |
|
2584 | 2584 | ): |
|
2585 | 2585 | """Yield diff of changes to files in the form of (`header`, `hunks`) tuples |
|
2586 | 2586 | where `header` is a list of diff headers and `hunks` is an iterable of |
|
2587 | 2587 | (`hunkrange`, `hunklines`) tuples. |
|
2588 | 2588 | |
|
2589 | 2589 | See diff() for the meaning of parameters. |
|
2590 | 2590 | """ |
|
2591 | 2591 | |
|
2592 | 2592 | if opts is None: |
|
2593 | 2593 | opts = mdiff.defaultopts |
|
2594 | 2594 | |
|
2595 | 2595 | def lrugetfilectx(): |
|
2596 | 2596 | cache = {} |
|
2597 | 2597 | order = collections.deque() |
|
2598 | 2598 | |
|
2599 | 2599 | def getfilectx(f, ctx): |
|
2600 | 2600 | fctx = ctx.filectx(f, filelog=cache.get(f)) |
|
2601 | 2601 | if f not in cache: |
|
2602 | 2602 | if len(cache) > 20: |
|
2603 | 2603 | del cache[order.popleft()] |
|
2604 | 2604 | cache[f] = fctx.filelog() |
|
2605 | 2605 | else: |
|
2606 | 2606 | order.remove(f) |
|
2607 | 2607 | order.append(f) |
|
2608 | 2608 | return fctx |
|
2609 | 2609 | |
|
2610 | 2610 | return getfilectx |
|
2611 | 2611 | |
|
2612 | 2612 | getfilectx = lrugetfilectx() |
|
2613 | 2613 | |
|
2614 | 2614 | if not changes: |
|
2615 | 2615 | changes = ctx1.status(ctx2, match=match) |
|
2616 | 2616 | if isinstance(changes, list): |
|
2617 | 2617 | modified, added, removed = changes[:3] |
|
2618 | 2618 | else: |
|
2619 | 2619 | modified, added, removed = ( |
|
2620 | 2620 | changes.modified, |
|
2621 | 2621 | changes.added, |
|
2622 | 2622 | changes.removed, |
|
2623 | 2623 | ) |
|
2624 | 2624 | |
|
2625 | 2625 | if not modified and not added and not removed: |
|
2626 | 2626 | return [] |
|
2627 | 2627 | |
|
2628 | 2628 | if repo.ui.debugflag: |
|
2629 | 2629 | hexfunc = hex |
|
2630 | 2630 | else: |
|
2631 | 2631 | hexfunc = short |
|
2632 | 2632 | revs = [hexfunc(node) for node in [ctx1.node(), ctx2.node()] if node] |
|
2633 | 2633 | |
|
2634 | 2634 | if copy is None: |
|
2635 | 2635 | copy = {} |
|
2636 | 2636 | if opts.git or opts.upgrade: |
|
2637 | 2637 | copy = copies.pathcopies(ctx1, ctx2, match=match) |
|
2638 | 2638 | |
|
2639 | 2639 | if copysourcematch: |
|
2640 | 2640 | # filter out copies where source side isn't inside the matcher |
|
2641 | 2641 | # (copies.pathcopies() already filtered out the destination) |
|
2642 | 2642 | copy = {dst: src for dst, src in copy.items() if copysourcematch(src)} |
|
2643 | 2643 | |
|
2644 | 2644 | modifiedset = set(modified) |
|
2645 | 2645 | addedset = set(added) |
|
2646 | 2646 | removedset = set(removed) |
|
2647 | 2647 | for f in modified: |
|
2648 | 2648 | if f not in ctx1: |
|
2649 | 2649 | # Fix up added, since merged-in additions appear as |
|
2650 | 2650 | # modifications during merges |
|
2651 | 2651 | modifiedset.remove(f) |
|
2652 | 2652 | addedset.add(f) |
|
2653 | 2653 | for f in removed: |
|
2654 | 2654 | if f not in ctx1: |
|
2655 | 2655 | # Merged-in additions that are then removed are reported as removed. |
|
2656 | 2656 | # They are not in ctx1, so We don't want to show them in the diff. |
|
2657 | 2657 | removedset.remove(f) |
|
2658 | 2658 | modified = sorted(modifiedset) |
|
2659 | 2659 | added = sorted(addedset) |
|
2660 | 2660 | removed = sorted(removedset) |
|
2661 | 2661 | for dst, src in list(copy.items()): |
|
2662 | 2662 | if src not in ctx1: |
|
2663 | 2663 | # Files merged in during a merge and then copied/renamed are |
|
2664 | 2664 | # reported as copies. We want to show them in the diff as additions. |
|
2665 | 2665 | del copy[dst] |
|
2666 | 2666 | |
|
2667 | 2667 | prefetchmatch = scmutil.matchfiles( |
|
2668 | 2668 | repo, list(modifiedset | addedset | removedset) |
|
2669 | 2669 | ) |
|
2670 | 2670 | revmatches = [ |
|
2671 | 2671 | (ctx1.rev(), prefetchmatch), |
|
2672 | 2672 | (ctx2.rev(), prefetchmatch), |
|
2673 | 2673 | ] |
|
2674 | 2674 | scmutil.prefetchfiles(repo, revmatches) |
|
2675 | 2675 | |
|
2676 | 2676 | def difffn(opts, losedata): |
|
2677 | 2677 | return trydiff( |
|
2678 | 2678 | repo, |
|
2679 | 2679 | revs, |
|
2680 | 2680 | ctx1, |
|
2681 | 2681 | ctx2, |
|
2682 | 2682 | modified, |
|
2683 | 2683 | added, |
|
2684 | 2684 | removed, |
|
2685 | 2685 | copy, |
|
2686 | 2686 | getfilectx, |
|
2687 | 2687 | opts, |
|
2688 | 2688 | losedata, |
|
2689 | 2689 | pathfn, |
|
2690 | 2690 | ) |
|
2691 | 2691 | |
|
2692 | 2692 | if opts.upgrade and not opts.git: |
|
2693 | 2693 | try: |
|
2694 | 2694 | |
|
2695 | 2695 | def losedata(fn): |
|
2696 | 2696 | if not losedatafn or not losedatafn(fn=fn): |
|
2697 | 2697 | raise GitDiffRequired |
|
2698 | 2698 | |
|
2699 | 2699 | # Buffer the whole output until we are sure it can be generated |
|
2700 | 2700 | return list(difffn(opts.copy(git=False), losedata)) |
|
2701 | 2701 | except GitDiffRequired: |
|
2702 | 2702 | return difffn(opts.copy(git=True), None) |
|
2703 | 2703 | else: |
|
2704 | 2704 | return difffn(opts, None) |
|
2705 | 2705 | |
|
2706 | 2706 | |
|
2707 | 2707 | def diffsinglehunk(hunklines): |
|
2708 | 2708 | """yield tokens for a list of lines in a single hunk""" |
|
2709 | 2709 | for line in hunklines: |
|
2710 | 2710 | # chomp |
|
2711 | 2711 | chompline = line.rstrip(b'\r\n') |
|
2712 | 2712 | # highlight tabs and trailing whitespace |
|
2713 | 2713 | stripline = chompline.rstrip() |
|
2714 | 2714 | if line.startswith(b'-'): |
|
2715 | 2715 | label = b'diff.deleted' |
|
2716 | 2716 | elif line.startswith(b'+'): |
|
2717 | 2717 | label = b'diff.inserted' |
|
2718 | 2718 | else: |
|
2719 | 2719 | raise error.ProgrammingError(b'unexpected hunk line: %s' % line) |
|
2720 | 2720 | for token in tabsplitter.findall(stripline): |
|
2721 | 2721 | if token.startswith(b'\t'): |
|
2722 | 2722 | yield (token, b'diff.tab') |
|
2723 | 2723 | else: |
|
2724 | 2724 | yield (token, label) |
|
2725 | 2725 | |
|
2726 | 2726 | if chompline != stripline: |
|
2727 | 2727 | yield (chompline[len(stripline) :], b'diff.trailingwhitespace') |
|
2728 | 2728 | if chompline != line: |
|
2729 | 2729 | yield (line[len(chompline) :], b'') |
|
2730 | 2730 | |
|
2731 | 2731 | |
|
2732 | 2732 | def diffsinglehunkinline(hunklines): |
|
2733 | 2733 | """yield tokens for a list of lines in a single hunk, with inline colors""" |
|
2734 | 2734 | # prepare deleted, and inserted content |
|
2735 | 2735 | a = bytearray() |
|
2736 | 2736 | b = bytearray() |
|
2737 | 2737 | for line in hunklines: |
|
2738 | 2738 | if line[0:1] == b'-': |
|
2739 | 2739 | a += line[1:] |
|
2740 | 2740 | elif line[0:1] == b'+': |
|
2741 | 2741 | b += line[1:] |
|
2742 | 2742 | else: |
|
2743 | 2743 | raise error.ProgrammingError(b'unexpected hunk line: %s' % line) |
|
2744 | 2744 | # fast path: if either side is empty, use diffsinglehunk |
|
2745 | 2745 | if not a or not b: |
|
2746 | 2746 | for t in diffsinglehunk(hunklines): |
|
2747 | 2747 | yield t |
|
2748 | 2748 | return |
|
2749 | 2749 | # re-split the content into words |
|
2750 | 2750 | al = wordsplitter.findall(bytes(a)) |
|
2751 | 2751 | bl = wordsplitter.findall(bytes(b)) |
|
2752 | 2752 | # re-arrange the words to lines since the diff algorithm is line-based |
|
2753 | 2753 | aln = [s if s == b'\n' else s + b'\n' for s in al] |
|
2754 | 2754 | bln = [s if s == b'\n' else s + b'\n' for s in bl] |
|
2755 | 2755 | an = b''.join(aln) |
|
2756 | 2756 | bn = b''.join(bln) |
|
2757 | 2757 | # run the diff algorithm, prepare atokens and btokens |
|
2758 | 2758 | atokens = [] |
|
2759 | 2759 | btokens = [] |
|
2760 | 2760 | blocks = mdiff.allblocks(an, bn, lines1=aln, lines2=bln) |
|
2761 | 2761 | for (a1, a2, b1, b2), btype in blocks: |
|
2762 | 2762 | changed = btype == b'!' |
|
2763 | 2763 | for token in mdiff.splitnewlines(b''.join(al[a1:a2])): |
|
2764 | 2764 | atokens.append((changed, token)) |
|
2765 | 2765 | for token in mdiff.splitnewlines(b''.join(bl[b1:b2])): |
|
2766 | 2766 | btokens.append((changed, token)) |
|
2767 | 2767 | |
|
2768 | 2768 | # yield deleted tokens, then inserted ones |
|
2769 | 2769 | for prefix, label, tokens in [ |
|
2770 | 2770 | (b'-', b'diff.deleted', atokens), |
|
2771 | 2771 | (b'+', b'diff.inserted', btokens), |
|
2772 | 2772 | ]: |
|
2773 | 2773 | nextisnewline = True |
|
2774 | 2774 | for changed, token in tokens: |
|
2775 | 2775 | if nextisnewline: |
|
2776 | 2776 | yield (prefix, label) |
|
2777 | 2777 | nextisnewline = False |
|
2778 | 2778 | # special handling line end |
|
2779 | 2779 | isendofline = token.endswith(b'\n') |
|
2780 | 2780 | if isendofline: |
|
2781 | 2781 | chomp = token[:-1] # chomp |
|
2782 | 2782 | if chomp.endswith(b'\r'): |
|
2783 | 2783 | chomp = chomp[:-1] |
|
2784 | 2784 | endofline = token[len(chomp) :] |
|
2785 | 2785 | token = chomp.rstrip() # detect spaces at the end |
|
2786 | 2786 | endspaces = chomp[len(token) :] |
|
2787 | 2787 | # scan tabs |
|
2788 | 2788 | for maybetab in tabsplitter.findall(token): |
|
2789 | 2789 | if b'\t' == maybetab[0:1]: |
|
2790 | 2790 | currentlabel = b'diff.tab' |
|
2791 | 2791 | else: |
|
2792 | 2792 | if changed: |
|
2793 | 2793 | currentlabel = label + b'.changed' |
|
2794 | 2794 | else: |
|
2795 | 2795 | currentlabel = label + b'.unchanged' |
|
2796 | 2796 | yield (maybetab, currentlabel) |
|
2797 | 2797 | if isendofline: |
|
2798 | 2798 | if endspaces: |
|
2799 | 2799 | yield (endspaces, b'diff.trailingwhitespace') |
|
2800 | 2800 | yield (endofline, b'') |
|
2801 | 2801 | nextisnewline = True |
|
2802 | 2802 | |
|
2803 | 2803 | |
|
2804 | 2804 | def difflabel(func, *args, **kw): |
|
2805 | 2805 | '''yields 2-tuples of (output, label) based on the output of func()''' |
|
2806 | 2806 | if kw.get('opts') and kw['opts'].worddiff: |
|
2807 | 2807 | dodiffhunk = diffsinglehunkinline |
|
2808 | 2808 | else: |
|
2809 | 2809 | dodiffhunk = diffsinglehunk |
|
2810 | 2810 | headprefixes = [ |
|
2811 | 2811 | (b'diff', b'diff.diffline'), |
|
2812 | 2812 | (b'copy', b'diff.extended'), |
|
2813 | 2813 | (b'rename', b'diff.extended'), |
|
2814 | 2814 | (b'old', b'diff.extended'), |
|
2815 | 2815 | (b'new', b'diff.extended'), |
|
2816 | 2816 | (b'deleted', b'diff.extended'), |
|
2817 | 2817 | (b'index', b'diff.extended'), |
|
2818 | 2818 | (b'similarity', b'diff.extended'), |
|
2819 | 2819 | (b'---', b'diff.file_a'), |
|
2820 | 2820 | (b'+++', b'diff.file_b'), |
|
2821 | 2821 | ] |
|
2822 | 2822 | textprefixes = [ |
|
2823 | 2823 | (b'@', b'diff.hunk'), |
|
2824 | 2824 | # - and + are handled by diffsinglehunk |
|
2825 | 2825 | ] |
|
2826 | 2826 | head = False |
|
2827 | 2827 | |
|
2828 | 2828 | # buffers a hunk, i.e. adjacent "-", "+" lines without other changes. |
|
2829 | 2829 | hunkbuffer = [] |
|
2830 | 2830 | |
|
2831 | 2831 | def consumehunkbuffer(): |
|
2832 | 2832 | if hunkbuffer: |
|
2833 | 2833 | for token in dodiffhunk(hunkbuffer): |
|
2834 | 2834 | yield token |
|
2835 | 2835 | hunkbuffer[:] = [] |
|
2836 | 2836 | |
|
2837 | 2837 | for chunk in func(*args, **kw): |
|
2838 | 2838 | lines = chunk.split(b'\n') |
|
2839 | 2839 | linecount = len(lines) |
|
2840 | 2840 | for i, line in enumerate(lines): |
|
2841 | 2841 | if head: |
|
2842 | 2842 | if line.startswith(b'@'): |
|
2843 | 2843 | head = False |
|
2844 | 2844 | else: |
|
2845 | 2845 | if line and not line.startswith( |
|
2846 | 2846 | (b' ', b'+', b'-', b'@', b'\\') |
|
2847 | 2847 | ): |
|
2848 | 2848 | head = True |
|
2849 | 2849 | diffline = False |
|
2850 | 2850 | if not head and line and line.startswith((b'+', b'-')): |
|
2851 | 2851 | diffline = True |
|
2852 | 2852 | |
|
2853 | 2853 | prefixes = textprefixes |
|
2854 | 2854 | if head: |
|
2855 | 2855 | prefixes = headprefixes |
|
2856 | 2856 | if diffline: |
|
2857 | 2857 | # buffered |
|
2858 | 2858 | bufferedline = line |
|
2859 | 2859 | if i + 1 < linecount: |
|
2860 | 2860 | bufferedline += b"\n" |
|
2861 | 2861 | hunkbuffer.append(bufferedline) |
|
2862 | 2862 | else: |
|
2863 | 2863 | # unbuffered |
|
2864 | 2864 | for token in consumehunkbuffer(): |
|
2865 | 2865 | yield token |
|
2866 | 2866 | stripline = line.rstrip() |
|
2867 | 2867 | for prefix, label in prefixes: |
|
2868 | 2868 | if stripline.startswith(prefix): |
|
2869 | 2869 | yield (stripline, label) |
|
2870 | 2870 | if line != stripline: |
|
2871 | 2871 | yield ( |
|
2872 | 2872 | line[len(stripline) :], |
|
2873 | 2873 | b'diff.trailingwhitespace', |
|
2874 | 2874 | ) |
|
2875 | 2875 | break |
|
2876 | 2876 | else: |
|
2877 | 2877 | yield (line, b'') |
|
2878 | 2878 | if i + 1 < linecount: |
|
2879 | 2879 | yield (b'\n', b'') |
|
2880 | 2880 | for token in consumehunkbuffer(): |
|
2881 | 2881 | yield token |
|
2882 | 2882 | |
|
2883 | 2883 | |
|
2884 | 2884 | def diffui(*args, **kw): |
|
2885 | 2885 | '''like diff(), but yields 2-tuples of (output, label) for ui.write()''' |
|
2886 | 2886 | return difflabel(diff, *args, **kw) |
|
2887 | 2887 | |
|
2888 | 2888 | |
|
2889 | 2889 | def _filepairs(modified, added, removed, copy, opts): |
|
2890 | 2890 | """generates tuples (f1, f2, copyop), where f1 is the name of the file |
|
2891 | 2891 | before and f2 is the the name after. For added files, f1 will be None, |
|
2892 | 2892 | and for removed files, f2 will be None. copyop may be set to None, 'copy' |
|
2893 | 2893 | or 'rename' (the latter two only if opts.git is set).""" |
|
2894 | 2894 | gone = set() |
|
2895 | 2895 | |
|
2896 | 2896 | copyto = {v: k for k, v in copy.items()} |
|
2897 | 2897 | |
|
2898 | 2898 | addedset, removedset = set(added), set(removed) |
|
2899 | 2899 | |
|
2900 | 2900 | for f in sorted(modified + added + removed): |
|
2901 | 2901 | copyop = None |
|
2902 | 2902 | f1, f2 = f, f |
|
2903 | 2903 | if f in addedset: |
|
2904 | 2904 | f1 = None |
|
2905 | 2905 | if f in copy: |
|
2906 | 2906 | if opts.git: |
|
2907 | 2907 | f1 = copy[f] |
|
2908 | 2908 | if f1 in removedset and f1 not in gone: |
|
2909 | 2909 | copyop = b'rename' |
|
2910 | 2910 | gone.add(f1) |
|
2911 | 2911 | else: |
|
2912 | 2912 | copyop = b'copy' |
|
2913 | 2913 | elif f in removedset: |
|
2914 | 2914 | f2 = None |
|
2915 | 2915 | if opts.git: |
|
2916 | 2916 | # have we already reported a copy above? |
|
2917 | 2917 | if ( |
|
2918 | 2918 | f in copyto |
|
2919 | 2919 | and copyto[f] in addedset |
|
2920 | 2920 | and copy[copyto[f]] == f |
|
2921 | 2921 | ): |
|
2922 | 2922 | continue |
|
2923 | 2923 | yield f1, f2, copyop |
|
2924 | 2924 | |
|
2925 | 2925 | |
|
2926 | 2926 | def _gitindex(text): |
|
2927 | 2927 | if not text: |
|
2928 | 2928 | text = b"" |
|
2929 | 2929 | l = len(text) |
|
2930 | 2930 | s = hashutil.sha1(b'blob %d\0' % l) |
|
2931 | 2931 | s.update(text) |
|
2932 | 2932 | return hex(s.digest()) |
|
2933 | 2933 | |
|
2934 | 2934 | |
|
2935 | 2935 | _gitmode = {b'l': b'120000', b'x': b'100755', b'': b'100644'} |
|
2936 | 2936 | |
|
2937 | 2937 | |
|
2938 | 2938 | def trydiff( |
|
2939 | 2939 | repo, |
|
2940 | 2940 | revs, |
|
2941 | 2941 | ctx1, |
|
2942 | 2942 | ctx2, |
|
2943 | 2943 | modified, |
|
2944 | 2944 | added, |
|
2945 | 2945 | removed, |
|
2946 | 2946 | copy, |
|
2947 | 2947 | getfilectx, |
|
2948 | 2948 | opts, |
|
2949 | 2949 | losedatafn, |
|
2950 | 2950 | pathfn, |
|
2951 | 2951 | ): |
|
2952 | 2952 | """given input data, generate a diff and yield it in blocks |
|
2953 | 2953 | |
|
2954 | 2954 | If generating a diff would lose data like flags or binary data and |
|
2955 | 2955 | losedatafn is not None, it will be called. |
|
2956 | 2956 | |
|
2957 | 2957 | pathfn is applied to every path in the diff output. |
|
2958 | 2958 | """ |
|
2959 | 2959 | |
|
2960 | 2960 | if opts.noprefix: |
|
2961 | 2961 | aprefix = bprefix = b'' |
|
2962 | 2962 | else: |
|
2963 | 2963 | aprefix = b'a/' |
|
2964 | 2964 | bprefix = b'b/' |
|
2965 | 2965 | |
|
2966 | 2966 | def diffline(f, revs): |
|
2967 | 2967 | revinfo = b' '.join([b"-r %s" % rev for rev in revs]) |
|
2968 | 2968 | return b'diff %s %s' % (revinfo, f) |
|
2969 | 2969 | |
|
2970 | 2970 | def isempty(fctx): |
|
2971 | 2971 | return fctx is None or fctx.size() == 0 |
|
2972 | 2972 | |
|
2973 | 2973 | date1 = dateutil.datestr(ctx1.date()) |
|
2974 | 2974 | date2 = dateutil.datestr(ctx2.date()) |
|
2975 | 2975 | |
|
2976 | 2976 | if not pathfn: |
|
2977 | 2977 | pathfn = lambda f: f |
|
2978 | 2978 | |
|
2979 | 2979 | for f1, f2, copyop in _filepairs(modified, added, removed, copy, opts): |
|
2980 | 2980 | content1 = None |
|
2981 | 2981 | content2 = None |
|
2982 | 2982 | fctx1 = None |
|
2983 | 2983 | fctx2 = None |
|
2984 | 2984 | flag1 = None |
|
2985 | 2985 | flag2 = None |
|
2986 | 2986 | if f1: |
|
2987 | 2987 | fctx1 = getfilectx(f1, ctx1) |
|
2988 | 2988 | if opts.git or losedatafn: |
|
2989 | 2989 | flag1 = ctx1.flags(f1) |
|
2990 | 2990 | if f2: |
|
2991 | 2991 | fctx2 = getfilectx(f2, ctx2) |
|
2992 | 2992 | if opts.git or losedatafn: |
|
2993 | 2993 | flag2 = ctx2.flags(f2) |
|
2994 | 2994 | # if binary is True, output "summary" or "base85", but not "text diff" |
|
2995 | 2995 | if opts.text: |
|
2996 | 2996 | binary = False |
|
2997 | 2997 | else: |
|
2998 | 2998 | binary = any(f.isbinary() for f in [fctx1, fctx2] if f is not None) |
|
2999 | 2999 | |
|
3000 | 3000 | if losedatafn and not opts.git: |
|
3001 | 3001 | if ( |
|
3002 | 3002 | binary |
|
3003 | 3003 | or |
|
3004 | 3004 | # copy/rename |
|
3005 | 3005 | f2 in copy |
|
3006 | 3006 | or |
|
3007 | 3007 | # empty file creation |
|
3008 | 3008 | (not f1 and isempty(fctx2)) |
|
3009 | 3009 | or |
|
3010 | 3010 | # empty file deletion |
|
3011 | 3011 | (isempty(fctx1) and not f2) |
|
3012 | 3012 | or |
|
3013 | 3013 | # create with flags |
|
3014 | 3014 | (not f1 and flag2) |
|
3015 | 3015 | or |
|
3016 | 3016 | # change flags |
|
3017 | 3017 | (f1 and f2 and flag1 != flag2) |
|
3018 | 3018 | ): |
|
3019 | 3019 | losedatafn(f2 or f1) |
|
3020 | 3020 | |
|
3021 | 3021 | path1 = pathfn(f1 or f2) |
|
3022 | 3022 | path2 = pathfn(f2 or f1) |
|
3023 | 3023 | header = [] |
|
3024 | 3024 | if opts.git: |
|
3025 | 3025 | header.append( |
|
3026 | 3026 | b'diff --git %s%s %s%s' % (aprefix, path1, bprefix, path2) |
|
3027 | 3027 | ) |
|
3028 | 3028 | if not f1: # added |
|
3029 | 3029 | header.append(b'new file mode %s' % _gitmode[flag2]) |
|
3030 | 3030 | elif not f2: # removed |
|
3031 | 3031 | header.append(b'deleted file mode %s' % _gitmode[flag1]) |
|
3032 | 3032 | else: # modified/copied/renamed |
|
3033 | 3033 | mode1, mode2 = _gitmode[flag1], _gitmode[flag2] |
|
3034 | 3034 | if mode1 != mode2: |
|
3035 | 3035 | header.append(b'old mode %s' % mode1) |
|
3036 | 3036 | header.append(b'new mode %s' % mode2) |
|
3037 | 3037 | if copyop is not None: |
|
3038 | 3038 | if opts.showsimilarity: |
|
3039 | 3039 | sim = similar.score(ctx1[path1], ctx2[path2]) * 100 |
|
3040 | 3040 | header.append(b'similarity index %d%%' % sim) |
|
3041 | 3041 | header.append(b'%s from %s' % (copyop, path1)) |
|
3042 | 3042 | header.append(b'%s to %s' % (copyop, path2)) |
|
3043 | 3043 | elif revs: |
|
3044 | 3044 | header.append(diffline(path1, revs)) |
|
3045 | 3045 | |
|
3046 | 3046 | # fctx.is | diffopts | what to | is fctx.data() |
|
3047 | 3047 | # binary() | text nobinary git index | output? | outputted? |
|
3048 | 3048 | # ------------------------------------|---------------------------- |
|
3049 | 3049 | # yes | no no no * | summary | no |
|
3050 | 3050 | # yes | no no yes * | base85 | yes |
|
3051 | 3051 | # yes | no yes no * | summary | no |
|
3052 | 3052 | # yes | no yes yes 0 | summary | no |
|
3053 | 3053 | # yes | no yes yes >0 | summary | semi [1] |
|
3054 | 3054 | # yes | yes * * * | text diff | yes |
|
3055 | 3055 | # no | * * * * | text diff | yes |
|
3056 | 3056 | # [1]: hash(fctx.data()) is outputted. so fctx.data() cannot be faked |
|
3057 | 3057 | if binary and ( |
|
3058 | 3058 | not opts.git or (opts.git and opts.nobinary and not opts.index) |
|
3059 | 3059 | ): |
|
3060 | 3060 | # fast path: no binary content will be displayed, content1 and |
|
3061 | 3061 | # content2 are only used for equivalent test. cmp() could have a |
|
3062 | 3062 | # fast path. |
|
3063 | 3063 | if fctx1 is not None: |
|
3064 | 3064 | content1 = b'\0' |
|
3065 | 3065 | if fctx2 is not None: |
|
3066 | 3066 | if fctx1 is not None and not fctx1.cmp(fctx2): |
|
3067 | 3067 | content2 = b'\0' # not different |
|
3068 | 3068 | else: |
|
3069 | 3069 | content2 = b'\0\0' |
|
3070 | 3070 | else: |
|
3071 | 3071 | # normal path: load contents |
|
3072 | 3072 | if fctx1 is not None: |
|
3073 | 3073 | content1 = fctx1.data() |
|
3074 | 3074 | if fctx2 is not None: |
|
3075 | 3075 | content2 = fctx2.data() |
|
3076 | 3076 | |
|
3077 | 3077 | data1 = (ctx1, fctx1, path1, flag1, content1, date1) |
|
3078 | 3078 | data2 = (ctx2, fctx2, path2, flag2, content2, date2) |
|
3079 | 3079 | yield diffcontent(data1, data2, header, binary, opts) |
|
3080 | 3080 | |
|
3081 | 3081 | |
|
3082 | 3082 | def diffcontent(data1, data2, header, binary, opts): |
|
3083 | 3083 | """diffs two versions of a file. |
|
3084 | 3084 | |
|
3085 | 3085 | data1 and data2 are tuples containg: |
|
3086 | 3086 | |
|
3087 | 3087 | * ctx: changeset for the file |
|
3088 | 3088 | * fctx: file context for that file |
|
3089 | 3089 | * path1: name of the file |
|
3090 | 3090 | * flag: flags of the file |
|
3091 | 3091 | * content: full content of the file (can be null in case of binary) |
|
3092 | 3092 | * date: date of the changeset |
|
3093 | 3093 | |
|
3094 | 3094 | header: the patch header |
|
3095 | 3095 | binary: whether the any of the version of file is binary or not |
|
3096 | 3096 | opts: user passed options |
|
3097 | 3097 | |
|
3098 | 3098 | It exists as a separate function so that extensions like extdiff can wrap |
|
3099 | 3099 | it and use the file content directly. |
|
3100 | 3100 | """ |
|
3101 | 3101 | |
|
3102 | 3102 | ctx1, fctx1, path1, flag1, content1, date1 = data1 |
|
3103 | 3103 | ctx2, fctx2, path2, flag2, content2, date2 = data2 |
|
3104 | 3104 | index1 = _gitindex(content1) if path1 in ctx1 else sha1nodeconstants.nullhex |
|
3105 | 3105 | index2 = _gitindex(content2) if path2 in ctx2 else sha1nodeconstants.nullhex |
|
3106 | 3106 | if binary and opts.git and not opts.nobinary: |
|
3107 | 3107 | text = mdiff.b85diff(content1, content2) |
|
3108 | 3108 | if text: |
|
3109 | 3109 | header.append(b'index %s..%s' % (index1, index2)) |
|
3110 | 3110 | hunks = ((None, [text]),) |
|
3111 | 3111 | else: |
|
3112 | 3112 | if opts.git and opts.index > 0: |
|
3113 | 3113 | flag = flag1 |
|
3114 | 3114 | if flag is None: |
|
3115 | 3115 | flag = flag2 |
|
3116 | 3116 | header.append( |
|
3117 | 3117 | b'index %s..%s %s' |
|
3118 | 3118 | % ( |
|
3119 | 3119 | index1[0 : opts.index], |
|
3120 | 3120 | index2[0 : opts.index], |
|
3121 | 3121 | _gitmode[flag], |
|
3122 | 3122 | ) |
|
3123 | 3123 | ) |
|
3124 | 3124 | |
|
3125 | 3125 | uheaders, hunks = mdiff.unidiff( |
|
3126 | 3126 | content1, |
|
3127 | 3127 | date1, |
|
3128 | 3128 | content2, |
|
3129 | 3129 | date2, |
|
3130 | 3130 | path1, |
|
3131 | 3131 | path2, |
|
3132 | 3132 | binary=binary, |
|
3133 | 3133 | opts=opts, |
|
3134 | 3134 | ) |
|
3135 | 3135 | header.extend(uheaders) |
|
3136 | 3136 | return fctx1, fctx2, header, hunks |
|
3137 | 3137 | |
|
3138 | 3138 | |
|
3139 | 3139 | def diffstatsum(stats): |
|
3140 | 3140 | maxfile, maxtotal, addtotal, removetotal, binary = 0, 0, 0, 0, False |
|
3141 | 3141 | for f, a, r, b in stats: |
|
3142 | 3142 | maxfile = max(maxfile, encoding.colwidth(f)) |
|
3143 | 3143 | maxtotal = max(maxtotal, a + r) |
|
3144 | 3144 | addtotal += a |
|
3145 | 3145 | removetotal += r |
|
3146 | 3146 | binary = binary or b |
|
3147 | 3147 | |
|
3148 | 3148 | return maxfile, maxtotal, addtotal, removetotal, binary |
|
3149 | 3149 | |
|
3150 | 3150 | |
|
3151 | 3151 | def diffstatdata(lines): |
|
3152 | 3152 | diffre = re.compile(br'^diff .*-r [a-z0-9]+\s(.*)$') |
|
3153 | 3153 | |
|
3154 | 3154 | results = [] |
|
3155 | 3155 | filename, adds, removes, isbinary = None, 0, 0, False |
|
3156 | 3156 | |
|
3157 | 3157 | def addresult(): |
|
3158 | 3158 | if filename: |
|
3159 | 3159 | results.append((filename, adds, removes, isbinary)) |
|
3160 | 3160 | |
|
3161 | 3161 | # inheader is used to track if a line is in the |
|
3162 | 3162 | # header portion of the diff. This helps properly account |
|
3163 | 3163 | # for lines that start with '--' or '++' |
|
3164 | 3164 | inheader = False |
|
3165 | 3165 | |
|
3166 | 3166 | for line in lines: |
|
3167 | 3167 | if line.startswith(b'diff'): |
|
3168 | 3168 | addresult() |
|
3169 | 3169 | # starting a new file diff |
|
3170 | 3170 | # set numbers to 0 and reset inheader |
|
3171 | 3171 | inheader = True |
|
3172 | 3172 | adds, removes, isbinary = 0, 0, False |
|
3173 | 3173 | if line.startswith(b'diff --git a/'): |
|
3174 | 3174 | filename = gitre.search(line).group(2) |
|
3175 | 3175 | elif line.startswith(b'diff -r'): |
|
3176 | 3176 | # format: "diff -r ... -r ... filename" |
|
3177 | 3177 | filename = diffre.search(line).group(1) |
|
3178 | 3178 | elif line.startswith(b'@@'): |
|
3179 | 3179 | inheader = False |
|
3180 | 3180 | elif line.startswith(b'+') and not inheader: |
|
3181 | 3181 | adds += 1 |
|
3182 | 3182 | elif line.startswith(b'-') and not inheader: |
|
3183 | 3183 | removes += 1 |
|
3184 | 3184 | elif line.startswith(b'GIT binary patch') or line.startswith( |
|
3185 | 3185 | b'Binary file' |
|
3186 | 3186 | ): |
|
3187 | 3187 | isbinary = True |
|
3188 | 3188 | elif line.startswith(b'rename from'): |
|
3189 | 3189 | filename = line[12:] |
|
3190 | 3190 | elif line.startswith(b'rename to'): |
|
3191 | 3191 | filename += b' => %s' % line[10:] |
|
3192 | 3192 | addresult() |
|
3193 | 3193 | return results |
|
3194 | 3194 | |
|
3195 | 3195 | |
|
3196 | 3196 | def diffstat(lines, width=80): |
|
3197 | 3197 | output = [] |
|
3198 | 3198 | stats = diffstatdata(lines) |
|
3199 | 3199 | maxname, maxtotal, totaladds, totalremoves, hasbinary = diffstatsum(stats) |
|
3200 | 3200 | |
|
3201 | 3201 | countwidth = len(str(maxtotal)) |
|
3202 | 3202 | if hasbinary and countwidth < 3: |
|
3203 | 3203 | countwidth = 3 |
|
3204 | 3204 | graphwidth = width - countwidth - maxname - 6 |
|
3205 | 3205 | if graphwidth < 10: |
|
3206 | 3206 | graphwidth = 10 |
|
3207 | 3207 | |
|
3208 | 3208 | def scale(i): |
|
3209 | 3209 | if maxtotal <= graphwidth: |
|
3210 | 3210 | return i |
|
3211 | 3211 | # If diffstat runs out of room it doesn't print anything, |
|
3212 | 3212 | # which isn't very useful, so always print at least one + or - |
|
3213 | 3213 | # if there were at least some changes. |
|
3214 | 3214 | return max(i * graphwidth // maxtotal, int(bool(i))) |
|
3215 | 3215 | |
|
3216 | 3216 | for filename, adds, removes, isbinary in stats: |
|
3217 | 3217 | if isbinary: |
|
3218 | 3218 | count = b'Bin' |
|
3219 | 3219 | else: |
|
3220 | 3220 | count = b'%d' % (adds + removes) |
|
3221 | 3221 | pluses = b'+' * scale(adds) |
|
3222 | 3222 | minuses = b'-' * scale(removes) |
|
3223 | 3223 | output.append( |
|
3224 | 3224 | b' %s%s | %*s %s%s\n' |
|
3225 | 3225 | % ( |
|
3226 | 3226 | filename, |
|
3227 | 3227 | b' ' * (maxname - encoding.colwidth(filename)), |
|
3228 | 3228 | countwidth, |
|
3229 | 3229 | count, |
|
3230 | 3230 | pluses, |
|
3231 | 3231 | minuses, |
|
3232 | 3232 | ) |
|
3233 | 3233 | ) |
|
3234 | 3234 | |
|
3235 | 3235 | if stats: |
|
3236 | 3236 | output.append( |
|
3237 | 3237 | _(b' %d files changed, %d insertions(+), %d deletions(-)\n') |
|
3238 | 3238 | % (len(stats), totaladds, totalremoves) |
|
3239 | 3239 | ) |
|
3240 | 3240 | |
|
3241 | 3241 | return b''.join(output) |
|
3242 | 3242 | |
|
3243 | 3243 | |
|
3244 | 3244 | def diffstatui(*args, **kw): |
|
3245 | 3245 | """like diffstat(), but yields 2-tuples of (output, label) for |
|
3246 | 3246 | ui.write() |
|
3247 | 3247 | """ |
|
3248 | 3248 | |
|
3249 | 3249 | for line in diffstat(*args, **kw).splitlines(): |
|
3250 | 3250 | if line and line[-1] in b'+-': |
|
3251 | 3251 | name, graph = line.rsplit(b' ', 1) |
|
3252 | 3252 | yield (name + b' ', b'') |
|
3253 | 3253 | m = re.search(br'\++', graph) |
|
3254 | 3254 | if m: |
|
3255 | 3255 | yield (m.group(0), b'diffstat.inserted') |
|
3256 | 3256 | m = re.search(br'-+', graph) |
|
3257 | 3257 | if m: |
|
3258 | 3258 | yield (m.group(0), b'diffstat.deleted') |
|
3259 | 3259 | else: |
|
3260 | 3260 | yield (line, b'') |
|
3261 | 3261 | yield (b'\n', b'') |
@@ -1,849 +1,849 b'' | |||
|
1 | 1 | # store.py - repository store handling for Mercurial |
|
2 | 2 | # |
|
3 | 3 | # Copyright 2008 Olivia Mackall <olivia@selenic.com> |
|
4 | 4 | # |
|
5 | 5 | # This software may be used and distributed according to the terms of the |
|
6 | 6 | # GNU General Public License version 2 or any later version. |
|
7 | 7 | |
|
8 | 8 | |
|
9 | 9 | import errno |
|
10 | 10 | import functools |
|
11 | 11 | import os |
|
12 | 12 | import re |
|
13 | 13 | import stat |
|
14 | 14 | |
|
15 | 15 | from .i18n import _ |
|
16 | 16 | from .pycompat import getattr |
|
17 | 17 | from .node import hex |
|
18 | 18 | from . import ( |
|
19 | 19 | changelog, |
|
20 | 20 | error, |
|
21 | 21 | manifest, |
|
22 | 22 | policy, |
|
23 | 23 | pycompat, |
|
24 | 24 | util, |
|
25 | 25 | vfs as vfsmod, |
|
26 | 26 | ) |
|
27 | 27 | from .utils import hashutil |
|
28 | 28 | |
|
29 | 29 | parsers = policy.importmod('parsers') |
|
30 | 30 | # how much bytes should be read from fncache in one read |
|
31 | 31 | # It is done to prevent loading large fncache files into memory |
|
32 | 32 | fncache_chunksize = 10 ** 6 |
|
33 | 33 | |
|
34 | 34 | |
|
35 | 35 | def _matchtrackedpath(path, matcher): |
|
36 | 36 | """parses a fncache entry and returns whether the entry is tracking a path |
|
37 | 37 | matched by matcher or not. |
|
38 | 38 | |
|
39 | 39 | If matcher is None, returns True""" |
|
40 | 40 | |
|
41 | 41 | if matcher is None: |
|
42 | 42 | return True |
|
43 | 43 | path = decodedir(path) |
|
44 | 44 | if path.startswith(b'data/'): |
|
45 | 45 | return matcher(path[len(b'data/') : -len(b'.i')]) |
|
46 | 46 | elif path.startswith(b'meta/'): |
|
47 | 47 | return matcher.visitdir(path[len(b'meta/') : -len(b'/00manifest.i')]) |
|
48 | 48 | |
|
49 | 49 | raise error.ProgrammingError(b"cannot decode path %s" % path) |
|
50 | 50 | |
|
51 | 51 | |
|
52 | 52 | # This avoids a collision between a file named foo and a dir named |
|
53 | 53 | # foo.i or foo.d |
|
54 | 54 | def _encodedir(path): |
|
55 | 55 | """ |
|
56 | 56 | >>> _encodedir(b'data/foo.i') |
|
57 | 57 | 'data/foo.i' |
|
58 | 58 | >>> _encodedir(b'data/foo.i/bla.i') |
|
59 | 59 | 'data/foo.i.hg/bla.i' |
|
60 | 60 | >>> _encodedir(b'data/foo.i.hg/bla.i') |
|
61 | 61 | 'data/foo.i.hg.hg/bla.i' |
|
62 | 62 | >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n') |
|
63 | 63 | 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n' |
|
64 | 64 | """ |
|
65 | 65 | return ( |
|
66 | 66 | path.replace(b".hg/", b".hg.hg/") |
|
67 | 67 | .replace(b".i/", b".i.hg/") |
|
68 | 68 | .replace(b".d/", b".d.hg/") |
|
69 | 69 | ) |
|
70 | 70 | |
|
71 | 71 | |
|
72 | 72 | encodedir = getattr(parsers, 'encodedir', _encodedir) |
|
73 | 73 | |
|
74 | 74 | |
|
75 | 75 | def decodedir(path): |
|
76 | 76 | """ |
|
77 | 77 | >>> decodedir(b'data/foo.i') |
|
78 | 78 | 'data/foo.i' |
|
79 | 79 | >>> decodedir(b'data/foo.i.hg/bla.i') |
|
80 | 80 | 'data/foo.i/bla.i' |
|
81 | 81 | >>> decodedir(b'data/foo.i.hg.hg/bla.i') |
|
82 | 82 | 'data/foo.i.hg/bla.i' |
|
83 | 83 | """ |
|
84 | 84 | if b".hg/" not in path: |
|
85 | 85 | return path |
|
86 | 86 | return ( |
|
87 | 87 | path.replace(b".d.hg/", b".d/") |
|
88 | 88 | .replace(b".i.hg/", b".i/") |
|
89 | 89 | .replace(b".hg.hg/", b".hg/") |
|
90 | 90 | ) |
|
91 | 91 | |
|
92 | 92 | |
|
93 | 93 | def _reserved(): |
|
94 | 94 | """characters that are problematic for filesystems |
|
95 | 95 | |
|
96 | 96 | * ascii escapes (0..31) |
|
97 | 97 | * ascii hi (126..255) |
|
98 | 98 | * windows specials |
|
99 | 99 | |
|
100 | 100 | these characters will be escaped by encodefunctions |
|
101 | 101 | """ |
|
102 | 102 | winreserved = [ord(x) for x in u'\\:*?"<>|'] |
|
103 | 103 | for x in range(32): |
|
104 | 104 | yield x |
|
105 | 105 | for x in range(126, 256): |
|
106 | 106 | yield x |
|
107 | 107 | for x in winreserved: |
|
108 | 108 | yield x |
|
109 | 109 | |
|
110 | 110 | |
|
111 | 111 | def _buildencodefun(): |
|
112 | 112 | """ |
|
113 | 113 | >>> enc, dec = _buildencodefun() |
|
114 | 114 | |
|
115 | 115 | >>> enc(b'nothing/special.txt') |
|
116 | 116 | 'nothing/special.txt' |
|
117 | 117 | >>> dec(b'nothing/special.txt') |
|
118 | 118 | 'nothing/special.txt' |
|
119 | 119 | |
|
120 | 120 | >>> enc(b'HELLO') |
|
121 | 121 | '_h_e_l_l_o' |
|
122 | 122 | >>> dec(b'_h_e_l_l_o') |
|
123 | 123 | 'HELLO' |
|
124 | 124 | |
|
125 | 125 | >>> enc(b'hello:world?') |
|
126 | 126 | 'hello~3aworld~3f' |
|
127 | 127 | >>> dec(b'hello~3aworld~3f') |
|
128 | 128 | 'hello:world?' |
|
129 | 129 | |
|
130 | 130 | >>> enc(b'the\\x07quick\\xADshot') |
|
131 | 131 | 'the~07quick~adshot' |
|
132 | 132 | >>> dec(b'the~07quick~adshot') |
|
133 | 133 | 'the\\x07quick\\xadshot' |
|
134 | 134 | """ |
|
135 | 135 | e = b'_' |
|
136 | 136 | xchr = pycompat.bytechr |
|
137 | 137 | asciistr = list(map(xchr, range(127))) |
|
138 | 138 | capitals = list(range(ord(b"A"), ord(b"Z") + 1)) |
|
139 | 139 | |
|
140 | 140 | cmap = {x: x for x in asciistr} |
|
141 | 141 | for x in _reserved(): |
|
142 | 142 | cmap[xchr(x)] = b"~%02x" % x |
|
143 | 143 | for x in capitals + [ord(e)]: |
|
144 | 144 | cmap[xchr(x)] = e + xchr(x).lower() |
|
145 | 145 | |
|
146 | 146 | dmap = {} |
|
147 | 147 | for k, v in cmap.items(): |
|
148 | 148 | dmap[v] = k |
|
149 | 149 | |
|
150 | 150 | def decode(s): |
|
151 | 151 | i = 0 |
|
152 | 152 | while i < len(s): |
|
153 | 153 | for l in pycompat.xrange(1, 4): |
|
154 | 154 | try: |
|
155 | 155 | yield dmap[s[i : i + l]] |
|
156 | 156 | i += l |
|
157 | 157 | break |
|
158 | 158 | except KeyError: |
|
159 | 159 | pass |
|
160 | 160 | else: |
|
161 | 161 | raise KeyError |
|
162 | 162 | |
|
163 | 163 | return ( |
|
164 | 164 | lambda s: b''.join( |
|
165 | 165 | [cmap[s[c : c + 1]] for c in pycompat.xrange(len(s))] |
|
166 | 166 | ), |
|
167 | 167 | lambda s: b''.join(list(decode(s))), |
|
168 | 168 | ) |
|
169 | 169 | |
|
170 | 170 | |
|
171 | 171 | _encodefname, _decodefname = _buildencodefun() |
|
172 | 172 | |
|
173 | 173 | |
|
174 | 174 | def encodefilename(s): |
|
175 | 175 | """ |
|
176 | 176 | >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO') |
|
177 | 177 | 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o' |
|
178 | 178 | """ |
|
179 | 179 | return _encodefname(encodedir(s)) |
|
180 | 180 | |
|
181 | 181 | |
|
182 | 182 | def decodefilename(s): |
|
183 | 183 | """ |
|
184 | 184 | >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o') |
|
185 | 185 | 'foo.i/bar.d/bla.hg/hi:world?/HELLO' |
|
186 | 186 | """ |
|
187 | 187 | return decodedir(_decodefname(s)) |
|
188 | 188 | |
|
189 | 189 | |
|
190 | 190 | def _buildlowerencodefun(): |
|
191 | 191 | """ |
|
192 | 192 | >>> f = _buildlowerencodefun() |
|
193 | 193 | >>> f(b'nothing/special.txt') |
|
194 | 194 | 'nothing/special.txt' |
|
195 | 195 | >>> f(b'HELLO') |
|
196 | 196 | 'hello' |
|
197 | 197 | >>> f(b'hello:world?') |
|
198 | 198 | 'hello~3aworld~3f' |
|
199 | 199 | >>> f(b'the\\x07quick\\xADshot') |
|
200 | 200 | 'the~07quick~adshot' |
|
201 | 201 | """ |
|
202 | 202 | xchr = pycompat.bytechr |
|
203 | 203 | cmap = {xchr(x): xchr(x) for x in pycompat.xrange(127)} |
|
204 | 204 | for x in _reserved(): |
|
205 | 205 | cmap[xchr(x)] = b"~%02x" % x |
|
206 | 206 | for x in range(ord(b"A"), ord(b"Z") + 1): |
|
207 | 207 | cmap[xchr(x)] = xchr(x).lower() |
|
208 | 208 | |
|
209 | 209 | def lowerencode(s): |
|
210 | 210 | return b"".join([cmap[c] for c in pycompat.iterbytestr(s)]) |
|
211 | 211 | |
|
212 | 212 | return lowerencode |
|
213 | 213 | |
|
214 | 214 | |
|
215 | 215 | lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun() |
|
216 | 216 | |
|
217 | 217 | # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9 |
|
218 | 218 | _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3 |
|
219 | 219 | _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9) |
|
220 | 220 | |
|
221 | 221 | |
|
222 | 222 | def _auxencode(path, dotencode): |
|
223 | 223 | """ |
|
224 | 224 | Encodes filenames containing names reserved by Windows or which end in |
|
225 | 225 | period or space. Does not touch other single reserved characters c. |
|
226 | 226 | Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here. |
|
227 | 227 | Additionally encodes space or period at the beginning, if dotencode is |
|
228 | 228 | True. Parameter path is assumed to be all lowercase. |
|
229 | 229 | A segment only needs encoding if a reserved name appears as a |
|
230 | 230 | basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux" |
|
231 | 231 | doesn't need encoding. |
|
232 | 232 | |
|
233 | 233 | >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.' |
|
234 | 234 | >>> _auxencode(s.split(b'/'), True) |
|
235 | 235 | ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e'] |
|
236 | 236 | >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.' |
|
237 | 237 | >>> _auxencode(s.split(b'/'), False) |
|
238 | 238 | ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e'] |
|
239 | 239 | >>> _auxencode([b'foo. '], True) |
|
240 | 240 | ['foo.~20'] |
|
241 | 241 | >>> _auxencode([b' .foo'], True) |
|
242 | 242 | ['~20.foo'] |
|
243 | 243 | """ |
|
244 | 244 | for i, n in enumerate(path): |
|
245 | 245 | if not n: |
|
246 | 246 | continue |
|
247 | 247 | if dotencode and n[0] in b'. ': |
|
248 | 248 | n = b"~%02x" % ord(n[0:1]) + n[1:] |
|
249 | 249 | path[i] = n |
|
250 | 250 | else: |
|
251 | 251 | l = n.find(b'.') |
|
252 | 252 | if l == -1: |
|
253 | 253 | l = len(n) |
|
254 | 254 | if (l == 3 and n[:3] in _winres3) or ( |
|
255 | 255 | l == 4 |
|
256 | 256 | and n[3:4] <= b'9' |
|
257 | 257 | and n[3:4] >= b'1' |
|
258 | 258 | and n[:3] in _winres4 |
|
259 | 259 | ): |
|
260 | 260 | # encode third letter ('aux' -> 'au~78') |
|
261 | 261 | ec = b"~%02x" % ord(n[2:3]) |
|
262 | 262 | n = n[0:2] + ec + n[3:] |
|
263 | 263 | path[i] = n |
|
264 | 264 | if n[-1] in b'. ': |
|
265 | 265 | # encode last period or space ('foo...' -> 'foo..~2e') |
|
266 | 266 | path[i] = n[:-1] + b"~%02x" % ord(n[-1:]) |
|
267 | 267 | return path |
|
268 | 268 | |
|
269 | 269 | |
|
270 | 270 | _maxstorepathlen = 120 |
|
271 | 271 | _dirprefixlen = 8 |
|
272 | 272 | _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4 |
|
273 | 273 | |
|
274 | 274 | |
|
275 | 275 | def _hashencode(path, dotencode): |
|
276 | 276 | digest = hex(hashutil.sha1(path).digest()) |
|
277 | 277 | le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/' |
|
278 | 278 | parts = _auxencode(le, dotencode) |
|
279 | 279 | basename = parts[-1] |
|
280 | 280 | _root, ext = os.path.splitext(basename) |
|
281 | 281 | sdirs = [] |
|
282 | 282 | sdirslen = 0 |
|
283 | 283 | for p in parts[:-1]: |
|
284 | 284 | d = p[:_dirprefixlen] |
|
285 | 285 | if d[-1] in b'. ': |
|
286 | 286 | # Windows can't access dirs ending in period or space |
|
287 | 287 | d = d[:-1] + b'_' |
|
288 | 288 | if sdirslen == 0: |
|
289 | 289 | t = len(d) |
|
290 | 290 | else: |
|
291 | 291 | t = sdirslen + 1 + len(d) |
|
292 | 292 | if t > _maxshortdirslen: |
|
293 | 293 | break |
|
294 | 294 | sdirs.append(d) |
|
295 | 295 | sdirslen = t |
|
296 | 296 | dirs = b'/'.join(sdirs) |
|
297 | 297 | if len(dirs) > 0: |
|
298 | 298 | dirs += b'/' |
|
299 | 299 | res = b'dh/' + dirs + digest + ext |
|
300 | 300 | spaceleft = _maxstorepathlen - len(res) |
|
301 | 301 | if spaceleft > 0: |
|
302 | 302 | filler = basename[:spaceleft] |
|
303 | 303 | res = b'dh/' + dirs + filler + digest + ext |
|
304 | 304 | return res |
|
305 | 305 | |
|
306 | 306 | |
|
307 | 307 | def _hybridencode(path, dotencode): |
|
308 | 308 | """encodes path with a length limit |
|
309 | 309 | |
|
310 | 310 | Encodes all paths that begin with 'data/', according to the following. |
|
311 | 311 | |
|
312 | 312 | Default encoding (reversible): |
|
313 | 313 | |
|
314 | 314 | Encodes all uppercase letters 'X' as '_x'. All reserved or illegal |
|
315 | 315 | characters are encoded as '~xx', where xx is the two digit hex code |
|
316 | 316 | of the character (see encodefilename). |
|
317 | 317 | Relevant path components consisting of Windows reserved filenames are |
|
318 | 318 | masked by encoding the third character ('aux' -> 'au~78', see _auxencode). |
|
319 | 319 | |
|
320 | 320 | Hashed encoding (not reversible): |
|
321 | 321 | |
|
322 | 322 | If the default-encoded path is longer than _maxstorepathlen, a |
|
323 | 323 | non-reversible hybrid hashing of the path is done instead. |
|
324 | 324 | This encoding uses up to _dirprefixlen characters of all directory |
|
325 | 325 | levels of the lowerencoded path, but not more levels than can fit into |
|
326 | 326 | _maxshortdirslen. |
|
327 | 327 | Then follows the filler followed by the sha digest of the full path. |
|
328 | 328 | The filler is the beginning of the basename of the lowerencoded path |
|
329 | 329 | (the basename is everything after the last path separator). The filler |
|
330 | 330 | is as long as possible, filling in characters from the basename until |
|
331 | 331 | the encoded path has _maxstorepathlen characters (or all chars of the |
|
332 | 332 | basename have been taken). |
|
333 | 333 | The extension (e.g. '.i' or '.d') is preserved. |
|
334 | 334 | |
|
335 | 335 | The string 'data/' at the beginning is replaced with 'dh/', if the hashed |
|
336 | 336 | encoding was used. |
|
337 | 337 | """ |
|
338 | 338 | path = encodedir(path) |
|
339 | 339 | ef = _encodefname(path).split(b'/') |
|
340 | 340 | res = b'/'.join(_auxencode(ef, dotencode)) |
|
341 | 341 | if len(res) > _maxstorepathlen: |
|
342 | 342 | res = _hashencode(path, dotencode) |
|
343 | 343 | return res |
|
344 | 344 | |
|
345 | 345 | |
|
346 | 346 | def _pathencode(path): |
|
347 | 347 | de = encodedir(path) |
|
348 | 348 | if len(path) > _maxstorepathlen: |
|
349 | 349 | return _hashencode(de, True) |
|
350 | 350 | ef = _encodefname(de).split(b'/') |
|
351 | 351 | res = b'/'.join(_auxencode(ef, True)) |
|
352 | 352 | if len(res) > _maxstorepathlen: |
|
353 | 353 | return _hashencode(de, True) |
|
354 | 354 | return res |
|
355 | 355 | |
|
356 | 356 | |
|
357 | 357 | _pathencode = getattr(parsers, 'pathencode', _pathencode) |
|
358 | 358 | |
|
359 | 359 | |
|
360 | 360 | def _plainhybridencode(f): |
|
361 | 361 | return _hybridencode(f, False) |
|
362 | 362 | |
|
363 | 363 | |
|
364 | 364 | def _calcmode(vfs): |
|
365 | 365 | try: |
|
366 | 366 | # files in .hg/ will be created using this mode |
|
367 | 367 | mode = vfs.stat().st_mode |
|
368 | 368 | # avoid some useless chmods |
|
369 | 369 | if (0o777 & ~util.umask) == (0o777 & mode): |
|
370 | 370 | mode = None |
|
371 | 371 | except OSError: |
|
372 | 372 | mode = None |
|
373 | 373 | return mode |
|
374 | 374 | |
|
375 | 375 | |
|
376 | 376 | _data = [ |
|
377 | 377 | b'bookmarks', |
|
378 | 378 | b'narrowspec', |
|
379 | 379 | b'data', |
|
380 | 380 | b'meta', |
|
381 | 381 | b'00manifest.d', |
|
382 | 382 | b'00manifest.i', |
|
383 | 383 | b'00changelog.d', |
|
384 | 384 | b'00changelog.i', |
|
385 | 385 | b'phaseroots', |
|
386 | 386 | b'obsstore', |
|
387 | 387 | b'requires', |
|
388 | 388 | ] |
|
389 | 389 | |
|
390 | 390 | REVLOG_FILES_MAIN_EXT = (b'.i', b'i.tmpcensored') |
|
391 | 391 | REVLOG_FILES_OTHER_EXT = ( |
|
392 | 392 | b'.idx', |
|
393 | 393 | b'.d', |
|
394 | 394 | b'.dat', |
|
395 | 395 | b'.n', |
|
396 | 396 | b'.nd', |
|
397 | 397 | b'.sda', |
|
398 | 398 | b'd.tmpcensored', |
|
399 | 399 | ) |
|
400 | 400 | # files that are "volatile" and might change between listing and streaming |
|
401 | 401 | # |
|
402 | 402 | # note: the ".nd" file are nodemap data and won't "change" but they might be |
|
403 | 403 | # deleted. |
|
404 | 404 | REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd') |
|
405 | 405 | |
|
406 | 406 | # some exception to the above matching |
|
407 | 407 | # |
|
408 | 408 | # XXX This is currently not in use because of issue6542 |
|
409 | 409 | EXCLUDED = re.compile(b'.*undo\.[^/]+\.(nd?|i)$') |
|
410 | 410 | |
|
411 | 411 | |
|
412 | 412 | def is_revlog(f, kind, st): |
|
413 | 413 | if kind != stat.S_IFREG: |
|
414 | 414 | return None |
|
415 | 415 | return revlog_type(f) |
|
416 | 416 | |
|
417 | 417 | |
|
418 | 418 | def revlog_type(f): |
|
419 | 419 | # XXX we need to filter `undo.` created by the transaction here, however |
|
420 | 420 | # being naive about it also filter revlog for `undo.*` files, leading to |
|
421 | 421 | # issue6542. So we no longer use EXCLUDED. |
|
422 | 422 | if f.endswith(REVLOG_FILES_MAIN_EXT): |
|
423 | 423 | return FILEFLAGS_REVLOG_MAIN |
|
424 | 424 | elif f.endswith(REVLOG_FILES_OTHER_EXT): |
|
425 | 425 | t = FILETYPE_FILELOG_OTHER |
|
426 | 426 | if f.endswith(REVLOG_FILES_VOLATILE_EXT): |
|
427 | 427 | t |= FILEFLAGS_VOLATILE |
|
428 | 428 | return t |
|
429 | 429 | return None |
|
430 | 430 | |
|
431 | 431 | |
|
432 | 432 | # the file is part of changelog data |
|
433 | 433 | FILEFLAGS_CHANGELOG = 1 << 13 |
|
434 | 434 | # the file is part of manifest data |
|
435 | 435 | FILEFLAGS_MANIFESTLOG = 1 << 12 |
|
436 | 436 | # the file is part of filelog data |
|
437 | 437 | FILEFLAGS_FILELOG = 1 << 11 |
|
438 | 438 | # file that are not directly part of a revlog |
|
439 | 439 | FILEFLAGS_OTHER = 1 << 10 |
|
440 | 440 | |
|
441 | 441 | # the main entry point for a revlog |
|
442 | 442 | FILEFLAGS_REVLOG_MAIN = 1 << 1 |
|
443 | 443 | # a secondary file for a revlog |
|
444 | 444 | FILEFLAGS_REVLOG_OTHER = 1 << 0 |
|
445 | 445 | |
|
446 | 446 | # files that are "volatile" and might change between listing and streaming |
|
447 | 447 | FILEFLAGS_VOLATILE = 1 << 20 |
|
448 | 448 | |
|
449 | 449 | FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN |
|
450 | 450 | FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER |
|
451 | 451 | FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN |
|
452 | 452 | FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER |
|
453 | 453 | FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN |
|
454 | 454 | FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER |
|
455 | 455 | FILETYPE_OTHER = FILEFLAGS_OTHER |
|
456 | 456 | |
|
457 | 457 | |
|
458 | 458 | class basicstore(object): |
|
459 | 459 | '''base class for local repository stores''' |
|
460 | 460 | |
|
461 | 461 | def __init__(self, path, vfstype): |
|
462 | 462 | vfs = vfstype(path) |
|
463 | 463 | self.path = vfs.base |
|
464 | 464 | self.createmode = _calcmode(vfs) |
|
465 | 465 | vfs.createmode = self.createmode |
|
466 | 466 | self.rawvfs = vfs |
|
467 | 467 | self.vfs = vfsmod.filtervfs(vfs, encodedir) |
|
468 | 468 | self.opener = self.vfs |
|
469 | 469 | |
|
470 | 470 | def join(self, f): |
|
471 | 471 | return self.path + b'/' + encodedir(f) |
|
472 | 472 | |
|
473 | 473 | def _walk(self, relpath, recurse): |
|
474 | 474 | '''yields (revlog_type, unencoded, size)''' |
|
475 | 475 | path = self.path |
|
476 | 476 | if relpath: |
|
477 | 477 | path += b'/' + relpath |
|
478 | 478 | striplen = len(self.path) + 1 |
|
479 | 479 | l = [] |
|
480 | 480 | if self.rawvfs.isdir(path): |
|
481 | 481 | visit = [path] |
|
482 | 482 | readdir = self.rawvfs.readdir |
|
483 | 483 | while visit: |
|
484 | 484 | p = visit.pop() |
|
485 | 485 | for f, kind, st in readdir(p, stat=True): |
|
486 | 486 | fp = p + b'/' + f |
|
487 | 487 | rl_type = is_revlog(f, kind, st) |
|
488 | 488 | if rl_type is not None: |
|
489 | 489 | n = util.pconvert(fp[striplen:]) |
|
490 | 490 | l.append((rl_type, decodedir(n), st.st_size)) |
|
491 | 491 | elif kind == stat.S_IFDIR and recurse: |
|
492 | 492 | visit.append(fp) |
|
493 | 493 | l.sort() |
|
494 | 494 | return l |
|
495 | 495 | |
|
496 | 496 | def changelog(self, trypending, concurrencychecker=None): |
|
497 | 497 | return changelog.changelog( |
|
498 | 498 | self.vfs, |
|
499 | 499 | trypending=trypending, |
|
500 | 500 | concurrencychecker=concurrencychecker, |
|
501 | 501 | ) |
|
502 | 502 | |
|
503 | 503 | def manifestlog(self, repo, storenarrowmatch): |
|
504 | 504 | rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs) |
|
505 | 505 | return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch) |
|
506 | 506 | |
|
507 | 507 | def datafiles(self, matcher=None, undecodable=None): |
|
508 | 508 | """Like walk, but excluding the changelog and root manifest. |
|
509 | 509 | |
|
510 | 510 | When [undecodable] is None, revlogs names that can't be |
|
511 | 511 | decoded cause an exception. When it is provided, it should |
|
512 | 512 | be a list and the filenames that can't be decoded are added |
|
513 | 513 | to it instead. This is very rarely needed.""" |
|
514 | 514 | files = self._walk(b'data', True) + self._walk(b'meta', True) |
|
515 | 515 | for (t, u, s) in files: |
|
516 | 516 | yield (FILEFLAGS_FILELOG | t, u, s) |
|
517 | 517 | |
|
518 | 518 | def topfiles(self): |
|
519 | 519 | # yield manifest before changelog |
|
520 | 520 | files = reversed(self._walk(b'', False)) |
|
521 | 521 | for (t, u, s) in files: |
|
522 | 522 | if u.startswith(b'00changelog'): |
|
523 | 523 | yield (FILEFLAGS_CHANGELOG | t, u, s) |
|
524 | 524 | elif u.startswith(b'00manifest'): |
|
525 | 525 | yield (FILEFLAGS_MANIFESTLOG | t, u, s) |
|
526 | 526 | else: |
|
527 | 527 | yield (FILETYPE_OTHER | t, u, s) |
|
528 | 528 | |
|
529 | 529 | def walk(self, matcher=None): |
|
530 | 530 | """return file related to data storage (ie: revlogs) |
|
531 | 531 | |
|
532 | 532 | yields (file_type, unencoded, size) |
|
533 | 533 | |
|
534 | 534 | if a matcher is passed, storage files of only those tracked paths |
|
535 | 535 | are passed with matches the matcher |
|
536 | 536 | """ |
|
537 | 537 | # yield data files first |
|
538 | 538 | for x in self.datafiles(matcher): |
|
539 | 539 | yield x |
|
540 | 540 | for x in self.topfiles(): |
|
541 | 541 | yield x |
|
542 | 542 | |
|
543 | 543 | def copylist(self): |
|
544 | 544 | return _data |
|
545 | 545 | |
|
546 | 546 | def write(self, tr): |
|
547 | 547 | pass |
|
548 | 548 | |
|
549 | 549 | def invalidatecaches(self): |
|
550 | 550 | pass |
|
551 | 551 | |
|
552 | 552 | def markremoved(self, fn): |
|
553 | 553 | pass |
|
554 | 554 | |
|
555 | 555 | def __contains__(self, path): |
|
556 | 556 | '''Checks if the store contains path''' |
|
557 | 557 | path = b"/".join((b"data", path)) |
|
558 | 558 | # file? |
|
559 | 559 | if self.vfs.exists(path + b".i"): |
|
560 | 560 | return True |
|
561 | 561 | # dir? |
|
562 | 562 | if not path.endswith(b"/"): |
|
563 | 563 | path = path + b"/" |
|
564 | 564 | return self.vfs.exists(path) |
|
565 | 565 | |
|
566 | 566 | |
|
567 | 567 | class encodedstore(basicstore): |
|
568 | 568 | def __init__(self, path, vfstype): |
|
569 | 569 | vfs = vfstype(path + b'/store') |
|
570 | 570 | self.path = vfs.base |
|
571 | 571 | self.createmode = _calcmode(vfs) |
|
572 | 572 | vfs.createmode = self.createmode |
|
573 | 573 | self.rawvfs = vfs |
|
574 | 574 | self.vfs = vfsmod.filtervfs(vfs, encodefilename) |
|
575 | 575 | self.opener = self.vfs |
|
576 | 576 | |
|
577 | 577 | # note: topfiles would also need a decode phase. It is just that in |
|
578 | 578 | # practice we do not have any file outside of `data/` that needs encoding. |
|
579 | 579 | # However that might change so we should probably add a test and encoding |
|
580 | 580 | # decoding for it too. see issue6548 |
|
581 | 581 | |
|
582 | 582 | def datafiles(self, matcher=None, undecodable=None): |
|
583 | 583 | for t, f1, size in super(encodedstore, self).datafiles(): |
|
584 | 584 | try: |
|
585 | 585 | f2 = decodefilename(f1) |
|
586 | 586 | except KeyError: |
|
587 | 587 | if undecodable is None: |
|
588 | 588 | msg = _(b'undecodable revlog name %s') % f1 |
|
589 | 589 | raise error.StorageError(msg) |
|
590 | 590 | else: |
|
591 | 591 | undecodable.append(f1) |
|
592 | 592 | continue |
|
593 | 593 | if not _matchtrackedpath(f2, matcher): |
|
594 | 594 | continue |
|
595 | 595 | yield t, f2, size |
|
596 | 596 | |
|
597 | 597 | def join(self, f): |
|
598 | 598 | return self.path + b'/' + encodefilename(f) |
|
599 | 599 | |
|
600 | 600 | def copylist(self): |
|
601 | 601 | return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data] |
|
602 | 602 | |
|
603 | 603 | |
|
604 | 604 | class fncache(object): |
|
605 | 605 | # the filename used to be partially encoded |
|
606 | 606 | # hence the encodedir/decodedir dance |
|
607 | 607 | def __init__(self, vfs): |
|
608 | 608 | self.vfs = vfs |
|
609 | 609 | self.entries = None |
|
610 | 610 | self._dirty = False |
|
611 | 611 | # set of new additions to fncache |
|
612 | 612 | self.addls = set() |
|
613 | 613 | |
|
614 | 614 | def ensureloaded(self, warn=None): |
|
615 | 615 | """read the fncache file if not already read. |
|
616 | 616 | |
|
617 | 617 | If the file on disk is corrupted, raise. If warn is provided, |
|
618 | 618 | warn and keep going instead.""" |
|
619 | 619 | if self.entries is None: |
|
620 | 620 | self._load(warn) |
|
621 | 621 | |
|
622 | 622 | def _load(self, warn=None): |
|
623 | 623 | '''fill the entries from the fncache file''' |
|
624 | 624 | self._dirty = False |
|
625 | 625 | try: |
|
626 | 626 | fp = self.vfs(b'fncache', mode=b'rb') |
|
627 | 627 | except IOError: |
|
628 | 628 | # skip nonexistent file |
|
629 | 629 | self.entries = set() |
|
630 | 630 | return |
|
631 | 631 | |
|
632 | 632 | self.entries = set() |
|
633 | 633 | chunk = b'' |
|
634 | 634 | for c in iter(functools.partial(fp.read, fncache_chunksize), b''): |
|
635 | 635 | chunk += c |
|
636 | 636 | try: |
|
637 | 637 | p = chunk.rindex(b'\n') |
|
638 | 638 | self.entries.update(decodedir(chunk[: p + 1]).splitlines()) |
|
639 | 639 | chunk = chunk[p + 1 :] |
|
640 | 640 | except ValueError: |
|
641 | 641 | # substring '\n' not found, maybe the entry is bigger than the |
|
642 | 642 | # chunksize, so let's keep iterating |
|
643 | 643 | pass |
|
644 | 644 | |
|
645 | 645 | if chunk: |
|
646 | 646 | msg = _(b"fncache does not ends with a newline") |
|
647 | 647 | if warn: |
|
648 | 648 | warn(msg + b'\n') |
|
649 | 649 | else: |
|
650 | 650 | raise error.Abort( |
|
651 | 651 | msg, |
|
652 | 652 | hint=_( |
|
653 | 653 | b"use 'hg debugrebuildfncache' to " |
|
654 | 654 | b"rebuild the fncache" |
|
655 | 655 | ), |
|
656 | 656 | ) |
|
657 | 657 | self._checkentries(fp, warn) |
|
658 | 658 | fp.close() |
|
659 | 659 | |
|
660 | 660 | def _checkentries(self, fp, warn): |
|
661 | 661 | """make sure there is no empty string in entries""" |
|
662 | 662 | if b'' in self.entries: |
|
663 | 663 | fp.seek(0) |
|
664 |
for n, line in enumerate( |
|
|
664 | for n, line in enumerate(fp): | |
|
665 | 665 | if not line.rstrip(b'\n'): |
|
666 | 666 | t = _(b'invalid entry in fncache, line %d') % (n + 1) |
|
667 | 667 | if warn: |
|
668 | 668 | warn(t + b'\n') |
|
669 | 669 | else: |
|
670 | 670 | raise error.Abort(t) |
|
671 | 671 | |
|
672 | 672 | def write(self, tr): |
|
673 | 673 | if self._dirty: |
|
674 | 674 | assert self.entries is not None |
|
675 | 675 | self.entries = self.entries | self.addls |
|
676 | 676 | self.addls = set() |
|
677 | 677 | tr.addbackup(b'fncache') |
|
678 | 678 | fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True) |
|
679 | 679 | if self.entries: |
|
680 | 680 | fp.write(encodedir(b'\n'.join(self.entries) + b'\n')) |
|
681 | 681 | fp.close() |
|
682 | 682 | self._dirty = False |
|
683 | 683 | if self.addls: |
|
684 | 684 | # if we have just new entries, let's append them to the fncache |
|
685 | 685 | tr.addbackup(b'fncache') |
|
686 | 686 | fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True) |
|
687 | 687 | if self.addls: |
|
688 | 688 | fp.write(encodedir(b'\n'.join(self.addls) + b'\n')) |
|
689 | 689 | fp.close() |
|
690 | 690 | self.entries = None |
|
691 | 691 | self.addls = set() |
|
692 | 692 | |
|
693 | 693 | def add(self, fn): |
|
694 | 694 | if self.entries is None: |
|
695 | 695 | self._load() |
|
696 | 696 | if fn not in self.entries: |
|
697 | 697 | self.addls.add(fn) |
|
698 | 698 | |
|
699 | 699 | def remove(self, fn): |
|
700 | 700 | if self.entries is None: |
|
701 | 701 | self._load() |
|
702 | 702 | if fn in self.addls: |
|
703 | 703 | self.addls.remove(fn) |
|
704 | 704 | return |
|
705 | 705 | try: |
|
706 | 706 | self.entries.remove(fn) |
|
707 | 707 | self._dirty = True |
|
708 | 708 | except KeyError: |
|
709 | 709 | pass |
|
710 | 710 | |
|
711 | 711 | def __contains__(self, fn): |
|
712 | 712 | if fn in self.addls: |
|
713 | 713 | return True |
|
714 | 714 | if self.entries is None: |
|
715 | 715 | self._load() |
|
716 | 716 | return fn in self.entries |
|
717 | 717 | |
|
718 | 718 | def __iter__(self): |
|
719 | 719 | if self.entries is None: |
|
720 | 720 | self._load() |
|
721 | 721 | return iter(self.entries | self.addls) |
|
722 | 722 | |
|
723 | 723 | |
|
724 | 724 | class _fncachevfs(vfsmod.proxyvfs): |
|
725 | 725 | def __init__(self, vfs, fnc, encode): |
|
726 | 726 | vfsmod.proxyvfs.__init__(self, vfs) |
|
727 | 727 | self.fncache = fnc |
|
728 | 728 | self.encode = encode |
|
729 | 729 | |
|
730 | 730 | def __call__(self, path, mode=b'r', *args, **kw): |
|
731 | 731 | encoded = self.encode(path) |
|
732 | 732 | if mode not in (b'r', b'rb') and ( |
|
733 | 733 | path.startswith(b'data/') or path.startswith(b'meta/') |
|
734 | 734 | ): |
|
735 | 735 | # do not trigger a fncache load when adding a file that already is |
|
736 | 736 | # known to exist. |
|
737 | 737 | notload = self.fncache.entries is None and self.vfs.exists(encoded) |
|
738 | 738 | if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size: |
|
739 | 739 | # when appending to an existing file, if the file has size zero, |
|
740 | 740 | # it should be considered as missing. Such zero-size files are |
|
741 | 741 | # the result of truncation when a transaction is aborted. |
|
742 | 742 | notload = False |
|
743 | 743 | if not notload: |
|
744 | 744 | self.fncache.add(path) |
|
745 | 745 | return self.vfs(encoded, mode, *args, **kw) |
|
746 | 746 | |
|
747 | 747 | def join(self, path): |
|
748 | 748 | if path: |
|
749 | 749 | return self.vfs.join(self.encode(path)) |
|
750 | 750 | else: |
|
751 | 751 | return self.vfs.join(path) |
|
752 | 752 | |
|
753 | 753 | def register_file(self, path): |
|
754 | 754 | """generic hook point to lets fncache steer its stew""" |
|
755 | 755 | if path.startswith(b'data/') or path.startswith(b'meta/'): |
|
756 | 756 | self.fncache.add(path) |
|
757 | 757 | |
|
758 | 758 | |
|
759 | 759 | class fncachestore(basicstore): |
|
760 | 760 | def __init__(self, path, vfstype, dotencode): |
|
761 | 761 | if dotencode: |
|
762 | 762 | encode = _pathencode |
|
763 | 763 | else: |
|
764 | 764 | encode = _plainhybridencode |
|
765 | 765 | self.encode = encode |
|
766 | 766 | vfs = vfstype(path + b'/store') |
|
767 | 767 | self.path = vfs.base |
|
768 | 768 | self.pathsep = self.path + b'/' |
|
769 | 769 | self.createmode = _calcmode(vfs) |
|
770 | 770 | vfs.createmode = self.createmode |
|
771 | 771 | self.rawvfs = vfs |
|
772 | 772 | fnc = fncache(vfs) |
|
773 | 773 | self.fncache = fnc |
|
774 | 774 | self.vfs = _fncachevfs(vfs, fnc, encode) |
|
775 | 775 | self.opener = self.vfs |
|
776 | 776 | |
|
777 | 777 | def join(self, f): |
|
778 | 778 | return self.pathsep + self.encode(f) |
|
779 | 779 | |
|
780 | 780 | def getsize(self, path): |
|
781 | 781 | return self.rawvfs.stat(path).st_size |
|
782 | 782 | |
|
783 | 783 | def datafiles(self, matcher=None, undecodable=None): |
|
784 | 784 | for f in sorted(self.fncache): |
|
785 | 785 | if not _matchtrackedpath(f, matcher): |
|
786 | 786 | continue |
|
787 | 787 | ef = self.encode(f) |
|
788 | 788 | try: |
|
789 | 789 | t = revlog_type(f) |
|
790 | 790 | assert t is not None, f |
|
791 | 791 | t |= FILEFLAGS_FILELOG |
|
792 | 792 | yield t, f, self.getsize(ef) |
|
793 | 793 | except OSError as err: |
|
794 | 794 | if err.errno != errno.ENOENT: |
|
795 | 795 | raise |
|
796 | 796 | |
|
797 | 797 | def copylist(self): |
|
798 | 798 | d = ( |
|
799 | 799 | b'bookmarks', |
|
800 | 800 | b'narrowspec', |
|
801 | 801 | b'data', |
|
802 | 802 | b'meta', |
|
803 | 803 | b'dh', |
|
804 | 804 | b'fncache', |
|
805 | 805 | b'phaseroots', |
|
806 | 806 | b'obsstore', |
|
807 | 807 | b'00manifest.d', |
|
808 | 808 | b'00manifest.i', |
|
809 | 809 | b'00changelog.d', |
|
810 | 810 | b'00changelog.i', |
|
811 | 811 | b'requires', |
|
812 | 812 | ) |
|
813 | 813 | return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d] |
|
814 | 814 | |
|
815 | 815 | def write(self, tr): |
|
816 | 816 | self.fncache.write(tr) |
|
817 | 817 | |
|
818 | 818 | def invalidatecaches(self): |
|
819 | 819 | self.fncache.entries = None |
|
820 | 820 | self.fncache.addls = set() |
|
821 | 821 | |
|
822 | 822 | def markremoved(self, fn): |
|
823 | 823 | self.fncache.remove(fn) |
|
824 | 824 | |
|
825 | 825 | def _exists(self, f): |
|
826 | 826 | ef = self.encode(f) |
|
827 | 827 | try: |
|
828 | 828 | self.getsize(ef) |
|
829 | 829 | return True |
|
830 | 830 | except OSError as err: |
|
831 | 831 | if err.errno != errno.ENOENT: |
|
832 | 832 | raise |
|
833 | 833 | # nonexistent entry |
|
834 | 834 | return False |
|
835 | 835 | |
|
836 | 836 | def __contains__(self, path): |
|
837 | 837 | '''Checks if the store contains path''' |
|
838 | 838 | path = b"/".join((b"data", path)) |
|
839 | 839 | # check for files (exact match) |
|
840 | 840 | e = path + b'.i' |
|
841 | 841 | if e in self.fncache and self._exists(e): |
|
842 | 842 | return True |
|
843 | 843 | # now check for directories (prefix match) |
|
844 | 844 | if not path.endswith(b'/'): |
|
845 | 845 | path += b'/' |
|
846 | 846 | for e in self.fncache: |
|
847 | 847 | if e.startswith(path) and self._exists(e): |
|
848 | 848 | return True |
|
849 | 849 | return False |
General Comments 0
You need to be logged in to leave comments.
Login now