Show More
@@ -1,1071 +1,1071 b'' | |||
|
1 | 1 | # Mercurial built-in replacement for cvsps. |
|
2 | 2 | # |
|
3 | 3 | # Copyright 2008, Frank Kingswood <frank@kingswood-consulting.co.uk> |
|
4 | 4 | # |
|
5 | 5 | # This software may be used and distributed according to the terms of the |
|
6 | 6 | # GNU General Public License version 2 or any later version. |
|
7 | 7 | |
|
8 | 8 | import functools |
|
9 | 9 | import os |
|
10 | 10 | import pickle |
|
11 | 11 | import re |
|
12 | 12 | |
|
13 | 13 | from mercurial.i18n import _ |
|
14 | 14 | from mercurial.pycompat import open |
|
15 | 15 | from mercurial import ( |
|
16 | 16 | encoding, |
|
17 | 17 | error, |
|
18 | 18 | hook, |
|
19 | 19 | pycompat, |
|
20 | 20 | util, |
|
21 | 21 | ) |
|
22 | 22 | from mercurial.utils import ( |
|
23 | 23 | dateutil, |
|
24 | 24 | procutil, |
|
25 | 25 | stringutil, |
|
26 | 26 | ) |
|
27 | 27 | |
|
28 | 28 | |
|
29 | 29 | class logentry: |
|
30 | 30 | """Class logentry has the following attributes: |
|
31 | 31 | .author - author name as CVS knows it |
|
32 | 32 | .branch - name of branch this revision is on |
|
33 | 33 | .branches - revision tuple of branches starting at this revision |
|
34 | 34 | .comment - commit message |
|
35 | 35 | .commitid - CVS commitid or None |
|
36 | 36 | .date - the commit date as a (time, tz) tuple |
|
37 | 37 | .dead - true if file revision is dead |
|
38 | 38 | .file - Name of file |
|
39 | 39 | .lines - a tuple (+lines, -lines) or None |
|
40 | 40 | .parent - Previous revision of this entry |
|
41 | 41 | .rcs - name of file as returned from CVS |
|
42 | 42 | .revision - revision number as tuple |
|
43 | 43 | .tags - list of tags on the file |
|
44 | 44 | .synthetic - is this a synthetic "file ... added on ..." revision? |
|
45 | 45 | .mergepoint - the branch that has been merged from (if present in |
|
46 | 46 | rlog output) or None |
|
47 | 47 | .branchpoints - the branches that start at the current entry or empty |
|
48 | 48 | """ |
|
49 | 49 | |
|
50 | 50 | def __init__(self, **entries): |
|
51 | 51 | self.synthetic = False |
|
52 | 52 | self.__dict__.update(entries) |
|
53 | 53 | |
|
54 | 54 | def __repr__(self): |
|
55 | 55 | items = ("%s=%r" % (k, self.__dict__[k]) for k in sorted(self.__dict__)) |
|
56 | 56 | return "%s(%s)" % (type(self).__name__, ", ".join(items)) |
|
57 | 57 | |
|
58 | 58 | |
|
59 | 59 | class logerror(Exception): |
|
60 | 60 | pass |
|
61 | 61 | |
|
62 | 62 | |
|
63 | 63 | def getrepopath(cvspath): |
|
64 | 64 | """Return the repository path from a CVS path. |
|
65 | 65 | |
|
66 | 66 | >>> getrepopath(b'/foo/bar') |
|
67 | 67 | '/foo/bar' |
|
68 | 68 | >>> getrepopath(b'c:/foo/bar') |
|
69 | 69 | '/foo/bar' |
|
70 | 70 | >>> getrepopath(b':pserver:10/foo/bar') |
|
71 | 71 | '/foo/bar' |
|
72 | 72 | >>> getrepopath(b':pserver:10c:/foo/bar') |
|
73 | 73 | '/foo/bar' |
|
74 | 74 | >>> getrepopath(b':pserver:/foo/bar') |
|
75 | 75 | '/foo/bar' |
|
76 | 76 | >>> getrepopath(b':pserver:c:/foo/bar') |
|
77 | 77 | '/foo/bar' |
|
78 | 78 | >>> getrepopath(b':pserver:truc@foo.bar:/foo/bar') |
|
79 | 79 | '/foo/bar' |
|
80 | 80 | >>> getrepopath(b':pserver:truc@foo.bar:c:/foo/bar') |
|
81 | 81 | '/foo/bar' |
|
82 | 82 | >>> getrepopath(b'user@server/path/to/repository') |
|
83 | 83 | '/path/to/repository' |
|
84 | 84 | """ |
|
85 | 85 | # According to CVS manual, CVS paths are expressed like: |
|
86 | 86 | # [:method:][[user][:password]@]hostname[:[port]]/path/to/repository |
|
87 | 87 | # |
|
88 | 88 | # CVSpath is splitted into parts and then position of the first occurrence |
|
89 | 89 | # of the '/' char after the '@' is located. The solution is the rest of the |
|
90 | 90 | # string after that '/' sign including it |
|
91 | 91 | |
|
92 | 92 | parts = cvspath.split(b':') |
|
93 | 93 | atposition = parts[-1].find(b'@') |
|
94 | 94 | start = 0 |
|
95 | 95 | |
|
96 | 96 | if atposition != -1: |
|
97 | 97 | start = atposition |
|
98 | 98 | |
|
99 | 99 | repopath = parts[-1][parts[-1].find(b'/', start) :] |
|
100 | 100 | return repopath |
|
101 | 101 | |
|
102 | 102 | |
|
103 | 103 | def createlog(ui, directory=None, root=b"", rlog=True, cache=None): |
|
104 | 104 | '''Collect the CVS rlog''' |
|
105 | 105 | |
|
106 | 106 | # Because we store many duplicate commit log messages, reusing strings |
|
107 | 107 | # saves a lot of memory and pickle storage space. |
|
108 | 108 | _scache = {} |
|
109 | 109 | |
|
110 | 110 | def scache(s): |
|
111 | 111 | """return a shared version of a string""" |
|
112 | 112 | return _scache.setdefault(s, s) |
|
113 | 113 | |
|
114 | 114 | ui.status(_(b'collecting CVS rlog\n')) |
|
115 | 115 | |
|
116 | 116 | log = [] # list of logentry objects containing the CVS state |
|
117 | 117 | |
|
118 | 118 | # patterns to match in CVS (r)log output, by state of use |
|
119 | 119 | re_00 = re.compile(b'RCS file: (.+)$') |
|
120 | 120 | re_01 = re.compile(b'cvs \\[r?log aborted\\]: (.+)$') |
|
121 | 121 | re_02 = re.compile(b'cvs (r?log|server): (.+)\n$') |
|
122 | 122 | re_03 = re.compile( |
|
123 | 123 | b"(Cannot access.+CVSROOT)|(can't create temporary directory.+)$" |
|
124 | 124 | ) |
|
125 | 125 | re_10 = re.compile(b'Working file: (.+)$') |
|
126 | 126 | re_20 = re.compile(b'symbolic names:') |
|
127 | 127 | re_30 = re.compile(b'\t(.+): ([\\d.]+)$') |
|
128 | 128 | re_31 = re.compile(b'----------------------------$') |
|
129 | 129 | re_32 = re.compile( |
|
130 | 130 | b'=======================================' |
|
131 | 131 | b'======================================$' |
|
132 | 132 | ) |
|
133 | 133 | re_50 = re.compile(br'revision ([\d.]+)(\s+locked by:\s+.+;)?$') |
|
134 | 134 | re_60 = re.compile( |
|
135 | 135 | br'date:\s+(.+);\s+author:\s+(.+);\s+state:\s+(.+?);' |
|
136 | 136 | br'(\s+lines:\s+(\+\d+)?\s+(-\d+)?;)?' |
|
137 | 137 | br'(\s+commitid:\s+([^;]+);)?' |
|
138 | 138 | br'(.*mergepoint:\s+([^;]+);)?' |
|
139 | 139 | ) |
|
140 | 140 | re_70 = re.compile(b'branches: (.+);$') |
|
141 | 141 | |
|
142 | 142 | file_added_re = re.compile(br'file [^/]+ was (initially )?added on branch') |
|
143 | 143 | |
|
144 | 144 | prefix = b'' # leading path to strip of what we get from CVS |
|
145 | 145 | |
|
146 | 146 | if directory is None: |
|
147 | 147 | # Current working directory |
|
148 | 148 | |
|
149 | 149 | # Get the real directory in the repository |
|
150 | 150 | try: |
|
151 | 151 | with open(os.path.join(b'CVS', b'Repository'), b'rb') as f: |
|
152 | 152 | prefix = f.read().strip() |
|
153 | 153 | directory = prefix |
|
154 | 154 | if prefix == b".": |
|
155 | 155 | prefix = b"" |
|
156 | 156 | except IOError: |
|
157 | 157 | raise logerror(_(b'not a CVS sandbox')) |
|
158 | 158 | |
|
159 | 159 | if prefix and not prefix.endswith(pycompat.ossep): |
|
160 | 160 | prefix += pycompat.ossep |
|
161 | 161 | |
|
162 | 162 | # Use the Root file in the sandbox, if it exists |
|
163 | 163 | try: |
|
164 | 164 | root = open(os.path.join(b'CVS', b'Root'), b'rb').read().strip() |
|
165 | 165 | except IOError: |
|
166 | 166 | pass |
|
167 | 167 | |
|
168 | 168 | if not root: |
|
169 | 169 | root = encoding.environ.get(b'CVSROOT', b'') |
|
170 | 170 | |
|
171 | 171 | # read log cache if one exists |
|
172 | 172 | oldlog = [] |
|
173 | 173 | date = None |
|
174 | 174 | |
|
175 | 175 | if cache: |
|
176 | 176 | cachedir = os.path.expanduser(b'~/.hg.cvsps') |
|
177 | 177 | if not os.path.exists(cachedir): |
|
178 | 178 | os.mkdir(cachedir) |
|
179 | 179 | |
|
180 | 180 | # The cvsps cache pickle needs a uniquified name, based on the |
|
181 | 181 | # repository location. The address may have all sort of nasties |
|
182 | 182 | # in it, slashes, colons and such. So here we take just the |
|
183 | 183 | # alphanumeric characters, concatenated in a way that does not |
|
184 | 184 | # mix up the various components, so that |
|
185 | 185 | # :pserver:user@server:/path |
|
186 | 186 | # and |
|
187 | 187 | # /pserver/user/server/path |
|
188 | 188 | # are mapped to different cache file names. |
|
189 | 189 | cachefile = root.split(b":") + [directory, b"cache"] |
|
190 | 190 | cachefile = [b'-'.join(re.findall(br'\w+', s)) for s in cachefile if s] |
|
191 | 191 | cachefile = os.path.join( |
|
192 | 192 | cachedir, b'.'.join([s for s in cachefile if s]) |
|
193 | 193 | ) |
|
194 | 194 | |
|
195 | 195 | if cache == b'update': |
|
196 | 196 | try: |
|
197 | 197 | ui.note(_(b'reading cvs log cache %s\n') % cachefile) |
|
198 | 198 | oldlog = pickle.load(open(cachefile, b'rb')) |
|
199 | 199 | for e in oldlog: |
|
200 | 200 | if not ( |
|
201 |
hasattr(e, |
|
|
202 |
and hasattr(e, |
|
|
203 |
and hasattr(e, |
|
|
201 | hasattr(e, 'branchpoints') | |
|
202 | and hasattr(e, 'commitid') | |
|
203 | and hasattr(e, 'mergepoint') | |
|
204 | 204 | ): |
|
205 | 205 | ui.status(_(b'ignoring old cache\n')) |
|
206 | 206 | oldlog = [] |
|
207 | 207 | break |
|
208 | 208 | |
|
209 | 209 | ui.note(_(b'cache has %d log entries\n') % len(oldlog)) |
|
210 | 210 | except Exception as e: |
|
211 | 211 | ui.note(_(b'error reading cache: %r\n') % e) |
|
212 | 212 | |
|
213 | 213 | if oldlog: |
|
214 | 214 | date = oldlog[-1].date # last commit date as a (time,tz) tuple |
|
215 | 215 | date = dateutil.datestr(date, b'%Y/%m/%d %H:%M:%S %1%2') |
|
216 | 216 | |
|
217 | 217 | # build the CVS commandline |
|
218 | 218 | cmd = [b'cvs', b'-q'] |
|
219 | 219 | if root: |
|
220 | 220 | cmd.append(b'-d%s' % root) |
|
221 | 221 | p = util.normpath(getrepopath(root)) |
|
222 | 222 | if not p.endswith(b'/'): |
|
223 | 223 | p += b'/' |
|
224 | 224 | if prefix: |
|
225 | 225 | # looks like normpath replaces "" by "." |
|
226 | 226 | prefix = p + util.normpath(prefix) |
|
227 | 227 | else: |
|
228 | 228 | prefix = p |
|
229 | 229 | cmd.append([b'log', b'rlog'][rlog]) |
|
230 | 230 | if date: |
|
231 | 231 | # no space between option and date string |
|
232 | 232 | cmd.append(b'-d>%s' % date) |
|
233 | 233 | cmd.append(directory) |
|
234 | 234 | |
|
235 | 235 | # state machine begins here |
|
236 | 236 | tags = {} # dictionary of revisions on current file with their tags |
|
237 | 237 | branchmap = {} # mapping between branch names and revision numbers |
|
238 | 238 | rcsmap = {} |
|
239 | 239 | state = 0 |
|
240 | 240 | store = False # set when a new record can be appended |
|
241 | 241 | |
|
242 | 242 | cmd = [procutil.shellquote(arg) for arg in cmd] |
|
243 | 243 | ui.note(_(b"running %s\n") % (b' '.join(cmd))) |
|
244 | 244 | ui.debug(b"prefix=%r directory=%r root=%r\n" % (prefix, directory, root)) |
|
245 | 245 | |
|
246 | 246 | pfp = procutil.popen(b' '.join(cmd), b'rb') |
|
247 | 247 | peek = util.fromnativeeol(pfp.readline()) |
|
248 | 248 | while True: |
|
249 | 249 | line = peek |
|
250 | 250 | if line == b'': |
|
251 | 251 | break |
|
252 | 252 | peek = util.fromnativeeol(pfp.readline()) |
|
253 | 253 | if line.endswith(b'\n'): |
|
254 | 254 | line = line[:-1] |
|
255 | 255 | # ui.debug('state=%d line=%r\n' % (state, line)) |
|
256 | 256 | |
|
257 | 257 | if state == 0: |
|
258 | 258 | # initial state, consume input until we see 'RCS file' |
|
259 | 259 | match = re_00.match(line) |
|
260 | 260 | if match: |
|
261 | 261 | rcs = match.group(1) |
|
262 | 262 | tags = {} |
|
263 | 263 | if rlog: |
|
264 | 264 | filename = util.normpath(rcs[:-2]) |
|
265 | 265 | if filename.startswith(prefix): |
|
266 | 266 | filename = filename[len(prefix) :] |
|
267 | 267 | if filename.startswith(b'/'): |
|
268 | 268 | filename = filename[1:] |
|
269 | 269 | if filename.startswith(b'Attic/'): |
|
270 | 270 | filename = filename[6:] |
|
271 | 271 | else: |
|
272 | 272 | filename = filename.replace(b'/Attic/', b'/') |
|
273 | 273 | state = 2 |
|
274 | 274 | continue |
|
275 | 275 | state = 1 |
|
276 | 276 | continue |
|
277 | 277 | match = re_01.match(line) |
|
278 | 278 | if match: |
|
279 | 279 | raise logerror(match.group(1)) |
|
280 | 280 | match = re_02.match(line) |
|
281 | 281 | if match: |
|
282 | 282 | raise logerror(match.group(2)) |
|
283 | 283 | if re_03.match(line): |
|
284 | 284 | raise logerror(line) |
|
285 | 285 | |
|
286 | 286 | elif state == 1: |
|
287 | 287 | # expect 'Working file' (only when using log instead of rlog) |
|
288 | 288 | match = re_10.match(line) |
|
289 | 289 | assert match, _(b'RCS file must be followed by working file') |
|
290 | 290 | filename = util.normpath(match.group(1)) |
|
291 | 291 | state = 2 |
|
292 | 292 | |
|
293 | 293 | elif state == 2: |
|
294 | 294 | # expect 'symbolic names' |
|
295 | 295 | if re_20.match(line): |
|
296 | 296 | branchmap = {} |
|
297 | 297 | state = 3 |
|
298 | 298 | |
|
299 | 299 | elif state == 3: |
|
300 | 300 | # read the symbolic names and store as tags |
|
301 | 301 | match = re_30.match(line) |
|
302 | 302 | if match: |
|
303 | 303 | rev = [int(x) for x in match.group(2).split(b'.')] |
|
304 | 304 | |
|
305 | 305 | # Convert magic branch number to an odd-numbered one |
|
306 | 306 | revn = len(rev) |
|
307 | 307 | if revn > 3 and (revn % 2) == 0 and rev[-2] == 0: |
|
308 | 308 | rev = rev[:-2] + rev[-1:] |
|
309 | 309 | rev = tuple(rev) |
|
310 | 310 | |
|
311 | 311 | if rev not in tags: |
|
312 | 312 | tags[rev] = [] |
|
313 | 313 | tags[rev].append(match.group(1)) |
|
314 | 314 | branchmap[match.group(1)] = match.group(2) |
|
315 | 315 | |
|
316 | 316 | elif re_31.match(line): |
|
317 | 317 | state = 5 |
|
318 | 318 | elif re_32.match(line): |
|
319 | 319 | state = 0 |
|
320 | 320 | |
|
321 | 321 | elif state == 4: |
|
322 | 322 | # expecting '------' separator before first revision |
|
323 | 323 | if re_31.match(line): |
|
324 | 324 | state = 5 |
|
325 | 325 | else: |
|
326 | 326 | assert not re_32.match(line), _( |
|
327 | 327 | b'must have at least some revisions' |
|
328 | 328 | ) |
|
329 | 329 | |
|
330 | 330 | elif state == 5: |
|
331 | 331 | # expecting revision number and possibly (ignored) lock indication |
|
332 | 332 | # we create the logentry here from values stored in states 0 to 4, |
|
333 | 333 | # as this state is re-entered for subsequent revisions of a file. |
|
334 | 334 | match = re_50.match(line) |
|
335 | 335 | assert match, _(b'expected revision number') |
|
336 | 336 | e = logentry( |
|
337 | 337 | rcs=scache(rcs), |
|
338 | 338 | file=scache(filename), |
|
339 | 339 | revision=tuple([int(x) for x in match.group(1).split(b'.')]), |
|
340 | 340 | branches=[], |
|
341 | 341 | parent=None, |
|
342 | 342 | commitid=None, |
|
343 | 343 | mergepoint=None, |
|
344 | 344 | branchpoints=set(), |
|
345 | 345 | ) |
|
346 | 346 | |
|
347 | 347 | state = 6 |
|
348 | 348 | |
|
349 | 349 | elif state == 6: |
|
350 | 350 | # expecting date, author, state, lines changed |
|
351 | 351 | match = re_60.match(line) |
|
352 | 352 | assert match, _(b'revision must be followed by date line') |
|
353 | 353 | d = match.group(1) |
|
354 | 354 | if d[2] == b'/': |
|
355 | 355 | # Y2K |
|
356 | 356 | d = b'19' + d |
|
357 | 357 | |
|
358 | 358 | if len(d.split()) != 3: |
|
359 | 359 | # cvs log dates always in GMT |
|
360 | 360 | d = d + b' UTC' |
|
361 | 361 | e.date = dateutil.parsedate( |
|
362 | 362 | d, |
|
363 | 363 | [ |
|
364 | 364 | b'%y/%m/%d %H:%M:%S', |
|
365 | 365 | b'%Y/%m/%d %H:%M:%S', |
|
366 | 366 | b'%Y-%m-%d %H:%M:%S', |
|
367 | 367 | ], |
|
368 | 368 | ) |
|
369 | 369 | e.author = scache(match.group(2)) |
|
370 | 370 | e.dead = match.group(3).lower() == b'dead' |
|
371 | 371 | |
|
372 | 372 | if match.group(5): |
|
373 | 373 | if match.group(6): |
|
374 | 374 | e.lines = (int(match.group(5)), int(match.group(6))) |
|
375 | 375 | else: |
|
376 | 376 | e.lines = (int(match.group(5)), 0) |
|
377 | 377 | elif match.group(6): |
|
378 | 378 | e.lines = (0, int(match.group(6))) |
|
379 | 379 | else: |
|
380 | 380 | e.lines = None |
|
381 | 381 | |
|
382 | 382 | if match.group(7): # cvs 1.12 commitid |
|
383 | 383 | e.commitid = match.group(8) |
|
384 | 384 | |
|
385 | 385 | if match.group(9): # cvsnt mergepoint |
|
386 | 386 | myrev = match.group(10).split(b'.') |
|
387 | 387 | if len(myrev) == 2: # head |
|
388 | 388 | e.mergepoint = b'HEAD' |
|
389 | 389 | else: |
|
390 | 390 | myrev = b'.'.join(myrev[:-2] + [b'0', myrev[-2]]) |
|
391 | 391 | branches = [b for b in branchmap if branchmap[b] == myrev] |
|
392 | 392 | assert len(branches) == 1, ( |
|
393 | 393 | b'unknown branch: %s' % e.mergepoint |
|
394 | 394 | ) |
|
395 | 395 | e.mergepoint = branches[0] |
|
396 | 396 | |
|
397 | 397 | e.comment = [] |
|
398 | 398 | state = 7 |
|
399 | 399 | |
|
400 | 400 | elif state == 7: |
|
401 | 401 | # read the revision numbers of branches that start at this revision |
|
402 | 402 | # or store the commit log message otherwise |
|
403 | 403 | m = re_70.match(line) |
|
404 | 404 | if m: |
|
405 | 405 | e.branches = [ |
|
406 | 406 | tuple([int(y) for y in x.strip().split(b'.')]) |
|
407 | 407 | for x in m.group(1).split(b';') |
|
408 | 408 | ] |
|
409 | 409 | state = 8 |
|
410 | 410 | elif re_31.match(line) and re_50.match(peek): |
|
411 | 411 | state = 5 |
|
412 | 412 | store = True |
|
413 | 413 | elif re_32.match(line): |
|
414 | 414 | state = 0 |
|
415 | 415 | store = True |
|
416 | 416 | else: |
|
417 | 417 | e.comment.append(line) |
|
418 | 418 | |
|
419 | 419 | elif state == 8: |
|
420 | 420 | # store commit log message |
|
421 | 421 | if re_31.match(line): |
|
422 | 422 | cpeek = peek |
|
423 | 423 | if cpeek.endswith(b'\n'): |
|
424 | 424 | cpeek = cpeek[:-1] |
|
425 | 425 | if re_50.match(cpeek): |
|
426 | 426 | state = 5 |
|
427 | 427 | store = True |
|
428 | 428 | else: |
|
429 | 429 | e.comment.append(line) |
|
430 | 430 | elif re_32.match(line): |
|
431 | 431 | state = 0 |
|
432 | 432 | store = True |
|
433 | 433 | else: |
|
434 | 434 | e.comment.append(line) |
|
435 | 435 | |
|
436 | 436 | # When a file is added on a branch B1, CVS creates a synthetic |
|
437 | 437 | # dead trunk revision 1.1 so that the branch has a root. |
|
438 | 438 | # Likewise, if you merge such a file to a later branch B2 (one |
|
439 | 439 | # that already existed when the file was added on B1), CVS |
|
440 | 440 | # creates a synthetic dead revision 1.1.x.1 on B2. Don't drop |
|
441 | 441 | # these revisions now, but mark them synthetic so |
|
442 | 442 | # createchangeset() can take care of them. |
|
443 | 443 | if ( |
|
444 | 444 | store |
|
445 | 445 | and e.dead |
|
446 | 446 | and e.revision[-1] == 1 |
|
447 | 447 | and len(e.comment) == 1 # 1.1 or 1.1.x.1 |
|
448 | 448 | and file_added_re.match(e.comment[0]) |
|
449 | 449 | ): |
|
450 | 450 | ui.debug( |
|
451 | 451 | b'found synthetic revision in %s: %r\n' % (e.rcs, e.comment[0]) |
|
452 | 452 | ) |
|
453 | 453 | e.synthetic = True |
|
454 | 454 | |
|
455 | 455 | if store: |
|
456 | 456 | # clean up the results and save in the log. |
|
457 | 457 | store = False |
|
458 | 458 | e.tags = sorted([scache(x) for x in tags.get(e.revision, [])]) |
|
459 | 459 | e.comment = scache(b'\n'.join(e.comment)) |
|
460 | 460 | |
|
461 | 461 | revn = len(e.revision) |
|
462 | 462 | if revn > 3 and (revn % 2) == 0: |
|
463 | 463 | e.branch = tags.get(e.revision[:-1], [None])[0] |
|
464 | 464 | else: |
|
465 | 465 | e.branch = None |
|
466 | 466 | |
|
467 | 467 | # find the branches starting from this revision |
|
468 | 468 | branchpoints = set() |
|
469 | 469 | for branch, revision in branchmap.items(): |
|
470 | 470 | revparts = tuple([int(i) for i in revision.split(b'.')]) |
|
471 | 471 | if len(revparts) < 2: # bad tags |
|
472 | 472 | continue |
|
473 | 473 | if revparts[-2] == 0 and revparts[-1] % 2 == 0: |
|
474 | 474 | # normal branch |
|
475 | 475 | if revparts[:-2] == e.revision: |
|
476 | 476 | branchpoints.add(branch) |
|
477 | 477 | elif revparts == (1, 1, 1): # vendor branch |
|
478 | 478 | if revparts in e.branches: |
|
479 | 479 | branchpoints.add(branch) |
|
480 | 480 | e.branchpoints = branchpoints |
|
481 | 481 | |
|
482 | 482 | log.append(e) |
|
483 | 483 | |
|
484 | 484 | rcsmap[e.rcs.replace(b'/Attic/', b'/')] = e.rcs |
|
485 | 485 | |
|
486 | 486 | if len(log) % 100 == 0: |
|
487 | 487 | ui.status( |
|
488 | 488 | stringutil.ellipsis(b'%d %s' % (len(log), e.file), 80) |
|
489 | 489 | + b'\n' |
|
490 | 490 | ) |
|
491 | 491 | |
|
492 | 492 | log.sort(key=lambda x: (x.rcs, x.revision)) |
|
493 | 493 | |
|
494 | 494 | # find parent revisions of individual files |
|
495 | 495 | versions = {} |
|
496 | 496 | for e in sorted(oldlog, key=lambda x: (x.rcs, x.revision)): |
|
497 | 497 | rcs = e.rcs.replace(b'/Attic/', b'/') |
|
498 | 498 | if rcs in rcsmap: |
|
499 | 499 | e.rcs = rcsmap[rcs] |
|
500 | 500 | branch = e.revision[:-1] |
|
501 | 501 | versions[(e.rcs, branch)] = e.revision |
|
502 | 502 | |
|
503 | 503 | for e in log: |
|
504 | 504 | branch = e.revision[:-1] |
|
505 | 505 | p = versions.get((e.rcs, branch), None) |
|
506 | 506 | if p is None: |
|
507 | 507 | p = e.revision[:-2] |
|
508 | 508 | e.parent = p |
|
509 | 509 | versions[(e.rcs, branch)] = e.revision |
|
510 | 510 | |
|
511 | 511 | # update the log cache |
|
512 | 512 | if cache: |
|
513 | 513 | if log: |
|
514 | 514 | # join up the old and new logs |
|
515 | 515 | log.sort(key=lambda x: x.date) |
|
516 | 516 | |
|
517 | 517 | if oldlog and oldlog[-1].date >= log[0].date: |
|
518 | 518 | raise logerror( |
|
519 | 519 | _( |
|
520 | 520 | b'log cache overlaps with new log entries,' |
|
521 | 521 | b' re-run without cache.' |
|
522 | 522 | ) |
|
523 | 523 | ) |
|
524 | 524 | |
|
525 | 525 | log = oldlog + log |
|
526 | 526 | |
|
527 | 527 | # write the new cachefile |
|
528 | 528 | ui.note(_(b'writing cvs log cache %s\n') % cachefile) |
|
529 | 529 | pickle.dump(log, open(cachefile, b'wb')) |
|
530 | 530 | else: |
|
531 | 531 | log = oldlog |
|
532 | 532 | |
|
533 | 533 | ui.status(_(b'%d log entries\n') % len(log)) |
|
534 | 534 | |
|
535 | 535 | encodings = ui.configlist(b'convert', b'cvsps.logencoding') |
|
536 | 536 | if encodings: |
|
537 | 537 | |
|
538 | 538 | def revstr(r): |
|
539 | 539 | # this is needed, because logentry.revision is a tuple of "int" |
|
540 | 540 | # (e.g. (1, 2) for "1.2") |
|
541 | 541 | return b'.'.join(pycompat.maplist(pycompat.bytestr, r)) |
|
542 | 542 | |
|
543 | 543 | for entry in log: |
|
544 | 544 | comment = entry.comment |
|
545 | 545 | for e in encodings: |
|
546 | 546 | try: |
|
547 | 547 | entry.comment = comment.decode(pycompat.sysstr(e)).encode( |
|
548 | 548 | 'utf-8' |
|
549 | 549 | ) |
|
550 | 550 | if ui.debugflag: |
|
551 | 551 | ui.debug( |
|
552 | 552 | b"transcoding by %s: %s of %s\n" |
|
553 | 553 | % (e, revstr(entry.revision), entry.file) |
|
554 | 554 | ) |
|
555 | 555 | break |
|
556 | 556 | except UnicodeDecodeError: |
|
557 | 557 | pass # try next encoding |
|
558 | 558 | except LookupError as inst: # unknown encoding, maybe |
|
559 | 559 | raise error.Abort( |
|
560 | 560 | pycompat.bytestr(inst), |
|
561 | 561 | hint=_( |
|
562 | 562 | b'check convert.cvsps.logencoding configuration' |
|
563 | 563 | ), |
|
564 | 564 | ) |
|
565 | 565 | else: |
|
566 | 566 | raise error.Abort( |
|
567 | 567 | _( |
|
568 | 568 | b"no encoding can transcode" |
|
569 | 569 | b" CVS log message for %s of %s" |
|
570 | 570 | ) |
|
571 | 571 | % (revstr(entry.revision), entry.file), |
|
572 | 572 | hint=_(b'check convert.cvsps.logencoding configuration'), |
|
573 | 573 | ) |
|
574 | 574 | |
|
575 | 575 | hook.hook(ui, None, b"cvslog", True, log=log) |
|
576 | 576 | |
|
577 | 577 | return log |
|
578 | 578 | |
|
579 | 579 | |
|
580 | 580 | class changeset: |
|
581 | 581 | """Class changeset has the following attributes: |
|
582 | 582 | .id - integer identifying this changeset (list index) |
|
583 | 583 | .author - author name as CVS knows it |
|
584 | 584 | .branch - name of branch this changeset is on, or None |
|
585 | 585 | .comment - commit message |
|
586 | 586 | .commitid - CVS commitid or None |
|
587 | 587 | .date - the commit date as a (time,tz) tuple |
|
588 | 588 | .entries - list of logentry objects in this changeset |
|
589 | 589 | .parents - list of one or two parent changesets |
|
590 | 590 | .tags - list of tags on this changeset |
|
591 | 591 | .synthetic - from synthetic revision "file ... added on branch ..." |
|
592 | 592 | .mergepoint- the branch that has been merged from or None |
|
593 | 593 | .branchpoints- the branches that start at the current entry or empty |
|
594 | 594 | """ |
|
595 | 595 | |
|
596 | 596 | def __init__(self, **entries): |
|
597 | 597 | self.id = None |
|
598 | 598 | self.synthetic = False |
|
599 | 599 | self.__dict__.update(entries) |
|
600 | 600 | |
|
601 | 601 | def __repr__(self): |
|
602 | 602 | items = ( |
|
603 | 603 | b"%s=%r" % (k, self.__dict__[k]) for k in sorted(self.__dict__) |
|
604 | 604 | ) |
|
605 | 605 | return b"%s(%s)" % (type(self).__name__, b", ".join(items)) |
|
606 | 606 | |
|
607 | 607 | |
|
608 | 608 | def createchangeset(ui, log, fuzz=60, mergefrom=None, mergeto=None): |
|
609 | 609 | '''Convert log into changesets.''' |
|
610 | 610 | |
|
611 | 611 | ui.status(_(b'creating changesets\n')) |
|
612 | 612 | |
|
613 | 613 | # try to order commitids by date |
|
614 | 614 | mindate = {} |
|
615 | 615 | for e in log: |
|
616 | 616 | if e.commitid: |
|
617 | 617 | if e.commitid not in mindate: |
|
618 | 618 | mindate[e.commitid] = e.date |
|
619 | 619 | else: |
|
620 | 620 | mindate[e.commitid] = min(e.date, mindate[e.commitid]) |
|
621 | 621 | |
|
622 | 622 | # Merge changesets |
|
623 | 623 | log.sort( |
|
624 | 624 | key=lambda x: ( |
|
625 | 625 | mindate.get(x.commitid, (-1, 0)), |
|
626 | 626 | x.commitid or b'', |
|
627 | 627 | x.comment, |
|
628 | 628 | x.author, |
|
629 | 629 | x.branch or b'', |
|
630 | 630 | x.date, |
|
631 | 631 | x.branchpoints, |
|
632 | 632 | ) |
|
633 | 633 | ) |
|
634 | 634 | |
|
635 | 635 | changesets = [] |
|
636 | 636 | files = set() |
|
637 | 637 | c = None |
|
638 | 638 | for i, e in enumerate(log): |
|
639 | 639 | |
|
640 | 640 | # Check if log entry belongs to the current changeset or not. |
|
641 | 641 | |
|
642 | 642 | # Since CVS is file-centric, two different file revisions with |
|
643 | 643 | # different branchpoints should be treated as belonging to two |
|
644 | 644 | # different changesets (and the ordering is important and not |
|
645 | 645 | # honoured by cvsps at this point). |
|
646 | 646 | # |
|
647 | 647 | # Consider the following case: |
|
648 | 648 | # foo 1.1 branchpoints: [MYBRANCH] |
|
649 | 649 | # bar 1.1 branchpoints: [MYBRANCH, MYBRANCH2] |
|
650 | 650 | # |
|
651 | 651 | # Here foo is part only of MYBRANCH, but not MYBRANCH2, e.g. a |
|
652 | 652 | # later version of foo may be in MYBRANCH2, so foo should be the |
|
653 | 653 | # first changeset and bar the next and MYBRANCH and MYBRANCH2 |
|
654 | 654 | # should both start off of the bar changeset. No provisions are |
|
655 | 655 | # made to ensure that this is, in fact, what happens. |
|
656 | 656 | if not ( |
|
657 | 657 | c |
|
658 | 658 | and e.branchpoints == c.branchpoints |
|
659 | 659 | and ( # cvs commitids |
|
660 | 660 | (e.commitid is not None and e.commitid == c.commitid) |
|
661 | 661 | or ( # no commitids, use fuzzy commit detection |
|
662 | 662 | (e.commitid is None or c.commitid is None) |
|
663 | 663 | and e.comment == c.comment |
|
664 | 664 | and e.author == c.author |
|
665 | 665 | and e.branch == c.branch |
|
666 | 666 | and ( |
|
667 | 667 | (c.date[0] + c.date[1]) |
|
668 | 668 | <= (e.date[0] + e.date[1]) |
|
669 | 669 | <= (c.date[0] + c.date[1]) + fuzz |
|
670 | 670 | ) |
|
671 | 671 | and e.file not in files |
|
672 | 672 | ) |
|
673 | 673 | ) |
|
674 | 674 | ): |
|
675 | 675 | c = changeset( |
|
676 | 676 | comment=e.comment, |
|
677 | 677 | author=e.author, |
|
678 | 678 | branch=e.branch, |
|
679 | 679 | date=e.date, |
|
680 | 680 | entries=[], |
|
681 | 681 | mergepoint=e.mergepoint, |
|
682 | 682 | branchpoints=e.branchpoints, |
|
683 | 683 | commitid=e.commitid, |
|
684 | 684 | ) |
|
685 | 685 | changesets.append(c) |
|
686 | 686 | |
|
687 | 687 | files = set() |
|
688 | 688 | if len(changesets) % 100 == 0: |
|
689 | 689 | t = b'%d %s' % ( |
|
690 | 690 | len(changesets), |
|
691 | 691 | pycompat.byterepr(e.comment)[2:-1], |
|
692 | 692 | ) |
|
693 | 693 | ui.status(stringutil.ellipsis(t, 80) + b'\n') |
|
694 | 694 | |
|
695 | 695 | c.entries.append(e) |
|
696 | 696 | files.add(e.file) |
|
697 | 697 | c.date = e.date # changeset date is date of latest commit in it |
|
698 | 698 | |
|
699 | 699 | # Mark synthetic changesets |
|
700 | 700 | |
|
701 | 701 | for c in changesets: |
|
702 | 702 | # Synthetic revisions always get their own changeset, because |
|
703 | 703 | # the log message includes the filename. E.g. if you add file3 |
|
704 | 704 | # and file4 on a branch, you get four log entries and three |
|
705 | 705 | # changesets: |
|
706 | 706 | # "File file3 was added on branch ..." (synthetic, 1 entry) |
|
707 | 707 | # "File file4 was added on branch ..." (synthetic, 1 entry) |
|
708 | 708 | # "Add file3 and file4 to fix ..." (real, 2 entries) |
|
709 | 709 | # Hence the check for 1 entry here. |
|
710 | 710 | c.synthetic = len(c.entries) == 1 and c.entries[0].synthetic |
|
711 | 711 | |
|
712 | 712 | # Sort files in each changeset |
|
713 | 713 | |
|
714 | 714 | def entitycompare(l, r): |
|
715 | 715 | """Mimic cvsps sorting order""" |
|
716 | 716 | l = l.file.split(b'/') |
|
717 | 717 | r = r.file.split(b'/') |
|
718 | 718 | nl = len(l) |
|
719 | 719 | nr = len(r) |
|
720 | 720 | n = min(nl, nr) |
|
721 | 721 | for i in range(n): |
|
722 | 722 | if i + 1 == nl and nl < nr: |
|
723 | 723 | return -1 |
|
724 | 724 | elif i + 1 == nr and nl > nr: |
|
725 | 725 | return +1 |
|
726 | 726 | elif l[i] < r[i]: |
|
727 | 727 | return -1 |
|
728 | 728 | elif l[i] > r[i]: |
|
729 | 729 | return +1 |
|
730 | 730 | return 0 |
|
731 | 731 | |
|
732 | 732 | for c in changesets: |
|
733 | 733 | c.entries.sort(key=functools.cmp_to_key(entitycompare)) |
|
734 | 734 | |
|
735 | 735 | # Sort changesets by date |
|
736 | 736 | |
|
737 | 737 | odd = set() |
|
738 | 738 | |
|
739 | 739 | def cscmp(l, r): |
|
740 | 740 | d = sum(l.date) - sum(r.date) |
|
741 | 741 | if d: |
|
742 | 742 | return d |
|
743 | 743 | |
|
744 | 744 | # detect vendor branches and initial commits on a branch |
|
745 | 745 | le = {} |
|
746 | 746 | for e in l.entries: |
|
747 | 747 | le[e.rcs] = e.revision |
|
748 | 748 | re = {} |
|
749 | 749 | for e in r.entries: |
|
750 | 750 | re[e.rcs] = e.revision |
|
751 | 751 | |
|
752 | 752 | d = 0 |
|
753 | 753 | for e in l.entries: |
|
754 | 754 | if re.get(e.rcs, None) == e.parent: |
|
755 | 755 | assert not d |
|
756 | 756 | d = 1 |
|
757 | 757 | break |
|
758 | 758 | |
|
759 | 759 | for e in r.entries: |
|
760 | 760 | if le.get(e.rcs, None) == e.parent: |
|
761 | 761 | if d: |
|
762 | 762 | odd.add((l, r)) |
|
763 | 763 | d = -1 |
|
764 | 764 | break |
|
765 | 765 | # By this point, the changesets are sufficiently compared that |
|
766 | 766 | # we don't really care about ordering. However, this leaves |
|
767 | 767 | # some race conditions in the tests, so we compare on the |
|
768 | 768 | # number of files modified, the files contained in each |
|
769 | 769 | # changeset, and the branchpoints in the change to ensure test |
|
770 | 770 | # output remains stable. |
|
771 | 771 | |
|
772 | 772 | # recommended replacement for cmp from |
|
773 | 773 | # https://docs.python.org/3.0/whatsnew/3.0.html |
|
774 | 774 | c = lambda x, y: (x > y) - (x < y) |
|
775 | 775 | # Sort bigger changes first. |
|
776 | 776 | if not d: |
|
777 | 777 | d = c(len(l.entries), len(r.entries)) |
|
778 | 778 | # Try sorting by filename in the change. |
|
779 | 779 | if not d: |
|
780 | 780 | d = c([e.file for e in l.entries], [e.file for e in r.entries]) |
|
781 | 781 | # Try and put changes without a branch point before ones with |
|
782 | 782 | # a branch point. |
|
783 | 783 | if not d: |
|
784 | 784 | d = c(len(l.branchpoints), len(r.branchpoints)) |
|
785 | 785 | return d |
|
786 | 786 | |
|
787 | 787 | changesets.sort(key=functools.cmp_to_key(cscmp)) |
|
788 | 788 | |
|
789 | 789 | # Collect tags |
|
790 | 790 | |
|
791 | 791 | globaltags = {} |
|
792 | 792 | for c in changesets: |
|
793 | 793 | for e in c.entries: |
|
794 | 794 | for tag in e.tags: |
|
795 | 795 | # remember which is the latest changeset to have this tag |
|
796 | 796 | globaltags[tag] = c |
|
797 | 797 | |
|
798 | 798 | for c in changesets: |
|
799 | 799 | tags = set() |
|
800 | 800 | for e in c.entries: |
|
801 | 801 | tags.update(e.tags) |
|
802 | 802 | # remember tags only if this is the latest changeset to have it |
|
803 | 803 | c.tags = sorted(tag for tag in tags if globaltags[tag] is c) |
|
804 | 804 | |
|
805 | 805 | # Find parent changesets, handle {{mergetobranch BRANCHNAME}} |
|
806 | 806 | # by inserting dummy changesets with two parents, and handle |
|
807 | 807 | # {{mergefrombranch BRANCHNAME}} by setting two parents. |
|
808 | 808 | |
|
809 | 809 | if mergeto is None: |
|
810 | 810 | mergeto = br'{{mergetobranch ([-\w]+)}}' |
|
811 | 811 | if mergeto: |
|
812 | 812 | mergeto = re.compile(mergeto) |
|
813 | 813 | |
|
814 | 814 | if mergefrom is None: |
|
815 | 815 | mergefrom = br'{{mergefrombranch ([-\w]+)}}' |
|
816 | 816 | if mergefrom: |
|
817 | 817 | mergefrom = re.compile(mergefrom) |
|
818 | 818 | |
|
819 | 819 | versions = {} # changeset index where we saw any particular file version |
|
820 | 820 | branches = {} # changeset index where we saw a branch |
|
821 | 821 | n = len(changesets) |
|
822 | 822 | i = 0 |
|
823 | 823 | while i < n: |
|
824 | 824 | c = changesets[i] |
|
825 | 825 | |
|
826 | 826 | for f in c.entries: |
|
827 | 827 | versions[(f.rcs, f.revision)] = i |
|
828 | 828 | |
|
829 | 829 | p = None |
|
830 | 830 | if c.branch in branches: |
|
831 | 831 | p = branches[c.branch] |
|
832 | 832 | else: |
|
833 | 833 | # first changeset on a new branch |
|
834 | 834 | # the parent is a changeset with the branch in its |
|
835 | 835 | # branchpoints such that it is the latest possible |
|
836 | 836 | # commit without any intervening, unrelated commits. |
|
837 | 837 | |
|
838 | 838 | for candidate in range(i): |
|
839 | 839 | if c.branch not in changesets[candidate].branchpoints: |
|
840 | 840 | if p is not None: |
|
841 | 841 | break |
|
842 | 842 | continue |
|
843 | 843 | p = candidate |
|
844 | 844 | |
|
845 | 845 | c.parents = [] |
|
846 | 846 | if p is not None: |
|
847 | 847 | p = changesets[p] |
|
848 | 848 | |
|
849 | 849 | # Ensure no changeset has a synthetic changeset as a parent. |
|
850 | 850 | while p.synthetic: |
|
851 | 851 | assert len(p.parents) <= 1, _( |
|
852 | 852 | b'synthetic changeset cannot have multiple parents' |
|
853 | 853 | ) |
|
854 | 854 | if p.parents: |
|
855 | 855 | p = p.parents[0] |
|
856 | 856 | else: |
|
857 | 857 | p = None |
|
858 | 858 | break |
|
859 | 859 | |
|
860 | 860 | if p is not None: |
|
861 | 861 | c.parents.append(p) |
|
862 | 862 | |
|
863 | 863 | if c.mergepoint: |
|
864 | 864 | if c.mergepoint == b'HEAD': |
|
865 | 865 | c.mergepoint = None |
|
866 | 866 | c.parents.append(changesets[branches[c.mergepoint]]) |
|
867 | 867 | |
|
868 | 868 | if mergefrom: |
|
869 | 869 | m = mergefrom.search(c.comment) |
|
870 | 870 | if m: |
|
871 | 871 | m = m.group(1) |
|
872 | 872 | if m == b'HEAD': |
|
873 | 873 | m = None |
|
874 | 874 | try: |
|
875 | 875 | candidate = changesets[branches[m]] |
|
876 | 876 | except KeyError: |
|
877 | 877 | ui.warn( |
|
878 | 878 | _( |
|
879 | 879 | b"warning: CVS commit message references " |
|
880 | 880 | b"non-existent branch %r:\n%s\n" |
|
881 | 881 | ) |
|
882 | 882 | % (pycompat.bytestr(m), c.comment) |
|
883 | 883 | ) |
|
884 | 884 | if m in branches and c.branch != m and not candidate.synthetic: |
|
885 | 885 | c.parents.append(candidate) |
|
886 | 886 | |
|
887 | 887 | if mergeto: |
|
888 | 888 | m = mergeto.search(c.comment) |
|
889 | 889 | if m: |
|
890 | 890 | if m.groups(): |
|
891 | 891 | m = m.group(1) |
|
892 | 892 | if m == b'HEAD': |
|
893 | 893 | m = None |
|
894 | 894 | else: |
|
895 | 895 | m = None # if no group found then merge to HEAD |
|
896 | 896 | if m in branches and c.branch != m: |
|
897 | 897 | # insert empty changeset for merge |
|
898 | 898 | cc = changeset( |
|
899 | 899 | author=c.author, |
|
900 | 900 | branch=m, |
|
901 | 901 | date=c.date, |
|
902 | 902 | comment=b'convert-repo: CVS merge from branch %s' |
|
903 | 903 | % c.branch, |
|
904 | 904 | entries=[], |
|
905 | 905 | tags=[], |
|
906 | 906 | parents=[changesets[branches[m]], c], |
|
907 | 907 | ) |
|
908 | 908 | changesets.insert(i + 1, cc) |
|
909 | 909 | branches[m] = i + 1 |
|
910 | 910 | |
|
911 | 911 | # adjust our loop counters now we have inserted a new entry |
|
912 | 912 | n += 1 |
|
913 | 913 | i += 2 |
|
914 | 914 | continue |
|
915 | 915 | |
|
916 | 916 | branches[c.branch] = i |
|
917 | 917 | i += 1 |
|
918 | 918 | |
|
919 | 919 | # Drop synthetic changesets (safe now that we have ensured no other |
|
920 | 920 | # changesets can have them as parents). |
|
921 | 921 | i = 0 |
|
922 | 922 | while i < len(changesets): |
|
923 | 923 | if changesets[i].synthetic: |
|
924 | 924 | del changesets[i] |
|
925 | 925 | else: |
|
926 | 926 | i += 1 |
|
927 | 927 | |
|
928 | 928 | # Number changesets |
|
929 | 929 | |
|
930 | 930 | for i, c in enumerate(changesets): |
|
931 | 931 | c.id = i + 1 |
|
932 | 932 | |
|
933 | 933 | if odd: |
|
934 | 934 | for l, r in odd: |
|
935 | 935 | if l.id is not None and r.id is not None: |
|
936 | 936 | ui.warn( |
|
937 | 937 | _(b'changeset %d is both before and after %d\n') |
|
938 | 938 | % (l.id, r.id) |
|
939 | 939 | ) |
|
940 | 940 | |
|
941 | 941 | ui.status(_(b'%d changeset entries\n') % len(changesets)) |
|
942 | 942 | |
|
943 | 943 | hook.hook(ui, None, b"cvschangesets", True, changesets=changesets) |
|
944 | 944 | |
|
945 | 945 | return changesets |
|
946 | 946 | |
|
947 | 947 | |
|
948 | 948 | def debugcvsps(ui, *args, **opts): |
|
949 | 949 | """Read CVS rlog for current directory or named path in |
|
950 | 950 | repository, and convert the log to changesets based on matching |
|
951 | 951 | commit log entries and dates. |
|
952 | 952 | """ |
|
953 | 953 | opts = pycompat.byteskwargs(opts) |
|
954 | 954 | if opts[b"new_cache"]: |
|
955 | 955 | cache = b"write" |
|
956 | 956 | elif opts[b"update_cache"]: |
|
957 | 957 | cache = b"update" |
|
958 | 958 | else: |
|
959 | 959 | cache = None |
|
960 | 960 | |
|
961 | 961 | revisions = opts[b"revisions"] |
|
962 | 962 | |
|
963 | 963 | try: |
|
964 | 964 | if args: |
|
965 | 965 | log = [] |
|
966 | 966 | for d in args: |
|
967 | 967 | log += createlog(ui, d, root=opts[b"root"], cache=cache) |
|
968 | 968 | else: |
|
969 | 969 | log = createlog(ui, root=opts[b"root"], cache=cache) |
|
970 | 970 | except logerror as e: |
|
971 | 971 | ui.write(b"%r\n" % e) |
|
972 | 972 | return |
|
973 | 973 | |
|
974 | 974 | changesets = createchangeset(ui, log, opts[b"fuzz"]) |
|
975 | 975 | del log |
|
976 | 976 | |
|
977 | 977 | # Print changesets (optionally filtered) |
|
978 | 978 | |
|
979 | 979 | off = len(revisions) |
|
980 | 980 | branches = {} # latest version number in each branch |
|
981 | 981 | ancestors = {} # parent branch |
|
982 | 982 | for cs in changesets: |
|
983 | 983 | |
|
984 | 984 | if opts[b"ancestors"]: |
|
985 | 985 | if cs.branch not in branches and cs.parents and cs.parents[0].id: |
|
986 | 986 | ancestors[cs.branch] = ( |
|
987 | 987 | changesets[cs.parents[0].id - 1].branch, |
|
988 | 988 | cs.parents[0].id, |
|
989 | 989 | ) |
|
990 | 990 | branches[cs.branch] = cs.id |
|
991 | 991 | |
|
992 | 992 | # limit by branches |
|
993 | 993 | if ( |
|
994 | 994 | opts[b"branches"] |
|
995 | 995 | and (cs.branch or b'HEAD') not in opts[b"branches"] |
|
996 | 996 | ): |
|
997 | 997 | continue |
|
998 | 998 | |
|
999 | 999 | if not off: |
|
1000 | 1000 | # Note: trailing spaces on several lines here are needed to have |
|
1001 | 1001 | # bug-for-bug compatibility with cvsps. |
|
1002 | 1002 | ui.write(b'---------------------\n') |
|
1003 | 1003 | ui.write((b'PatchSet %d \n' % cs.id)) |
|
1004 | 1004 | ui.write( |
|
1005 | 1005 | ( |
|
1006 | 1006 | b'Date: %s\n' |
|
1007 | 1007 | % dateutil.datestr(cs.date, b'%Y/%m/%d %H:%M:%S %1%2') |
|
1008 | 1008 | ) |
|
1009 | 1009 | ) |
|
1010 | 1010 | ui.write((b'Author: %s\n' % cs.author)) |
|
1011 | 1011 | ui.write((b'Branch: %s\n' % (cs.branch or b'HEAD'))) |
|
1012 | 1012 | ui.write( |
|
1013 | 1013 | ( |
|
1014 | 1014 | b'Tag%s: %s \n' |
|
1015 | 1015 | % ( |
|
1016 | 1016 | [b'', b's'][len(cs.tags) > 1], |
|
1017 | 1017 | b','.join(cs.tags) or b'(none)', |
|
1018 | 1018 | ) |
|
1019 | 1019 | ) |
|
1020 | 1020 | ) |
|
1021 | 1021 | if cs.branchpoints: |
|
1022 | 1022 | ui.writenoi18n( |
|
1023 | 1023 | b'Branchpoints: %s \n' % b', '.join(sorted(cs.branchpoints)) |
|
1024 | 1024 | ) |
|
1025 | 1025 | if opts[b"parents"] and cs.parents: |
|
1026 | 1026 | if len(cs.parents) > 1: |
|
1027 | 1027 | ui.write( |
|
1028 | 1028 | ( |
|
1029 | 1029 | b'Parents: %s\n' |
|
1030 | 1030 | % (b','.join([(b"%d" % p.id) for p in cs.parents])) |
|
1031 | 1031 | ) |
|
1032 | 1032 | ) |
|
1033 | 1033 | else: |
|
1034 | 1034 | ui.write((b'Parent: %d\n' % cs.parents[0].id)) |
|
1035 | 1035 | |
|
1036 | 1036 | if opts[b"ancestors"]: |
|
1037 | 1037 | b = cs.branch |
|
1038 | 1038 | r = [] |
|
1039 | 1039 | while b: |
|
1040 | 1040 | b, c = ancestors[b] |
|
1041 | 1041 | r.append(b'%s:%d:%d' % (b or b"HEAD", c, branches[b])) |
|
1042 | 1042 | if r: |
|
1043 | 1043 | ui.write((b'Ancestors: %s\n' % (b','.join(r)))) |
|
1044 | 1044 | |
|
1045 | 1045 | ui.writenoi18n(b'Log:\n') |
|
1046 | 1046 | ui.write(b'%s\n\n' % cs.comment) |
|
1047 | 1047 | ui.writenoi18n(b'Members: \n') |
|
1048 | 1048 | for f in cs.entries: |
|
1049 | 1049 | fn = f.file |
|
1050 | 1050 | if fn.startswith(opts[b"prefix"]): |
|
1051 | 1051 | fn = fn[len(opts[b"prefix"]) :] |
|
1052 | 1052 | ui.write( |
|
1053 | 1053 | b'\t%s:%s->%s%s \n' |
|
1054 | 1054 | % ( |
|
1055 | 1055 | fn, |
|
1056 | 1056 | b'.'.join([b"%d" % x for x in f.parent]) or b'INITIAL', |
|
1057 | 1057 | b'.'.join([(b"%d" % x) for x in f.revision]), |
|
1058 | 1058 | [b'', b'(DEAD)'][f.dead], |
|
1059 | 1059 | ) |
|
1060 | 1060 | ) |
|
1061 | 1061 | ui.write(b'\n') |
|
1062 | 1062 | |
|
1063 | 1063 | # have we seen the start tag? |
|
1064 | 1064 | if revisions and off: |
|
1065 | 1065 | if revisions[0] == (b"%d" % cs.id) or revisions[0] in cs.tags: |
|
1066 | 1066 | off = False |
|
1067 | 1067 | |
|
1068 | 1068 | # see if we reached the end tag |
|
1069 | 1069 | if len(revisions) > 1 and not off: |
|
1070 | 1070 | if revisions[1] == (b"%d" % cs.id) or revisions[1] in cs.tags: |
|
1071 | 1071 | break |
@@ -1,1016 +1,1016 b'' | |||
|
1 | 1 | # __init__.py - fsmonitor initialization and overrides |
|
2 | 2 | # |
|
3 | 3 | # Copyright 2013-2016 Facebook, Inc. |
|
4 | 4 | # |
|
5 | 5 | # This software may be used and distributed according to the terms of the |
|
6 | 6 | # GNU General Public License version 2 or any later version. |
|
7 | 7 | |
|
8 | 8 | '''Faster status operations with the Watchman file monitor (EXPERIMENTAL) |
|
9 | 9 | |
|
10 | 10 | Integrates the file-watching program Watchman with Mercurial to produce faster |
|
11 | 11 | status results. |
|
12 | 12 | |
|
13 | 13 | On a particular Linux system, for a real-world repository with over 400,000 |
|
14 | 14 | files hosted on ext4, vanilla `hg status` takes 1.3 seconds. On the same |
|
15 | 15 | system, with fsmonitor it takes about 0.3 seconds. |
|
16 | 16 | |
|
17 | 17 | fsmonitor requires no configuration -- it will tell Watchman about your |
|
18 | 18 | repository as necessary. You'll need to install Watchman from |
|
19 | 19 | https://facebook.github.io/watchman/ and make sure it is in your PATH. |
|
20 | 20 | |
|
21 | 21 | fsmonitor is incompatible with the largefiles and eol extensions, and |
|
22 | 22 | will disable itself if any of those are active. |
|
23 | 23 | |
|
24 | 24 | The following configuration options exist: |
|
25 | 25 | |
|
26 | 26 | :: |
|
27 | 27 | |
|
28 | 28 | [fsmonitor] |
|
29 | 29 | mode = {off, on, paranoid} |
|
30 | 30 | |
|
31 | 31 | When `mode = off`, fsmonitor will disable itself (similar to not loading the |
|
32 | 32 | extension at all). When `mode = on`, fsmonitor will be enabled (the default). |
|
33 | 33 | When `mode = paranoid`, fsmonitor will query both Watchman and the filesystem, |
|
34 | 34 | and ensure that the results are consistent. |
|
35 | 35 | |
|
36 | 36 | :: |
|
37 | 37 | |
|
38 | 38 | [fsmonitor] |
|
39 | 39 | timeout = (float) |
|
40 | 40 | |
|
41 | 41 | A value, in seconds, that determines how long fsmonitor will wait for Watchman |
|
42 | 42 | to return results. Defaults to `2.0`. |
|
43 | 43 | |
|
44 | 44 | :: |
|
45 | 45 | |
|
46 | 46 | [fsmonitor] |
|
47 | 47 | blacklistusers = (list of userids) |
|
48 | 48 | |
|
49 | 49 | A list of usernames for which fsmonitor will disable itself altogether. |
|
50 | 50 | |
|
51 | 51 | :: |
|
52 | 52 | |
|
53 | 53 | [fsmonitor] |
|
54 | 54 | walk_on_invalidate = (boolean) |
|
55 | 55 | |
|
56 | 56 | Whether or not to walk the whole repo ourselves when our cached state has been |
|
57 | 57 | invalidated, for example when Watchman has been restarted or .hgignore rules |
|
58 | 58 | have been changed. Walking the repo in that case can result in competing for |
|
59 | 59 | I/O with Watchman. For large repos it is recommended to set this value to |
|
60 | 60 | false. You may wish to set this to true if you have a very fast filesystem |
|
61 | 61 | that can outpace the IPC overhead of getting the result data for the full repo |
|
62 | 62 | from Watchman. Defaults to false. |
|
63 | 63 | |
|
64 | 64 | :: |
|
65 | 65 | |
|
66 | 66 | [fsmonitor] |
|
67 | 67 | warn_when_unused = (boolean) |
|
68 | 68 | |
|
69 | 69 | Whether to print a warning during certain operations when fsmonitor would be |
|
70 | 70 | beneficial to performance but isn't enabled. |
|
71 | 71 | |
|
72 | 72 | :: |
|
73 | 73 | |
|
74 | 74 | [fsmonitor] |
|
75 | 75 | warn_update_file_count = (integer) |
|
76 | 76 | # or when mercurial is built with rust support |
|
77 | 77 | warn_update_file_count_rust = (integer) |
|
78 | 78 | |
|
79 | 79 | If ``warn_when_unused`` is set and fsmonitor isn't enabled, a warning will |
|
80 | 80 | be printed during working directory updates if this many files will be |
|
81 | 81 | created. |
|
82 | 82 | ''' |
|
83 | 83 | |
|
84 | 84 | # Platforms Supported |
|
85 | 85 | # =================== |
|
86 | 86 | # |
|
87 | 87 | # **Linux:** *Stable*. Watchman and fsmonitor are both known to work reliably, |
|
88 | 88 | # even under severe loads. |
|
89 | 89 | # |
|
90 | 90 | # **Mac OS X:** *Stable*. The Mercurial test suite passes with fsmonitor |
|
91 | 91 | # turned on, on case-insensitive HFS+. There has been a reasonable amount of |
|
92 | 92 | # user testing under normal loads. |
|
93 | 93 | # |
|
94 | 94 | # **Solaris, BSD:** *Alpha*. watchman and fsmonitor are believed to work, but |
|
95 | 95 | # very little testing has been done. |
|
96 | 96 | # |
|
97 | 97 | # **Windows:** *Alpha*. Not in a release version of watchman or fsmonitor yet. |
|
98 | 98 | # |
|
99 | 99 | # Known Issues |
|
100 | 100 | # ============ |
|
101 | 101 | # |
|
102 | 102 | # * fsmonitor will disable itself if any of the following extensions are |
|
103 | 103 | # enabled: largefiles, inotify, eol; or if the repository has subrepos. |
|
104 | 104 | # * fsmonitor will produce incorrect results if nested repos that are not |
|
105 | 105 | # subrepos exist. *Workaround*: add nested repo paths to your `.hgignore`. |
|
106 | 106 | # |
|
107 | 107 | # The issues related to nested repos and subrepos are probably not fundamental |
|
108 | 108 | # ones. Patches to fix them are welcome. |
|
109 | 109 | |
|
110 | 110 | |
|
111 | 111 | import codecs |
|
112 | 112 | import os |
|
113 | 113 | import stat |
|
114 | 114 | import sys |
|
115 | 115 | import tempfile |
|
116 | 116 | import weakref |
|
117 | 117 | |
|
118 | 118 | from mercurial.i18n import _ |
|
119 | 119 | from mercurial.node import hex |
|
120 | 120 | |
|
121 | 121 | from mercurial.pycompat import open |
|
122 | 122 | from mercurial import ( |
|
123 | 123 | context, |
|
124 | 124 | encoding, |
|
125 | 125 | error, |
|
126 | 126 | extensions, |
|
127 | 127 | localrepo, |
|
128 | 128 | merge, |
|
129 | 129 | pathutil, |
|
130 | 130 | pycompat, |
|
131 | 131 | registrar, |
|
132 | 132 | scmutil, |
|
133 | 133 | util, |
|
134 | 134 | ) |
|
135 | 135 | |
|
136 | 136 | # no-check-code because we're accessing private information only public in pure |
|
137 | 137 | from mercurial.pure import parsers |
|
138 | 138 | from mercurial import match as matchmod |
|
139 | 139 | from mercurial.utils import ( |
|
140 | 140 | hashutil, |
|
141 | 141 | stringutil, |
|
142 | 142 | ) |
|
143 | 143 | |
|
144 | 144 | from . import ( |
|
145 | 145 | pywatchman, |
|
146 | 146 | state, |
|
147 | 147 | watchmanclient, |
|
148 | 148 | ) |
|
149 | 149 | |
|
150 | 150 | # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for |
|
151 | 151 | # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should |
|
152 | 152 | # be specifying the version(s) of Mercurial they are tested with, or |
|
153 | 153 | # leave the attribute unspecified. |
|
154 | 154 | testedwith = b'ships-with-hg-core' |
|
155 | 155 | |
|
156 | 156 | configtable = {} |
|
157 | 157 | configitem = registrar.configitem(configtable) |
|
158 | 158 | |
|
159 | 159 | configitem( |
|
160 | 160 | b'fsmonitor', |
|
161 | 161 | b'mode', |
|
162 | 162 | default=b'on', |
|
163 | 163 | ) |
|
164 | 164 | configitem( |
|
165 | 165 | b'fsmonitor', |
|
166 | 166 | b'walk_on_invalidate', |
|
167 | 167 | default=False, |
|
168 | 168 | ) |
|
169 | 169 | configitem( |
|
170 | 170 | b'fsmonitor', |
|
171 | 171 | b'timeout', |
|
172 | 172 | default=b'2', |
|
173 | 173 | ) |
|
174 | 174 | configitem( |
|
175 | 175 | b'fsmonitor', |
|
176 | 176 | b'blacklistusers', |
|
177 | 177 | default=list, |
|
178 | 178 | ) |
|
179 | 179 | configitem( |
|
180 | 180 | b'fsmonitor', |
|
181 | 181 | b'watchman_exe', |
|
182 | 182 | default=b'watchman', |
|
183 | 183 | ) |
|
184 | 184 | configitem( |
|
185 | 185 | b'fsmonitor', |
|
186 | 186 | b'verbose', |
|
187 | 187 | default=True, |
|
188 | 188 | experimental=True, |
|
189 | 189 | ) |
|
190 | 190 | configitem( |
|
191 | 191 | b'experimental', |
|
192 | 192 | b'fsmonitor.transaction_notify', |
|
193 | 193 | default=False, |
|
194 | 194 | ) |
|
195 | 195 | |
|
196 | 196 | # This extension is incompatible with the following blacklisted extensions |
|
197 | 197 | # and will disable itself when encountering one of these: |
|
198 | 198 | _blacklist = [b'largefiles', b'eol'] |
|
199 | 199 | |
|
200 | 200 | |
|
201 | 201 | def debuginstall(ui, fm): |
|
202 | 202 | fm.write( |
|
203 | 203 | b"fsmonitor-watchman", |
|
204 | 204 | _(b"fsmonitor checking for watchman binary... (%s)\n"), |
|
205 | 205 | ui.configpath(b"fsmonitor", b"watchman_exe"), |
|
206 | 206 | ) |
|
207 | 207 | root = tempfile.mkdtemp() |
|
208 | 208 | c = watchmanclient.client(ui, root) |
|
209 | 209 | err = None |
|
210 | 210 | try: |
|
211 | 211 | v = c.command(b"version") |
|
212 | 212 | fm.write( |
|
213 | 213 | b"fsmonitor-watchman-version", |
|
214 | 214 | _(b" watchman binary version %s\n"), |
|
215 | 215 | pycompat.bytestr(v["version"]), |
|
216 | 216 | ) |
|
217 | 217 | except watchmanclient.Unavailable as e: |
|
218 | 218 | err = stringutil.forcebytestr(e) |
|
219 | 219 | fm.condwrite( |
|
220 | 220 | err, |
|
221 | 221 | b"fsmonitor-watchman-error", |
|
222 | 222 | _(b" watchman binary missing or broken: %s\n"), |
|
223 | 223 | err, |
|
224 | 224 | ) |
|
225 | 225 | return 1 if err else 0 |
|
226 | 226 | |
|
227 | 227 | |
|
228 | 228 | def _handleunavailable(ui, state, ex): |
|
229 | 229 | """Exception handler for Watchman interaction exceptions""" |
|
230 | 230 | if isinstance(ex, watchmanclient.Unavailable): |
|
231 | 231 | # experimental config: fsmonitor.verbose |
|
232 | 232 | if ex.warn and ui.configbool(b'fsmonitor', b'verbose'): |
|
233 | 233 | if b'illegal_fstypes' not in stringutil.forcebytestr(ex): |
|
234 | 234 | ui.warn(stringutil.forcebytestr(ex) + b'\n') |
|
235 | 235 | if ex.invalidate: |
|
236 | 236 | state.invalidate() |
|
237 | 237 | # experimental config: fsmonitor.verbose |
|
238 | 238 | if ui.configbool(b'fsmonitor', b'verbose'): |
|
239 | 239 | ui.log( |
|
240 | 240 | b'fsmonitor', |
|
241 | 241 | b'Watchman unavailable: %s\n', |
|
242 | 242 | stringutil.forcebytestr(ex.msg), |
|
243 | 243 | ) |
|
244 | 244 | else: |
|
245 | 245 | ui.log( |
|
246 | 246 | b'fsmonitor', |
|
247 | 247 | b'Watchman exception: %s\n', |
|
248 | 248 | stringutil.forcebytestr(ex), |
|
249 | 249 | ) |
|
250 | 250 | |
|
251 | 251 | |
|
252 | 252 | def _hashignore(ignore): |
|
253 | 253 | """Calculate hash for ignore patterns and filenames |
|
254 | 254 | |
|
255 | 255 | If this information changes between Mercurial invocations, we can't |
|
256 | 256 | rely on Watchman information anymore and have to re-scan the working |
|
257 | 257 | copy. |
|
258 | 258 | |
|
259 | 259 | """ |
|
260 | 260 | sha1 = hashutil.sha1() |
|
261 | 261 | sha1.update(pycompat.byterepr(ignore)) |
|
262 | 262 | return pycompat.sysbytes(sha1.hexdigest()) |
|
263 | 263 | |
|
264 | 264 | |
|
265 | 265 | _watchmanencoding = pywatchman.encoding.get_local_encoding() |
|
266 | 266 | _fsencoding = sys.getfilesystemencoding() or sys.getdefaultencoding() |
|
267 | 267 | _fixencoding = codecs.lookup(_watchmanencoding) != codecs.lookup(_fsencoding) |
|
268 | 268 | |
|
269 | 269 | |
|
270 | 270 | def _watchmantofsencoding(path): |
|
271 | 271 | """Fix path to match watchman and local filesystem encoding |
|
272 | 272 | |
|
273 | 273 | watchman's paths encoding can differ from filesystem encoding. For example, |
|
274 | 274 | on Windows, it's always utf-8. |
|
275 | 275 | """ |
|
276 | 276 | try: |
|
277 | 277 | decoded = path.decode(_watchmanencoding) |
|
278 | 278 | except UnicodeDecodeError as e: |
|
279 | 279 | raise error.Abort( |
|
280 | 280 | stringutil.forcebytestr(e), hint=b'watchman encoding error' |
|
281 | 281 | ) |
|
282 | 282 | |
|
283 | 283 | try: |
|
284 | 284 | encoded = decoded.encode(_fsencoding, 'strict') |
|
285 | 285 | except UnicodeEncodeError as e: |
|
286 | 286 | raise error.Abort(stringutil.forcebytestr(e)) |
|
287 | 287 | |
|
288 | 288 | return encoded |
|
289 | 289 | |
|
290 | 290 | |
|
291 | 291 | def overridewalk(orig, self, match, subrepos, unknown, ignored, full=True): |
|
292 | 292 | """Replacement for dirstate.walk, hooking into Watchman. |
|
293 | 293 | |
|
294 | 294 | Whenever full is False, ignored is False, and the Watchman client is |
|
295 | 295 | available, use Watchman combined with saved state to possibly return only a |
|
296 | 296 | subset of files.""" |
|
297 | 297 | |
|
298 | 298 | def bail(reason): |
|
299 | 299 | self._ui.debug(b'fsmonitor: fallback to core status, %s\n' % reason) |
|
300 | 300 | return orig(match, subrepos, unknown, ignored, full=True) |
|
301 | 301 | |
|
302 | 302 | if full: |
|
303 | 303 | return bail(b'full rewalk requested') |
|
304 | 304 | if ignored: |
|
305 | 305 | return bail(b'listing ignored files') |
|
306 | 306 | if not self._watchmanclient.available(): |
|
307 | 307 | return bail(b'client unavailable') |
|
308 | 308 | state = self._fsmonitorstate |
|
309 | 309 | clock, ignorehash, notefiles = state.get() |
|
310 | 310 | if not clock: |
|
311 | 311 | if state.walk_on_invalidate: |
|
312 | 312 | return bail(b'no clock') |
|
313 | 313 | # Initial NULL clock value, see |
|
314 | 314 | # https://facebook.github.io/watchman/docs/clockspec.html |
|
315 | 315 | clock = b'c:0:0' |
|
316 | 316 | notefiles = [] |
|
317 | 317 | |
|
318 | 318 | ignore = self._ignore |
|
319 | 319 | dirignore = self._dirignore |
|
320 | 320 | if unknown: |
|
321 | 321 | if _hashignore(ignore) != ignorehash and clock != b'c:0:0': |
|
322 | 322 | # ignore list changed -- can't rely on Watchman state any more |
|
323 | 323 | if state.walk_on_invalidate: |
|
324 | 324 | return bail(b'ignore rules changed') |
|
325 | 325 | notefiles = [] |
|
326 | 326 | clock = b'c:0:0' |
|
327 | 327 | else: |
|
328 | 328 | # always ignore |
|
329 | 329 | ignore = util.always |
|
330 | 330 | dirignore = util.always |
|
331 | 331 | |
|
332 | 332 | matchfn = match.matchfn |
|
333 | 333 | matchalways = match.always() |
|
334 | 334 | dmap = self._map |
|
335 |
if hasattr(dmap, |
|
|
335 | if hasattr(dmap, '_map'): | |
|
336 | 336 | # for better performance, directly access the inner dirstate map if the |
|
337 | 337 | # standard dirstate implementation is in use. |
|
338 | 338 | dmap = dmap._map |
|
339 | 339 | |
|
340 | 340 | has_mtime = parsers.DIRSTATE_V2_HAS_MTIME |
|
341 | 341 | mtime_is_ambiguous = parsers.DIRSTATE_V2_MTIME_SECOND_AMBIGUOUS |
|
342 | 342 | mask = has_mtime | mtime_is_ambiguous |
|
343 | 343 | |
|
344 | 344 | # All entries that may not be clean |
|
345 | 345 | nonnormalset = { |
|
346 | 346 | f |
|
347 | 347 | for f, e in self._map.items() |
|
348 | 348 | if not e.maybe_clean |
|
349 | 349 | # same as "not has_time or has_ambiguous_time", but factored to only |
|
350 | 350 | # need a single access to flags for performance. |
|
351 | 351 | # `mask` removes all irrelevant bits, then we flip the `mtime` bit so |
|
352 | 352 | # its `true` value is NOT having a mtime, then check if either bit |
|
353 | 353 | # is set. |
|
354 | 354 | or bool((e.v2_data()[0] & mask) ^ has_mtime) |
|
355 | 355 | } |
|
356 | 356 | |
|
357 | 357 | copymap = self._map.copymap |
|
358 | 358 | getkind = stat.S_IFMT |
|
359 | 359 | dirkind = stat.S_IFDIR |
|
360 | 360 | regkind = stat.S_IFREG |
|
361 | 361 | lnkkind = stat.S_IFLNK |
|
362 | 362 | join = self._join |
|
363 | 363 | normcase = util.normcase |
|
364 | 364 | fresh_instance = False |
|
365 | 365 | |
|
366 | 366 | exact = skipstep3 = False |
|
367 | 367 | if match.isexact(): # match.exact |
|
368 | 368 | exact = True |
|
369 | 369 | dirignore = util.always # skip step 2 |
|
370 | 370 | elif match.prefix(): # match.match, no patterns |
|
371 | 371 | skipstep3 = True |
|
372 | 372 | |
|
373 | 373 | if not exact and self._checkcase: |
|
374 | 374 | # note that even though we could receive directory entries, we're only |
|
375 | 375 | # interested in checking if a file with the same name exists. So only |
|
376 | 376 | # normalize files if possible. |
|
377 | 377 | normalize = self._normalizefile |
|
378 | 378 | skipstep3 = False |
|
379 | 379 | else: |
|
380 | 380 | normalize = None |
|
381 | 381 | |
|
382 | 382 | # step 1: find all explicit files |
|
383 | 383 | results, work, dirsnotfound = self._walkexplicit(match, subrepos) |
|
384 | 384 | |
|
385 | 385 | skipstep3 = skipstep3 and not (work or dirsnotfound) |
|
386 | 386 | work = [d for d in work if not dirignore(d[0])] |
|
387 | 387 | |
|
388 | 388 | if not work and (exact or skipstep3): |
|
389 | 389 | for s in subrepos: |
|
390 | 390 | del results[s] |
|
391 | 391 | del results[b'.hg'] |
|
392 | 392 | return results |
|
393 | 393 | |
|
394 | 394 | # step 2: query Watchman |
|
395 | 395 | try: |
|
396 | 396 | # Use the user-configured timeout for the query. |
|
397 | 397 | # Add a little slack over the top of the user query to allow for |
|
398 | 398 | # overheads while transferring the data |
|
399 | 399 | self._watchmanclient.settimeout(state.timeout + 0.1) |
|
400 | 400 | result = self._watchmanclient.command( |
|
401 | 401 | b'query', |
|
402 | 402 | { |
|
403 | 403 | b'fields': [b'mode', b'mtime', b'size', b'exists', b'name'], |
|
404 | 404 | b'since': clock, |
|
405 | 405 | b'expression': [ |
|
406 | 406 | b'not', |
|
407 | 407 | [ |
|
408 | 408 | b'anyof', |
|
409 | 409 | [b'dirname', b'.hg'], |
|
410 | 410 | [b'name', b'.hg', b'wholename'], |
|
411 | 411 | ], |
|
412 | 412 | ], |
|
413 | 413 | b'sync_timeout': int(state.timeout * 1000), |
|
414 | 414 | b'empty_on_fresh_instance': state.walk_on_invalidate, |
|
415 | 415 | }, |
|
416 | 416 | ) |
|
417 | 417 | except Exception as ex: |
|
418 | 418 | _handleunavailable(self._ui, state, ex) |
|
419 | 419 | self._watchmanclient.clearconnection() |
|
420 | 420 | return bail(b'exception during run') |
|
421 | 421 | else: |
|
422 | 422 | # We need to propagate the last observed clock up so that we |
|
423 | 423 | # can use it for our next query |
|
424 | 424 | state.setlastclock(pycompat.sysbytes(result[b'clock'])) |
|
425 | 425 | if result[b'is_fresh_instance']: |
|
426 | 426 | if state.walk_on_invalidate: |
|
427 | 427 | state.invalidate() |
|
428 | 428 | return bail(b'fresh instance') |
|
429 | 429 | fresh_instance = True |
|
430 | 430 | # Ignore any prior noteable files from the state info |
|
431 | 431 | notefiles = [] |
|
432 | 432 | |
|
433 | 433 | # for file paths which require normalization and we encounter a case |
|
434 | 434 | # collision, we store our own foldmap |
|
435 | 435 | if normalize: |
|
436 | 436 | foldmap = {normcase(k): k for k in results} |
|
437 | 437 | |
|
438 | 438 | switch_slashes = pycompat.ossep == b'\\' |
|
439 | 439 | # The order of the results is, strictly speaking, undefined. |
|
440 | 440 | # For case changes on a case insensitive filesystem we may receive |
|
441 | 441 | # two entries, one with exists=True and another with exists=False. |
|
442 | 442 | # The exists=True entries in the same response should be interpreted |
|
443 | 443 | # as being happens-after the exists=False entries due to the way that |
|
444 | 444 | # Watchman tracks files. We use this property to reconcile deletes |
|
445 | 445 | # for name case changes. |
|
446 | 446 | for entry in result[b'files']: |
|
447 | 447 | fname = entry[b'name'] |
|
448 | 448 | |
|
449 | 449 | # Watchman always give us a str. Normalize to bytes on Python 3 |
|
450 | 450 | # using Watchman's encoding, if needed. |
|
451 | 451 | if not isinstance(fname, bytes): |
|
452 | 452 | fname = fname.encode(_watchmanencoding) |
|
453 | 453 | |
|
454 | 454 | if _fixencoding: |
|
455 | 455 | fname = _watchmantofsencoding(fname) |
|
456 | 456 | |
|
457 | 457 | if switch_slashes: |
|
458 | 458 | fname = fname.replace(b'\\', b'/') |
|
459 | 459 | if normalize: |
|
460 | 460 | normed = normcase(fname) |
|
461 | 461 | fname = normalize(fname, True, True) |
|
462 | 462 | foldmap[normed] = fname |
|
463 | 463 | fmode = entry[b'mode'] |
|
464 | 464 | fexists = entry[b'exists'] |
|
465 | 465 | kind = getkind(fmode) |
|
466 | 466 | |
|
467 | 467 | if b'/.hg/' in fname or fname.endswith(b'/.hg'): |
|
468 | 468 | return bail(b'nested-repo-detected') |
|
469 | 469 | |
|
470 | 470 | if not fexists: |
|
471 | 471 | # if marked as deleted and we don't already have a change |
|
472 | 472 | # record, mark it as deleted. If we already have an entry |
|
473 | 473 | # for fname then it was either part of walkexplicit or was |
|
474 | 474 | # an earlier result that was a case change |
|
475 | 475 | if ( |
|
476 | 476 | fname not in results |
|
477 | 477 | and fname in dmap |
|
478 | 478 | and (matchalways or matchfn(fname)) |
|
479 | 479 | ): |
|
480 | 480 | results[fname] = None |
|
481 | 481 | elif kind == dirkind: |
|
482 | 482 | if fname in dmap and (matchalways or matchfn(fname)): |
|
483 | 483 | results[fname] = None |
|
484 | 484 | elif kind == regkind or kind == lnkkind: |
|
485 | 485 | if fname in dmap: |
|
486 | 486 | if matchalways or matchfn(fname): |
|
487 | 487 | results[fname] = entry |
|
488 | 488 | elif (matchalways or matchfn(fname)) and not ignore(fname): |
|
489 | 489 | results[fname] = entry |
|
490 | 490 | elif fname in dmap and (matchalways or matchfn(fname)): |
|
491 | 491 | results[fname] = None |
|
492 | 492 | |
|
493 | 493 | # step 3: query notable files we don't already know about |
|
494 | 494 | # XXX try not to iterate over the entire dmap |
|
495 | 495 | if normalize: |
|
496 | 496 | # any notable files that have changed case will already be handled |
|
497 | 497 | # above, so just check membership in the foldmap |
|
498 | 498 | notefiles = { |
|
499 | 499 | normalize(f, True, True) |
|
500 | 500 | for f in notefiles |
|
501 | 501 | if normcase(f) not in foldmap |
|
502 | 502 | } |
|
503 | 503 | visit = { |
|
504 | 504 | f |
|
505 | 505 | for f in notefiles |
|
506 | 506 | if (f not in results and matchfn(f) and (f in dmap or not ignore(f))) |
|
507 | 507 | } |
|
508 | 508 | |
|
509 | 509 | if not fresh_instance: |
|
510 | 510 | if matchalways: |
|
511 | 511 | visit.update(f for f in nonnormalset if f not in results) |
|
512 | 512 | visit.update(f for f in copymap if f not in results) |
|
513 | 513 | else: |
|
514 | 514 | visit.update( |
|
515 | 515 | f for f in nonnormalset if f not in results and matchfn(f) |
|
516 | 516 | ) |
|
517 | 517 | visit.update(f for f in copymap if f not in results and matchfn(f)) |
|
518 | 518 | else: |
|
519 | 519 | if matchalways: |
|
520 | 520 | visit.update(f for f, st in dmap.items() if f not in results) |
|
521 | 521 | visit.update(f for f in copymap if f not in results) |
|
522 | 522 | else: |
|
523 | 523 | visit.update( |
|
524 | 524 | f for f, st in dmap.items() if f not in results and matchfn(f) |
|
525 | 525 | ) |
|
526 | 526 | visit.update(f for f in copymap if f not in results and matchfn(f)) |
|
527 | 527 | |
|
528 | 528 | audit = pathutil.pathauditor(self._root, cached=True).check |
|
529 | 529 | auditpass = [f for f in visit if audit(f)] |
|
530 | 530 | auditpass.sort() |
|
531 | 531 | auditfail = visit.difference(auditpass) |
|
532 | 532 | for f in auditfail: |
|
533 | 533 | results[f] = None |
|
534 | 534 | |
|
535 | 535 | nf = iter(auditpass) |
|
536 | 536 | for st in util.statfiles([join(f) for f in auditpass]): |
|
537 | 537 | f = next(nf) |
|
538 | 538 | if st or f in dmap: |
|
539 | 539 | results[f] = st |
|
540 | 540 | |
|
541 | 541 | for s in subrepos: |
|
542 | 542 | del results[s] |
|
543 | 543 | del results[b'.hg'] |
|
544 | 544 | return results |
|
545 | 545 | |
|
546 | 546 | |
|
547 | 547 | def overridestatus( |
|
548 | 548 | orig, |
|
549 | 549 | self, |
|
550 | 550 | node1=b'.', |
|
551 | 551 | node2=None, |
|
552 | 552 | match=None, |
|
553 | 553 | ignored=False, |
|
554 | 554 | clean=False, |
|
555 | 555 | unknown=False, |
|
556 | 556 | listsubrepos=False, |
|
557 | 557 | ): |
|
558 | 558 | listignored = ignored |
|
559 | 559 | listclean = clean |
|
560 | 560 | listunknown = unknown |
|
561 | 561 | |
|
562 | 562 | def _cmpsets(l1, l2): |
|
563 | 563 | try: |
|
564 | 564 | if b'FSMONITOR_LOG_FILE' in encoding.environ: |
|
565 | 565 | fn = encoding.environ[b'FSMONITOR_LOG_FILE'] |
|
566 | 566 | f = open(fn, b'wb') |
|
567 | 567 | else: |
|
568 | 568 | fn = b'fsmonitorfail.log' |
|
569 | 569 | f = self.vfs.open(fn, b'wb') |
|
570 | 570 | except (IOError, OSError): |
|
571 | 571 | self.ui.warn(_(b'warning: unable to write to %s\n') % fn) |
|
572 | 572 | return |
|
573 | 573 | |
|
574 | 574 | try: |
|
575 | 575 | for i, (s1, s2) in enumerate(zip(l1, l2)): |
|
576 | 576 | if set(s1) != set(s2): |
|
577 | 577 | f.write(b'sets at position %d are unequal\n' % i) |
|
578 | 578 | f.write(b'watchman returned: %r\n' % s1) |
|
579 | 579 | f.write(b'stat returned: %r\n' % s2) |
|
580 | 580 | finally: |
|
581 | 581 | f.close() |
|
582 | 582 | |
|
583 | 583 | if isinstance(node1, context.changectx): |
|
584 | 584 | ctx1 = node1 |
|
585 | 585 | else: |
|
586 | 586 | ctx1 = self[node1] |
|
587 | 587 | if isinstance(node2, context.changectx): |
|
588 | 588 | ctx2 = node2 |
|
589 | 589 | else: |
|
590 | 590 | ctx2 = self[node2] |
|
591 | 591 | |
|
592 | 592 | working = ctx2.rev() is None |
|
593 | 593 | parentworking = working and ctx1 == self[b'.'] |
|
594 | 594 | match = match or matchmod.always() |
|
595 | 595 | |
|
596 | 596 | # Maybe we can use this opportunity to update Watchman's state. |
|
597 | 597 | # Mercurial uses workingcommitctx and/or memctx to represent the part of |
|
598 | 598 | # the workingctx that is to be committed. So don't update the state in |
|
599 | 599 | # that case. |
|
600 | 600 | # HG_PENDING is set in the environment when the dirstate is being updated |
|
601 | 601 | # in the middle of a transaction; we must not update our state in that |
|
602 | 602 | # case, or we risk forgetting about changes in the working copy. |
|
603 | 603 | updatestate = ( |
|
604 | 604 | parentworking |
|
605 | 605 | and match.always() |
|
606 | 606 | and not isinstance(ctx2, (context.workingcommitctx, context.memctx)) |
|
607 | 607 | and b'HG_PENDING' not in encoding.environ |
|
608 | 608 | ) |
|
609 | 609 | |
|
610 | 610 | try: |
|
611 | 611 | if self._fsmonitorstate.walk_on_invalidate: |
|
612 | 612 | # Use a short timeout to query the current clock. If that |
|
613 | 613 | # takes too long then we assume that the service will be slow |
|
614 | 614 | # to answer our query. |
|
615 | 615 | # walk_on_invalidate indicates that we prefer to walk the |
|
616 | 616 | # tree ourselves because we can ignore portions that Watchman |
|
617 | 617 | # cannot and we tend to be faster in the warmer buffer cache |
|
618 | 618 | # cases. |
|
619 | 619 | self._watchmanclient.settimeout(0.1) |
|
620 | 620 | else: |
|
621 | 621 | # Give Watchman more time to potentially complete its walk |
|
622 | 622 | # and return the initial clock. In this mode we assume that |
|
623 | 623 | # the filesystem will be slower than parsing a potentially |
|
624 | 624 | # very large Watchman result set. |
|
625 | 625 | self._watchmanclient.settimeout(self._fsmonitorstate.timeout + 0.1) |
|
626 | 626 | startclock = self._watchmanclient.getcurrentclock() |
|
627 | 627 | except Exception as ex: |
|
628 | 628 | self._watchmanclient.clearconnection() |
|
629 | 629 | _handleunavailable(self.ui, self._fsmonitorstate, ex) |
|
630 | 630 | # boo, Watchman failed. bail |
|
631 | 631 | return orig( |
|
632 | 632 | node1, |
|
633 | 633 | node2, |
|
634 | 634 | match, |
|
635 | 635 | listignored, |
|
636 | 636 | listclean, |
|
637 | 637 | listunknown, |
|
638 | 638 | listsubrepos, |
|
639 | 639 | ) |
|
640 | 640 | |
|
641 | 641 | if updatestate: |
|
642 | 642 | # We need info about unknown files. This may make things slower the |
|
643 | 643 | # first time, but whatever. |
|
644 | 644 | stateunknown = True |
|
645 | 645 | else: |
|
646 | 646 | stateunknown = listunknown |
|
647 | 647 | |
|
648 | 648 | if updatestate: |
|
649 | 649 | ps = poststatus(startclock) |
|
650 | 650 | self.addpostdsstatus(ps) |
|
651 | 651 | |
|
652 | 652 | r = orig( |
|
653 | 653 | node1, node2, match, listignored, listclean, stateunknown, listsubrepos |
|
654 | 654 | ) |
|
655 | 655 | modified, added, removed, deleted, unknown, ignored, clean = r |
|
656 | 656 | |
|
657 | 657 | if not listunknown: |
|
658 | 658 | unknown = [] |
|
659 | 659 | |
|
660 | 660 | # don't do paranoid checks if we're not going to query Watchman anyway |
|
661 | 661 | full = listclean or match.traversedir is not None |
|
662 | 662 | if self._fsmonitorstate.mode == b'paranoid' and not full: |
|
663 | 663 | # run status again and fall back to the old walk this time |
|
664 | 664 | self.dirstate._fsmonitordisable = True |
|
665 | 665 | |
|
666 | 666 | # shut the UI up |
|
667 | 667 | quiet = self.ui.quiet |
|
668 | 668 | self.ui.quiet = True |
|
669 | 669 | fout, ferr = self.ui.fout, self.ui.ferr |
|
670 | 670 | self.ui.fout = self.ui.ferr = open(os.devnull, b'wb') |
|
671 | 671 | |
|
672 | 672 | try: |
|
673 | 673 | rv2 = orig( |
|
674 | 674 | node1, |
|
675 | 675 | node2, |
|
676 | 676 | match, |
|
677 | 677 | listignored, |
|
678 | 678 | listclean, |
|
679 | 679 | listunknown, |
|
680 | 680 | listsubrepos, |
|
681 | 681 | ) |
|
682 | 682 | finally: |
|
683 | 683 | self.dirstate._fsmonitordisable = False |
|
684 | 684 | self.ui.quiet = quiet |
|
685 | 685 | self.ui.fout, self.ui.ferr = fout, ferr |
|
686 | 686 | |
|
687 | 687 | # clean isn't tested since it's set to True above |
|
688 | 688 | with self.wlock(): |
|
689 | 689 | _cmpsets( |
|
690 | 690 | [modified, added, removed, deleted, unknown, ignored, clean], |
|
691 | 691 | rv2, |
|
692 | 692 | ) |
|
693 | 693 | modified, added, removed, deleted, unknown, ignored, clean = rv2 |
|
694 | 694 | |
|
695 | 695 | return scmutil.status( |
|
696 | 696 | modified, added, removed, deleted, unknown, ignored, clean |
|
697 | 697 | ) |
|
698 | 698 | |
|
699 | 699 | |
|
700 | 700 | class poststatus: |
|
701 | 701 | def __init__(self, startclock): |
|
702 | 702 | self._startclock = pycompat.sysbytes(startclock) |
|
703 | 703 | |
|
704 | 704 | def __call__(self, wctx, status): |
|
705 | 705 | clock = wctx.repo()._fsmonitorstate.getlastclock() or self._startclock |
|
706 | 706 | hashignore = _hashignore(wctx.repo().dirstate._ignore) |
|
707 | 707 | notefiles = ( |
|
708 | 708 | status.modified |
|
709 | 709 | + status.added |
|
710 | 710 | + status.removed |
|
711 | 711 | + status.deleted |
|
712 | 712 | + status.unknown |
|
713 | 713 | ) |
|
714 | 714 | wctx.repo()._fsmonitorstate.set(clock, hashignore, notefiles) |
|
715 | 715 | |
|
716 | 716 | |
|
717 | 717 | def makedirstate(repo, dirstate): |
|
718 | 718 | class fsmonitordirstate(dirstate.__class__): |
|
719 | 719 | def _fsmonitorinit(self, repo): |
|
720 | 720 | # _fsmonitordisable is used in paranoid mode |
|
721 | 721 | self._fsmonitordisable = False |
|
722 | 722 | self._fsmonitorstate = repo._fsmonitorstate |
|
723 | 723 | self._watchmanclient = repo._watchmanclient |
|
724 | 724 | self._repo = weakref.proxy(repo) |
|
725 | 725 | |
|
726 | 726 | def walk(self, *args, **kwargs): |
|
727 | 727 | orig = super(fsmonitordirstate, self).walk |
|
728 | 728 | if self._fsmonitordisable: |
|
729 | 729 | return orig(*args, **kwargs) |
|
730 | 730 | return overridewalk(orig, self, *args, **kwargs) |
|
731 | 731 | |
|
732 | 732 | def rebuild(self, *args, **kwargs): |
|
733 | 733 | self._fsmonitorstate.invalidate() |
|
734 | 734 | return super(fsmonitordirstate, self).rebuild(*args, **kwargs) |
|
735 | 735 | |
|
736 | 736 | def invalidate(self, *args, **kwargs): |
|
737 | 737 | self._fsmonitorstate.invalidate() |
|
738 | 738 | return super(fsmonitordirstate, self).invalidate(*args, **kwargs) |
|
739 | 739 | |
|
740 | 740 | dirstate.__class__ = fsmonitordirstate |
|
741 | 741 | dirstate._fsmonitorinit(repo) |
|
742 | 742 | |
|
743 | 743 | |
|
744 | 744 | def wrapdirstate(orig, self): |
|
745 | 745 | ds = orig(self) |
|
746 | 746 | # only override the dirstate when Watchman is available for the repo |
|
747 |
if hasattr(self, |
|
|
747 | if hasattr(self, '_fsmonitorstate'): | |
|
748 | 748 | makedirstate(self, ds) |
|
749 | 749 | return ds |
|
750 | 750 | |
|
751 | 751 | |
|
752 | 752 | def extsetup(ui): |
|
753 | 753 | extensions.wrapfilecache( |
|
754 | 754 | localrepo.localrepository, b'dirstate', wrapdirstate |
|
755 | 755 | ) |
|
756 | 756 | if pycompat.isdarwin: |
|
757 | 757 | # An assist for avoiding the dangling-symlink fsevents bug |
|
758 | 758 | extensions.wrapfunction(os, 'symlink', wrapsymlink) |
|
759 | 759 | |
|
760 | 760 | extensions.wrapfunction(merge, '_update', wrapupdate) |
|
761 | 761 | |
|
762 | 762 | |
|
763 | 763 | def wrapsymlink(orig, source, link_name): |
|
764 | 764 | """if we create a dangling symlink, also touch the parent dir |
|
765 | 765 | to encourage fsevents notifications to work more correctly""" |
|
766 | 766 | try: |
|
767 | 767 | return orig(source, link_name) |
|
768 | 768 | finally: |
|
769 | 769 | try: |
|
770 | 770 | os.utime(os.path.dirname(link_name), None) |
|
771 | 771 | except OSError: |
|
772 | 772 | pass |
|
773 | 773 | |
|
774 | 774 | |
|
775 | 775 | class state_update: |
|
776 | 776 | """This context manager is responsible for dispatching the state-enter |
|
777 | 777 | and state-leave signals to the watchman service. The enter and leave |
|
778 | 778 | methods can be invoked manually (for scenarios where context manager |
|
779 | 779 | semantics are not possible). If parameters oldnode and newnode are None, |
|
780 | 780 | they will be populated based on current working copy in enter and |
|
781 | 781 | leave, respectively. Similarly, if the distance is none, it will be |
|
782 | 782 | calculated based on the oldnode and newnode in the leave method.""" |
|
783 | 783 | |
|
784 | 784 | def __init__( |
|
785 | 785 | self, |
|
786 | 786 | repo, |
|
787 | 787 | name, |
|
788 | 788 | oldnode=None, |
|
789 | 789 | newnode=None, |
|
790 | 790 | distance=None, |
|
791 | 791 | partial=False, |
|
792 | 792 | ): |
|
793 | 793 | self.repo = repo.unfiltered() |
|
794 | 794 | self.name = name |
|
795 | 795 | self.oldnode = oldnode |
|
796 | 796 | self.newnode = newnode |
|
797 | 797 | self.distance = distance |
|
798 | 798 | self.partial = partial |
|
799 | 799 | self._lock = None |
|
800 | 800 | self.need_leave = False |
|
801 | 801 | |
|
802 | 802 | def __enter__(self): |
|
803 | 803 | self.enter() |
|
804 | 804 | |
|
805 | 805 | def enter(self): |
|
806 | 806 | # Make sure we have a wlock prior to sending notifications to watchman. |
|
807 | 807 | # We don't want to race with other actors. In the update case, |
|
808 | 808 | # merge.update is going to take the wlock almost immediately. We are |
|
809 | 809 | # effectively extending the lock around several short sanity checks. |
|
810 | 810 | if self.oldnode is None: |
|
811 | 811 | self.oldnode = self.repo[b'.'].node() |
|
812 | 812 | |
|
813 | 813 | if self.repo.currentwlock() is None: |
|
814 |
if hasattr(self.repo, |
|
|
814 | if hasattr(self.repo, 'wlocknostateupdate'): | |
|
815 | 815 | self._lock = self.repo.wlocknostateupdate() |
|
816 | 816 | else: |
|
817 | 817 | self._lock = self.repo.wlock() |
|
818 | 818 | self.need_leave = self._state(b'state-enter', hex(self.oldnode)) |
|
819 | 819 | return self |
|
820 | 820 | |
|
821 | 821 | def __exit__(self, type_, value, tb): |
|
822 | 822 | abort = True if type_ else False |
|
823 | 823 | self.exit(abort=abort) |
|
824 | 824 | |
|
825 | 825 | def exit(self, abort=False): |
|
826 | 826 | try: |
|
827 | 827 | if self.need_leave: |
|
828 | 828 | status = b'failed' if abort else b'ok' |
|
829 | 829 | if self.newnode is None: |
|
830 | 830 | self.newnode = self.repo[b'.'].node() |
|
831 | 831 | if self.distance is None: |
|
832 | 832 | self.distance = calcdistance( |
|
833 | 833 | self.repo, self.oldnode, self.newnode |
|
834 | 834 | ) |
|
835 | 835 | self._state(b'state-leave', hex(self.newnode), status=status) |
|
836 | 836 | finally: |
|
837 | 837 | self.need_leave = False |
|
838 | 838 | if self._lock: |
|
839 | 839 | self._lock.release() |
|
840 | 840 | |
|
841 | 841 | def _state(self, cmd, commithash, status=b'ok'): |
|
842 |
if not hasattr(self.repo, |
|
|
842 | if not hasattr(self.repo, '_watchmanclient'): | |
|
843 | 843 | return False |
|
844 | 844 | try: |
|
845 | 845 | self.repo._watchmanclient.command( |
|
846 | 846 | cmd, |
|
847 | 847 | { |
|
848 | 848 | b'name': self.name, |
|
849 | 849 | b'metadata': { |
|
850 | 850 | # the target revision |
|
851 | 851 | b'rev': commithash, |
|
852 | 852 | # approximate number of commits between current and target |
|
853 | 853 | b'distance': self.distance if self.distance else 0, |
|
854 | 854 | # success/failure (only really meaningful for state-leave) |
|
855 | 855 | b'status': status, |
|
856 | 856 | # whether the working copy parent is changing |
|
857 | 857 | b'partial': self.partial, |
|
858 | 858 | }, |
|
859 | 859 | }, |
|
860 | 860 | ) |
|
861 | 861 | return True |
|
862 | 862 | except Exception as e: |
|
863 | 863 | # Swallow any errors; fire and forget |
|
864 | 864 | self.repo.ui.log( |
|
865 | 865 | b'watchman', b'Exception %s while running %s\n', e, cmd |
|
866 | 866 | ) |
|
867 | 867 | return False |
|
868 | 868 | |
|
869 | 869 | |
|
870 | 870 | # Estimate the distance between two nodes |
|
871 | 871 | def calcdistance(repo, oldnode, newnode): |
|
872 | 872 | anc = repo.changelog.ancestor(oldnode, newnode) |
|
873 | 873 | ancrev = repo[anc].rev() |
|
874 | 874 | distance = abs(repo[oldnode].rev() - ancrev) + abs( |
|
875 | 875 | repo[newnode].rev() - ancrev |
|
876 | 876 | ) |
|
877 | 877 | return distance |
|
878 | 878 | |
|
879 | 879 | |
|
880 | 880 | # Bracket working copy updates with calls to the watchman state-enter |
|
881 | 881 | # and state-leave commands. This allows clients to perform more intelligent |
|
882 | 882 | # settling during bulk file change scenarios |
|
883 | 883 | # https://facebook.github.io/watchman/docs/cmd/subscribe.html#advanced-settling |
|
884 | 884 | def wrapupdate( |
|
885 | 885 | orig, |
|
886 | 886 | repo, |
|
887 | 887 | node, |
|
888 | 888 | branchmerge, |
|
889 | 889 | force, |
|
890 | 890 | ancestor=None, |
|
891 | 891 | mergeancestor=False, |
|
892 | 892 | labels=None, |
|
893 | 893 | matcher=None, |
|
894 | 894 | **kwargs |
|
895 | 895 | ): |
|
896 | 896 | |
|
897 | 897 | distance = 0 |
|
898 | 898 | partial = True |
|
899 | 899 | oldnode = repo[b'.'].node() |
|
900 | 900 | newnode = repo[node].node() |
|
901 | 901 | if matcher is None or matcher.always(): |
|
902 | 902 | partial = False |
|
903 | 903 | distance = calcdistance(repo.unfiltered(), oldnode, newnode) |
|
904 | 904 | |
|
905 | 905 | with state_update( |
|
906 | 906 | repo, |
|
907 | 907 | name=b"hg.update", |
|
908 | 908 | oldnode=oldnode, |
|
909 | 909 | newnode=newnode, |
|
910 | 910 | distance=distance, |
|
911 | 911 | partial=partial, |
|
912 | 912 | ): |
|
913 | 913 | return orig( |
|
914 | 914 | repo, |
|
915 | 915 | node, |
|
916 | 916 | branchmerge, |
|
917 | 917 | force, |
|
918 | 918 | ancestor, |
|
919 | 919 | mergeancestor, |
|
920 | 920 | labels, |
|
921 | 921 | matcher, |
|
922 | 922 | **kwargs |
|
923 | 923 | ) |
|
924 | 924 | |
|
925 | 925 | |
|
926 | 926 | def repo_has_depth_one_nested_repo(repo): |
|
927 | 927 | for f in repo.wvfs.listdir(): |
|
928 | 928 | if os.path.isdir(os.path.join(repo.root, f, b'.hg')): |
|
929 | 929 | msg = b'fsmonitor: sub-repository %r detected, fsmonitor disabled\n' |
|
930 | 930 | repo.ui.debug(msg % f) |
|
931 | 931 | return True |
|
932 | 932 | return False |
|
933 | 933 | |
|
934 | 934 | |
|
935 | 935 | def reposetup(ui, repo): |
|
936 | 936 | # We don't work with largefiles or inotify |
|
937 | 937 | exts = extensions.enabled() |
|
938 | 938 | for ext in _blacklist: |
|
939 | 939 | if ext in exts: |
|
940 | 940 | ui.warn( |
|
941 | 941 | _( |
|
942 | 942 | b'The fsmonitor extension is incompatible with the %s ' |
|
943 | 943 | b'extension and has been disabled.\n' |
|
944 | 944 | ) |
|
945 | 945 | % ext |
|
946 | 946 | ) |
|
947 | 947 | return |
|
948 | 948 | |
|
949 | 949 | if repo.local(): |
|
950 | 950 | # We don't work with subrepos either. |
|
951 | 951 | # |
|
952 | 952 | # if repo[None].substate can cause a dirstate parse, which is too |
|
953 | 953 | # slow. Instead, look for a file called hgsubstate, |
|
954 | 954 | if repo.wvfs.exists(b'.hgsubstate') or repo.wvfs.exists(b'.hgsub'): |
|
955 | 955 | return |
|
956 | 956 | |
|
957 | 957 | if repo_has_depth_one_nested_repo(repo): |
|
958 | 958 | return |
|
959 | 959 | |
|
960 | 960 | fsmonitorstate = state.state(repo) |
|
961 | 961 | if fsmonitorstate.mode == b'off': |
|
962 | 962 | return |
|
963 | 963 | |
|
964 | 964 | try: |
|
965 | 965 | client = watchmanclient.client(repo.ui, repo.root) |
|
966 | 966 | except Exception as ex: |
|
967 | 967 | _handleunavailable(ui, fsmonitorstate, ex) |
|
968 | 968 | return |
|
969 | 969 | |
|
970 | 970 | repo._fsmonitorstate = fsmonitorstate |
|
971 | 971 | repo._watchmanclient = client |
|
972 | 972 | |
|
973 | 973 | dirstate, cached = localrepo.isfilecached(repo, b'dirstate') |
|
974 | 974 | if cached: |
|
975 | 975 | # at this point since fsmonitorstate wasn't present, |
|
976 | 976 | # repo.dirstate is not a fsmonitordirstate |
|
977 | 977 | makedirstate(repo, dirstate) |
|
978 | 978 | |
|
979 | 979 | class fsmonitorrepo(repo.__class__): |
|
980 | 980 | def status(self, *args, **kwargs): |
|
981 | 981 | orig = super(fsmonitorrepo, self).status |
|
982 | 982 | return overridestatus(orig, self, *args, **kwargs) |
|
983 | 983 | |
|
984 | 984 | def wlocknostateupdate(self, *args, **kwargs): |
|
985 | 985 | return super(fsmonitorrepo, self).wlock(*args, **kwargs) |
|
986 | 986 | |
|
987 | 987 | def wlock(self, *args, **kwargs): |
|
988 | 988 | l = super(fsmonitorrepo, self).wlock(*args, **kwargs) |
|
989 | 989 | if not ui.configbool( |
|
990 | 990 | b"experimental", b"fsmonitor.transaction_notify" |
|
991 | 991 | ): |
|
992 | 992 | return l |
|
993 | 993 | if l.held != 1: |
|
994 | 994 | return l |
|
995 | 995 | origrelease = l.releasefn |
|
996 | 996 | |
|
997 | 997 | def staterelease(): |
|
998 | 998 | if origrelease: |
|
999 | 999 | origrelease() |
|
1000 | 1000 | if l.stateupdate: |
|
1001 | 1001 | l.stateupdate.exit() |
|
1002 | 1002 | l.stateupdate = None |
|
1003 | 1003 | |
|
1004 | 1004 | try: |
|
1005 | 1005 | l.stateupdate = None |
|
1006 | 1006 | l.stateupdate = state_update(self, name=b"hg.transaction") |
|
1007 | 1007 | l.stateupdate.enter() |
|
1008 | 1008 | l.releasefn = staterelease |
|
1009 | 1009 | except Exception as e: |
|
1010 | 1010 | # Swallow any errors; fire and forget |
|
1011 | 1011 | self.ui.log( |
|
1012 | 1012 | b'watchman', b'Exception in state update %s\n', e |
|
1013 | 1013 | ) |
|
1014 | 1014 | return l |
|
1015 | 1015 | |
|
1016 | 1016 | repo.__class__ = fsmonitorrepo |
@@ -1,449 +1,449 b'' | |||
|
1 | 1 | import os |
|
2 | 2 | import shutil |
|
3 | 3 | import stat |
|
4 | 4 | import time |
|
5 | 5 | |
|
6 | 6 | from mercurial.i18n import _ |
|
7 | 7 | from mercurial.node import bin, hex |
|
8 | 8 | from mercurial.pycompat import open |
|
9 | 9 | from mercurial import ( |
|
10 | 10 | error, |
|
11 | 11 | pycompat, |
|
12 | 12 | util, |
|
13 | 13 | ) |
|
14 | 14 | from mercurial.utils import hashutil |
|
15 | 15 | from . import ( |
|
16 | 16 | constants, |
|
17 | 17 | shallowutil, |
|
18 | 18 | ) |
|
19 | 19 | |
|
20 | 20 | |
|
21 | 21 | class basestore: |
|
22 | 22 | def __init__(self, repo, path, reponame, shared=False): |
|
23 | 23 | """Creates a remotefilelog store object for the given repo name. |
|
24 | 24 | |
|
25 | 25 | `path` - The file path where this store keeps its data |
|
26 | 26 | `reponame` - The name of the repo. This is used to partition data from |
|
27 | 27 | many repos. |
|
28 | 28 | `shared` - True if this store is a shared cache of data from the central |
|
29 | 29 | server, for many repos on this machine. False means this store is for |
|
30 | 30 | the local data for one repo. |
|
31 | 31 | """ |
|
32 | 32 | self.repo = repo |
|
33 | 33 | self.ui = repo.ui |
|
34 | 34 | self._path = path |
|
35 | 35 | self._reponame = reponame |
|
36 | 36 | self._shared = shared |
|
37 | 37 | self._uid = os.getuid() if not pycompat.iswindows else None |
|
38 | 38 | |
|
39 | 39 | self._validatecachelog = self.ui.config( |
|
40 | 40 | b"remotefilelog", b"validatecachelog" |
|
41 | 41 | ) |
|
42 | 42 | self._validatecache = self.ui.config( |
|
43 | 43 | b"remotefilelog", b"validatecache", b'on' |
|
44 | 44 | ) |
|
45 | 45 | if self._validatecache not in (b'on', b'strict', b'off'): |
|
46 | 46 | self._validatecache = b'on' |
|
47 | 47 | if self._validatecache == b'off': |
|
48 | 48 | self._validatecache = False |
|
49 | 49 | |
|
50 | 50 | if shared: |
|
51 | 51 | shallowutil.mkstickygroupdir(self.ui, path) |
|
52 | 52 | |
|
53 | 53 | def getmissing(self, keys): |
|
54 | 54 | missing = [] |
|
55 | 55 | for name, node in keys: |
|
56 | 56 | filepath = self._getfilepath(name, node) |
|
57 | 57 | exists = os.path.exists(filepath) |
|
58 | 58 | if ( |
|
59 | 59 | exists |
|
60 | 60 | and self._validatecache == b'strict' |
|
61 | 61 | and not self._validatekey(filepath, b'contains') |
|
62 | 62 | ): |
|
63 | 63 | exists = False |
|
64 | 64 | if not exists: |
|
65 | 65 | missing.append((name, node)) |
|
66 | 66 | |
|
67 | 67 | return missing |
|
68 | 68 | |
|
69 | 69 | # BELOW THIS ARE IMPLEMENTATIONS OF REPACK SOURCE |
|
70 | 70 | |
|
71 | 71 | def markledger(self, ledger, options=None): |
|
72 | 72 | if options and options.get(constants.OPTION_PACKSONLY): |
|
73 | 73 | return |
|
74 | 74 | if self._shared: |
|
75 | 75 | for filename, nodes in self._getfiles(): |
|
76 | 76 | for node in nodes: |
|
77 | 77 | ledger.markdataentry(self, filename, node) |
|
78 | 78 | ledger.markhistoryentry(self, filename, node) |
|
79 | 79 | |
|
80 | 80 | def cleanup(self, ledger): |
|
81 | 81 | ui = self.ui |
|
82 | 82 | entries = ledger.sources.get(self, []) |
|
83 | 83 | count = 0 |
|
84 | 84 | progress = ui.makeprogress( |
|
85 | 85 | _(b"cleaning up"), unit=b"files", total=len(entries) |
|
86 | 86 | ) |
|
87 | 87 | for entry in entries: |
|
88 | 88 | if entry.gced or (entry.datarepacked and entry.historyrepacked): |
|
89 | 89 | progress.update(count) |
|
90 | 90 | path = self._getfilepath(entry.filename, entry.node) |
|
91 | 91 | util.tryunlink(path) |
|
92 | 92 | count += 1 |
|
93 | 93 | progress.complete() |
|
94 | 94 | |
|
95 | 95 | # Clean up the repo cache directory. |
|
96 | 96 | self._cleanupdirectory(self._getrepocachepath()) |
|
97 | 97 | |
|
98 | 98 | # BELOW THIS ARE NON-STANDARD APIS |
|
99 | 99 | |
|
100 | 100 | def _cleanupdirectory(self, rootdir): |
|
101 | 101 | """Removes the empty directories and unnecessary files within the root |
|
102 | 102 | directory recursively. Note that this method does not remove the root |
|
103 | 103 | directory itself.""" |
|
104 | 104 | |
|
105 | 105 | oldfiles = set() |
|
106 | 106 | otherfiles = set() |
|
107 | 107 | # osutil.listdir returns stat information which saves some rmdir/listdir |
|
108 | 108 | # syscalls. |
|
109 | 109 | for name, mode in util.osutil.listdir(rootdir): |
|
110 | 110 | if stat.S_ISDIR(mode): |
|
111 | 111 | dirpath = os.path.join(rootdir, name) |
|
112 | 112 | self._cleanupdirectory(dirpath) |
|
113 | 113 | |
|
114 | 114 | # Now that the directory specified by dirpath is potentially |
|
115 | 115 | # empty, try and remove it. |
|
116 | 116 | try: |
|
117 | 117 | os.rmdir(dirpath) |
|
118 | 118 | except OSError: |
|
119 | 119 | pass |
|
120 | 120 | |
|
121 | 121 | elif stat.S_ISREG(mode): |
|
122 | 122 | if name.endswith(b'_old'): |
|
123 | 123 | oldfiles.add(name[:-4]) |
|
124 | 124 | else: |
|
125 | 125 | otherfiles.add(name) |
|
126 | 126 | |
|
127 | 127 | # Remove the files which end with suffix '_old' and have no |
|
128 | 128 | # corresponding file without the suffix '_old'. See addremotefilelognode |
|
129 | 129 | # method for the generation/purpose of files with '_old' suffix. |
|
130 | 130 | for filename in oldfiles - otherfiles: |
|
131 | 131 | filepath = os.path.join(rootdir, filename + b'_old') |
|
132 | 132 | util.tryunlink(filepath) |
|
133 | 133 | |
|
134 | 134 | def _getfiles(self): |
|
135 | 135 | """Return a list of (filename, [node,...]) for all the revisions that |
|
136 | 136 | exist in the store. |
|
137 | 137 | |
|
138 | 138 | This is useful for obtaining a list of all the contents of the store |
|
139 | 139 | when performing a repack to another store, since the store API requires |
|
140 | 140 | name+node keys and not namehash+node keys. |
|
141 | 141 | """ |
|
142 | 142 | existing = {} |
|
143 | 143 | for filenamehash, node in self._listkeys(): |
|
144 | 144 | existing.setdefault(filenamehash, []).append(node) |
|
145 | 145 | |
|
146 | 146 | filenamemap = self._resolvefilenames(existing.keys()) |
|
147 | 147 | |
|
148 | 148 | for filename, sha in filenamemap.items(): |
|
149 | 149 | yield (filename, existing[sha]) |
|
150 | 150 | |
|
151 | 151 | def _resolvefilenames(self, hashes): |
|
152 | 152 | """Given a list of filename hashes that are present in the |
|
153 | 153 | remotefilelog store, return a mapping from filename->hash. |
|
154 | 154 | |
|
155 | 155 | This is useful when converting remotefilelog blobs into other storage |
|
156 | 156 | formats. |
|
157 | 157 | """ |
|
158 | 158 | if not hashes: |
|
159 | 159 | return {} |
|
160 | 160 | |
|
161 | 161 | filenames = {} |
|
162 | 162 | missingfilename = set(hashes) |
|
163 | 163 | |
|
164 | 164 | # Start with a full manifest, since it'll cover the majority of files |
|
165 | 165 | for filename in self.repo[b'tip'].manifest(): |
|
166 | 166 | sha = hashutil.sha1(filename).digest() |
|
167 | 167 | if sha in missingfilename: |
|
168 | 168 | filenames[filename] = sha |
|
169 | 169 | missingfilename.discard(sha) |
|
170 | 170 | |
|
171 | 171 | # Scan the changelog until we've found every file name |
|
172 | 172 | cl = self.repo.unfiltered().changelog |
|
173 | 173 | for rev in range(len(cl) - 1, -1, -1): |
|
174 | 174 | if not missingfilename: |
|
175 | 175 | break |
|
176 | 176 | files = cl.readfiles(cl.node(rev)) |
|
177 | 177 | for filename in files: |
|
178 | 178 | sha = hashutil.sha1(filename).digest() |
|
179 | 179 | if sha in missingfilename: |
|
180 | 180 | filenames[filename] = sha |
|
181 | 181 | missingfilename.discard(sha) |
|
182 | 182 | |
|
183 | 183 | return filenames |
|
184 | 184 | |
|
185 | 185 | def _getrepocachepath(self): |
|
186 | 186 | return ( |
|
187 | 187 | os.path.join(self._path, self._reponame) |
|
188 | 188 | if self._shared |
|
189 | 189 | else self._path |
|
190 | 190 | ) |
|
191 | 191 | |
|
192 | 192 | def _listkeys(self): |
|
193 | 193 | """List all the remotefilelog keys that exist in the store. |
|
194 | 194 | |
|
195 | 195 | Returns a iterator of (filename hash, filecontent hash) tuples. |
|
196 | 196 | """ |
|
197 | 197 | |
|
198 | 198 | for root, dirs, files in os.walk(self._getrepocachepath()): |
|
199 | 199 | for filename in files: |
|
200 | 200 | if len(filename) != 40: |
|
201 | 201 | continue |
|
202 | 202 | node = filename |
|
203 | 203 | if self._shared: |
|
204 | 204 | # .../1a/85ffda..be21 |
|
205 | 205 | filenamehash = root[-41:-39] + root[-38:] |
|
206 | 206 | else: |
|
207 | 207 | filenamehash = root[-40:] |
|
208 | 208 | yield (bin(filenamehash), bin(node)) |
|
209 | 209 | |
|
210 | 210 | def _getfilepath(self, name, node): |
|
211 | 211 | node = hex(node) |
|
212 | 212 | if self._shared: |
|
213 | 213 | key = shallowutil.getcachekey(self._reponame, name, node) |
|
214 | 214 | else: |
|
215 | 215 | key = shallowutil.getlocalkey(name, node) |
|
216 | 216 | |
|
217 | 217 | return os.path.join(self._path, key) |
|
218 | 218 | |
|
219 | 219 | def _getdata(self, name, node): |
|
220 | 220 | filepath = self._getfilepath(name, node) |
|
221 | 221 | try: |
|
222 | 222 | data = shallowutil.readfile(filepath) |
|
223 | 223 | if self._validatecache and not self._validatedata(data, filepath): |
|
224 | 224 | if self._validatecachelog: |
|
225 | 225 | with open(self._validatecachelog, b'ab+') as f: |
|
226 | 226 | f.write(b"corrupt %s during read\n" % filepath) |
|
227 | 227 | os.rename(filepath, filepath + b".corrupt") |
|
228 | 228 | raise KeyError(b"corrupt local cache file %s" % filepath) |
|
229 | 229 | except IOError: |
|
230 | 230 | raise KeyError( |
|
231 | 231 | b"no file found at %s for %s:%s" % (filepath, name, hex(node)) |
|
232 | 232 | ) |
|
233 | 233 | |
|
234 | 234 | return data |
|
235 | 235 | |
|
236 | 236 | def addremotefilelognode(self, name, node, data): |
|
237 | 237 | filepath = self._getfilepath(name, node) |
|
238 | 238 | |
|
239 | 239 | oldumask = os.umask(0o002) |
|
240 | 240 | try: |
|
241 | 241 | # if this node already exists, save the old version for |
|
242 | 242 | # recovery/debugging purposes. |
|
243 | 243 | if os.path.exists(filepath): |
|
244 | 244 | newfilename = filepath + b'_old' |
|
245 | 245 | # newfilename can be read-only and shutil.copy will fail. |
|
246 | 246 | # Delete newfilename to avoid it |
|
247 | 247 | if os.path.exists(newfilename): |
|
248 | 248 | shallowutil.unlinkfile(newfilename) |
|
249 | 249 | shutil.copy(filepath, newfilename) |
|
250 | 250 | |
|
251 | 251 | shallowutil.mkstickygroupdir(self.ui, os.path.dirname(filepath)) |
|
252 | 252 | shallowutil.writefile(filepath, data, readonly=True) |
|
253 | 253 | |
|
254 | 254 | if self._validatecache: |
|
255 | 255 | if not self._validatekey(filepath, b'write'): |
|
256 | 256 | raise error.Abort( |
|
257 | 257 | _(b"local cache write was corrupted %s") % filepath |
|
258 | 258 | ) |
|
259 | 259 | finally: |
|
260 | 260 | os.umask(oldumask) |
|
261 | 261 | |
|
262 | 262 | def markrepo(self, path): |
|
263 | 263 | """Call this to add the given repo path to the store's list of |
|
264 | 264 | repositories that are using it. This is useful later when doing garbage |
|
265 | 265 | collection, since it allows us to insecpt the repos to see what nodes |
|
266 | 266 | they want to be kept alive in the store. |
|
267 | 267 | """ |
|
268 | 268 | repospath = os.path.join(self._path, b"repos") |
|
269 | 269 | with open(repospath, b'ab') as reposfile: |
|
270 | 270 | reposfile.write(os.path.dirname(path) + b"\n") |
|
271 | 271 | |
|
272 | 272 | repospathstat = os.stat(repospath) |
|
273 | 273 | if repospathstat.st_uid == self._uid: |
|
274 | 274 | os.chmod(repospath, 0o0664) |
|
275 | 275 | |
|
276 | 276 | def _validatekey(self, path, action): |
|
277 | 277 | with open(path, b'rb') as f: |
|
278 | 278 | data = f.read() |
|
279 | 279 | |
|
280 | 280 | if self._validatedata(data, path): |
|
281 | 281 | return True |
|
282 | 282 | |
|
283 | 283 | if self._validatecachelog: |
|
284 | 284 | with open(self._validatecachelog, b'ab+') as f: |
|
285 | 285 | f.write(b"corrupt %s during %s\n" % (path, action)) |
|
286 | 286 | |
|
287 | 287 | os.rename(path, path + b".corrupt") |
|
288 | 288 | return False |
|
289 | 289 | |
|
290 | 290 | def _validatedata(self, data, path): |
|
291 | 291 | try: |
|
292 | 292 | if len(data) > 0: |
|
293 | 293 | # see remotefilelogserver.createfileblob for the format |
|
294 | 294 | offset, size, flags = shallowutil.parsesizeflags(data) |
|
295 | 295 | if len(data) <= size: |
|
296 | 296 | # it is truncated |
|
297 | 297 | return False |
|
298 | 298 | |
|
299 | 299 | # extract the node from the metadata |
|
300 | 300 | offset += size |
|
301 | 301 | datanode = data[offset : offset + 20] |
|
302 | 302 | |
|
303 | 303 | # and compare against the path |
|
304 | 304 | if os.path.basename(path) == hex(datanode): |
|
305 | 305 | # Content matches the intended path |
|
306 | 306 | return True |
|
307 | 307 | return False |
|
308 | 308 | except (ValueError, shallowutil.BadRemotefilelogHeader): |
|
309 | 309 | pass |
|
310 | 310 | |
|
311 | 311 | return False |
|
312 | 312 | |
|
313 | 313 | def gc(self, keepkeys): |
|
314 | 314 | ui = self.ui |
|
315 | 315 | cachepath = self._path |
|
316 | 316 | |
|
317 | 317 | # prune cache |
|
318 | 318 | queue = pycompat.queue.PriorityQueue() |
|
319 | 319 | originalsize = 0 |
|
320 | 320 | size = 0 |
|
321 | 321 | count = 0 |
|
322 | 322 | removed = 0 |
|
323 | 323 | |
|
324 | 324 | # keep files newer than a day even if they aren't needed |
|
325 | 325 | limit = time.time() - (60 * 60 * 24) |
|
326 | 326 | |
|
327 | 327 | progress = ui.makeprogress( |
|
328 | 328 | _(b"removing unnecessary files"), unit=b"files" |
|
329 | 329 | ) |
|
330 | 330 | progress.update(0) |
|
331 | 331 | for root, dirs, files in os.walk(cachepath): |
|
332 | 332 | for file in files: |
|
333 | 333 | if file == b'repos': |
|
334 | 334 | continue |
|
335 | 335 | |
|
336 | 336 | # Don't delete pack files |
|
337 | 337 | if b'/packs/' in root: |
|
338 | 338 | continue |
|
339 | 339 | |
|
340 | 340 | progress.update(count) |
|
341 | 341 | path = os.path.join(root, file) |
|
342 | 342 | key = os.path.relpath(path, cachepath) |
|
343 | 343 | count += 1 |
|
344 | 344 | try: |
|
345 | 345 | pathstat = os.stat(path) |
|
346 | 346 | except FileNotFoundError: |
|
347 | 347 | msg = _( |
|
348 | 348 | b"warning: file %s was removed by another process\n" |
|
349 | 349 | ) |
|
350 | 350 | ui.warn(msg % path) |
|
351 | 351 | continue |
|
352 | 352 | |
|
353 | 353 | originalsize += pathstat.st_size |
|
354 | 354 | |
|
355 | 355 | if key in keepkeys or pathstat.st_atime > limit: |
|
356 | 356 | queue.put((pathstat.st_atime, path, pathstat)) |
|
357 | 357 | size += pathstat.st_size |
|
358 | 358 | else: |
|
359 | 359 | try: |
|
360 | 360 | shallowutil.unlinkfile(path) |
|
361 | 361 | except FileNotFoundError: |
|
362 | 362 | msg = _( |
|
363 | 363 | b"warning: file %s was removed by another " |
|
364 | 364 | b"process\n" |
|
365 | 365 | ) |
|
366 | 366 | ui.warn(msg % path) |
|
367 | 367 | continue |
|
368 | 368 | removed += 1 |
|
369 | 369 | progress.complete() |
|
370 | 370 | |
|
371 | 371 | # remove oldest files until under limit |
|
372 | 372 | limit = ui.configbytes(b"remotefilelog", b"cachelimit") |
|
373 | 373 | if size > limit: |
|
374 | 374 | excess = size - limit |
|
375 | 375 | progress = ui.makeprogress( |
|
376 | 376 | _(b"enforcing cache limit"), unit=b"bytes", total=excess |
|
377 | 377 | ) |
|
378 | 378 | removedexcess = 0 |
|
379 | 379 | while queue and size > limit and size > 0: |
|
380 | 380 | progress.update(removedexcess) |
|
381 | 381 | atime, oldpath, oldpathstat = queue.get() |
|
382 | 382 | try: |
|
383 | 383 | shallowutil.unlinkfile(oldpath) |
|
384 | 384 | except FileNotFoundError: |
|
385 | 385 | msg = _( |
|
386 | 386 | b"warning: file %s was removed by another process\n" |
|
387 | 387 | ) |
|
388 | 388 | ui.warn(msg % oldpath) |
|
389 | 389 | size -= oldpathstat.st_size |
|
390 | 390 | removed += 1 |
|
391 | 391 | removedexcess += oldpathstat.st_size |
|
392 | 392 | progress.complete() |
|
393 | 393 | |
|
394 | 394 | ui.status( |
|
395 | 395 | _(b"finished: removed %d of %d files (%0.2f GB to %0.2f GB)\n") |
|
396 | 396 | % ( |
|
397 | 397 | removed, |
|
398 | 398 | count, |
|
399 | 399 | float(originalsize) / 1024.0 / 1024.0 / 1024.0, |
|
400 | 400 | float(size) / 1024.0 / 1024.0 / 1024.0, |
|
401 | 401 | ) |
|
402 | 402 | ) |
|
403 | 403 | |
|
404 | 404 | |
|
405 | 405 | class baseunionstore: |
|
406 | 406 | def __init__(self, *args, **kwargs): |
|
407 | 407 | # If one of the functions that iterates all of the stores is about to |
|
408 | 408 | # throw a KeyError, try this many times with a full refresh between |
|
409 | 409 | # attempts. A repack operation may have moved data from one store to |
|
410 | 410 | # another while we were running. |
|
411 | 411 | self.numattempts = kwargs.get('numretries', 0) + 1 |
|
412 | 412 | # If not-None, call this function on every retry and if the attempts are |
|
413 | 413 | # exhausted. |
|
414 | 414 | self.retrylog = kwargs.get('retrylog', None) |
|
415 | 415 | |
|
416 | 416 | def markforrefresh(self): |
|
417 | 417 | for store in self.stores: |
|
418 |
if hasattr(store, |
|
|
418 | if hasattr(store, 'markforrefresh'): | |
|
419 | 419 | store.markforrefresh() |
|
420 | 420 | |
|
421 | 421 | @staticmethod |
|
422 | 422 | def retriable(fn): |
|
423 | 423 | def noop(*args): |
|
424 | 424 | pass |
|
425 | 425 | |
|
426 | 426 | def wrapped(self, *args, **kwargs): |
|
427 | 427 | retrylog = self.retrylog or noop |
|
428 | 428 | funcname = fn.__name__ |
|
429 | 429 | i = 0 |
|
430 | 430 | while i < self.numattempts: |
|
431 | 431 | if i > 0: |
|
432 | 432 | retrylog( |
|
433 | 433 | b're-attempting (n=%d) %s\n' |
|
434 | 434 | % (i, pycompat.sysbytes(funcname)) |
|
435 | 435 | ) |
|
436 | 436 | self.markforrefresh() |
|
437 | 437 | i += 1 |
|
438 | 438 | try: |
|
439 | 439 | return fn(self, *args, **kwargs) |
|
440 | 440 | except KeyError: |
|
441 | 441 | if i == self.numattempts: |
|
442 | 442 | # retries exhausted |
|
443 | 443 | retrylog( |
|
444 | 444 | b'retries exhausted in %s, raising KeyError\n' |
|
445 | 445 | % pycompat.sysbytes(funcname) |
|
446 | 446 | ) |
|
447 | 447 | raise |
|
448 | 448 | |
|
449 | 449 | return wrapped |
@@ -1,670 +1,670 b'' | |||
|
1 | 1 | # fileserverclient.py - client for communicating with the cache process |
|
2 | 2 | # |
|
3 | 3 | # Copyright 2013 Facebook, Inc. |
|
4 | 4 | # |
|
5 | 5 | # This software may be used and distributed according to the terms of the |
|
6 | 6 | # GNU General Public License version 2 or any later version. |
|
7 | 7 | |
|
8 | 8 | |
|
9 | 9 | import io |
|
10 | 10 | import os |
|
11 | 11 | import threading |
|
12 | 12 | import time |
|
13 | 13 | import zlib |
|
14 | 14 | |
|
15 | 15 | from mercurial.i18n import _ |
|
16 | 16 | from mercurial.node import bin, hex |
|
17 | 17 | from mercurial import ( |
|
18 | 18 | error, |
|
19 | 19 | pycompat, |
|
20 | 20 | revlog, |
|
21 | 21 | sshpeer, |
|
22 | 22 | util, |
|
23 | 23 | wireprotov1peer, |
|
24 | 24 | ) |
|
25 | 25 | from mercurial.utils import ( |
|
26 | 26 | hashutil, |
|
27 | 27 | procutil, |
|
28 | 28 | ) |
|
29 | 29 | |
|
30 | 30 | from . import ( |
|
31 | 31 | constants, |
|
32 | 32 | contentstore, |
|
33 | 33 | metadatastore, |
|
34 | 34 | ) |
|
35 | 35 | |
|
36 | 36 | _sshv1peer = sshpeer.sshv1peer |
|
37 | 37 | |
|
38 | 38 | # Statistics for debugging |
|
39 | 39 | fetchcost = 0 |
|
40 | 40 | fetches = 0 |
|
41 | 41 | fetched = 0 |
|
42 | 42 | fetchmisses = 0 |
|
43 | 43 | |
|
44 | 44 | _lfsmod = None |
|
45 | 45 | |
|
46 | 46 | |
|
47 | 47 | def getcachekey(reponame, file, id): |
|
48 | 48 | pathhash = hex(hashutil.sha1(file).digest()) |
|
49 | 49 | return os.path.join(reponame, pathhash[:2], pathhash[2:], id) |
|
50 | 50 | |
|
51 | 51 | |
|
52 | 52 | def getlocalkey(file, id): |
|
53 | 53 | pathhash = hex(hashutil.sha1(file).digest()) |
|
54 | 54 | return os.path.join(pathhash, id) |
|
55 | 55 | |
|
56 | 56 | |
|
57 | 57 | def peersetup(ui, peer): |
|
58 | 58 | class remotefilepeer(peer.__class__): |
|
59 | 59 | @wireprotov1peer.batchable |
|
60 | 60 | def x_rfl_getfile(self, file, node): |
|
61 | 61 | if not self.capable(b'x_rfl_getfile'): |
|
62 | 62 | raise error.Abort( |
|
63 | 63 | b'configured remotefile server does not support getfile' |
|
64 | 64 | ) |
|
65 | 65 | |
|
66 | 66 | def decode(d): |
|
67 | 67 | code, data = d.split(b'\0', 1) |
|
68 | 68 | if int(code): |
|
69 | 69 | raise error.LookupError(file, node, data) |
|
70 | 70 | return data |
|
71 | 71 | |
|
72 | 72 | return {b'file': file, b'node': node}, decode |
|
73 | 73 | |
|
74 | 74 | @wireprotov1peer.batchable |
|
75 | 75 | def x_rfl_getflogheads(self, path): |
|
76 | 76 | if not self.capable(b'x_rfl_getflogheads'): |
|
77 | 77 | raise error.Abort( |
|
78 | 78 | b'configured remotefile server does not ' |
|
79 | 79 | b'support getflogheads' |
|
80 | 80 | ) |
|
81 | 81 | |
|
82 | 82 | def decode(d): |
|
83 | 83 | return d.split(b'\n') if d else [] |
|
84 | 84 | |
|
85 | 85 | return {b'path': path}, decode |
|
86 | 86 | |
|
87 | 87 | def _updatecallstreamopts(self, command, opts): |
|
88 | 88 | if command != b'getbundle': |
|
89 | 89 | return |
|
90 | 90 | if ( |
|
91 | 91 | constants.NETWORK_CAP_LEGACY_SSH_GETFILES |
|
92 | 92 | not in self.capabilities() |
|
93 | 93 | ): |
|
94 | 94 | return |
|
95 | 95 | if not hasattr(self, '_localrepo'): |
|
96 | 96 | return |
|
97 | 97 | if ( |
|
98 | 98 | constants.SHALLOWREPO_REQUIREMENT |
|
99 | 99 | not in self._localrepo.requirements |
|
100 | 100 | ): |
|
101 | 101 | return |
|
102 | 102 | |
|
103 | 103 | bundlecaps = opts.get(b'bundlecaps') |
|
104 | 104 | if bundlecaps: |
|
105 | 105 | bundlecaps = [bundlecaps] |
|
106 | 106 | else: |
|
107 | 107 | bundlecaps = [] |
|
108 | 108 | |
|
109 | 109 | # shallow, includepattern, and excludepattern are a hacky way of |
|
110 | 110 | # carrying over data from the local repo to this getbundle |
|
111 | 111 | # command. We need to do it this way because bundle1 getbundle |
|
112 | 112 | # doesn't provide any other place we can hook in to manipulate |
|
113 | 113 | # getbundle args before it goes across the wire. Once we get rid |
|
114 | 114 | # of bundle1, we can use bundle2's _pullbundle2extraprepare to |
|
115 | 115 | # do this more cleanly. |
|
116 | 116 | bundlecaps.append(constants.BUNDLE2_CAPABLITY) |
|
117 | 117 | if self._localrepo.includepattern: |
|
118 | 118 | patterns = b'\0'.join(self._localrepo.includepattern) |
|
119 | 119 | includecap = b"includepattern=" + patterns |
|
120 | 120 | bundlecaps.append(includecap) |
|
121 | 121 | if self._localrepo.excludepattern: |
|
122 | 122 | patterns = b'\0'.join(self._localrepo.excludepattern) |
|
123 | 123 | excludecap = b"excludepattern=" + patterns |
|
124 | 124 | bundlecaps.append(excludecap) |
|
125 | 125 | opts[b'bundlecaps'] = b','.join(bundlecaps) |
|
126 | 126 | |
|
127 | 127 | def _sendrequest(self, command, args, **opts): |
|
128 | 128 | self._updatecallstreamopts(command, args) |
|
129 | 129 | return super(remotefilepeer, self)._sendrequest( |
|
130 | 130 | command, args, **opts |
|
131 | 131 | ) |
|
132 | 132 | |
|
133 | 133 | def _callstream(self, command, **opts): |
|
134 | 134 | supertype = super(remotefilepeer, self) |
|
135 | 135 | if not hasattr(supertype, '_sendrequest'): |
|
136 | 136 | self._updatecallstreamopts(command, pycompat.byteskwargs(opts)) |
|
137 | 137 | return super(remotefilepeer, self)._callstream(command, **opts) |
|
138 | 138 | |
|
139 | 139 | peer.__class__ = remotefilepeer |
|
140 | 140 | |
|
141 | 141 | |
|
142 | 142 | class cacheconnection: |
|
143 | 143 | """The connection for communicating with the remote cache. Performs |
|
144 | 144 | gets and sets by communicating with an external process that has the |
|
145 | 145 | cache-specific implementation. |
|
146 | 146 | """ |
|
147 | 147 | |
|
148 | 148 | def __init__(self): |
|
149 | 149 | self.pipeo = self.pipei = self.pipee = None |
|
150 | 150 | self.subprocess = None |
|
151 | 151 | self.connected = False |
|
152 | 152 | |
|
153 | 153 | def connect(self, cachecommand): |
|
154 | 154 | if self.pipeo: |
|
155 | 155 | raise error.Abort(_(b"cache connection already open")) |
|
156 | 156 | self.pipei, self.pipeo, self.pipee, self.subprocess = procutil.popen4( |
|
157 | 157 | cachecommand |
|
158 | 158 | ) |
|
159 | 159 | self.connected = True |
|
160 | 160 | |
|
161 | 161 | def close(self): |
|
162 | 162 | def tryclose(pipe): |
|
163 | 163 | try: |
|
164 | 164 | pipe.close() |
|
165 | 165 | except Exception: |
|
166 | 166 | pass |
|
167 | 167 | |
|
168 | 168 | if self.connected: |
|
169 | 169 | try: |
|
170 | 170 | self.pipei.write(b"exit\n") |
|
171 | 171 | except Exception: |
|
172 | 172 | pass |
|
173 | 173 | tryclose(self.pipei) |
|
174 | 174 | self.pipei = None |
|
175 | 175 | tryclose(self.pipeo) |
|
176 | 176 | self.pipeo = None |
|
177 | 177 | tryclose(self.pipee) |
|
178 | 178 | self.pipee = None |
|
179 | 179 | try: |
|
180 | 180 | # Wait for process to terminate, making sure to avoid deadlock. |
|
181 | 181 | # See https://docs.python.org/2/library/subprocess.html for |
|
182 | 182 | # warnings about wait() and deadlocking. |
|
183 | 183 | self.subprocess.communicate() |
|
184 | 184 | except Exception: |
|
185 | 185 | pass |
|
186 | 186 | self.subprocess = None |
|
187 | 187 | self.connected = False |
|
188 | 188 | |
|
189 | 189 | def request(self, request, flush=True): |
|
190 | 190 | if self.connected: |
|
191 | 191 | try: |
|
192 | 192 | self.pipei.write(request) |
|
193 | 193 | if flush: |
|
194 | 194 | self.pipei.flush() |
|
195 | 195 | except IOError: |
|
196 | 196 | self.close() |
|
197 | 197 | |
|
198 | 198 | def receiveline(self): |
|
199 | 199 | if not self.connected: |
|
200 | 200 | return None |
|
201 | 201 | try: |
|
202 | 202 | result = self.pipeo.readline()[:-1] |
|
203 | 203 | if not result: |
|
204 | 204 | self.close() |
|
205 | 205 | except IOError: |
|
206 | 206 | self.close() |
|
207 | 207 | |
|
208 | 208 | return result |
|
209 | 209 | |
|
210 | 210 | |
|
211 | 211 | def _getfilesbatch( |
|
212 | 212 | remote, receivemissing, progresstick, missed, idmap, batchsize |
|
213 | 213 | ): |
|
214 | 214 | # Over http(s), iterbatch is a streamy method and we can start |
|
215 | 215 | # looking at results early. This means we send one (potentially |
|
216 | 216 | # large) request, but then we show nice progress as we process |
|
217 | 217 | # file results, rather than showing chunks of $batchsize in |
|
218 | 218 | # progress. |
|
219 | 219 | # |
|
220 | 220 | # Over ssh, iterbatch isn't streamy because batch() wasn't |
|
221 | 221 | # explicitly designed as a streaming method. In the future we |
|
222 | 222 | # should probably introduce a streambatch() method upstream and |
|
223 | 223 | # use that for this. |
|
224 | 224 | with remote.commandexecutor() as e: |
|
225 | 225 | futures = [] |
|
226 | 226 | for m in missed: |
|
227 | 227 | futures.append( |
|
228 | 228 | e.callcommand( |
|
229 | 229 | b'x_rfl_getfile', {b'file': idmap[m], b'node': m[-40:]} |
|
230 | 230 | ) |
|
231 | 231 | ) |
|
232 | 232 | |
|
233 | 233 | for i, m in enumerate(missed): |
|
234 | 234 | r = futures[i].result() |
|
235 | 235 | futures[i] = None # release memory |
|
236 | 236 | file_ = idmap[m] |
|
237 | 237 | node = m[-40:] |
|
238 | 238 | receivemissing(io.BytesIO(b'%d\n%s' % (len(r), r)), file_, node) |
|
239 | 239 | progresstick() |
|
240 | 240 | |
|
241 | 241 | |
|
242 | 242 | def _getfiles_optimistic( |
|
243 | 243 | remote, receivemissing, progresstick, missed, idmap, step |
|
244 | 244 | ): |
|
245 | 245 | remote._callstream(b"x_rfl_getfiles") |
|
246 | 246 | i = 0 |
|
247 | 247 | pipeo = remote._pipeo |
|
248 | 248 | pipei = remote._pipei |
|
249 | 249 | while i < len(missed): |
|
250 | 250 | # issue a batch of requests |
|
251 | 251 | start = i |
|
252 | 252 | end = min(len(missed), start + step) |
|
253 | 253 | i = end |
|
254 | 254 | for missingid in missed[start:end]: |
|
255 | 255 | # issue new request |
|
256 | 256 | versionid = missingid[-40:] |
|
257 | 257 | file = idmap[missingid] |
|
258 | 258 | sshrequest = b"%s%s\n" % (versionid, file) |
|
259 | 259 | pipeo.write(sshrequest) |
|
260 | 260 | pipeo.flush() |
|
261 | 261 | |
|
262 | 262 | # receive batch results |
|
263 | 263 | for missingid in missed[start:end]: |
|
264 | 264 | versionid = missingid[-40:] |
|
265 | 265 | file = idmap[missingid] |
|
266 | 266 | receivemissing(pipei, file, versionid) |
|
267 | 267 | progresstick() |
|
268 | 268 | |
|
269 | 269 | # End the command |
|
270 | 270 | pipeo.write(b'\n') |
|
271 | 271 | pipeo.flush() |
|
272 | 272 | |
|
273 | 273 | |
|
274 | 274 | def _getfiles_threaded( |
|
275 | 275 | remote, receivemissing, progresstick, missed, idmap, step |
|
276 | 276 | ): |
|
277 | 277 | remote._callstream(b"x_rfl_getfiles") |
|
278 | 278 | pipeo = remote._pipeo |
|
279 | 279 | pipei = remote._pipei |
|
280 | 280 | |
|
281 | 281 | def writer(): |
|
282 | 282 | for missingid in missed: |
|
283 | 283 | versionid = missingid[-40:] |
|
284 | 284 | file = idmap[missingid] |
|
285 | 285 | sshrequest = b"%s%s\n" % (versionid, file) |
|
286 | 286 | pipeo.write(sshrequest) |
|
287 | 287 | pipeo.flush() |
|
288 | 288 | |
|
289 | 289 | writerthread = threading.Thread(target=writer) |
|
290 | 290 | writerthread.daemon = True |
|
291 | 291 | writerthread.start() |
|
292 | 292 | |
|
293 | 293 | for missingid in missed: |
|
294 | 294 | versionid = missingid[-40:] |
|
295 | 295 | file = idmap[missingid] |
|
296 | 296 | receivemissing(pipei, file, versionid) |
|
297 | 297 | progresstick() |
|
298 | 298 | |
|
299 | 299 | writerthread.join() |
|
300 | 300 | # End the command |
|
301 | 301 | pipeo.write(b'\n') |
|
302 | 302 | pipeo.flush() |
|
303 | 303 | |
|
304 | 304 | |
|
305 | 305 | class fileserverclient: |
|
306 | 306 | """A client for requesting files from the remote file server.""" |
|
307 | 307 | |
|
308 | 308 | def __init__(self, repo): |
|
309 | 309 | ui = repo.ui |
|
310 | 310 | self.repo = repo |
|
311 | 311 | self.ui = ui |
|
312 | 312 | self.cacheprocess = ui.config(b"remotefilelog", b"cacheprocess") |
|
313 | 313 | if self.cacheprocess: |
|
314 | 314 | self.cacheprocess = util.expandpath(self.cacheprocess) |
|
315 | 315 | |
|
316 | 316 | # This option causes remotefilelog to pass the full file path to the |
|
317 | 317 | # cacheprocess instead of a hashed key. |
|
318 | 318 | self.cacheprocesspasspath = ui.configbool( |
|
319 | 319 | b"remotefilelog", b"cacheprocess.includepath" |
|
320 | 320 | ) |
|
321 | 321 | |
|
322 | 322 | self.debugoutput = ui.configbool(b"remotefilelog", b"debug") |
|
323 | 323 | |
|
324 | 324 | self.remotecache = cacheconnection() |
|
325 | 325 | |
|
326 | 326 | def setstore(self, datastore, historystore, writedata, writehistory): |
|
327 | 327 | self.datastore = datastore |
|
328 | 328 | self.historystore = historystore |
|
329 | 329 | self.writedata = writedata |
|
330 | 330 | self.writehistory = writehistory |
|
331 | 331 | |
|
332 | 332 | def _connect(self): |
|
333 | 333 | return self.repo.connectionpool.get(self.repo.fallbackpath) |
|
334 | 334 | |
|
335 | 335 | def request(self, fileids): |
|
336 | 336 | """Takes a list of filename/node pairs and fetches them from the |
|
337 | 337 | server. Files are stored in the local cache. |
|
338 | 338 | A list of nodes that the server couldn't find is returned. |
|
339 | 339 | If the connection fails, an exception is raised. |
|
340 | 340 | """ |
|
341 | 341 | if not self.remotecache.connected: |
|
342 | 342 | self.connect() |
|
343 | 343 | cache = self.remotecache |
|
344 | 344 | writedata = self.writedata |
|
345 | 345 | |
|
346 | 346 | repo = self.repo |
|
347 | 347 | total = len(fileids) |
|
348 | 348 | request = b"get\n%d\n" % total |
|
349 | 349 | idmap = {} |
|
350 | 350 | reponame = repo.name |
|
351 | 351 | for file, id in fileids: |
|
352 | 352 | fullid = getcachekey(reponame, file, id) |
|
353 | 353 | if self.cacheprocesspasspath: |
|
354 | 354 | request += file + b'\0' |
|
355 | 355 | request += fullid + b"\n" |
|
356 | 356 | idmap[fullid] = file |
|
357 | 357 | |
|
358 | 358 | cache.request(request) |
|
359 | 359 | |
|
360 | 360 | progress = self.ui.makeprogress(_(b'downloading'), total=total) |
|
361 | 361 | progress.update(0) |
|
362 | 362 | |
|
363 | 363 | missed = [] |
|
364 | 364 | while True: |
|
365 | 365 | missingid = cache.receiveline() |
|
366 | 366 | if not missingid: |
|
367 | 367 | missedset = set(missed) |
|
368 | 368 | for missingid in idmap: |
|
369 | 369 | if not missingid in missedset: |
|
370 | 370 | missed.append(missingid) |
|
371 | 371 | self.ui.warn( |
|
372 | 372 | _( |
|
373 | 373 | b"warning: cache connection closed early - " |
|
374 | 374 | + b"falling back to server\n" |
|
375 | 375 | ) |
|
376 | 376 | ) |
|
377 | 377 | break |
|
378 | 378 | if missingid == b"0": |
|
379 | 379 | break |
|
380 | 380 | if missingid.startswith(b"_hits_"): |
|
381 | 381 | # receive progress reports |
|
382 | 382 | parts = missingid.split(b"_") |
|
383 | 383 | progress.increment(int(parts[2])) |
|
384 | 384 | continue |
|
385 | 385 | |
|
386 | 386 | missed.append(missingid) |
|
387 | 387 | |
|
388 | 388 | global fetchmisses |
|
389 | 389 | fetchmisses += len(missed) |
|
390 | 390 | |
|
391 | 391 | fromcache = total - len(missed) |
|
392 | 392 | progress.update(fromcache, total=total) |
|
393 | 393 | self.ui.log( |
|
394 | 394 | b"remotefilelog", |
|
395 | 395 | b"remote cache hit rate is %r of %r\n", |
|
396 | 396 | fromcache, |
|
397 | 397 | total, |
|
398 | 398 | hit=fromcache, |
|
399 | 399 | total=total, |
|
400 | 400 | ) |
|
401 | 401 | |
|
402 | 402 | oldumask = os.umask(0o002) |
|
403 | 403 | try: |
|
404 | 404 | # receive cache misses from master |
|
405 | 405 | if missed: |
|
406 | 406 | # When verbose is true, sshpeer prints 'running ssh...' |
|
407 | 407 | # to stdout, which can interfere with some command |
|
408 | 408 | # outputs |
|
409 | 409 | verbose = self.ui.verbose |
|
410 | 410 | self.ui.verbose = False |
|
411 | 411 | try: |
|
412 | 412 | with self._connect() as conn: |
|
413 | 413 | remote = conn.peer |
|
414 | 414 | if remote.capable( |
|
415 | 415 | constants.NETWORK_CAP_LEGACY_SSH_GETFILES |
|
416 | 416 | ): |
|
417 | 417 | if not isinstance(remote, _sshv1peer): |
|
418 | 418 | raise error.Abort( |
|
419 | 419 | b'remotefilelog requires ssh servers' |
|
420 | 420 | ) |
|
421 | 421 | step = self.ui.configint( |
|
422 | 422 | b'remotefilelog', b'getfilesstep' |
|
423 | 423 | ) |
|
424 | 424 | getfilestype = self.ui.config( |
|
425 | 425 | b'remotefilelog', b'getfilestype' |
|
426 | 426 | ) |
|
427 | 427 | if getfilestype == b'threaded': |
|
428 | 428 | _getfiles = _getfiles_threaded |
|
429 | 429 | else: |
|
430 | 430 | _getfiles = _getfiles_optimistic |
|
431 | 431 | _getfiles( |
|
432 | 432 | remote, |
|
433 | 433 | self.receivemissing, |
|
434 | 434 | progress.increment, |
|
435 | 435 | missed, |
|
436 | 436 | idmap, |
|
437 | 437 | step, |
|
438 | 438 | ) |
|
439 | 439 | elif remote.capable(b"x_rfl_getfile"): |
|
440 | 440 | if remote.capable(b'batch'): |
|
441 | 441 | batchdefault = 100 |
|
442 | 442 | else: |
|
443 | 443 | batchdefault = 10 |
|
444 | 444 | batchsize = self.ui.configint( |
|
445 | 445 | b'remotefilelog', b'batchsize', batchdefault |
|
446 | 446 | ) |
|
447 | 447 | self.ui.debug( |
|
448 | 448 | b'requesting %d files from ' |
|
449 | 449 | b'remotefilelog server...\n' % len(missed) |
|
450 | 450 | ) |
|
451 | 451 | _getfilesbatch( |
|
452 | 452 | remote, |
|
453 | 453 | self.receivemissing, |
|
454 | 454 | progress.increment, |
|
455 | 455 | missed, |
|
456 | 456 | idmap, |
|
457 | 457 | batchsize, |
|
458 | 458 | ) |
|
459 | 459 | else: |
|
460 | 460 | raise error.Abort( |
|
461 | 461 | b"configured remotefilelog server" |
|
462 | 462 | b" does not support remotefilelog" |
|
463 | 463 | ) |
|
464 | 464 | |
|
465 | 465 | self.ui.log( |
|
466 | 466 | b"remotefilefetchlog", |
|
467 | 467 | b"Success\n", |
|
468 | 468 | fetched_files=progress.pos - fromcache, |
|
469 | 469 | total_to_fetch=total - fromcache, |
|
470 | 470 | ) |
|
471 | 471 | except Exception: |
|
472 | 472 | self.ui.log( |
|
473 | 473 | b"remotefilefetchlog", |
|
474 | 474 | b"Fail\n", |
|
475 | 475 | fetched_files=progress.pos - fromcache, |
|
476 | 476 | total_to_fetch=total - fromcache, |
|
477 | 477 | ) |
|
478 | 478 | raise |
|
479 | 479 | finally: |
|
480 | 480 | self.ui.verbose = verbose |
|
481 | 481 | # send to memcache |
|
482 | 482 | request = b"set\n%d\n%s\n" % (len(missed), b"\n".join(missed)) |
|
483 | 483 | cache.request(request) |
|
484 | 484 | |
|
485 | 485 | progress.complete() |
|
486 | 486 | |
|
487 | 487 | # mark ourselves as a user of this cache |
|
488 | 488 | writedata.markrepo(self.repo.path) |
|
489 | 489 | finally: |
|
490 | 490 | os.umask(oldumask) |
|
491 | 491 | |
|
492 | 492 | def receivemissing(self, pipe, filename, node): |
|
493 | 493 | line = pipe.readline()[:-1] |
|
494 | 494 | if not line: |
|
495 | 495 | raise error.ResponseError( |
|
496 | 496 | _(b"error downloading file contents:"), |
|
497 | 497 | _(b"connection closed early"), |
|
498 | 498 | ) |
|
499 | 499 | size = int(line) |
|
500 | 500 | data = pipe.read(size) |
|
501 | 501 | if len(data) != size: |
|
502 | 502 | raise error.ResponseError( |
|
503 | 503 | _(b"error downloading file contents:"), |
|
504 | 504 | _(b"only received %s of %s bytes") % (len(data), size), |
|
505 | 505 | ) |
|
506 | 506 | |
|
507 | 507 | self.writedata.addremotefilelognode( |
|
508 | 508 | filename, bin(node), zlib.decompress(data) |
|
509 | 509 | ) |
|
510 | 510 | |
|
511 | 511 | def connect(self): |
|
512 | 512 | if self.cacheprocess: |
|
513 | 513 | cmd = b"%s %s" % (self.cacheprocess, self.writedata._path) |
|
514 | 514 | self.remotecache.connect(cmd) |
|
515 | 515 | else: |
|
516 | 516 | # If no cache process is specified, we fake one that always |
|
517 | 517 | # returns cache misses. This enables tests to run easily |
|
518 | 518 | # and may eventually allow us to be a drop in replacement |
|
519 | 519 | # for the largefiles extension. |
|
520 | 520 | class simplecache: |
|
521 | 521 | def __init__(self): |
|
522 | 522 | self.missingids = [] |
|
523 | 523 | self.connected = True |
|
524 | 524 | |
|
525 | 525 | def close(self): |
|
526 | 526 | pass |
|
527 | 527 | |
|
528 | 528 | def request(self, value, flush=True): |
|
529 | 529 | lines = value.split(b"\n") |
|
530 | 530 | if lines[0] != b"get": |
|
531 | 531 | return |
|
532 | 532 | self.missingids = lines[2:-1] |
|
533 | 533 | self.missingids.append(b'0') |
|
534 | 534 | |
|
535 | 535 | def receiveline(self): |
|
536 | 536 | if len(self.missingids) > 0: |
|
537 | 537 | return self.missingids.pop(0) |
|
538 | 538 | return None |
|
539 | 539 | |
|
540 | 540 | self.remotecache = simplecache() |
|
541 | 541 | |
|
542 | 542 | def close(self): |
|
543 | 543 | if fetches: |
|
544 | 544 | msg = ( |
|
545 | 545 | b"%d files fetched over %d fetches - " |
|
546 | 546 | + b"(%d misses, %0.2f%% hit ratio) over %0.2fs\n" |
|
547 | 547 | ) % ( |
|
548 | 548 | fetched, |
|
549 | 549 | fetches, |
|
550 | 550 | fetchmisses, |
|
551 | 551 | float(fetched - fetchmisses) / float(fetched) * 100.0, |
|
552 | 552 | fetchcost, |
|
553 | 553 | ) |
|
554 | 554 | if self.debugoutput: |
|
555 | 555 | self.ui.warn(msg) |
|
556 | 556 | self.ui.log( |
|
557 | 557 | b"remotefilelog.prefetch", |
|
558 | 558 | msg.replace(b"%", b"%%"), |
|
559 | 559 | remotefilelogfetched=fetched, |
|
560 | 560 | remotefilelogfetches=fetches, |
|
561 | 561 | remotefilelogfetchmisses=fetchmisses, |
|
562 | 562 | remotefilelogfetchtime=fetchcost * 1000, |
|
563 | 563 | ) |
|
564 | 564 | |
|
565 | 565 | if self.remotecache.connected: |
|
566 | 566 | self.remotecache.close() |
|
567 | 567 | |
|
568 | 568 | def prefetch( |
|
569 | 569 | self, fileids, force=False, fetchdata=True, fetchhistory=False |
|
570 | 570 | ): |
|
571 | 571 | """downloads the given file versions to the cache""" |
|
572 | 572 | repo = self.repo |
|
573 | 573 | idstocheck = [] |
|
574 | 574 | for file, id in fileids: |
|
575 | 575 | # hack |
|
576 | 576 | # - we don't use .hgtags |
|
577 | 577 | # - workingctx produces ids with length 42, |
|
578 | 578 | # which we skip since they aren't in any cache |
|
579 | 579 | if ( |
|
580 | 580 | file == b'.hgtags' |
|
581 | 581 | or len(id) == 42 |
|
582 | 582 | or not repo.shallowmatch(file) |
|
583 | 583 | ): |
|
584 | 584 | continue |
|
585 | 585 | |
|
586 | 586 | idstocheck.append((file, bin(id))) |
|
587 | 587 | |
|
588 | 588 | datastore = self.datastore |
|
589 | 589 | historystore = self.historystore |
|
590 | 590 | if force: |
|
591 | 591 | datastore = contentstore.unioncontentstore(*repo.shareddatastores) |
|
592 | 592 | historystore = metadatastore.unionmetadatastore( |
|
593 | 593 | *repo.sharedhistorystores |
|
594 | 594 | ) |
|
595 | 595 | |
|
596 | 596 | missingids = set() |
|
597 | 597 | if fetchdata: |
|
598 | 598 | missingids.update(datastore.getmissing(idstocheck)) |
|
599 | 599 | if fetchhistory: |
|
600 | 600 | missingids.update(historystore.getmissing(idstocheck)) |
|
601 | 601 | |
|
602 | 602 | # partition missing nodes into nullid and not-nullid so we can |
|
603 | 603 | # warn about this filtering potentially shadowing bugs. |
|
604 | 604 | nullids = len( |
|
605 | 605 | [None for unused, id in missingids if id == self.repo.nullid] |
|
606 | 606 | ) |
|
607 | 607 | if nullids: |
|
608 | 608 | missingids = [ |
|
609 | 609 | (f, id) for f, id in missingids if id != self.repo.nullid |
|
610 | 610 | ] |
|
611 | 611 | repo.ui.develwarn( |
|
612 | 612 | ( |
|
613 | 613 | b'remotefilelog not fetching %d null revs' |
|
614 | 614 | b' - this is likely hiding bugs' % nullids |
|
615 | 615 | ), |
|
616 | 616 | config=b'remotefilelog-ext', |
|
617 | 617 | ) |
|
618 | 618 | if missingids: |
|
619 | 619 | global fetches, fetched, fetchcost |
|
620 | 620 | fetches += 1 |
|
621 | 621 | |
|
622 | 622 | # We want to be able to detect excess individual file downloads, so |
|
623 | 623 | # let's log that information for debugging. |
|
624 | 624 | if fetches >= 15 and fetches < 18: |
|
625 | 625 | if fetches == 15: |
|
626 | 626 | fetchwarning = self.ui.config( |
|
627 | 627 | b'remotefilelog', b'fetchwarning' |
|
628 | 628 | ) |
|
629 | 629 | if fetchwarning: |
|
630 | 630 | self.ui.warn(fetchwarning + b'\n') |
|
631 | 631 | self.logstacktrace() |
|
632 | 632 | missingids = [(file, hex(id)) for file, id in sorted(missingids)] |
|
633 | 633 | fetched += len(missingids) |
|
634 | 634 | start = time.time() |
|
635 | 635 | missingids = self.request(missingids) |
|
636 | 636 | if missingids: |
|
637 | 637 | raise error.Abort( |
|
638 | 638 | _(b"unable to download %d files") % len(missingids) |
|
639 | 639 | ) |
|
640 | 640 | fetchcost += time.time() - start |
|
641 | 641 | self._lfsprefetch(fileids) |
|
642 | 642 | |
|
643 | 643 | def _lfsprefetch(self, fileids): |
|
644 |
if not _lfsmod or not hasattr(self.repo.svfs, |
|
|
644 | if not _lfsmod or not hasattr(self.repo.svfs, 'lfslocalblobstore'): | |
|
645 | 645 | return |
|
646 | 646 | if not _lfsmod.wrapper.candownload(self.repo): |
|
647 | 647 | return |
|
648 | 648 | pointers = [] |
|
649 | 649 | store = self.repo.svfs.lfslocalblobstore |
|
650 | 650 | for file, id in fileids: |
|
651 | 651 | node = bin(id) |
|
652 | 652 | rlog = self.repo.file(file) |
|
653 | 653 | if rlog.flags(node) & revlog.REVIDX_EXTSTORED: |
|
654 | 654 | text = rlog.rawdata(node) |
|
655 | 655 | p = _lfsmod.pointer.deserialize(text) |
|
656 | 656 | oid = p.oid() |
|
657 | 657 | if not store.has(oid): |
|
658 | 658 | pointers.append(p) |
|
659 | 659 | if len(pointers) > 0: |
|
660 | 660 | self.repo.svfs.lfsremoteblobstore.readbatch(pointers, store) |
|
661 | 661 | assert all(store.has(p.oid()) for p in pointers) |
|
662 | 662 | |
|
663 | 663 | def logstacktrace(self): |
|
664 | 664 | import traceback |
|
665 | 665 | |
|
666 | 666 | self.ui.log( |
|
667 | 667 | b'remotefilelog', |
|
668 | 668 | b'excess remotefilelog fetching:\n%s\n', |
|
669 | 669 | b''.join(pycompat.sysbytes(s) for s in traceback.format_stack()), |
|
670 | 670 | ) |
General Comments 0
You need to be logged in to leave comments.
Login now