Show More
@@ -1,263 +1,262 | |||||
1 | # Copyright 2016-present Facebook. All Rights Reserved. |
|
1 | # Copyright 2016-present Facebook. All Rights Reserved. | |
2 | # |
|
2 | # | |
3 | # protocol: logic for a server providing fastannotate support |
|
3 | # protocol: logic for a server providing fastannotate support | |
4 | # |
|
4 | # | |
5 | # This software may be used and distributed according to the terms of the |
|
5 | # This software may be used and distributed according to the terms of the | |
6 | # GNU General Public License version 2 or any later version. |
|
6 | # GNU General Public License version 2 or any later version. | |
7 | from __future__ import annotations |
|
7 | from __future__ import annotations | |
8 |
|
8 | |||
9 | import contextlib |
|
9 | import contextlib | |
10 | import os |
|
10 | import os | |
11 |
|
11 | |||
12 | from mercurial.i18n import _ |
|
12 | from mercurial.i18n import _ | |
13 | from mercurial.pycompat import open |
|
|||
14 | from mercurial import ( |
|
13 | from mercurial import ( | |
15 | error, |
|
14 | error, | |
16 | extensions, |
|
15 | extensions, | |
17 | hg, |
|
16 | hg, | |
18 | util, |
|
17 | util, | |
19 | wireprotov1peer, |
|
18 | wireprotov1peer, | |
20 | wireprotov1server, |
|
19 | wireprotov1server, | |
21 | ) |
|
20 | ) | |
22 | from mercurial.utils import ( |
|
21 | from mercurial.utils import ( | |
23 | urlutil, |
|
22 | urlutil, | |
24 | ) |
|
23 | ) | |
25 | from . import context |
|
24 | from . import context | |
26 |
|
25 | |||
27 | # common |
|
26 | # common | |
28 |
|
27 | |||
29 |
|
28 | |||
30 | def _getmaster(ui): |
|
29 | def _getmaster(ui): | |
31 | """get the mainbranch, and enforce it is set""" |
|
30 | """get the mainbranch, and enforce it is set""" | |
32 | master = ui.config(b'fastannotate', b'mainbranch') |
|
31 | master = ui.config(b'fastannotate', b'mainbranch') | |
33 | if not master: |
|
32 | if not master: | |
34 | raise error.Abort( |
|
33 | raise error.Abort( | |
35 | _( |
|
34 | _( | |
36 | b'fastannotate.mainbranch is required ' |
|
35 | b'fastannotate.mainbranch is required ' | |
37 | b'for both the client and the server' |
|
36 | b'for both the client and the server' | |
38 | ) |
|
37 | ) | |
39 | ) |
|
38 | ) | |
40 | return master |
|
39 | return master | |
41 |
|
40 | |||
42 |
|
41 | |||
43 | # server-side |
|
42 | # server-side | |
44 |
|
43 | |||
45 |
|
44 | |||
46 | def _capabilities(orig, repo, proto): |
|
45 | def _capabilities(orig, repo, proto): | |
47 | result = orig(repo, proto) |
|
46 | result = orig(repo, proto) | |
48 | result.append(b'getannotate') |
|
47 | result.append(b'getannotate') | |
49 | return result |
|
48 | return result | |
50 |
|
49 | |||
51 |
|
50 | |||
52 | def _getannotate(repo, proto, path, lastnode): |
|
51 | def _getannotate(repo, proto, path, lastnode): | |
53 | # output: |
|
52 | # output: | |
54 | # FILE := vfspath + '\0' + str(size) + '\0' + content |
|
53 | # FILE := vfspath + '\0' + str(size) + '\0' + content | |
55 | # OUTPUT := '' | FILE + OUTPUT |
|
54 | # OUTPUT := '' | FILE + OUTPUT | |
56 | result = b'' |
|
55 | result = b'' | |
57 | buildondemand = repo.ui.configbool( |
|
56 | buildondemand = repo.ui.configbool( | |
58 | b'fastannotate', b'serverbuildondemand', True |
|
57 | b'fastannotate', b'serverbuildondemand', True | |
59 | ) |
|
58 | ) | |
60 | with context.annotatecontext(repo, path) as actx: |
|
59 | with context.annotatecontext(repo, path) as actx: | |
61 | if buildondemand: |
|
60 | if buildondemand: | |
62 | # update before responding to the client |
|
61 | # update before responding to the client | |
63 | master = _getmaster(repo.ui) |
|
62 | master = _getmaster(repo.ui) | |
64 | try: |
|
63 | try: | |
65 | if not actx.isuptodate(master): |
|
64 | if not actx.isuptodate(master): | |
66 | actx.annotate(master, master) |
|
65 | actx.annotate(master, master) | |
67 | except Exception: |
|
66 | except Exception: | |
68 | # non-fast-forward move or corrupted. rebuild automically. |
|
67 | # non-fast-forward move or corrupted. rebuild automically. | |
69 | actx.rebuild() |
|
68 | actx.rebuild() | |
70 | try: |
|
69 | try: | |
71 | actx.annotate(master, master) |
|
70 | actx.annotate(master, master) | |
72 | except Exception: |
|
71 | except Exception: | |
73 | actx.rebuild() # delete files |
|
72 | actx.rebuild() # delete files | |
74 | finally: |
|
73 | finally: | |
75 | # although the "with" context will also do a close/flush, we |
|
74 | # although the "with" context will also do a close/flush, we | |
76 | # need to do it early so we can send the correct respond to |
|
75 | # need to do it early so we can send the correct respond to | |
77 | # client. |
|
76 | # client. | |
78 | actx.close() |
|
77 | actx.close() | |
79 | # send back the full content of revmap and linelog, in the future we |
|
78 | # send back the full content of revmap and linelog, in the future we | |
80 | # may want to do some rsync-like fancy updating. |
|
79 | # may want to do some rsync-like fancy updating. | |
81 | # the lastnode check is not necessary if the client and the server |
|
80 | # the lastnode check is not necessary if the client and the server | |
82 | # agree where the main branch is. |
|
81 | # agree where the main branch is. | |
83 | if actx.lastnode != lastnode: |
|
82 | if actx.lastnode != lastnode: | |
84 | for p in [actx.revmappath, actx.linelogpath]: |
|
83 | for p in [actx.revmappath, actx.linelogpath]: | |
85 | if not os.path.exists(p): |
|
84 | if not os.path.exists(p): | |
86 | continue |
|
85 | continue | |
87 |
with open(p, |
|
86 | with open(p, 'rb') as f: | |
88 | content = f.read() |
|
87 | content = f.read() | |
89 | vfsbaselen = len(repo.vfs.base + b'/') |
|
88 | vfsbaselen = len(repo.vfs.base + b'/') | |
90 | relpath = p[vfsbaselen:] |
|
89 | relpath = p[vfsbaselen:] | |
91 | result += b'%s\0%d\0%s' % (relpath, len(content), content) |
|
90 | result += b'%s\0%d\0%s' % (relpath, len(content), content) | |
92 | return result |
|
91 | return result | |
93 |
|
92 | |||
94 |
|
93 | |||
95 | def _registerwireprotocommand(): |
|
94 | def _registerwireprotocommand(): | |
96 | if b'getannotate' in wireprotov1server.commands: |
|
95 | if b'getannotate' in wireprotov1server.commands: | |
97 | return |
|
96 | return | |
98 | wireprotov1server.wireprotocommand(b'getannotate', b'path lastnode')( |
|
97 | wireprotov1server.wireprotocommand(b'getannotate', b'path lastnode')( | |
99 | _getannotate |
|
98 | _getannotate | |
100 | ) |
|
99 | ) | |
101 |
|
100 | |||
102 |
|
101 | |||
103 | def serveruisetup(ui): |
|
102 | def serveruisetup(ui): | |
104 | _registerwireprotocommand() |
|
103 | _registerwireprotocommand() | |
105 | extensions.wrapfunction(wireprotov1server, '_capabilities', _capabilities) |
|
104 | extensions.wrapfunction(wireprotov1server, '_capabilities', _capabilities) | |
106 |
|
105 | |||
107 |
|
106 | |||
108 | # client-side |
|
107 | # client-side | |
109 |
|
108 | |||
110 |
|
109 | |||
111 | def _parseresponse(payload): |
|
110 | def _parseresponse(payload): | |
112 | result = {} |
|
111 | result = {} | |
113 | i = 0 |
|
112 | i = 0 | |
114 | l = len(payload) - 1 |
|
113 | l = len(payload) - 1 | |
115 | state = 0 # 0: vfspath, 1: size |
|
114 | state = 0 # 0: vfspath, 1: size | |
116 | vfspath = size = b'' |
|
115 | vfspath = size = b'' | |
117 | while i < l: |
|
116 | while i < l: | |
118 | ch = payload[i : i + 1] |
|
117 | ch = payload[i : i + 1] | |
119 | if ch == b'\0': |
|
118 | if ch == b'\0': | |
120 | if state == 1: |
|
119 | if state == 1: | |
121 | result[vfspath] = payload[i + 1 : i + 1 + int(size)] |
|
120 | result[vfspath] = payload[i + 1 : i + 1 + int(size)] | |
122 | i += int(size) |
|
121 | i += int(size) | |
123 | state = 0 |
|
122 | state = 0 | |
124 | vfspath = size = b'' |
|
123 | vfspath = size = b'' | |
125 | elif state == 0: |
|
124 | elif state == 0: | |
126 | state = 1 |
|
125 | state = 1 | |
127 | else: |
|
126 | else: | |
128 | if state == 1: |
|
127 | if state == 1: | |
129 | size += ch |
|
128 | size += ch | |
130 | elif state == 0: |
|
129 | elif state == 0: | |
131 | vfspath += ch |
|
130 | vfspath += ch | |
132 | i += 1 |
|
131 | i += 1 | |
133 | return result |
|
132 | return result | |
134 |
|
133 | |||
135 |
|
134 | |||
136 | def peersetup(ui, peer): |
|
135 | def peersetup(ui, peer): | |
137 | class fastannotatepeer(peer.__class__): |
|
136 | class fastannotatepeer(peer.__class__): | |
138 | @wireprotov1peer.batchable |
|
137 | @wireprotov1peer.batchable | |
139 | def getannotate(self, path, lastnode=None): |
|
138 | def getannotate(self, path, lastnode=None): | |
140 | if not self.capable(b'getannotate'): |
|
139 | if not self.capable(b'getannotate'): | |
141 | ui.warn(_(b'remote peer cannot provide annotate cache\n')) |
|
140 | ui.warn(_(b'remote peer cannot provide annotate cache\n')) | |
142 | return None, None |
|
141 | return None, None | |
143 | else: |
|
142 | else: | |
144 | args = {b'path': path, b'lastnode': lastnode or b''} |
|
143 | args = {b'path': path, b'lastnode': lastnode or b''} | |
145 | return args, _parseresponse |
|
144 | return args, _parseresponse | |
146 |
|
145 | |||
147 | peer.__class__ = fastannotatepeer |
|
146 | peer.__class__ = fastannotatepeer | |
148 |
|
147 | |||
149 |
|
148 | |||
150 | @contextlib.contextmanager |
|
149 | @contextlib.contextmanager | |
151 | def annotatepeer(repo): |
|
150 | def annotatepeer(repo): | |
152 | ui = repo.ui |
|
151 | ui = repo.ui | |
153 |
|
152 | |||
154 | remotedest = ui.config(b'fastannotate', b'remotepath', b'default') |
|
153 | remotedest = ui.config(b'fastannotate', b'remotepath', b'default') | |
155 | remotepath = urlutil.get_unique_pull_path_obj( |
|
154 | remotepath = urlutil.get_unique_pull_path_obj( | |
156 | b'fastannotate', |
|
155 | b'fastannotate', | |
157 | ui, |
|
156 | ui, | |
158 | remotedest, |
|
157 | remotedest, | |
159 | ) |
|
158 | ) | |
160 | peer = hg.peer(ui, {}, remotepath) |
|
159 | peer = hg.peer(ui, {}, remotepath) | |
161 |
|
160 | |||
162 | try: |
|
161 | try: | |
163 | yield peer |
|
162 | yield peer | |
164 | finally: |
|
163 | finally: | |
165 | peer.close() |
|
164 | peer.close() | |
166 |
|
165 | |||
167 |
|
166 | |||
168 | def clientfetch(repo, paths, lastnodemap=None, peer=None): |
|
167 | def clientfetch(repo, paths, lastnodemap=None, peer=None): | |
169 | """download annotate cache from the server for paths""" |
|
168 | """download annotate cache from the server for paths""" | |
170 | if not paths: |
|
169 | if not paths: | |
171 | return |
|
170 | return | |
172 |
|
171 | |||
173 | if peer is None: |
|
172 | if peer is None: | |
174 | with annotatepeer(repo) as peer: |
|
173 | with annotatepeer(repo) as peer: | |
175 | return clientfetch(repo, paths, lastnodemap, peer) |
|
174 | return clientfetch(repo, paths, lastnodemap, peer) | |
176 |
|
175 | |||
177 | if lastnodemap is None: |
|
176 | if lastnodemap is None: | |
178 | lastnodemap = {} |
|
177 | lastnodemap = {} | |
179 |
|
178 | |||
180 | ui = repo.ui |
|
179 | ui = repo.ui | |
181 | results = [] |
|
180 | results = [] | |
182 | with peer.commandexecutor() as batcher: |
|
181 | with peer.commandexecutor() as batcher: | |
183 | ui.debug(b'fastannotate: requesting %d files\n' % len(paths)) |
|
182 | ui.debug(b'fastannotate: requesting %d files\n' % len(paths)) | |
184 | for p in paths: |
|
183 | for p in paths: | |
185 | results.append( |
|
184 | results.append( | |
186 | batcher.callcommand( |
|
185 | batcher.callcommand( | |
187 | b'getannotate', |
|
186 | b'getannotate', | |
188 | {b'path': p, b'lastnode': lastnodemap.get(p)}, |
|
187 | {b'path': p, b'lastnode': lastnodemap.get(p)}, | |
189 | ) |
|
188 | ) | |
190 | ) |
|
189 | ) | |
191 |
|
190 | |||
192 | for result in results: |
|
191 | for result in results: | |
193 | r = result.result() |
|
192 | r = result.result() | |
194 | # TODO: pconvert these paths on the server? |
|
193 | # TODO: pconvert these paths on the server? | |
195 | r = {util.pconvert(p): v for p, v in r.items()} |
|
194 | r = {util.pconvert(p): v for p, v in r.items()} | |
196 | for path in sorted(r): |
|
195 | for path in sorted(r): | |
197 | # ignore malicious paths |
|
196 | # ignore malicious paths | |
198 | if not path.startswith(b'fastannotate/') or b'/../' in ( |
|
197 | if not path.startswith(b'fastannotate/') or b'/../' in ( | |
199 | path + b'/' |
|
198 | path + b'/' | |
200 | ): |
|
199 | ): | |
201 | ui.debug( |
|
200 | ui.debug( | |
202 | b'fastannotate: ignored malicious path %s\n' % path |
|
201 | b'fastannotate: ignored malicious path %s\n' % path | |
203 | ) |
|
202 | ) | |
204 | continue |
|
203 | continue | |
205 | content = r[path] |
|
204 | content = r[path] | |
206 | if ui.debugflag: |
|
205 | if ui.debugflag: | |
207 | ui.debug( |
|
206 | ui.debug( | |
208 | b'fastannotate: writing %d bytes to %s\n' |
|
207 | b'fastannotate: writing %d bytes to %s\n' | |
209 | % (len(content), path) |
|
208 | % (len(content), path) | |
210 | ) |
|
209 | ) | |
211 | repo.vfs.makedirs(os.path.dirname(path)) |
|
210 | repo.vfs.makedirs(os.path.dirname(path)) | |
212 | with repo.vfs(path, b'wb') as f: |
|
211 | with repo.vfs(path, b'wb') as f: | |
213 | f.write(content) |
|
212 | f.write(content) | |
214 |
|
213 | |||
215 |
|
214 | |||
216 | def _filterfetchpaths(repo, paths): |
|
215 | def _filterfetchpaths(repo, paths): | |
217 | """return a subset of paths whose history is long and need to fetch linelog |
|
216 | """return a subset of paths whose history is long and need to fetch linelog | |
218 | from the server. works with remotefilelog and non-remotefilelog repos. |
|
217 | from the server. works with remotefilelog and non-remotefilelog repos. | |
219 | """ |
|
218 | """ | |
220 | threshold = repo.ui.configint(b'fastannotate', b'clientfetchthreshold', 10) |
|
219 | threshold = repo.ui.configint(b'fastannotate', b'clientfetchthreshold', 10) | |
221 | if threshold <= 0: |
|
220 | if threshold <= 0: | |
222 | return paths |
|
221 | return paths | |
223 |
|
222 | |||
224 | result = [] |
|
223 | result = [] | |
225 | for path in paths: |
|
224 | for path in paths: | |
226 | try: |
|
225 | try: | |
227 | if len(repo.file(path)) >= threshold: |
|
226 | if len(repo.file(path)) >= threshold: | |
228 | result.append(path) |
|
227 | result.append(path) | |
229 | except Exception: # file not found etc. |
|
228 | except Exception: # file not found etc. | |
230 | result.append(path) |
|
229 | result.append(path) | |
231 |
|
230 | |||
232 | return result |
|
231 | return result | |
233 |
|
232 | |||
234 |
|
233 | |||
235 | def localreposetup(ui, repo): |
|
234 | def localreposetup(ui, repo): | |
236 | class fastannotaterepo(repo.__class__): |
|
235 | class fastannotaterepo(repo.__class__): | |
237 | def prefetchfastannotate(self, paths, peer=None): |
|
236 | def prefetchfastannotate(self, paths, peer=None): | |
238 | master = _getmaster(self.ui) |
|
237 | master = _getmaster(self.ui) | |
239 | needupdatepaths = [] |
|
238 | needupdatepaths = [] | |
240 | lastnodemap = {} |
|
239 | lastnodemap = {} | |
241 | try: |
|
240 | try: | |
242 | for path in _filterfetchpaths(self, paths): |
|
241 | for path in _filterfetchpaths(self, paths): | |
243 | with context.annotatecontext(self, path) as actx: |
|
242 | with context.annotatecontext(self, path) as actx: | |
244 | if not actx.isuptodate(master, strict=False): |
|
243 | if not actx.isuptodate(master, strict=False): | |
245 | needupdatepaths.append(path) |
|
244 | needupdatepaths.append(path) | |
246 | lastnodemap[path] = actx.lastnode |
|
245 | lastnodemap[path] = actx.lastnode | |
247 | if needupdatepaths: |
|
246 | if needupdatepaths: | |
248 | clientfetch(self, needupdatepaths, lastnodemap, peer) |
|
247 | clientfetch(self, needupdatepaths, lastnodemap, peer) | |
249 | except Exception as ex: |
|
248 | except Exception as ex: | |
250 | # could be directory not writable or so, not fatal |
|
249 | # could be directory not writable or so, not fatal | |
251 | self.ui.debug(b'fastannotate: prefetch failed: %r\n' % ex) |
|
250 | self.ui.debug(b'fastannotate: prefetch failed: %r\n' % ex) | |
252 |
|
251 | |||
253 | repo.__class__ = fastannotaterepo |
|
252 | repo.__class__ = fastannotaterepo | |
254 |
|
253 | |||
255 |
|
254 | |||
256 | def clientreposetup(ui, repo): |
|
255 | def clientreposetup(ui, repo): | |
257 | _registerwireprotocommand() |
|
256 | _registerwireprotocommand() | |
258 | if repo.local(): |
|
257 | if repo.local(): | |
259 | localreposetup(ui, repo) |
|
258 | localreposetup(ui, repo) | |
260 | # TODO: this mutates global state, but only if at least one repo |
|
259 | # TODO: this mutates global state, but only if at least one repo | |
261 | # has the extension enabled. This is probably bad for hgweb. |
|
260 | # has the extension enabled. This is probably bad for hgweb. | |
262 | if peersetup not in hg.wirepeersetupfuncs: |
|
261 | if peersetup not in hg.wirepeersetupfuncs: | |
263 | hg.wirepeersetupfuncs.append(peersetup) |
|
262 | hg.wirepeersetupfuncs.append(peersetup) |
@@ -1,260 +1,259 | |||||
1 | # Copyright 2016-present Facebook. All Rights Reserved. |
|
1 | # Copyright 2016-present Facebook. All Rights Reserved. | |
2 | # |
|
2 | # | |
3 | # revmap: trivial hg hash - linelog rev bidirectional map |
|
3 | # revmap: trivial hg hash - linelog rev bidirectional map | |
4 | # |
|
4 | # | |
5 | # This software may be used and distributed according to the terms of the |
|
5 | # This software may be used and distributed according to the terms of the | |
6 | # GNU General Public License version 2 or any later version. |
|
6 | # GNU General Public License version 2 or any later version. | |
7 |
|
7 | |||
8 | from __future__ import annotations |
|
8 | from __future__ import annotations | |
9 |
|
9 | |||
10 | import bisect |
|
10 | import bisect | |
11 | import io |
|
11 | import io | |
12 | import os |
|
12 | import os | |
13 | import struct |
|
13 | import struct | |
14 |
|
14 | |||
15 | from mercurial.node import hex |
|
15 | from mercurial.node import hex | |
16 | from mercurial.pycompat import open |
|
|||
17 | from mercurial import ( |
|
16 | from mercurial import ( | |
18 | error as hgerror, |
|
17 | error as hgerror, | |
19 | ) |
|
18 | ) | |
20 | from . import error |
|
19 | from . import error | |
21 |
|
20 | |||
22 | # the revmap file format is straightforward: |
|
21 | # the revmap file format is straightforward: | |
23 | # |
|
22 | # | |
24 | # 8 bytes: header |
|
23 | # 8 bytes: header | |
25 | # 1 byte : flag for linelog revision 1 |
|
24 | # 1 byte : flag for linelog revision 1 | |
26 | # ? bytes: (optional) '\0'-terminated path string |
|
25 | # ? bytes: (optional) '\0'-terminated path string | |
27 | # only exists if (flag & renameflag) != 0 |
|
26 | # only exists if (flag & renameflag) != 0 | |
28 | # 20 bytes: hg hash for linelog revision 1 |
|
27 | # 20 bytes: hg hash for linelog revision 1 | |
29 | # 1 byte : flag for linelog revision 2 |
|
28 | # 1 byte : flag for linelog revision 2 | |
30 | # ? bytes: (optional) '\0'-terminated path string |
|
29 | # ? bytes: (optional) '\0'-terminated path string | |
31 | # 20 bytes: hg hash for linelog revision 2 |
|
30 | # 20 bytes: hg hash for linelog revision 2 | |
32 | # .... |
|
31 | # .... | |
33 | # |
|
32 | # | |
34 | # the implementation is kinda stupid: __init__ loads the whole revmap. |
|
33 | # the implementation is kinda stupid: __init__ loads the whole revmap. | |
35 | # no laziness. benchmark shows loading 10000 revisions is about 0.015 |
|
34 | # no laziness. benchmark shows loading 10000 revisions is about 0.015 | |
36 | # seconds, which looks enough for our use-case. if this implementation |
|
35 | # seconds, which looks enough for our use-case. if this implementation | |
37 | # becomes a bottleneck, we can change it to lazily read the file |
|
36 | # becomes a bottleneck, we can change it to lazily read the file | |
38 | # from the end. |
|
37 | # from the end. | |
39 |
|
38 | |||
40 | # whether the changeset is in the side branch. i.e. not in the linear main |
|
39 | # whether the changeset is in the side branch. i.e. not in the linear main | |
41 | # branch but only got referenced by lines in merge changesets. |
|
40 | # branch but only got referenced by lines in merge changesets. | |
42 | sidebranchflag = 1 |
|
41 | sidebranchflag = 1 | |
43 |
|
42 | |||
44 | # whether the changeset changes the file path (ie. is a rename) |
|
43 | # whether the changeset changes the file path (ie. is a rename) | |
45 | renameflag = 2 |
|
44 | renameflag = 2 | |
46 |
|
45 | |||
47 | # len(mercurial.node.nullid) |
|
46 | # len(mercurial.node.nullid) | |
48 | _hshlen = 20 |
|
47 | _hshlen = 20 | |
49 |
|
48 | |||
50 |
|
49 | |||
51 | class revmap: |
|
50 | class revmap: | |
52 | """trivial hg bin hash - linelog rev bidirectional map |
|
51 | """trivial hg bin hash - linelog rev bidirectional map | |
53 |
|
52 | |||
54 | also stores a flag (uint8) for each revision, and track renames. |
|
53 | also stores a flag (uint8) for each revision, and track renames. | |
55 | """ |
|
54 | """ | |
56 |
|
55 | |||
57 | HEADER = b'REVMAP1\0' |
|
56 | HEADER = b'REVMAP1\0' | |
58 |
|
57 | |||
59 | def __init__(self, path=None): |
|
58 | def __init__(self, path=None): | |
60 | """create or load the revmap, optionally associate to a file |
|
59 | """create or load the revmap, optionally associate to a file | |
61 |
|
60 | |||
62 | if path is None, the revmap is entirely in-memory. the caller is |
|
61 | if path is None, the revmap is entirely in-memory. the caller is | |
63 | responsible for locking. concurrent writes to a same file is unsafe. |
|
62 | responsible for locking. concurrent writes to a same file is unsafe. | |
64 | the caller needs to make sure one file is associated to at most one |
|
63 | the caller needs to make sure one file is associated to at most one | |
65 | revmap object at a time.""" |
|
64 | revmap object at a time.""" | |
66 | self.path = path |
|
65 | self.path = path | |
67 | self._rev2hsh = [None] |
|
66 | self._rev2hsh = [None] | |
68 | self._rev2flag = [None] |
|
67 | self._rev2flag = [None] | |
69 | self._hsh2rev = {} |
|
68 | self._hsh2rev = {} | |
70 | # since rename does not happen frequently, do not store path for every |
|
69 | # since rename does not happen frequently, do not store path for every | |
71 | # revision. self._renamerevs can be used for bisecting. |
|
70 | # revision. self._renamerevs can be used for bisecting. | |
72 | self._renamerevs = [0] |
|
71 | self._renamerevs = [0] | |
73 | self._renamepaths = [b''] |
|
72 | self._renamepaths = [b''] | |
74 | self._lastmaxrev = -1 |
|
73 | self._lastmaxrev = -1 | |
75 | if path: |
|
74 | if path: | |
76 | if os.path.exists(path): |
|
75 | if os.path.exists(path): | |
77 | self._load() |
|
76 | self._load() | |
78 | else: |
|
77 | else: | |
79 | # write the header so "append" can do incremental updates |
|
78 | # write the header so "append" can do incremental updates | |
80 | self.flush() |
|
79 | self.flush() | |
81 |
|
80 | |||
82 | def copyfrom(self, rhs): |
|
81 | def copyfrom(self, rhs): | |
83 | """copy the map data from another revmap. do not affect self.path""" |
|
82 | """copy the map data from another revmap. do not affect self.path""" | |
84 | self._rev2hsh = rhs._rev2hsh[:] |
|
83 | self._rev2hsh = rhs._rev2hsh[:] | |
85 | self._rev2flag = rhs._rev2flag[:] |
|
84 | self._rev2flag = rhs._rev2flag[:] | |
86 | self._hsh2rev = rhs._hsh2rev.copy() |
|
85 | self._hsh2rev = rhs._hsh2rev.copy() | |
87 | self._renamerevs = rhs._renamerevs[:] |
|
86 | self._renamerevs = rhs._renamerevs[:] | |
88 | self._renamepaths = rhs._renamepaths[:] |
|
87 | self._renamepaths = rhs._renamepaths[:] | |
89 | self._lastmaxrev = -1 |
|
88 | self._lastmaxrev = -1 | |
90 |
|
89 | |||
91 | @property |
|
90 | @property | |
92 | def maxrev(self): |
|
91 | def maxrev(self): | |
93 | """return max linelog revision number""" |
|
92 | """return max linelog revision number""" | |
94 | return len(self._rev2hsh) - 1 |
|
93 | return len(self._rev2hsh) - 1 | |
95 |
|
94 | |||
96 | def append(self, hsh, sidebranch=False, path=None, flush=False): |
|
95 | def append(self, hsh, sidebranch=False, path=None, flush=False): | |
97 | """add a binary hg hash and return the mapped linelog revision. |
|
96 | """add a binary hg hash and return the mapped linelog revision. | |
98 | if flush is True, incrementally update the file. |
|
97 | if flush is True, incrementally update the file. | |
99 | """ |
|
98 | """ | |
100 | if hsh in self._hsh2rev: |
|
99 | if hsh in self._hsh2rev: | |
101 | raise error.CorruptedFileError( |
|
100 | raise error.CorruptedFileError( | |
102 | b'%r is in revmap already' % hex(hsh) |
|
101 | b'%r is in revmap already' % hex(hsh) | |
103 | ) |
|
102 | ) | |
104 | if len(hsh) != _hshlen: |
|
103 | if len(hsh) != _hshlen: | |
105 | raise hgerror.ProgrammingError( |
|
104 | raise hgerror.ProgrammingError( | |
106 | b'hsh must be %d-char long' % _hshlen |
|
105 | b'hsh must be %d-char long' % _hshlen | |
107 | ) |
|
106 | ) | |
108 | idx = len(self._rev2hsh) |
|
107 | idx = len(self._rev2hsh) | |
109 | flag = 0 |
|
108 | flag = 0 | |
110 | if sidebranch: |
|
109 | if sidebranch: | |
111 | flag |= sidebranchflag |
|
110 | flag |= sidebranchflag | |
112 | if path is not None and path != self._renamepaths[-1]: |
|
111 | if path is not None and path != self._renamepaths[-1]: | |
113 | flag |= renameflag |
|
112 | flag |= renameflag | |
114 | self._renamerevs.append(idx) |
|
113 | self._renamerevs.append(idx) | |
115 | self._renamepaths.append(path) |
|
114 | self._renamepaths.append(path) | |
116 | self._rev2hsh.append(hsh) |
|
115 | self._rev2hsh.append(hsh) | |
117 | self._rev2flag.append(flag) |
|
116 | self._rev2flag.append(flag) | |
118 | self._hsh2rev[hsh] = idx |
|
117 | self._hsh2rev[hsh] = idx | |
119 | if flush: |
|
118 | if flush: | |
120 | self.flush() |
|
119 | self.flush() | |
121 | return idx |
|
120 | return idx | |
122 |
|
121 | |||
123 | def rev2hsh(self, rev): |
|
122 | def rev2hsh(self, rev): | |
124 | """convert linelog revision to hg hash. return None if not found.""" |
|
123 | """convert linelog revision to hg hash. return None if not found.""" | |
125 | if rev > self.maxrev or rev < 0: |
|
124 | if rev > self.maxrev or rev < 0: | |
126 | return None |
|
125 | return None | |
127 | return self._rev2hsh[rev] |
|
126 | return self._rev2hsh[rev] | |
128 |
|
127 | |||
129 | def rev2flag(self, rev): |
|
128 | def rev2flag(self, rev): | |
130 | """get the flag (uint8) for a given linelog revision. |
|
129 | """get the flag (uint8) for a given linelog revision. | |
131 | return None if revision does not exist. |
|
130 | return None if revision does not exist. | |
132 | """ |
|
131 | """ | |
133 | if rev > self.maxrev or rev < 0: |
|
132 | if rev > self.maxrev or rev < 0: | |
134 | return None |
|
133 | return None | |
135 | return self._rev2flag[rev] |
|
134 | return self._rev2flag[rev] | |
136 |
|
135 | |||
137 | def rev2path(self, rev): |
|
136 | def rev2path(self, rev): | |
138 | """get the path for a given linelog revision. |
|
137 | """get the path for a given linelog revision. | |
139 | return None if revision does not exist. |
|
138 | return None if revision does not exist. | |
140 | """ |
|
139 | """ | |
141 | if rev > self.maxrev or rev < 0: |
|
140 | if rev > self.maxrev or rev < 0: | |
142 | return None |
|
141 | return None | |
143 | idx = bisect.bisect_right(self._renamerevs, rev) - 1 |
|
142 | idx = bisect.bisect_right(self._renamerevs, rev) - 1 | |
144 | return self._renamepaths[idx] |
|
143 | return self._renamepaths[idx] | |
145 |
|
144 | |||
146 | def hsh2rev(self, hsh): |
|
145 | def hsh2rev(self, hsh): | |
147 | """convert hg hash to linelog revision. return None if not found.""" |
|
146 | """convert hg hash to linelog revision. return None if not found.""" | |
148 | return self._hsh2rev.get(hsh) |
|
147 | return self._hsh2rev.get(hsh) | |
149 |
|
148 | |||
150 | def clear(self, flush=False): |
|
149 | def clear(self, flush=False): | |
151 | """make the map empty. if flush is True, write to disk""" |
|
150 | """make the map empty. if flush is True, write to disk""" | |
152 | # rev 0 is reserved, real rev starts from 1 |
|
151 | # rev 0 is reserved, real rev starts from 1 | |
153 | self._rev2hsh = [None] |
|
152 | self._rev2hsh = [None] | |
154 | self._rev2flag = [None] |
|
153 | self._rev2flag = [None] | |
155 | self._hsh2rev = {} |
|
154 | self._hsh2rev = {} | |
156 | self._rev2path = [b''] |
|
155 | self._rev2path = [b''] | |
157 | self._lastmaxrev = -1 |
|
156 | self._lastmaxrev = -1 | |
158 | if flush: |
|
157 | if flush: | |
159 | self.flush() |
|
158 | self.flush() | |
160 |
|
159 | |||
161 | def flush(self): |
|
160 | def flush(self): | |
162 | """write the state down to the file""" |
|
161 | """write the state down to the file""" | |
163 | if not self.path: |
|
162 | if not self.path: | |
164 | return |
|
163 | return | |
165 | if self._lastmaxrev == -1: # write the entire file |
|
164 | if self._lastmaxrev == -1: # write the entire file | |
166 |
with open(self.path, |
|
165 | with open(self.path, 'wb') as f: | |
167 | f.write(self.HEADER) |
|
166 | f.write(self.HEADER) | |
168 | for i in range(1, len(self._rev2hsh)): |
|
167 | for i in range(1, len(self._rev2hsh)): | |
169 | self._writerev(i, f) |
|
168 | self._writerev(i, f) | |
170 | else: # append incrementally |
|
169 | else: # append incrementally | |
171 |
with open(self.path, |
|
170 | with open(self.path, 'ab') as f: | |
172 | for i in range(self._lastmaxrev + 1, len(self._rev2hsh)): |
|
171 | for i in range(self._lastmaxrev + 1, len(self._rev2hsh)): | |
173 | self._writerev(i, f) |
|
172 | self._writerev(i, f) | |
174 | self._lastmaxrev = self.maxrev |
|
173 | self._lastmaxrev = self.maxrev | |
175 |
|
174 | |||
176 | def _load(self): |
|
175 | def _load(self): | |
177 | """load state from file""" |
|
176 | """load state from file""" | |
178 | if not self.path: |
|
177 | if not self.path: | |
179 | return |
|
178 | return | |
180 | # use local variables in a loop. CPython uses LOAD_FAST for them, |
|
179 | # use local variables in a loop. CPython uses LOAD_FAST for them, | |
181 | # which is faster than both LOAD_CONST and LOAD_GLOBAL. |
|
180 | # which is faster than both LOAD_CONST and LOAD_GLOBAL. | |
182 | flaglen = 1 |
|
181 | flaglen = 1 | |
183 | hshlen = _hshlen |
|
182 | hshlen = _hshlen | |
184 |
with open(self.path, |
|
183 | with open(self.path, 'rb') as f: | |
185 | if f.read(len(self.HEADER)) != self.HEADER: |
|
184 | if f.read(len(self.HEADER)) != self.HEADER: | |
186 | raise error.CorruptedFileError() |
|
185 | raise error.CorruptedFileError() | |
187 | self.clear(flush=False) |
|
186 | self.clear(flush=False) | |
188 | while True: |
|
187 | while True: | |
189 | buf = f.read(flaglen) |
|
188 | buf = f.read(flaglen) | |
190 | if not buf: |
|
189 | if not buf: | |
191 | break |
|
190 | break | |
192 | flag = ord(buf) |
|
191 | flag = ord(buf) | |
193 | rev = len(self._rev2hsh) |
|
192 | rev = len(self._rev2hsh) | |
194 | if flag & renameflag: |
|
193 | if flag & renameflag: | |
195 | path = self._readcstr(f) |
|
194 | path = self._readcstr(f) | |
196 | self._renamerevs.append(rev) |
|
195 | self._renamerevs.append(rev) | |
197 | self._renamepaths.append(path) |
|
196 | self._renamepaths.append(path) | |
198 | hsh = f.read(hshlen) |
|
197 | hsh = f.read(hshlen) | |
199 | if len(hsh) != hshlen: |
|
198 | if len(hsh) != hshlen: | |
200 | raise error.CorruptedFileError() |
|
199 | raise error.CorruptedFileError() | |
201 | self._hsh2rev[hsh] = rev |
|
200 | self._hsh2rev[hsh] = rev | |
202 | self._rev2flag.append(flag) |
|
201 | self._rev2flag.append(flag) | |
203 | self._rev2hsh.append(hsh) |
|
202 | self._rev2hsh.append(hsh) | |
204 | self._lastmaxrev = self.maxrev |
|
203 | self._lastmaxrev = self.maxrev | |
205 |
|
204 | |||
206 | def _writerev(self, rev, f): |
|
205 | def _writerev(self, rev, f): | |
207 | """append a revision data to file""" |
|
206 | """append a revision data to file""" | |
208 | flag = self._rev2flag[rev] |
|
207 | flag = self._rev2flag[rev] | |
209 | hsh = self._rev2hsh[rev] |
|
208 | hsh = self._rev2hsh[rev] | |
210 | f.write(struct.pack(b'B', flag)) |
|
209 | f.write(struct.pack(b'B', flag)) | |
211 | if flag & renameflag: |
|
210 | if flag & renameflag: | |
212 | path = self.rev2path(rev) |
|
211 | path = self.rev2path(rev) | |
213 | if path is None: |
|
212 | if path is None: | |
214 | raise error.CorruptedFileError(b'cannot find path for %s' % rev) |
|
213 | raise error.CorruptedFileError(b'cannot find path for %s' % rev) | |
215 | f.write(path + b'\0') |
|
214 | f.write(path + b'\0') | |
216 | f.write(hsh) |
|
215 | f.write(hsh) | |
217 |
|
216 | |||
218 | @staticmethod |
|
217 | @staticmethod | |
219 | def _readcstr(f): |
|
218 | def _readcstr(f): | |
220 | """read a C-language-like '\0'-terminated string""" |
|
219 | """read a C-language-like '\0'-terminated string""" | |
221 | buf = b'' |
|
220 | buf = b'' | |
222 | while True: |
|
221 | while True: | |
223 | ch = f.read(1) |
|
222 | ch = f.read(1) | |
224 | if not ch: # unexpected eof |
|
223 | if not ch: # unexpected eof | |
225 | raise error.CorruptedFileError() |
|
224 | raise error.CorruptedFileError() | |
226 | if ch == b'\0': |
|
225 | if ch == b'\0': | |
227 | break |
|
226 | break | |
228 | buf += ch |
|
227 | buf += ch | |
229 | return buf |
|
228 | return buf | |
230 |
|
229 | |||
231 | def __contains__(self, f): |
|
230 | def __contains__(self, f): | |
232 | """(fctx or (node, path)) -> bool. |
|
231 | """(fctx or (node, path)) -> bool. | |
233 | test if (node, path) is in the map, and is not in a side branch. |
|
232 | test if (node, path) is in the map, and is not in a side branch. | |
234 | f can be either a tuple of (node, path), or a fctx. |
|
233 | f can be either a tuple of (node, path), or a fctx. | |
235 | """ |
|
234 | """ | |
236 | if isinstance(f, tuple): # f: (node, path) |
|
235 | if isinstance(f, tuple): # f: (node, path) | |
237 | hsh, path = f |
|
236 | hsh, path = f | |
238 | else: # f: fctx |
|
237 | else: # f: fctx | |
239 | hsh, path = f.node(), f.path() |
|
238 | hsh, path = f.node(), f.path() | |
240 | rev = self.hsh2rev(hsh) |
|
239 | rev = self.hsh2rev(hsh) | |
241 | if rev is None: |
|
240 | if rev is None: | |
242 | return False |
|
241 | return False | |
243 | if path is not None and path != self.rev2path(rev): |
|
242 | if path is not None and path != self.rev2path(rev): | |
244 | return False |
|
243 | return False | |
245 | return (self.rev2flag(rev) & sidebranchflag) == 0 |
|
244 | return (self.rev2flag(rev) & sidebranchflag) == 0 | |
246 |
|
245 | |||
247 |
|
246 | |||
248 | def getlastnode(path): |
|
247 | def getlastnode(path): | |
249 | """return the last hash in a revmap, without loading its full content. |
|
248 | """return the last hash in a revmap, without loading its full content. | |
250 | this is equivalent to `m = revmap(path); m.rev2hsh(m.maxrev)`, but faster. |
|
249 | this is equivalent to `m = revmap(path); m.rev2hsh(m.maxrev)`, but faster. | |
251 | """ |
|
250 | """ | |
252 | hsh = None |
|
251 | hsh = None | |
253 | try: |
|
252 | try: | |
254 |
with open(path, |
|
253 | with open(path, 'rb') as f: | |
255 | f.seek(-_hshlen, io.SEEK_END) |
|
254 | f.seek(-_hshlen, io.SEEK_END) | |
256 | if f.tell() > len(revmap.HEADER): |
|
255 | if f.tell() > len(revmap.HEADER): | |
257 | hsh = f.read(_hshlen) |
|
256 | hsh = f.read(_hshlen) | |
258 | except IOError: |
|
257 | except IOError: | |
259 | pass |
|
258 | pass | |
260 | return hsh |
|
259 | return hsh |
General Comments 0
You need to be logged in to leave comments.
Login now