Show More
@@ -1,256 +1,252 | |||
|
1 | 1 | # filelog.py - file history class for mercurial |
|
2 | 2 | # |
|
3 | 3 | # Copyright 2005-2007 Matt Mackall <mpm@selenic.com> |
|
4 | 4 | # |
|
5 | 5 | # This software may be used and distributed according to the terms of the |
|
6 | 6 | # GNU General Public License version 2 or any later version. |
|
7 | 7 | |
|
8 | 8 | from __future__ import absolute_import |
|
9 | 9 | |
|
10 | 10 | from . import ( |
|
11 | 11 | error, |
|
12 | 12 | repository, |
|
13 | 13 | revlog, |
|
14 | 14 | ) |
|
15 | 15 | from .utils import ( |
|
16 | 16 | interfaceutil, |
|
17 | 17 | storageutil, |
|
18 | 18 | ) |
|
19 | 19 | |
|
20 | 20 | @interfaceutil.implementer(repository.ifilestorage) |
|
21 | 21 | class filelog(object): |
|
22 | 22 | def __init__(self, opener, path): |
|
23 | 23 | self._revlog = revlog.revlog(opener, |
|
24 | 24 | '/'.join(('data', path + '.i')), |
|
25 | 25 | censorable=True) |
|
26 | 26 | # Full name of the user visible file, relative to the repository root. |
|
27 | 27 | # Used by LFS. |
|
28 | 28 | self._revlog.filename = path |
|
29 | 29 | |
|
30 | 30 | def __len__(self): |
|
31 | 31 | return len(self._revlog) |
|
32 | 32 | |
|
33 | 33 | def __iter__(self): |
|
34 | 34 | return self._revlog.__iter__() |
|
35 | 35 | |
|
36 | 36 | def revs(self, start=0, stop=None): |
|
37 | 37 | return self._revlog.revs(start=start, stop=stop) |
|
38 | 38 | |
|
39 | 39 | def parents(self, node): |
|
40 | 40 | return self._revlog.parents(node) |
|
41 | 41 | |
|
42 | 42 | def parentrevs(self, rev): |
|
43 | 43 | return self._revlog.parentrevs(rev) |
|
44 | 44 | |
|
45 | 45 | def rev(self, node): |
|
46 | 46 | return self._revlog.rev(node) |
|
47 | 47 | |
|
48 | 48 | def node(self, rev): |
|
49 | 49 | return self._revlog.node(rev) |
|
50 | 50 | |
|
51 | 51 | def lookup(self, node): |
|
52 | 52 | return self._revlog.lookup(node) |
|
53 | 53 | |
|
54 | 54 | def linkrev(self, rev): |
|
55 | 55 | return self._revlog.linkrev(rev) |
|
56 | 56 | |
|
57 | 57 | def commonancestorsheads(self, node1, node2): |
|
58 | 58 | return self._revlog.commonancestorsheads(node1, node2) |
|
59 | 59 | |
|
60 | 60 | # Used by dagop.blockdescendants(). |
|
61 | 61 | def descendants(self, revs): |
|
62 | 62 | return self._revlog.descendants(revs) |
|
63 | 63 | |
|
64 | 64 | def heads(self, start=None, stop=None): |
|
65 | 65 | return self._revlog.heads(start, stop) |
|
66 | 66 | |
|
67 | 67 | # Used by hgweb, children extension. |
|
68 | 68 | def children(self, node): |
|
69 | 69 | return self._revlog.children(node) |
|
70 | 70 | |
|
71 | 71 | def iscensored(self, rev): |
|
72 | 72 | return self._revlog.iscensored(rev) |
|
73 | 73 | |
|
74 | 74 | # Might be unused. |
|
75 | 75 | def checkhash(self, text, node, p1=None, p2=None, rev=None): |
|
76 | 76 | return self._revlog.checkhash(text, node, p1=p1, p2=p2, rev=rev) |
|
77 | 77 | |
|
78 | 78 | def revision(self, node, _df=None, raw=False): |
|
79 | 79 | return self._revlog.revision(node, _df=_df, raw=raw) |
|
80 | 80 | |
|
81 | 81 | def revdiff(self, rev1, rev2): |
|
82 | 82 | return self._revlog.revdiff(rev1, rev2) |
|
83 | 83 | |
|
84 | 84 | def emitrevisions(self, nodes, nodesorder=None, |
|
85 | 85 | revisiondata=False, assumehaveparentrevisions=False, |
|
86 | 86 | deltaprevious=False): |
|
87 | 87 | return self._revlog.emitrevisions( |
|
88 | 88 | nodes, nodesorder=nodesorder, revisiondata=revisiondata, |
|
89 | 89 | assumehaveparentrevisions=assumehaveparentrevisions, |
|
90 | 90 | deltaprevious=deltaprevious) |
|
91 | 91 | |
|
92 | 92 | def addrevision(self, revisiondata, transaction, linkrev, p1, p2, |
|
93 | 93 | node=None, flags=revlog.REVIDX_DEFAULT_FLAGS, |
|
94 | 94 | cachedelta=None): |
|
95 | 95 | return self._revlog.addrevision(revisiondata, transaction, linkrev, |
|
96 | 96 | p1, p2, node=node, flags=flags, |
|
97 | 97 | cachedelta=cachedelta) |
|
98 | 98 | |
|
99 | 99 | def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None): |
|
100 | 100 | return self._revlog.addgroup(deltas, linkmapper, transaction, |
|
101 | 101 | addrevisioncb=addrevisioncb) |
|
102 | 102 | |
|
103 | 103 | def getstrippoint(self, minlink): |
|
104 | 104 | return self._revlog.getstrippoint(minlink) |
|
105 | 105 | |
|
106 | 106 | def strip(self, minlink, transaction): |
|
107 | 107 | return self._revlog.strip(minlink, transaction) |
|
108 | 108 | |
|
109 | 109 | def censorrevision(self, tr, node, tombstone=b''): |
|
110 | 110 | return self._revlog.censorrevision(node, tombstone=tombstone) |
|
111 | 111 | |
|
112 | 112 | def files(self): |
|
113 | 113 | return self._revlog.files() |
|
114 | 114 | |
|
115 | 115 | def read(self, node): |
|
116 |
t |
|
|
117 | if not t.startswith('\1\n'): | |
|
118 | return t | |
|
119 | s = t.index('\1\n', 2) | |
|
120 | return t[s + 2:] | |
|
116 | return storageutil.filtermetadata(self.revision(node)) | |
|
121 | 117 | |
|
122 | 118 | def add(self, text, meta, transaction, link, p1=None, p2=None): |
|
123 | 119 | if meta or text.startswith('\1\n'): |
|
124 | 120 | text = storageutil.packmeta(meta, text) |
|
125 | 121 | return self.addrevision(text, transaction, link, p1, p2) |
|
126 | 122 | |
|
127 | 123 | def renamed(self, node): |
|
128 | 124 | if self.parents(node)[0] != revlog.nullid: |
|
129 | 125 | return False |
|
130 | 126 | t = self.revision(node) |
|
131 | 127 | m = storageutil.parsemeta(t)[0] |
|
132 | 128 | # copy and copyrev occur in pairs. In rare cases due to bugs, |
|
133 | 129 | # one can occur without the other. |
|
134 | 130 | if m and "copy" in m and "copyrev" in m: |
|
135 | 131 | return (m["copy"], revlog.bin(m["copyrev"])) |
|
136 | 132 | return False |
|
137 | 133 | |
|
138 | 134 | def size(self, rev): |
|
139 | 135 | """return the size of a given revision""" |
|
140 | 136 | |
|
141 | 137 | # for revisions with renames, we have to go the slow way |
|
142 | 138 | node = self.node(rev) |
|
143 | 139 | if self.renamed(node): |
|
144 | 140 | return len(self.read(node)) |
|
145 | 141 | if self.iscensored(rev): |
|
146 | 142 | return 0 |
|
147 | 143 | |
|
148 | 144 | # XXX if self.read(node).startswith("\1\n"), this returns (size+4) |
|
149 | 145 | return self._revlog.size(rev) |
|
150 | 146 | |
|
151 | 147 | def cmp(self, node, text): |
|
152 | 148 | """compare text with a given file revision |
|
153 | 149 | |
|
154 | 150 | returns True if text is different than what is stored. |
|
155 | 151 | """ |
|
156 | 152 | |
|
157 | 153 | t = text |
|
158 | 154 | if text.startswith('\1\n'): |
|
159 | 155 | t = '\1\n\1\n' + text |
|
160 | 156 | |
|
161 | 157 | samehashes = not self._revlog.cmp(node, t) |
|
162 | 158 | if samehashes: |
|
163 | 159 | return False |
|
164 | 160 | |
|
165 | 161 | # censored files compare against the empty file |
|
166 | 162 | if self.iscensored(self.rev(node)): |
|
167 | 163 | return text != '' |
|
168 | 164 | |
|
169 | 165 | # renaming a file produces a different hash, even if the data |
|
170 | 166 | # remains unchanged. Check if it's the case (slow): |
|
171 | 167 | if self.renamed(node): |
|
172 | 168 | t2 = self.read(node) |
|
173 | 169 | return t2 != text |
|
174 | 170 | |
|
175 | 171 | return True |
|
176 | 172 | |
|
177 | 173 | def verifyintegrity(self, state): |
|
178 | 174 | return self._revlog.verifyintegrity(state) |
|
179 | 175 | |
|
180 | 176 | def storageinfo(self, exclusivefiles=False, sharedfiles=False, |
|
181 | 177 | revisionscount=False, trackedsize=False, |
|
182 | 178 | storedsize=False): |
|
183 | 179 | return self._revlog.storageinfo( |
|
184 | 180 | exclusivefiles=exclusivefiles, sharedfiles=sharedfiles, |
|
185 | 181 | revisionscount=revisionscount, trackedsize=trackedsize, |
|
186 | 182 | storedsize=storedsize) |
|
187 | 183 | |
|
188 | 184 | # TODO these aren't part of the interface and aren't internal methods. |
|
189 | 185 | # Callers should be fixed to not use them. |
|
190 | 186 | |
|
191 | 187 | # Used by bundlefilelog, unionfilelog. |
|
192 | 188 | @property |
|
193 | 189 | def indexfile(self): |
|
194 | 190 | return self._revlog.indexfile |
|
195 | 191 | |
|
196 | 192 | @indexfile.setter |
|
197 | 193 | def indexfile(self, value): |
|
198 | 194 | self._revlog.indexfile = value |
|
199 | 195 | |
|
200 | 196 | # Used by repo upgrade. |
|
201 | 197 | def clone(self, tr, destrevlog, **kwargs): |
|
202 | 198 | if not isinstance(destrevlog, filelog): |
|
203 | 199 | raise error.ProgrammingError('expected filelog to clone()') |
|
204 | 200 | |
|
205 | 201 | return self._revlog.clone(tr, destrevlog._revlog, **kwargs) |
|
206 | 202 | |
|
207 | 203 | class narrowfilelog(filelog): |
|
208 | 204 | """Filelog variation to be used with narrow stores.""" |
|
209 | 205 | |
|
210 | 206 | def __init__(self, opener, path, narrowmatch): |
|
211 | 207 | super(narrowfilelog, self).__init__(opener, path) |
|
212 | 208 | self._narrowmatch = narrowmatch |
|
213 | 209 | |
|
214 | 210 | def renamed(self, node): |
|
215 | 211 | res = super(narrowfilelog, self).renamed(node) |
|
216 | 212 | |
|
217 | 213 | # Renames that come from outside the narrowspec are problematic |
|
218 | 214 | # because we may lack the base text for the rename. This can result |
|
219 | 215 | # in code attempting to walk the ancestry or compute a diff |
|
220 | 216 | # encountering a missing revision. We address this by silently |
|
221 | 217 | # removing rename metadata if the source file is outside the |
|
222 | 218 | # narrow spec. |
|
223 | 219 | # |
|
224 | 220 | # A better solution would be to see if the base revision is available, |
|
225 | 221 | # rather than assuming it isn't. |
|
226 | 222 | # |
|
227 | 223 | # An even better solution would be to teach all consumers of rename |
|
228 | 224 | # metadata that the base revision may not be available. |
|
229 | 225 | # |
|
230 | 226 | # TODO consider better ways of doing this. |
|
231 | 227 | if res and not self._narrowmatch(res[0]): |
|
232 | 228 | return None |
|
233 | 229 | |
|
234 | 230 | return res |
|
235 | 231 | |
|
236 | 232 | def size(self, rev): |
|
237 | 233 | # Because we have a custom renamed() that may lie, we need to call |
|
238 | 234 | # the base renamed() to report accurate results. |
|
239 | 235 | node = self.node(rev) |
|
240 | 236 | if super(narrowfilelog, self).renamed(node): |
|
241 | 237 | return len(self.read(node)) |
|
242 | 238 | else: |
|
243 | 239 | return super(narrowfilelog, self).size(rev) |
|
244 | 240 | |
|
245 | 241 | def cmp(self, node, text): |
|
246 | 242 | different = super(narrowfilelog, self).cmp(node, text) |
|
247 | 243 | |
|
248 | 244 | # Because renamed() may lie, we may get false positives for |
|
249 | 245 | # different content. Check for this by comparing against the original |
|
250 | 246 | # renamed() implementation. |
|
251 | 247 | if different: |
|
252 | 248 | if super(narrowfilelog, self).renamed(node): |
|
253 | 249 | t2 = self.read(node) |
|
254 | 250 | return t2 != text |
|
255 | 251 | |
|
256 | 252 | return different |
@@ -1,71 +1,83 | |||
|
1 | 1 | # storageutil.py - Storage functionality agnostic of backend implementation. |
|
2 | 2 | # |
|
3 | 3 | # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com> |
|
4 | 4 | # |
|
5 | 5 | # This software may be used and distributed according to the terms of the |
|
6 | 6 | # GNU General Public License version 2 or any later version. |
|
7 | 7 | |
|
8 | 8 | from __future__ import absolute_import |
|
9 | 9 | |
|
10 | 10 | import hashlib |
|
11 | 11 | import re |
|
12 | 12 | |
|
13 | 13 | from ..node import ( |
|
14 | 14 | nullid, |
|
15 | 15 | ) |
|
16 | 16 | |
|
17 | 17 | _nullhash = hashlib.sha1(nullid) |
|
18 | 18 | |
|
19 | 19 | def hashrevisionsha1(text, p1, p2): |
|
20 | 20 | """Compute the SHA-1 for revision data and its parents. |
|
21 | 21 | |
|
22 | 22 | This hash combines both the current file contents and its history |
|
23 | 23 | in a manner that makes it easy to distinguish nodes with the same |
|
24 | 24 | content in the revision graph. |
|
25 | 25 | """ |
|
26 | 26 | # As of now, if one of the parent node is null, p2 is null |
|
27 | 27 | if p2 == nullid: |
|
28 | 28 | # deep copy of a hash is faster than creating one |
|
29 | 29 | s = _nullhash.copy() |
|
30 | 30 | s.update(p1) |
|
31 | 31 | else: |
|
32 | 32 | # none of the parent nodes are nullid |
|
33 | 33 | if p1 < p2: |
|
34 | 34 | a = p1 |
|
35 | 35 | b = p2 |
|
36 | 36 | else: |
|
37 | 37 | a = p2 |
|
38 | 38 | b = p1 |
|
39 | 39 | s = hashlib.sha1(a) |
|
40 | 40 | s.update(b) |
|
41 | 41 | s.update(text) |
|
42 | 42 | return s.digest() |
|
43 | 43 | |
|
44 | 44 | METADATA_RE = re.compile(b'\x01\n') |
|
45 | 45 | |
|
46 | 46 | def parsemeta(text): |
|
47 | 47 | """Parse metadata header from revision data. |
|
48 | 48 | |
|
49 | 49 | Returns a 2-tuple of (metadata, offset), where both can be None if there |
|
50 | 50 | is no metadata. |
|
51 | 51 | """ |
|
52 | 52 | # text can be buffer, so we can't use .startswith or .index |
|
53 | 53 | if text[:2] != b'\x01\n': |
|
54 | 54 | return None, None |
|
55 | 55 | s = METADATA_RE.search(text, 2).start() |
|
56 | 56 | mtext = text[2:s] |
|
57 | 57 | meta = {} |
|
58 | 58 | for l in mtext.splitlines(): |
|
59 | 59 | k, v = l.split(b': ', 1) |
|
60 | 60 | meta[k] = v |
|
61 | 61 | return meta, s + 2 |
|
62 | 62 | |
|
63 | 63 | def packmeta(meta, text): |
|
64 | 64 | """Add metadata to fulltext to produce revision text.""" |
|
65 | 65 | keys = sorted(meta) |
|
66 | 66 | metatext = b''.join(b'%s: %s\n' % (k, meta[k]) for k in keys) |
|
67 | 67 | return b'\x01\n%s\x01\n%s' % (metatext, text) |
|
68 | 68 | |
|
69 | 69 | def iscensoredtext(text): |
|
70 | 70 | meta = parsemeta(text)[0] |
|
71 | 71 | return meta and b'censored' in meta |
|
72 | ||
|
73 | def filtermetadata(text): | |
|
74 | """Extract just the revision data from source text. | |
|
75 | ||
|
76 | Returns ``text`` unless it has a metadata header, in which case we return | |
|
77 | a new buffer without hte metadata. | |
|
78 | """ | |
|
79 | if not text.startswith(b'\x01\n'): | |
|
80 | return text | |
|
81 | ||
|
82 | offset = text.index(b'\x01\n', 2) | |
|
83 | return text[offset + 2:] |
General Comments 0
You need to be logged in to leave comments.
Login now