##// END OF EJS Templates
storageutil: new function for extracting metadata-less content from text...
Gregory Szorc -
r39916:1b65fb4d default
parent child Browse files
Show More
@@ -1,256 +1,252
1 1 # filelog.py - file history class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 from . import (
11 11 error,
12 12 repository,
13 13 revlog,
14 14 )
15 15 from .utils import (
16 16 interfaceutil,
17 17 storageutil,
18 18 )
19 19
20 20 @interfaceutil.implementer(repository.ifilestorage)
21 21 class filelog(object):
22 22 def __init__(self, opener, path):
23 23 self._revlog = revlog.revlog(opener,
24 24 '/'.join(('data', path + '.i')),
25 25 censorable=True)
26 26 # Full name of the user visible file, relative to the repository root.
27 27 # Used by LFS.
28 28 self._revlog.filename = path
29 29
30 30 def __len__(self):
31 31 return len(self._revlog)
32 32
33 33 def __iter__(self):
34 34 return self._revlog.__iter__()
35 35
36 36 def revs(self, start=0, stop=None):
37 37 return self._revlog.revs(start=start, stop=stop)
38 38
39 39 def parents(self, node):
40 40 return self._revlog.parents(node)
41 41
42 42 def parentrevs(self, rev):
43 43 return self._revlog.parentrevs(rev)
44 44
45 45 def rev(self, node):
46 46 return self._revlog.rev(node)
47 47
48 48 def node(self, rev):
49 49 return self._revlog.node(rev)
50 50
51 51 def lookup(self, node):
52 52 return self._revlog.lookup(node)
53 53
54 54 def linkrev(self, rev):
55 55 return self._revlog.linkrev(rev)
56 56
57 57 def commonancestorsheads(self, node1, node2):
58 58 return self._revlog.commonancestorsheads(node1, node2)
59 59
60 60 # Used by dagop.blockdescendants().
61 61 def descendants(self, revs):
62 62 return self._revlog.descendants(revs)
63 63
64 64 def heads(self, start=None, stop=None):
65 65 return self._revlog.heads(start, stop)
66 66
67 67 # Used by hgweb, children extension.
68 68 def children(self, node):
69 69 return self._revlog.children(node)
70 70
71 71 def iscensored(self, rev):
72 72 return self._revlog.iscensored(rev)
73 73
74 74 # Might be unused.
75 75 def checkhash(self, text, node, p1=None, p2=None, rev=None):
76 76 return self._revlog.checkhash(text, node, p1=p1, p2=p2, rev=rev)
77 77
78 78 def revision(self, node, _df=None, raw=False):
79 79 return self._revlog.revision(node, _df=_df, raw=raw)
80 80
81 81 def revdiff(self, rev1, rev2):
82 82 return self._revlog.revdiff(rev1, rev2)
83 83
84 84 def emitrevisions(self, nodes, nodesorder=None,
85 85 revisiondata=False, assumehaveparentrevisions=False,
86 86 deltaprevious=False):
87 87 return self._revlog.emitrevisions(
88 88 nodes, nodesorder=nodesorder, revisiondata=revisiondata,
89 89 assumehaveparentrevisions=assumehaveparentrevisions,
90 90 deltaprevious=deltaprevious)
91 91
92 92 def addrevision(self, revisiondata, transaction, linkrev, p1, p2,
93 93 node=None, flags=revlog.REVIDX_DEFAULT_FLAGS,
94 94 cachedelta=None):
95 95 return self._revlog.addrevision(revisiondata, transaction, linkrev,
96 96 p1, p2, node=node, flags=flags,
97 97 cachedelta=cachedelta)
98 98
99 99 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
100 100 return self._revlog.addgroup(deltas, linkmapper, transaction,
101 101 addrevisioncb=addrevisioncb)
102 102
103 103 def getstrippoint(self, minlink):
104 104 return self._revlog.getstrippoint(minlink)
105 105
106 106 def strip(self, minlink, transaction):
107 107 return self._revlog.strip(minlink, transaction)
108 108
109 109 def censorrevision(self, tr, node, tombstone=b''):
110 110 return self._revlog.censorrevision(node, tombstone=tombstone)
111 111
112 112 def files(self):
113 113 return self._revlog.files()
114 114
115 115 def read(self, node):
116 t = self.revision(node)
117 if not t.startswith('\1\n'):
118 return t
119 s = t.index('\1\n', 2)
120 return t[s + 2:]
116 return storageutil.filtermetadata(self.revision(node))
121 117
122 118 def add(self, text, meta, transaction, link, p1=None, p2=None):
123 119 if meta or text.startswith('\1\n'):
124 120 text = storageutil.packmeta(meta, text)
125 121 return self.addrevision(text, transaction, link, p1, p2)
126 122
127 123 def renamed(self, node):
128 124 if self.parents(node)[0] != revlog.nullid:
129 125 return False
130 126 t = self.revision(node)
131 127 m = storageutil.parsemeta(t)[0]
132 128 # copy and copyrev occur in pairs. In rare cases due to bugs,
133 129 # one can occur without the other.
134 130 if m and "copy" in m and "copyrev" in m:
135 131 return (m["copy"], revlog.bin(m["copyrev"]))
136 132 return False
137 133
138 134 def size(self, rev):
139 135 """return the size of a given revision"""
140 136
141 137 # for revisions with renames, we have to go the slow way
142 138 node = self.node(rev)
143 139 if self.renamed(node):
144 140 return len(self.read(node))
145 141 if self.iscensored(rev):
146 142 return 0
147 143
148 144 # XXX if self.read(node).startswith("\1\n"), this returns (size+4)
149 145 return self._revlog.size(rev)
150 146
151 147 def cmp(self, node, text):
152 148 """compare text with a given file revision
153 149
154 150 returns True if text is different than what is stored.
155 151 """
156 152
157 153 t = text
158 154 if text.startswith('\1\n'):
159 155 t = '\1\n\1\n' + text
160 156
161 157 samehashes = not self._revlog.cmp(node, t)
162 158 if samehashes:
163 159 return False
164 160
165 161 # censored files compare against the empty file
166 162 if self.iscensored(self.rev(node)):
167 163 return text != ''
168 164
169 165 # renaming a file produces a different hash, even if the data
170 166 # remains unchanged. Check if it's the case (slow):
171 167 if self.renamed(node):
172 168 t2 = self.read(node)
173 169 return t2 != text
174 170
175 171 return True
176 172
177 173 def verifyintegrity(self, state):
178 174 return self._revlog.verifyintegrity(state)
179 175
180 176 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
181 177 revisionscount=False, trackedsize=False,
182 178 storedsize=False):
183 179 return self._revlog.storageinfo(
184 180 exclusivefiles=exclusivefiles, sharedfiles=sharedfiles,
185 181 revisionscount=revisionscount, trackedsize=trackedsize,
186 182 storedsize=storedsize)
187 183
188 184 # TODO these aren't part of the interface and aren't internal methods.
189 185 # Callers should be fixed to not use them.
190 186
191 187 # Used by bundlefilelog, unionfilelog.
192 188 @property
193 189 def indexfile(self):
194 190 return self._revlog.indexfile
195 191
196 192 @indexfile.setter
197 193 def indexfile(self, value):
198 194 self._revlog.indexfile = value
199 195
200 196 # Used by repo upgrade.
201 197 def clone(self, tr, destrevlog, **kwargs):
202 198 if not isinstance(destrevlog, filelog):
203 199 raise error.ProgrammingError('expected filelog to clone()')
204 200
205 201 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
206 202
207 203 class narrowfilelog(filelog):
208 204 """Filelog variation to be used with narrow stores."""
209 205
210 206 def __init__(self, opener, path, narrowmatch):
211 207 super(narrowfilelog, self).__init__(opener, path)
212 208 self._narrowmatch = narrowmatch
213 209
214 210 def renamed(self, node):
215 211 res = super(narrowfilelog, self).renamed(node)
216 212
217 213 # Renames that come from outside the narrowspec are problematic
218 214 # because we may lack the base text for the rename. This can result
219 215 # in code attempting to walk the ancestry or compute a diff
220 216 # encountering a missing revision. We address this by silently
221 217 # removing rename metadata if the source file is outside the
222 218 # narrow spec.
223 219 #
224 220 # A better solution would be to see if the base revision is available,
225 221 # rather than assuming it isn't.
226 222 #
227 223 # An even better solution would be to teach all consumers of rename
228 224 # metadata that the base revision may not be available.
229 225 #
230 226 # TODO consider better ways of doing this.
231 227 if res and not self._narrowmatch(res[0]):
232 228 return None
233 229
234 230 return res
235 231
236 232 def size(self, rev):
237 233 # Because we have a custom renamed() that may lie, we need to call
238 234 # the base renamed() to report accurate results.
239 235 node = self.node(rev)
240 236 if super(narrowfilelog, self).renamed(node):
241 237 return len(self.read(node))
242 238 else:
243 239 return super(narrowfilelog, self).size(rev)
244 240
245 241 def cmp(self, node, text):
246 242 different = super(narrowfilelog, self).cmp(node, text)
247 243
248 244 # Because renamed() may lie, we may get false positives for
249 245 # different content. Check for this by comparing against the original
250 246 # renamed() implementation.
251 247 if different:
252 248 if super(narrowfilelog, self).renamed(node):
253 249 t2 = self.read(node)
254 250 return t2 != text
255 251
256 252 return different
@@ -1,71 +1,83
1 1 # storageutil.py - Storage functionality agnostic of backend implementation.
2 2 #
3 3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import hashlib
11 11 import re
12 12
13 13 from ..node import (
14 14 nullid,
15 15 )
16 16
17 17 _nullhash = hashlib.sha1(nullid)
18 18
19 19 def hashrevisionsha1(text, p1, p2):
20 20 """Compute the SHA-1 for revision data and its parents.
21 21
22 22 This hash combines both the current file contents and its history
23 23 in a manner that makes it easy to distinguish nodes with the same
24 24 content in the revision graph.
25 25 """
26 26 # As of now, if one of the parent node is null, p2 is null
27 27 if p2 == nullid:
28 28 # deep copy of a hash is faster than creating one
29 29 s = _nullhash.copy()
30 30 s.update(p1)
31 31 else:
32 32 # none of the parent nodes are nullid
33 33 if p1 < p2:
34 34 a = p1
35 35 b = p2
36 36 else:
37 37 a = p2
38 38 b = p1
39 39 s = hashlib.sha1(a)
40 40 s.update(b)
41 41 s.update(text)
42 42 return s.digest()
43 43
44 44 METADATA_RE = re.compile(b'\x01\n')
45 45
46 46 def parsemeta(text):
47 47 """Parse metadata header from revision data.
48 48
49 49 Returns a 2-tuple of (metadata, offset), where both can be None if there
50 50 is no metadata.
51 51 """
52 52 # text can be buffer, so we can't use .startswith or .index
53 53 if text[:2] != b'\x01\n':
54 54 return None, None
55 55 s = METADATA_RE.search(text, 2).start()
56 56 mtext = text[2:s]
57 57 meta = {}
58 58 for l in mtext.splitlines():
59 59 k, v = l.split(b': ', 1)
60 60 meta[k] = v
61 61 return meta, s + 2
62 62
63 63 def packmeta(meta, text):
64 64 """Add metadata to fulltext to produce revision text."""
65 65 keys = sorted(meta)
66 66 metatext = b''.join(b'%s: %s\n' % (k, meta[k]) for k in keys)
67 67 return b'\x01\n%s\x01\n%s' % (metatext, text)
68 68
69 69 def iscensoredtext(text):
70 70 meta = parsemeta(text)[0]
71 71 return meta and b'censored' in meta
72
73 def filtermetadata(text):
74 """Extract just the revision data from source text.
75
76 Returns ``text`` unless it has a metadata header, in which case we return
77 a new buffer without hte metadata.
78 """
79 if not text.startswith(b'\x01\n'):
80 return text
81
82 offset = text.index(b'\x01\n', 2)
83 return text[offset + 2:]
General Comments 0
You need to be logged in to leave comments. Login now