##// END OF EJS Templates
storageutil: invert logic of file data comparison...
Gregory Szorc -
r40043:14701830 default
parent child Browse files
Show More
@@ -1,219 +1,219 b''
1 1 # filelog.py - file history class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 from . import (
11 11 error,
12 12 repository,
13 13 revlog,
14 14 )
15 15 from .utils import (
16 16 interfaceutil,
17 17 storageutil,
18 18 )
19 19
20 20 @interfaceutil.implementer(repository.ifilestorage)
21 21 class filelog(object):
22 22 def __init__(self, opener, path):
23 23 self._revlog = revlog.revlog(opener,
24 24 '/'.join(('data', path + '.i')),
25 25 censorable=True)
26 26 # Full name of the user visible file, relative to the repository root.
27 27 # Used by LFS.
28 28 self._revlog.filename = path
29 29
30 30 def __len__(self):
31 31 return len(self._revlog)
32 32
33 33 def __iter__(self):
34 34 return self._revlog.__iter__()
35 35
36 36 def revs(self, start=0, stop=None):
37 37 return self._revlog.revs(start=start, stop=stop)
38 38
39 39 def parents(self, node):
40 40 return self._revlog.parents(node)
41 41
42 42 def parentrevs(self, rev):
43 43 return self._revlog.parentrevs(rev)
44 44
45 45 def rev(self, node):
46 46 return self._revlog.rev(node)
47 47
48 48 def node(self, rev):
49 49 return self._revlog.node(rev)
50 50
51 51 def lookup(self, node):
52 52 return storageutil.fileidlookup(self._revlog, node,
53 53 self._revlog.indexfile)
54 54
55 55 def linkrev(self, rev):
56 56 return self._revlog.linkrev(rev)
57 57
58 58 def commonancestorsheads(self, node1, node2):
59 59 return self._revlog.commonancestorsheads(node1, node2)
60 60
61 61 # Used by dagop.blockdescendants().
62 62 def descendants(self, revs):
63 63 return self._revlog.descendants(revs)
64 64
65 65 def heads(self, start=None, stop=None):
66 66 return self._revlog.heads(start, stop)
67 67
68 68 # Used by hgweb, children extension.
69 69 def children(self, node):
70 70 return self._revlog.children(node)
71 71
72 72 def iscensored(self, rev):
73 73 return self._revlog.iscensored(rev)
74 74
75 75 def revision(self, node, _df=None, raw=False):
76 76 return self._revlog.revision(node, _df=_df, raw=raw)
77 77
78 78 def emitrevisions(self, nodes, nodesorder=None,
79 79 revisiondata=False, assumehaveparentrevisions=False,
80 80 deltaprevious=False):
81 81 return self._revlog.emitrevisions(
82 82 nodes, nodesorder=nodesorder, revisiondata=revisiondata,
83 83 assumehaveparentrevisions=assumehaveparentrevisions,
84 84 deltaprevious=deltaprevious)
85 85
86 86 def addrevision(self, revisiondata, transaction, linkrev, p1, p2,
87 87 node=None, flags=revlog.REVIDX_DEFAULT_FLAGS,
88 88 cachedelta=None):
89 89 return self._revlog.addrevision(revisiondata, transaction, linkrev,
90 90 p1, p2, node=node, flags=flags,
91 91 cachedelta=cachedelta)
92 92
93 93 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
94 94 return self._revlog.addgroup(deltas, linkmapper, transaction,
95 95 addrevisioncb=addrevisioncb)
96 96
97 97 def getstrippoint(self, minlink):
98 98 return self._revlog.getstrippoint(minlink)
99 99
100 100 def strip(self, minlink, transaction):
101 101 return self._revlog.strip(minlink, transaction)
102 102
103 103 def censorrevision(self, tr, node, tombstone=b''):
104 104 return self._revlog.censorrevision(node, tombstone=tombstone)
105 105
106 106 def files(self):
107 107 return self._revlog.files()
108 108
109 109 def read(self, node):
110 110 return storageutil.filtermetadata(self.revision(node))
111 111
112 112 def add(self, text, meta, transaction, link, p1=None, p2=None):
113 113 if meta or text.startswith('\1\n'):
114 114 text = storageutil.packmeta(meta, text)
115 115 return self.addrevision(text, transaction, link, p1, p2)
116 116
117 117 def renamed(self, node):
118 118 return storageutil.filerevisioncopied(self, node)
119 119
120 120 def size(self, rev):
121 121 """return the size of a given revision"""
122 122
123 123 # for revisions with renames, we have to go the slow way
124 124 node = self.node(rev)
125 125 if self.renamed(node):
126 126 return len(self.read(node))
127 127 if self.iscensored(rev):
128 128 return 0
129 129
130 130 # XXX if self.read(node).startswith("\1\n"), this returns (size+4)
131 131 return self._revlog.size(rev)
132 132
133 133 def cmp(self, node, text):
134 134 """compare text with a given file revision
135 135
136 136 returns True if text is different than what is stored.
137 137 """
138 return storageutil.filerevisiondifferent(self, node, text)
138 return not storageutil.filedataequivalent(self, node, text)
139 139
140 140 def verifyintegrity(self, state):
141 141 return self._revlog.verifyintegrity(state)
142 142
143 143 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
144 144 revisionscount=False, trackedsize=False,
145 145 storedsize=False):
146 146 return self._revlog.storageinfo(
147 147 exclusivefiles=exclusivefiles, sharedfiles=sharedfiles,
148 148 revisionscount=revisionscount, trackedsize=trackedsize,
149 149 storedsize=storedsize)
150 150
151 151 # TODO these aren't part of the interface and aren't internal methods.
152 152 # Callers should be fixed to not use them.
153 153
154 154 # Used by bundlefilelog, unionfilelog.
155 155 @property
156 156 def indexfile(self):
157 157 return self._revlog.indexfile
158 158
159 159 @indexfile.setter
160 160 def indexfile(self, value):
161 161 self._revlog.indexfile = value
162 162
163 163 # Used by repo upgrade.
164 164 def clone(self, tr, destrevlog, **kwargs):
165 165 if not isinstance(destrevlog, filelog):
166 166 raise error.ProgrammingError('expected filelog to clone()')
167 167
168 168 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
169 169
170 170 class narrowfilelog(filelog):
171 171 """Filelog variation to be used with narrow stores."""
172 172
173 173 def __init__(self, opener, path, narrowmatch):
174 174 super(narrowfilelog, self).__init__(opener, path)
175 175 self._narrowmatch = narrowmatch
176 176
177 177 def renamed(self, node):
178 178 res = super(narrowfilelog, self).renamed(node)
179 179
180 180 # Renames that come from outside the narrowspec are problematic
181 181 # because we may lack the base text for the rename. This can result
182 182 # in code attempting to walk the ancestry or compute a diff
183 183 # encountering a missing revision. We address this by silently
184 184 # removing rename metadata if the source file is outside the
185 185 # narrow spec.
186 186 #
187 187 # A better solution would be to see if the base revision is available,
188 188 # rather than assuming it isn't.
189 189 #
190 190 # An even better solution would be to teach all consumers of rename
191 191 # metadata that the base revision may not be available.
192 192 #
193 193 # TODO consider better ways of doing this.
194 194 if res and not self._narrowmatch(res[0]):
195 195 return None
196 196
197 197 return res
198 198
199 199 def size(self, rev):
200 200 # Because we have a custom renamed() that may lie, we need to call
201 201 # the base renamed() to report accurate results.
202 202 node = self.node(rev)
203 203 if super(narrowfilelog, self).renamed(node):
204 204 return len(self.read(node))
205 205 else:
206 206 return super(narrowfilelog, self).size(rev)
207 207
208 208 def cmp(self, node, text):
209 209 different = super(narrowfilelog, self).cmp(node, text)
210 210
211 211 # Because renamed() may lie, we may get false positives for
212 212 # different content. Check for this by comparing against the original
213 213 # renamed() implementation.
214 214 if different:
215 215 if super(narrowfilelog, self).renamed(node):
216 216 t2 = self.read(node)
217 217 return t2 != text
218 218
219 219 return different
@@ -1,254 +1,264 b''
1 1 # storageutil.py - Storage functionality agnostic of backend implementation.
2 2 #
3 3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import hashlib
11 11 import re
12 12
13 13 from ..i18n import _
14 14 from ..node import (
15 15 bin,
16 16 nullid,
17 17 nullrev,
18 18 )
19 19 from .. import (
20 20 error,
21 21 pycompat,
22 22 )
23 23
24 24 _nullhash = hashlib.sha1(nullid)
25 25
26 26 def hashrevisionsha1(text, p1, p2):
27 27 """Compute the SHA-1 for revision data and its parents.
28 28
29 29 This hash combines both the current file contents and its history
30 30 in a manner that makes it easy to distinguish nodes with the same
31 31 content in the revision graph.
32 32 """
33 33 # As of now, if one of the parent node is null, p2 is null
34 34 if p2 == nullid:
35 35 # deep copy of a hash is faster than creating one
36 36 s = _nullhash.copy()
37 37 s.update(p1)
38 38 else:
39 39 # none of the parent nodes are nullid
40 40 if p1 < p2:
41 41 a = p1
42 42 b = p2
43 43 else:
44 44 a = p2
45 45 b = p1
46 46 s = hashlib.sha1(a)
47 47 s.update(b)
48 48 s.update(text)
49 49 return s.digest()
50 50
51 51 METADATA_RE = re.compile(b'\x01\n')
52 52
53 53 def parsemeta(text):
54 54 """Parse metadata header from revision data.
55 55
56 56 Returns a 2-tuple of (metadata, offset), where both can be None if there
57 57 is no metadata.
58 58 """
59 59 # text can be buffer, so we can't use .startswith or .index
60 60 if text[:2] != b'\x01\n':
61 61 return None, None
62 62 s = METADATA_RE.search(text, 2).start()
63 63 mtext = text[2:s]
64 64 meta = {}
65 65 for l in mtext.splitlines():
66 66 k, v = l.split(b': ', 1)
67 67 meta[k] = v
68 68 return meta, s + 2
69 69
70 70 def packmeta(meta, text):
71 71 """Add metadata to fulltext to produce revision text."""
72 72 keys = sorted(meta)
73 73 metatext = b''.join(b'%s: %s\n' % (k, meta[k]) for k in keys)
74 74 return b'\x01\n%s\x01\n%s' % (metatext, text)
75 75
76 76 def iscensoredtext(text):
77 77 meta = parsemeta(text)[0]
78 78 return meta and b'censored' in meta
79 79
80 80 def filtermetadata(text):
81 81 """Extract just the revision data from source text.
82 82
83 83 Returns ``text`` unless it has a metadata header, in which case we return
84 84 a new buffer without hte metadata.
85 85 """
86 86 if not text.startswith(b'\x01\n'):
87 87 return text
88 88
89 89 offset = text.index(b'\x01\n', 2)
90 90 return text[offset + 2:]
91 91
92 92 def filerevisioncopied(store, node):
93 93 """Resolve file revision copy metadata.
94 94
95 95 Returns ``False`` if the file has no copy metadata. Otherwise a
96 96 2-tuple of the source filename and node.
97 97 """
98 98 if store.parents(node)[0] != nullid:
99 99 return False
100 100
101 101 meta = parsemeta(store.revision(node))[0]
102 102
103 103 # copy and copyrev occur in pairs. In rare cases due to old bugs,
104 104 # one can occur without the other. So ensure both are present to flag
105 105 # as a copy.
106 106 if meta and b'copy' in meta and b'copyrev' in meta:
107 107 return meta[b'copy'], bin(meta[b'copyrev'])
108 108
109 109 return False
110 110
111 def filerevisiondifferent(store, node, filedata):
112 """Determines whether file data is equivalent to a stored node."""
111 def filedataequivalent(store, node, filedata):
112 """Determines whether file data is equivalent to a stored node.
113
114 Returns True if the passed file data would hash to the same value
115 as a stored revision and False otherwise.
116
117 When a stored revision is censored, filedata must be empty to have
118 equivalence.
119
120 When a stored revision has copy metadata, it is ignored as part
121 of the compare.
122 """
113 123
114 124 if filedata.startswith(b'\x01\n'):
115 125 revisiontext = b'\x01\n\x01\n' + filedata
116 126 else:
117 127 revisiontext = filedata
118 128
119 129 p1, p2 = store.parents(node)
120 130
121 131 computednode = hashrevisionsha1(revisiontext, p1, p2)
122 132
123 133 if computednode == node:
124 return False
134 return True
125 135
126 136 # Censored files compare against the empty file.
127 137 if store.iscensored(store.rev(node)):
128 return filedata != b''
138 return filedata == b''
129 139
130 140 # Renaming a file produces a different hash, even if the data
131 141 # remains unchanged. Check if that's the case.
132 142 if store.renamed(node):
133 return store.read(node) != filedata
143 return store.read(node) == filedata
134 144
135 return True
145 return False
136 146
137 147 def iterrevs(storelen, start=0, stop=None):
138 148 """Iterate over revision numbers in a store."""
139 149 step = 1
140 150
141 151 if stop is not None:
142 152 if start > stop:
143 153 step = -1
144 154 stop += step
145 155 if stop > storelen:
146 156 stop = storelen
147 157 else:
148 158 stop = storelen
149 159
150 160 return pycompat.xrange(start, stop, step)
151 161
152 162 def fileidlookup(store, fileid, identifier):
153 163 """Resolve the file node for a value.
154 164
155 165 ``store`` is an object implementing the ``ifileindex`` interface.
156 166
157 167 ``fileid`` can be:
158 168
159 169 * A 20 byte binary node.
160 170 * An integer revision number
161 171 * A 40 byte hex node.
162 172 * A bytes that can be parsed as an integer representing a revision number.
163 173
164 174 ``identifier`` is used to populate ``error.LookupError`` with an identifier
165 175 for the store.
166 176
167 177 Raises ``error.LookupError`` on failure.
168 178 """
169 179 if isinstance(fileid, int):
170 180 try:
171 181 return store.node(fileid)
172 182 except IndexError:
173 183 raise error.LookupError(fileid, identifier, _('no match found'))
174 184
175 185 if len(fileid) == 20:
176 186 try:
177 187 store.rev(fileid)
178 188 return fileid
179 189 except error.LookupError:
180 190 pass
181 191
182 192 if len(fileid) == 40:
183 193 try:
184 194 rawnode = bin(fileid)
185 195 store.rev(rawnode)
186 196 return rawnode
187 197 except TypeError:
188 198 pass
189 199
190 200 try:
191 201 rev = int(fileid)
192 202
193 203 if b'%d' % rev != fileid:
194 204 raise ValueError
195 205
196 206 try:
197 207 return store.node(rev)
198 208 except (IndexError, TypeError):
199 209 pass
200 210 except (ValueError, OverflowError):
201 211 pass
202 212
203 213 raise error.LookupError(fileid, identifier, _('no match found'))
204 214
205 215 def resolvestripinfo(minlinkrev, tiprev, headrevs, linkrevfn, parentrevsfn):
206 216 """Resolve information needed to strip revisions.
207 217
208 218 Finds the minimum revision number that must be stripped in order to
209 219 strip ``minlinkrev``.
210 220
211 221 Returns a 2-tuple of the minimum revision number to do that and a set
212 222 of all revision numbers that have linkrevs that would be broken
213 223 by that strip.
214 224
215 225 ``tiprev`` is the current tip-most revision. It is ``len(store) - 1``.
216 226 ``headrevs`` is an iterable of head revisions.
217 227 ``linkrevfn`` is a callable that receives a revision and returns a linked
218 228 revision.
219 229 ``parentrevsfn`` is a callable that receives a revision number and returns
220 230 an iterable of its parent revision numbers.
221 231 """
222 232 brokenrevs = set()
223 233 strippoint = tiprev + 1
224 234
225 235 heads = {}
226 236 futurelargelinkrevs = set()
227 237 for head in headrevs:
228 238 headlinkrev = linkrevfn(head)
229 239 heads[head] = headlinkrev
230 240 if headlinkrev >= minlinkrev:
231 241 futurelargelinkrevs.add(headlinkrev)
232 242
233 243 # This algorithm involves walking down the rev graph, starting at the
234 244 # heads. Since the revs are topologically sorted according to linkrev,
235 245 # once all head linkrevs are below the minlink, we know there are
236 246 # no more revs that could have a linkrev greater than minlink.
237 247 # So we can stop walking.
238 248 while futurelargelinkrevs:
239 249 strippoint -= 1
240 250 linkrev = heads.pop(strippoint)
241 251
242 252 if linkrev < minlinkrev:
243 253 brokenrevs.add(strippoint)
244 254 else:
245 255 futurelargelinkrevs.remove(linkrev)
246 256
247 257 for p in parentrevsfn(strippoint):
248 258 if p != nullrev:
249 259 plinkrev = linkrevfn(p)
250 260 heads[p] = plinkrev
251 261 if plinkrev >= minlinkrev:
252 262 futurelargelinkrevs.add(plinkrev)
253 263
254 264 return strippoint, brokenrevs
General Comments 0
You need to be logged in to leave comments. Login now