##// END OF EJS Templates
storageutil: extract filelog.cmp() to a standalone function...
Gregory Szorc -
r40042:422beffd default
parent child Browse files
Show More
@@ -1,238 +1,219
1 1 # filelog.py - file history class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 from . import (
11 11 error,
12 12 repository,
13 13 revlog,
14 14 )
15 15 from .utils import (
16 16 interfaceutil,
17 17 storageutil,
18 18 )
19 19
20 20 @interfaceutil.implementer(repository.ifilestorage)
21 21 class filelog(object):
22 22 def __init__(self, opener, path):
23 23 self._revlog = revlog.revlog(opener,
24 24 '/'.join(('data', path + '.i')),
25 25 censorable=True)
26 26 # Full name of the user visible file, relative to the repository root.
27 27 # Used by LFS.
28 28 self._revlog.filename = path
29 29
30 30 def __len__(self):
31 31 return len(self._revlog)
32 32
33 33 def __iter__(self):
34 34 return self._revlog.__iter__()
35 35
36 36 def revs(self, start=0, stop=None):
37 37 return self._revlog.revs(start=start, stop=stop)
38 38
39 39 def parents(self, node):
40 40 return self._revlog.parents(node)
41 41
42 42 def parentrevs(self, rev):
43 43 return self._revlog.parentrevs(rev)
44 44
45 45 def rev(self, node):
46 46 return self._revlog.rev(node)
47 47
48 48 def node(self, rev):
49 49 return self._revlog.node(rev)
50 50
51 51 def lookup(self, node):
52 52 return storageutil.fileidlookup(self._revlog, node,
53 53 self._revlog.indexfile)
54 54
55 55 def linkrev(self, rev):
56 56 return self._revlog.linkrev(rev)
57 57
58 58 def commonancestorsheads(self, node1, node2):
59 59 return self._revlog.commonancestorsheads(node1, node2)
60 60
61 61 # Used by dagop.blockdescendants().
62 62 def descendants(self, revs):
63 63 return self._revlog.descendants(revs)
64 64
65 65 def heads(self, start=None, stop=None):
66 66 return self._revlog.heads(start, stop)
67 67
68 68 # Used by hgweb, children extension.
69 69 def children(self, node):
70 70 return self._revlog.children(node)
71 71
72 72 def iscensored(self, rev):
73 73 return self._revlog.iscensored(rev)
74 74
75 75 def revision(self, node, _df=None, raw=False):
76 76 return self._revlog.revision(node, _df=_df, raw=raw)
77 77
78 78 def emitrevisions(self, nodes, nodesorder=None,
79 79 revisiondata=False, assumehaveparentrevisions=False,
80 80 deltaprevious=False):
81 81 return self._revlog.emitrevisions(
82 82 nodes, nodesorder=nodesorder, revisiondata=revisiondata,
83 83 assumehaveparentrevisions=assumehaveparentrevisions,
84 84 deltaprevious=deltaprevious)
85 85
86 86 def addrevision(self, revisiondata, transaction, linkrev, p1, p2,
87 87 node=None, flags=revlog.REVIDX_DEFAULT_FLAGS,
88 88 cachedelta=None):
89 89 return self._revlog.addrevision(revisiondata, transaction, linkrev,
90 90 p1, p2, node=node, flags=flags,
91 91 cachedelta=cachedelta)
92 92
93 93 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
94 94 return self._revlog.addgroup(deltas, linkmapper, transaction,
95 95 addrevisioncb=addrevisioncb)
96 96
97 97 def getstrippoint(self, minlink):
98 98 return self._revlog.getstrippoint(minlink)
99 99
100 100 def strip(self, minlink, transaction):
101 101 return self._revlog.strip(minlink, transaction)
102 102
103 103 def censorrevision(self, tr, node, tombstone=b''):
104 104 return self._revlog.censorrevision(node, tombstone=tombstone)
105 105
106 106 def files(self):
107 107 return self._revlog.files()
108 108
109 109 def read(self, node):
110 110 return storageutil.filtermetadata(self.revision(node))
111 111
112 112 def add(self, text, meta, transaction, link, p1=None, p2=None):
113 113 if meta or text.startswith('\1\n'):
114 114 text = storageutil.packmeta(meta, text)
115 115 return self.addrevision(text, transaction, link, p1, p2)
116 116
117 117 def renamed(self, node):
118 118 return storageutil.filerevisioncopied(self, node)
119 119
120 120 def size(self, rev):
121 121 """return the size of a given revision"""
122 122
123 123 # for revisions with renames, we have to go the slow way
124 124 node = self.node(rev)
125 125 if self.renamed(node):
126 126 return len(self.read(node))
127 127 if self.iscensored(rev):
128 128 return 0
129 129
130 130 # XXX if self.read(node).startswith("\1\n"), this returns (size+4)
131 131 return self._revlog.size(rev)
132 132
133 133 def cmp(self, node, text):
134 134 """compare text with a given file revision
135 135
136 136 returns True if text is different than what is stored.
137 137 """
138
139 t = text
140 if text.startswith('\1\n'):
141 t = '\1\n\1\n' + text
142
143 samehashes = not self._revlog.cmp(node, t)
144 if samehashes:
145 return False
146
147 # censored files compare against the empty file
148 if self.iscensored(self.rev(node)):
149 return text != ''
150
151 # renaming a file produces a different hash, even if the data
152 # remains unchanged. Check if it's the case (slow):
153 if self.renamed(node):
154 t2 = self.read(node)
155 return t2 != text
156
157 return True
138 return storageutil.filerevisiondifferent(self, node, text)
158 139
159 140 def verifyintegrity(self, state):
160 141 return self._revlog.verifyintegrity(state)
161 142
162 143 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
163 144 revisionscount=False, trackedsize=False,
164 145 storedsize=False):
165 146 return self._revlog.storageinfo(
166 147 exclusivefiles=exclusivefiles, sharedfiles=sharedfiles,
167 148 revisionscount=revisionscount, trackedsize=trackedsize,
168 149 storedsize=storedsize)
169 150
170 151 # TODO these aren't part of the interface and aren't internal methods.
171 152 # Callers should be fixed to not use them.
172 153
173 154 # Used by bundlefilelog, unionfilelog.
174 155 @property
175 156 def indexfile(self):
176 157 return self._revlog.indexfile
177 158
178 159 @indexfile.setter
179 160 def indexfile(self, value):
180 161 self._revlog.indexfile = value
181 162
182 163 # Used by repo upgrade.
183 164 def clone(self, tr, destrevlog, **kwargs):
184 165 if not isinstance(destrevlog, filelog):
185 166 raise error.ProgrammingError('expected filelog to clone()')
186 167
187 168 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
188 169
189 170 class narrowfilelog(filelog):
190 171 """Filelog variation to be used with narrow stores."""
191 172
192 173 def __init__(self, opener, path, narrowmatch):
193 174 super(narrowfilelog, self).__init__(opener, path)
194 175 self._narrowmatch = narrowmatch
195 176
196 177 def renamed(self, node):
197 178 res = super(narrowfilelog, self).renamed(node)
198 179
199 180 # Renames that come from outside the narrowspec are problematic
200 181 # because we may lack the base text for the rename. This can result
201 182 # in code attempting to walk the ancestry or compute a diff
202 183 # encountering a missing revision. We address this by silently
203 184 # removing rename metadata if the source file is outside the
204 185 # narrow spec.
205 186 #
206 187 # A better solution would be to see if the base revision is available,
207 188 # rather than assuming it isn't.
208 189 #
209 190 # An even better solution would be to teach all consumers of rename
210 191 # metadata that the base revision may not be available.
211 192 #
212 193 # TODO consider better ways of doing this.
213 194 if res and not self._narrowmatch(res[0]):
214 195 return None
215 196
216 197 return res
217 198
218 199 def size(self, rev):
219 200 # Because we have a custom renamed() that may lie, we need to call
220 201 # the base renamed() to report accurate results.
221 202 node = self.node(rev)
222 203 if super(narrowfilelog, self).renamed(node):
223 204 return len(self.read(node))
224 205 else:
225 206 return super(narrowfilelog, self).size(rev)
226 207
227 208 def cmp(self, node, text):
228 209 different = super(narrowfilelog, self).cmp(node, text)
229 210
230 211 # Because renamed() may lie, we may get false positives for
231 212 # different content. Check for this by comparing against the original
232 213 # renamed() implementation.
233 214 if different:
234 215 if super(narrowfilelog, self).renamed(node):
235 216 t2 = self.read(node)
236 217 return t2 != text
237 218
238 219 return different
@@ -1,228 +1,254
1 1 # storageutil.py - Storage functionality agnostic of backend implementation.
2 2 #
3 3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import hashlib
11 11 import re
12 12
13 13 from ..i18n import _
14 14 from ..node import (
15 15 bin,
16 16 nullid,
17 17 nullrev,
18 18 )
19 19 from .. import (
20 20 error,
21 21 pycompat,
22 22 )
23 23
24 24 _nullhash = hashlib.sha1(nullid)
25 25
26 26 def hashrevisionsha1(text, p1, p2):
27 27 """Compute the SHA-1 for revision data and its parents.
28 28
29 29 This hash combines both the current file contents and its history
30 30 in a manner that makes it easy to distinguish nodes with the same
31 31 content in the revision graph.
32 32 """
33 33 # As of now, if one of the parent node is null, p2 is null
34 34 if p2 == nullid:
35 35 # deep copy of a hash is faster than creating one
36 36 s = _nullhash.copy()
37 37 s.update(p1)
38 38 else:
39 39 # none of the parent nodes are nullid
40 40 if p1 < p2:
41 41 a = p1
42 42 b = p2
43 43 else:
44 44 a = p2
45 45 b = p1
46 46 s = hashlib.sha1(a)
47 47 s.update(b)
48 48 s.update(text)
49 49 return s.digest()
50 50
51 51 METADATA_RE = re.compile(b'\x01\n')
52 52
53 53 def parsemeta(text):
54 54 """Parse metadata header from revision data.
55 55
56 56 Returns a 2-tuple of (metadata, offset), where both can be None if there
57 57 is no metadata.
58 58 """
59 59 # text can be buffer, so we can't use .startswith or .index
60 60 if text[:2] != b'\x01\n':
61 61 return None, None
62 62 s = METADATA_RE.search(text, 2).start()
63 63 mtext = text[2:s]
64 64 meta = {}
65 65 for l in mtext.splitlines():
66 66 k, v = l.split(b': ', 1)
67 67 meta[k] = v
68 68 return meta, s + 2
69 69
70 70 def packmeta(meta, text):
71 71 """Add metadata to fulltext to produce revision text."""
72 72 keys = sorted(meta)
73 73 metatext = b''.join(b'%s: %s\n' % (k, meta[k]) for k in keys)
74 74 return b'\x01\n%s\x01\n%s' % (metatext, text)
75 75
76 76 def iscensoredtext(text):
77 77 meta = parsemeta(text)[0]
78 78 return meta and b'censored' in meta
79 79
80 80 def filtermetadata(text):
81 81 """Extract just the revision data from source text.
82 82
83 83 Returns ``text`` unless it has a metadata header, in which case we return
84 84 a new buffer without hte metadata.
85 85 """
86 86 if not text.startswith(b'\x01\n'):
87 87 return text
88 88
89 89 offset = text.index(b'\x01\n', 2)
90 90 return text[offset + 2:]
91 91
92 92 def filerevisioncopied(store, node):
93 93 """Resolve file revision copy metadata.
94 94
95 95 Returns ``False`` if the file has no copy metadata. Otherwise a
96 96 2-tuple of the source filename and node.
97 97 """
98 98 if store.parents(node)[0] != nullid:
99 99 return False
100 100
101 101 meta = parsemeta(store.revision(node))[0]
102 102
103 103 # copy and copyrev occur in pairs. In rare cases due to old bugs,
104 104 # one can occur without the other. So ensure both are present to flag
105 105 # as a copy.
106 106 if meta and b'copy' in meta and b'copyrev' in meta:
107 107 return meta[b'copy'], bin(meta[b'copyrev'])
108 108
109 109 return False
110 110
111 def filerevisiondifferent(store, node, filedata):
112 """Determines whether file data is equivalent to a stored node."""
113
114 if filedata.startswith(b'\x01\n'):
115 revisiontext = b'\x01\n\x01\n' + filedata
116 else:
117 revisiontext = filedata
118
119 p1, p2 = store.parents(node)
120
121 computednode = hashrevisionsha1(revisiontext, p1, p2)
122
123 if computednode == node:
124 return False
125
126 # Censored files compare against the empty file.
127 if store.iscensored(store.rev(node)):
128 return filedata != b''
129
130 # Renaming a file produces a different hash, even if the data
131 # remains unchanged. Check if that's the case.
132 if store.renamed(node):
133 return store.read(node) != filedata
134
135 return True
136
111 137 def iterrevs(storelen, start=0, stop=None):
112 138 """Iterate over revision numbers in a store."""
113 139 step = 1
114 140
115 141 if stop is not None:
116 142 if start > stop:
117 143 step = -1
118 144 stop += step
119 145 if stop > storelen:
120 146 stop = storelen
121 147 else:
122 148 stop = storelen
123 149
124 150 return pycompat.xrange(start, stop, step)
125 151
126 152 def fileidlookup(store, fileid, identifier):
127 153 """Resolve the file node for a value.
128 154
129 155 ``store`` is an object implementing the ``ifileindex`` interface.
130 156
131 157 ``fileid`` can be:
132 158
133 159 * A 20 byte binary node.
134 160 * An integer revision number
135 161 * A 40 byte hex node.
136 162 * A bytes that can be parsed as an integer representing a revision number.
137 163
138 164 ``identifier`` is used to populate ``error.LookupError`` with an identifier
139 165 for the store.
140 166
141 167 Raises ``error.LookupError`` on failure.
142 168 """
143 169 if isinstance(fileid, int):
144 170 try:
145 171 return store.node(fileid)
146 172 except IndexError:
147 173 raise error.LookupError(fileid, identifier, _('no match found'))
148 174
149 175 if len(fileid) == 20:
150 176 try:
151 177 store.rev(fileid)
152 178 return fileid
153 179 except error.LookupError:
154 180 pass
155 181
156 182 if len(fileid) == 40:
157 183 try:
158 184 rawnode = bin(fileid)
159 185 store.rev(rawnode)
160 186 return rawnode
161 187 except TypeError:
162 188 pass
163 189
164 190 try:
165 191 rev = int(fileid)
166 192
167 193 if b'%d' % rev != fileid:
168 194 raise ValueError
169 195
170 196 try:
171 197 return store.node(rev)
172 198 except (IndexError, TypeError):
173 199 pass
174 200 except (ValueError, OverflowError):
175 201 pass
176 202
177 203 raise error.LookupError(fileid, identifier, _('no match found'))
178 204
179 205 def resolvestripinfo(minlinkrev, tiprev, headrevs, linkrevfn, parentrevsfn):
180 206 """Resolve information needed to strip revisions.
181 207
182 208 Finds the minimum revision number that must be stripped in order to
183 209 strip ``minlinkrev``.
184 210
185 211 Returns a 2-tuple of the minimum revision number to do that and a set
186 212 of all revision numbers that have linkrevs that would be broken
187 213 by that strip.
188 214
189 215 ``tiprev`` is the current tip-most revision. It is ``len(store) - 1``.
190 216 ``headrevs`` is an iterable of head revisions.
191 217 ``linkrevfn`` is a callable that receives a revision and returns a linked
192 218 revision.
193 219 ``parentrevsfn`` is a callable that receives a revision number and returns
194 220 an iterable of its parent revision numbers.
195 221 """
196 222 brokenrevs = set()
197 223 strippoint = tiprev + 1
198 224
199 225 heads = {}
200 226 futurelargelinkrevs = set()
201 227 for head in headrevs:
202 228 headlinkrev = linkrevfn(head)
203 229 heads[head] = headlinkrev
204 230 if headlinkrev >= minlinkrev:
205 231 futurelargelinkrevs.add(headlinkrev)
206 232
207 233 # This algorithm involves walking down the rev graph, starting at the
208 234 # heads. Since the revs are topologically sorted according to linkrev,
209 235 # once all head linkrevs are below the minlink, we know there are
210 236 # no more revs that could have a linkrev greater than minlink.
211 237 # So we can stop walking.
212 238 while futurelargelinkrevs:
213 239 strippoint -= 1
214 240 linkrev = heads.pop(strippoint)
215 241
216 242 if linkrev < minlinkrev:
217 243 brokenrevs.add(strippoint)
218 244 else:
219 245 futurelargelinkrevs.remove(linkrev)
220 246
221 247 for p in parentrevsfn(strippoint):
222 248 if p != nullrev:
223 249 plinkrev = linkrevfn(p)
224 250 heads[p] = plinkrev
225 251 if plinkrev >= minlinkrev:
226 252 futurelargelinkrevs.add(plinkrev)
227 253
228 254 return strippoint, brokenrevs
General Comments 0
You need to be logged in to leave comments. Login now