##// END OF EJS Templates
treemanifest: store submanifest revlog per directory...
Martin von Zweigbergk -
r25091:b5052fc7 default
parent child Browse files
Show More
@@ -0,0 +1,278
1
2 Set up repo
3
4 $ hg --config experimental.treemanifest=True init repo
5 $ cd repo
6
7 Requirements get set on init
8
9 $ grep treemanifest .hg/requires
10 treemanifest
11
12 Without directories, looks like any other repo
13
14 $ echo 0 > a
15 $ echo 0 > b
16 $ hg ci -Aqm initial
17 $ hg debugdata -m 0
18 a\x00362fef284ce2ca02aecc8de6d5e8a1c3af0556fe (esc)
19 b\x00362fef284ce2ca02aecc8de6d5e8a1c3af0556fe (esc)
20
21 Submanifest is stored in separate revlog
22
23 $ mkdir dir1
24 $ echo 1 > dir1/a
25 $ echo 1 > dir1/b
26 $ echo 1 > e
27 $ hg ci -Aqm 'add dir1'
28 $ hg debugdata -m 1
29 a\x00362fef284ce2ca02aecc8de6d5e8a1c3af0556fe (esc)
30 b\x00362fef284ce2ca02aecc8de6d5e8a1c3af0556fe (esc)
31 dir1\x008b3ffd73f901e83304c83d33132c8e774ceac44ed (esc)
32 e\x00b8e02f6433738021a065f94175c7cd23db5f05be (esc)
33 $ hg debugdata .hg/store/meta/dir1/00manifest.i 0
34 a\x00b8e02f6433738021a065f94175c7cd23db5f05be (esc)
35 b\x00b8e02f6433738021a065f94175c7cd23db5f05be (esc)
36
37 Can add nested directories
38
39 $ mkdir dir1/dir1
40 $ echo 2 > dir1/dir1/a
41 $ echo 2 > dir1/dir1/b
42 $ mkdir dir1/dir2
43 $ echo 2 > dir1/dir2/a
44 $ echo 2 > dir1/dir2/b
45 $ hg ci -Aqm 'add dir1/dir1'
46 $ hg files -r .
47 a
48 b
49 dir1/a
50 dir1/b
51 dir1/dir1/a
52 dir1/dir1/b
53 dir1/dir2/a
54 dir1/dir2/b
55 e
56
57 Revision is not created for unchanged directory
58
59 $ mkdir dir2
60 $ echo 3 > dir2/a
61 $ hg add dir2
62 adding dir2/a
63 $ hg debugindex .hg/store/meta/dir1/00manifest.i > before
64 $ hg ci -qm 'add dir2'
65 $ hg debugindex .hg/store/meta/dir1/00manifest.i > after
66 $ diff before after
67 $ rm before after
68
69 Removing directory does not create an revlog entry
70
71 $ hg rm dir1/dir1
72 removing dir1/dir1/a
73 removing dir1/dir1/b
74 $ hg debugindex .hg/store/meta/dir1/dir1/00manifest.i > before
75 $ hg ci -qm 'remove dir1/dir1'
76 $ hg debugindex .hg/store/meta/dir1/dir1/00manifest.i > after
77 $ diff before after
78 $ rm before after
79
80 Check that hg files (calls treemanifest.walk()) works
81
82 $ hg co 'desc("add dir2")'
83 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
84 $ hg files -r . dir1
85 dir1/a
86 dir1/b
87 dir1/dir1/a
88 dir1/dir1/b
89 dir1/dir2/a
90 dir1/dir2/b
91
92 Check that status between revisions works (calls treemanifest.matches())
93
94 $ hg status --rev 'desc("add dir1")' --rev . dir1
95 A dir1/dir1/a
96 A dir1/dir1/b
97 A dir1/dir2/a
98 A dir1/dir2/b
99
100 Merge creates 2-parent revision of directory revlog
101
102 $ echo 5 > dir1/a
103 $ hg ci -Aqm 'modify dir1/a'
104 $ hg co '.^'
105 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
106 $ echo 6 > dir1/b
107 $ hg ci -Aqm 'modify dir1/b'
108 $ hg merge 'desc("modify dir1/a")'
109 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
110 (branch merge, don't forget to commit)
111 $ hg ci -m 'conflict-free merge involving dir1/'
112 $ cat dir1/a
113 5
114 $ cat dir1/b
115 6
116 $ hg debugindex .hg/store/meta/dir1/00manifest.i
117 rev offset length base linkrev nodeid p1 p2
118 0 0 54 0 1 8b3ffd73f901 000000000000 000000000000
119 1 54 68 0 2 b66d046c644f 8b3ffd73f901 000000000000
120 2 122 12 0 4 b87265673c8a b66d046c644f 000000000000
121 3 134 95 0 5 aa5d3adcec72 b66d046c644f 000000000000
122 4 229 81 0 6 e29b066b91ad b66d046c644f 000000000000
123 5 310 107 5 7 a120ce2b83f5 e29b066b91ad aa5d3adcec72
124
125 Merge keeping directory from parent 1 does not create revlog entry. (Note that
126 dir1's manifest does change, but only because dir1/a's filelog changes.)
127
128 $ hg co 'desc("add dir2")'
129 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
130 $ echo 8 > dir2/a
131 $ hg ci -m 'modify dir2/a'
132 created new head
133
134 $ hg debugindex .hg/store/meta/dir2/00manifest.i > before
135 $ hg merge 'desc("modify dir1/a")'
136 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
137 (branch merge, don't forget to commit)
138 $ hg revert -r 'desc("modify dir2/a")' .
139 reverting dir1/a (glob)
140 $ hg ci -m 'merge, keeping parent 1'
141 $ hg debugindex .hg/store/meta/dir2/00manifest.i > after
142 $ diff before after
143 $ rm before after
144
145 Merge keeping directory from parent 2 does not create revlog entry. (Note that
146 dir2's manifest does change, but only because dir2/a's filelog changes.)
147
148 $ hg co 'desc("modify dir2/a")'
149 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
150 $ hg debugindex .hg/store/meta/dir1/00manifest.i > before
151 $ hg merge 'desc("modify dir1/a")'
152 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
153 (branch merge, don't forget to commit)
154 $ hg revert -r 'desc("modify dir1/a")' .
155 reverting dir2/a (glob)
156 $ hg ci -m 'merge, keeping parent 2'
157 created new head
158 $ hg debugindex .hg/store/meta/dir1/00manifest.i > after
159 $ diff before after
160 $ rm before after
161
162 Create flat source repo for tests with mixed flat/tree manifests
163
164 $ cd ..
165 $ hg init repo-flat
166 $ cd repo-flat
167
168 Create a few commits with flat manifest
169
170 $ echo 0 > a
171 $ echo 0 > b
172 $ echo 0 > e
173 $ for d in dir1 dir1/dir1 dir1/dir2 dir2
174 > do
175 > mkdir $d
176 > echo 0 > $d/a
177 > echo 0 > $d/b
178 > done
179 $ hg ci -Aqm initial
180
181 $ echo 1 > a
182 $ echo 1 > dir1/a
183 $ echo 1 > dir1/dir1/a
184 $ hg ci -Aqm 'modify on branch 1'
185
186 $ hg co 0
187 3 files updated, 0 files merged, 0 files removed, 0 files unresolved
188 $ echo 2 > b
189 $ echo 2 > dir1/b
190 $ echo 2 > dir1/dir1/b
191 $ hg ci -Aqm 'modify on branch 2'
192
193 $ hg merge 1
194 3 files updated, 0 files merged, 0 files removed, 0 files unresolved
195 (branch merge, don't forget to commit)
196 $ hg ci -m 'merge of flat manifests to new flat manifest'
197
198 Create clone with tree manifests enabled
199
200 $ cd ..
201 $ hg clone --pull --config experimental.treemanifest=1 repo-flat repo-mixed
202 requesting all changes
203 adding changesets
204 adding manifests
205 adding file changes
206 added 4 changesets with 17 changes to 11 files
207 updating to branch default
208 11 files updated, 0 files merged, 0 files removed, 0 files unresolved
209 $ cd repo-mixed
210 $ test -f .hg/store/meta
211 [1]
212 $ grep treemanifest .hg/requires
213 treemanifest
214
215 Commit should store revlog per directory
216
217 $ hg co 1
218 3 files updated, 0 files merged, 0 files removed, 0 files unresolved
219 $ echo 3 > a
220 $ echo 3 > dir1/a
221 $ echo 3 > dir1/dir1/a
222 $ hg ci -m 'first tree'
223 created new head
224 $ find .hg/store/meta | sort
225 .hg/store/meta
226 .hg/store/meta/dir1
227 .hg/store/meta/dir1/00manifest.i
228 .hg/store/meta/dir1/dir1
229 .hg/store/meta/dir1/dir1/00manifest.i
230 .hg/store/meta/dir1/dir2
231 .hg/store/meta/dir1/dir2/00manifest.i
232 .hg/store/meta/dir2
233 .hg/store/meta/dir2/00manifest.i
234
235 Merge of two trees
236
237 $ hg co 2
238 6 files updated, 0 files merged, 0 files removed, 0 files unresolved
239 $ hg merge 1
240 3 files updated, 0 files merged, 0 files removed, 0 files unresolved
241 (branch merge, don't forget to commit)
242 $ hg ci -m 'merge of flat manifests to new tree manifest'
243 created new head
244 $ hg diff -r 3
245
246 Parent of tree root manifest should be flat manifest, and two for merge
247
248 $ hg debugindex -m
249 rev offset length base linkrev nodeid p1 p2
250 0 0 80 0 0 40536115ed9e 000000000000 000000000000
251 1 80 83 0 1 f3376063c255 40536115ed9e 000000000000
252 2 163 103 0 2 5d9b9da231a2 40536115ed9e 000000000000
253 3 266 83 0 3 d17d663cbd8a 5d9b9da231a2 f3376063c255
254 4 349 132 4 4 c05a51345f86 f3376063c255 000000000000
255 5 481 110 4 5 82594b1f557d 5d9b9da231a2 f3376063c255
256
257
258 Status across flat/tree boundary should work
259
260 $ hg status --rev '.^' --rev .
261 M a
262 M dir1/a
263 M dir1/dir1/a
264
265
266 Turning off treemanifest config has no effect
267
268 $ hg debugindex .hg/store/meta/dir1/00manifest.i
269 rev offset length base linkrev nodeid p1 p2
270 0 0 125 0 4 63c9c0557d24 000000000000 000000000000
271 1 125 109 0 5 23d12a1f6e0e 000000000000 000000000000
272 $ echo 2 > dir1/a
273 $ hg --config experimental.treemanifest=False ci -qm 'modify dir1/a'
274 $ hg debugindex .hg/store/meta/dir1/00manifest.i
275 rev offset length base linkrev nodeid p1 p2
276 0 0 125 0 4 63c9c0557d24 000000000000 000000000000
277 1 125 109 0 5 23d12a1f6e0e 000000000000 000000000000
278 2 234 55 0 6 3cb2d87b4250 23d12a1f6e0e 000000000000
@@ -1,860 +1,944
1 1 # manifest.py - manifest revision class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from i18n import _
9 9 import mdiff, parsers, error, revlog, util
10 10 import array, struct
11 11 import os
12 12
13 13 propertycache = util.propertycache
14 14
15 15 def _parsev1(data):
16 16 # This method does a little bit of excessive-looking
17 17 # precondition checking. This is so that the behavior of this
18 18 # class exactly matches its C counterpart to try and help
19 19 # prevent surprise breakage for anyone that develops against
20 20 # the pure version.
21 21 if data and data[-1] != '\n':
22 22 raise ValueError('Manifest did not end in a newline.')
23 23 prev = None
24 24 for l in data.splitlines():
25 25 if prev is not None and prev > l:
26 26 raise ValueError('Manifest lines not in sorted order.')
27 27 prev = l
28 28 f, n = l.split('\0')
29 29 if len(n) > 40:
30 30 yield f, revlog.bin(n[:40]), n[40:]
31 31 else:
32 32 yield f, revlog.bin(n), ''
33 33
34 34 def _parsev2(data):
35 35 metadataend = data.find('\n')
36 36 # Just ignore metadata for now
37 37 pos = metadataend + 1
38 38 prevf = ''
39 39 while pos < len(data):
40 40 end = data.find('\n', pos + 1) # +1 to skip stem length byte
41 41 if end == -1:
42 42 raise ValueError('Manifest ended with incomplete file entry.')
43 43 stemlen = ord(data[pos])
44 44 items = data[pos + 1:end].split('\0')
45 45 f = prevf[:stemlen] + items[0]
46 46 if prevf > f:
47 47 raise ValueError('Manifest entries not in sorted order.')
48 48 fl = items[1]
49 49 # Just ignore metadata (items[2:] for now)
50 50 n = data[end + 1:end + 21]
51 51 yield f, n, fl
52 52 pos = end + 22
53 53 prevf = f
54 54
55 55 def _parse(data):
56 56 """Generates (path, node, flags) tuples from a manifest text"""
57 57 if data.startswith('\0'):
58 58 return iter(_parsev2(data))
59 59 else:
60 60 return iter(_parsev1(data))
61 61
62 62 def _text(it, usemanifestv2):
63 63 """Given an iterator over (path, node, flags) tuples, returns a manifest
64 64 text"""
65 65 if usemanifestv2:
66 66 return _textv2(it)
67 67 else:
68 68 return _textv1(it)
69 69
70 70 def _textv1(it):
71 71 files = []
72 72 lines = []
73 73 _hex = revlog.hex
74 74 for f, n, fl in it:
75 75 files.append(f)
76 76 # if this is changed to support newlines in filenames,
77 77 # be sure to check the templates/ dir again (especially *-raw.tmpl)
78 78 lines.append("%s\0%s%s\n" % (f, _hex(n), fl))
79 79
80 80 _checkforbidden(files)
81 81 return ''.join(lines)
82 82
83 83 def _textv2(it):
84 84 files = []
85 85 lines = ['\0\n']
86 86 prevf = ''
87 87 for f, n, fl in it:
88 88 files.append(f)
89 89 stem = os.path.commonprefix([prevf, f])
90 90 stemlen = min(len(stem), 255)
91 91 lines.append("%c%s\0%s\n%s\n" % (stemlen, f[stemlen:], fl, n))
92 92 prevf = f
93 93 _checkforbidden(files)
94 94 return ''.join(lines)
95 95
96 96 class _lazymanifest(dict):
97 97 """This is the pure implementation of lazymanifest.
98 98
99 99 It has not been optimized *at all* and is not lazy.
100 100 """
101 101
102 102 def __init__(self, data):
103 103 dict.__init__(self)
104 104 for f, n, fl in _parse(data):
105 105 self[f] = n, fl
106 106
107 107 def __setitem__(self, k, v):
108 108 node, flag = v
109 109 assert node is not None
110 110 if len(node) > 21:
111 111 node = node[:21] # match c implementation behavior
112 112 dict.__setitem__(self, k, (node, flag))
113 113
114 114 def __iter__(self):
115 115 return iter(sorted(dict.keys(self)))
116 116
117 117 def iterkeys(self):
118 118 return iter(sorted(dict.keys(self)))
119 119
120 120 def iterentries(self):
121 121 return ((f, e[0], e[1]) for f, e in sorted(self.iteritems()))
122 122
123 123 def copy(self):
124 124 c = _lazymanifest('')
125 125 c.update(self)
126 126 return c
127 127
128 128 def diff(self, m2, clean=False):
129 129 '''Finds changes between the current manifest and m2.'''
130 130 diff = {}
131 131
132 132 for fn, e1 in self.iteritems():
133 133 if fn not in m2:
134 134 diff[fn] = e1, (None, '')
135 135 else:
136 136 e2 = m2[fn]
137 137 if e1 != e2:
138 138 diff[fn] = e1, e2
139 139 elif clean:
140 140 diff[fn] = None
141 141
142 142 for fn, e2 in m2.iteritems():
143 143 if fn not in self:
144 144 diff[fn] = (None, ''), e2
145 145
146 146 return diff
147 147
148 148 def filtercopy(self, filterfn):
149 149 c = _lazymanifest('')
150 150 for f, n, fl in self.iterentries():
151 151 if filterfn(f):
152 152 c[f] = n, fl
153 153 return c
154 154
155 155 def text(self):
156 156 """Get the full data of this manifest as a bytestring."""
157 157 return _textv1(self.iterentries())
158 158
159 159 try:
160 160 _lazymanifest = parsers.lazymanifest
161 161 except AttributeError:
162 162 pass
163 163
164 164 class manifestdict(object):
165 165 def __init__(self, data=''):
166 166 if data.startswith('\0'):
167 167 #_lazymanifest can not parse v2
168 168 self._lm = _lazymanifest('')
169 169 for f, n, fl in _parsev2(data):
170 170 self._lm[f] = n, fl
171 171 else:
172 172 self._lm = _lazymanifest(data)
173 173
174 174 def __getitem__(self, key):
175 175 return self._lm[key][0]
176 176
177 177 def find(self, key):
178 178 return self._lm[key]
179 179
180 180 def __len__(self):
181 181 return len(self._lm)
182 182
183 183 def __setitem__(self, key, node):
184 184 self._lm[key] = node, self.flags(key, '')
185 185
186 186 def __contains__(self, key):
187 187 return key in self._lm
188 188
189 189 def __delitem__(self, key):
190 190 del self._lm[key]
191 191
192 192 def __iter__(self):
193 193 return self._lm.__iter__()
194 194
195 195 def iterkeys(self):
196 196 return self._lm.iterkeys()
197 197
198 198 def keys(self):
199 199 return list(self.iterkeys())
200 200
201 201 def filesnotin(self, m2):
202 202 '''Set of files in this manifest that are not in the other'''
203 203 files = set(self)
204 204 files.difference_update(m2)
205 205 return files
206 206
207 207 @propertycache
208 208 def _dirs(self):
209 209 return util.dirs(self)
210 210
211 211 def dirs(self):
212 212 return self._dirs
213 213
214 214 def hasdir(self, dir):
215 215 return dir in self._dirs
216 216
217 217 def _filesfastpath(self, match):
218 218 '''Checks whether we can correctly and quickly iterate over matcher
219 219 files instead of over manifest files.'''
220 220 files = match.files()
221 221 return (len(files) < 100 and (match.isexact() or
222 222 (not match.anypats() and util.all(fn in self for fn in files))))
223 223
224 224 def walk(self, match):
225 225 '''Generates matching file names.
226 226
227 227 Equivalent to manifest.matches(match).iterkeys(), but without creating
228 228 an entirely new manifest.
229 229
230 230 It also reports nonexistent files by marking them bad with match.bad().
231 231 '''
232 232 if match.always():
233 233 for f in iter(self):
234 234 yield f
235 235 return
236 236
237 237 fset = set(match.files())
238 238
239 239 # avoid the entire walk if we're only looking for specific files
240 240 if self._filesfastpath(match):
241 241 for fn in sorted(fset):
242 242 yield fn
243 243 return
244 244
245 245 for fn in self:
246 246 if fn in fset:
247 247 # specified pattern is the exact name
248 248 fset.remove(fn)
249 249 if match(fn):
250 250 yield fn
251 251
252 252 # for dirstate.walk, files=['.'] means "walk the whole tree".
253 253 # follow that here, too
254 254 fset.discard('.')
255 255
256 256 for fn in sorted(fset):
257 257 if not self.hasdir(fn):
258 258 match.bad(fn, None)
259 259
260 260 def matches(self, match):
261 261 '''generate a new manifest filtered by the match argument'''
262 262 if match.always():
263 263 return self.copy()
264 264
265 265 if self._filesfastpath(match):
266 266 m = manifestdict()
267 267 lm = self._lm
268 268 for fn in match.files():
269 269 if fn in lm:
270 270 m._lm[fn] = lm[fn]
271 271 return m
272 272
273 273 m = manifestdict()
274 274 m._lm = self._lm.filtercopy(match)
275 275 return m
276 276
277 277 def diff(self, m2, clean=False):
278 278 '''Finds changes between the current manifest and m2.
279 279
280 280 Args:
281 281 m2: the manifest to which this manifest should be compared.
282 282 clean: if true, include files unchanged between these manifests
283 283 with a None value in the returned dictionary.
284 284
285 285 The result is returned as a dict with filename as key and
286 286 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
287 287 nodeid in the current/other manifest and fl1/fl2 is the flag
288 288 in the current/other manifest. Where the file does not exist,
289 289 the nodeid will be None and the flags will be the empty
290 290 string.
291 291 '''
292 292 return self._lm.diff(m2._lm, clean)
293 293
294 294 def setflag(self, key, flag):
295 295 self._lm[key] = self[key], flag
296 296
297 297 def get(self, key, default=None):
298 298 try:
299 299 return self._lm[key][0]
300 300 except KeyError:
301 301 return default
302 302
303 303 def flags(self, key, default=''):
304 304 try:
305 305 return self._lm[key][1]
306 306 except KeyError:
307 307 return default
308 308
309 309 def copy(self):
310 310 c = manifestdict()
311 311 c._lm = self._lm.copy()
312 312 return c
313 313
314 314 def iteritems(self):
315 315 return (x[:2] for x in self._lm.iterentries())
316 316
317 317 def text(self, usemanifestv2=False):
318 318 if usemanifestv2:
319 319 return _textv2(self._lm.iterentries())
320 320 else:
321 321 # use (probably) native version for v1
322 322 return self._lm.text()
323 323
324 324 def fastdelta(self, base, changes):
325 325 """Given a base manifest text as an array.array and a list of changes
326 326 relative to that text, compute a delta that can be used by revlog.
327 327 """
328 328 delta = []
329 329 dstart = None
330 330 dend = None
331 331 dline = [""]
332 332 start = 0
333 333 # zero copy representation of base as a buffer
334 334 addbuf = util.buffer(base)
335 335
336 336 # start with a readonly loop that finds the offset of
337 337 # each line and creates the deltas
338 338 for f, todelete in changes:
339 339 # bs will either be the index of the item or the insert point
340 340 start, end = _msearch(addbuf, f, start)
341 341 if not todelete:
342 342 h, fl = self._lm[f]
343 343 l = "%s\0%s%s\n" % (f, revlog.hex(h), fl)
344 344 else:
345 345 if start == end:
346 346 # item we want to delete was not found, error out
347 347 raise AssertionError(
348 348 _("failed to remove %s from manifest") % f)
349 349 l = ""
350 350 if dstart is not None and dstart <= start and dend >= start:
351 351 if dend < end:
352 352 dend = end
353 353 if l:
354 354 dline.append(l)
355 355 else:
356 356 if dstart is not None:
357 357 delta.append([dstart, dend, "".join(dline)])
358 358 dstart = start
359 359 dend = end
360 360 dline = [l]
361 361
362 362 if dstart is not None:
363 363 delta.append([dstart, dend, "".join(dline)])
364 364 # apply the delta to the base, and get a delta for addrevision
365 365 deltatext, arraytext = _addlistdelta(base, delta)
366 366 return arraytext, deltatext
367 367
368 368 def _msearch(m, s, lo=0, hi=None):
369 369 '''return a tuple (start, end) that says where to find s within m.
370 370
371 371 If the string is found m[start:end] are the line containing
372 372 that string. If start == end the string was not found and
373 373 they indicate the proper sorted insertion point.
374 374
375 375 m should be a buffer or a string
376 376 s is a string'''
377 377 def advance(i, c):
378 378 while i < lenm and m[i] != c:
379 379 i += 1
380 380 return i
381 381 if not s:
382 382 return (lo, lo)
383 383 lenm = len(m)
384 384 if not hi:
385 385 hi = lenm
386 386 while lo < hi:
387 387 mid = (lo + hi) // 2
388 388 start = mid
389 389 while start > 0 and m[start - 1] != '\n':
390 390 start -= 1
391 391 end = advance(start, '\0')
392 392 if m[start:end] < s:
393 393 # we know that after the null there are 40 bytes of sha1
394 394 # this translates to the bisect lo = mid + 1
395 395 lo = advance(end + 40, '\n') + 1
396 396 else:
397 397 # this translates to the bisect hi = mid
398 398 hi = start
399 399 end = advance(lo, '\0')
400 400 found = m[lo:end]
401 401 if s == found:
402 402 # we know that after the null there are 40 bytes of sha1
403 403 end = advance(end + 40, '\n')
404 404 return (lo, end + 1)
405 405 else:
406 406 return (lo, lo)
407 407
408 408 def _checkforbidden(l):
409 409 """Check filenames for illegal characters."""
410 410 for f in l:
411 411 if '\n' in f or '\r' in f:
412 412 raise error.RevlogError(
413 413 _("'\\n' and '\\r' disallowed in filenames: %r") % f)
414 414
415 415
416 416 # apply the changes collected during the bisect loop to our addlist
417 417 # return a delta suitable for addrevision
418 418 def _addlistdelta(addlist, x):
419 419 # for large addlist arrays, building a new array is cheaper
420 420 # than repeatedly modifying the existing one
421 421 currentposition = 0
422 422 newaddlist = array.array('c')
423 423
424 424 for start, end, content in x:
425 425 newaddlist += addlist[currentposition:start]
426 426 if content:
427 427 newaddlist += array.array('c', content)
428 428
429 429 currentposition = end
430 430
431 431 newaddlist += addlist[currentposition:]
432 432
433 433 deltatext = "".join(struct.pack(">lll", start, end, len(content))
434 434 + content for start, end, content in x)
435 435 return deltatext, newaddlist
436 436
437 437 def _splittopdir(f):
438 438 if '/' in f:
439 439 dir, subpath = f.split('/', 1)
440 440 return dir + '/', subpath
441 441 else:
442 442 return '', f
443 443
444 444 class treemanifest(object):
445 445 def __init__(self, dir='', text=''):
446 446 self._dir = dir
447 self._node = revlog.nullid
447 448 self._dirs = {}
448 449 # Using _lazymanifest here is a little slower than plain old dicts
449 450 self._files = {}
450 451 self._flags = {}
451 self.parse(text)
452 def readsubtree(subdir, subm):
453 raise AssertionError('treemanifest constructor only accepts '
454 'flat manifests')
455 self.parse(text, readsubtree)
452 456
453 457 def _subpath(self, path):
454 458 return self._dir + path
455 459
456 460 def __len__(self):
457 461 size = len(self._files)
458 462 for m in self._dirs.values():
459 463 size += m.__len__()
460 464 return size
461 465
462 466 def _isempty(self):
463 467 return (not self._files and (not self._dirs or
464 468 util.all(m._isempty() for m in self._dirs.values())))
465 469
466 470 def __str__(self):
467 return '<treemanifest dir=%s>' % self._dir
471 return ('<treemanifest dir=%s, node=%s>' %
472 (self._dir, revlog.hex(self._node)))
473
474 def dir(self):
475 '''The directory that this tree manifest represents, including a
476 trailing '/'. Empty string for the repo root directory.'''
477 return self._dir
478
479 def node(self):
480 '''This node of this instance. nullid for unsaved instances. Should
481 be updated when the instance is read or written from a revlog.
482 '''
483 return self._node
484
485 def setnode(self, node):
486 self._node = node
468 487
469 488 def iteritems(self):
470 489 for p, n in sorted(self._dirs.items() + self._files.items()):
471 490 if p in self._files:
472 491 yield self._subpath(p), n
473 492 else:
474 493 for f, sn in n.iteritems():
475 494 yield f, sn
476 495
477 496 def iterkeys(self):
478 497 for p in sorted(self._dirs.keys() + self._files.keys()):
479 498 if p in self._files:
480 499 yield self._subpath(p)
481 500 else:
482 501 for f in self._dirs[p].iterkeys():
483 502 yield f
484 503
485 504 def keys(self):
486 505 return list(self.iterkeys())
487 506
488 507 def __iter__(self):
489 508 return self.iterkeys()
490 509
491 510 def __contains__(self, f):
492 511 if f is None:
493 512 return False
494 513 dir, subpath = _splittopdir(f)
495 514 if dir:
496 515 if dir not in self._dirs:
497 516 return False
498 517 return self._dirs[dir].__contains__(subpath)
499 518 else:
500 519 return f in self._files
501 520
502 521 def get(self, f, default=None):
503 522 dir, subpath = _splittopdir(f)
504 523 if dir:
505 524 if dir not in self._dirs:
506 525 return default
507 526 return self._dirs[dir].get(subpath, default)
508 527 else:
509 528 return self._files.get(f, default)
510 529
511 530 def __getitem__(self, f):
512 531 dir, subpath = _splittopdir(f)
513 532 if dir:
514 533 return self._dirs[dir].__getitem__(subpath)
515 534 else:
516 535 return self._files[f]
517 536
518 537 def flags(self, f):
519 538 dir, subpath = _splittopdir(f)
520 539 if dir:
521 540 if dir not in self._dirs:
522 541 return ''
523 542 return self._dirs[dir].flags(subpath)
524 543 else:
525 544 if f in self._dirs:
526 545 return ''
527 546 return self._flags.get(f, '')
528 547
529 548 def find(self, f):
530 549 dir, subpath = _splittopdir(f)
531 550 if dir:
532 551 return self._dirs[dir].find(subpath)
533 552 else:
534 553 return self._files[f], self._flags.get(f, '')
535 554
536 555 def __delitem__(self, f):
537 556 dir, subpath = _splittopdir(f)
538 557 if dir:
539 558 self._dirs[dir].__delitem__(subpath)
540 559 # If the directory is now empty, remove it
541 560 if self._dirs[dir]._isempty():
542 561 del self._dirs[dir]
543 562 else:
544 563 del self._files[f]
545 564 if f in self._flags:
546 565 del self._flags[f]
547 566
548 567 def __setitem__(self, f, n):
549 568 assert n is not None
550 569 dir, subpath = _splittopdir(f)
551 570 if dir:
552 571 if dir not in self._dirs:
553 572 self._dirs[dir] = treemanifest(self._subpath(dir))
554 573 self._dirs[dir].__setitem__(subpath, n)
555 574 else:
556 575 self._files[f] = n[:21] # to match manifestdict's behavior
557 576
558 577 def setflag(self, f, flags):
559 578 """Set the flags (symlink, executable) for path f."""
579 assert 'd' not in flags
560 580 dir, subpath = _splittopdir(f)
561 581 if dir:
562 582 if dir not in self._dirs:
563 583 self._dirs[dir] = treemanifest(self._subpath(dir))
564 584 self._dirs[dir].setflag(subpath, flags)
565 585 else:
566 586 self._flags[f] = flags
567 587
568 588 def copy(self):
569 589 copy = treemanifest(self._dir)
590 copy._node = self._node
570 591 for d in self._dirs:
571 592 copy._dirs[d] = self._dirs[d].copy()
572 593 copy._files = dict.copy(self._files)
573 594 copy._flags = dict.copy(self._flags)
574 595 return copy
575 596
576 597 def filesnotin(self, m2):
577 598 '''Set of files in this manifest that are not in the other'''
578 599 files = set()
579 600 def _filesnotin(t1, t2):
580 601 for d, m1 in t1._dirs.iteritems():
581 602 if d in t2._dirs:
582 603 m2 = t2._dirs[d]
583 604 _filesnotin(m1, m2)
584 605 else:
585 606 files.update(m1.iterkeys())
586 607
587 608 for fn in t1._files.iterkeys():
588 609 if fn not in t2._files:
589 610 files.add(t1._subpath(fn))
590 611
591 612 _filesnotin(self, m2)
592 613 return files
593 614
594 615 @propertycache
595 616 def _alldirs(self):
596 617 return util.dirs(self)
597 618
598 619 def dirs(self):
599 620 return self._alldirs
600 621
601 622 def hasdir(self, dir):
602 623 topdir, subdir = _splittopdir(dir)
603 624 if topdir:
604 625 if topdir in self._dirs:
605 626 return self._dirs[topdir].hasdir(subdir)
606 627 return False
607 628 return (dir + '/') in self._dirs
608 629
609 630 def walk(self, match):
610 631 '''Generates matching file names.
611 632
612 633 Equivalent to manifest.matches(match).iterkeys(), but without creating
613 634 an entirely new manifest.
614 635
615 636 It also reports nonexistent files by marking them bad with match.bad().
616 637 '''
617 638 if match.always():
618 639 for f in iter(self):
619 640 yield f
620 641 return
621 642
622 643 fset = set(match.files())
623 644
624 645 for fn in self._walk(match):
625 646 if fn in fset:
626 647 # specified pattern is the exact name
627 648 fset.remove(fn)
628 649 yield fn
629 650
630 651 # for dirstate.walk, files=['.'] means "walk the whole tree".
631 652 # follow that here, too
632 653 fset.discard('.')
633 654
634 655 for fn in sorted(fset):
635 656 if not self.hasdir(fn):
636 657 match.bad(fn, None)
637 658
638 659 def _walk(self, match, alldirs=False):
639 660 '''Recursively generates matching file names for walk().
640 661
641 662 Will visit all subdirectories if alldirs is True, otherwise it will
642 663 only visit subdirectories for which match.visitdir is True.'''
643 664
644 665 if not alldirs:
645 666 # substring to strip trailing slash
646 667 visit = match.visitdir(self._dir[:-1] or '.')
647 668 if not visit:
648 669 return
649 670 alldirs = (visit == 'all')
650 671
651 672 # yield this dir's files and walk its submanifests
652 673 for p in sorted(self._dirs.keys() + self._files.keys()):
653 674 if p in self._files:
654 675 fullp = self._subpath(p)
655 676 if match(fullp):
656 677 yield fullp
657 678 else:
658 679 for f in self._dirs[p]._walk(match, alldirs):
659 680 yield f
660 681
661 682 def matches(self, match):
662 683 '''generate a new manifest filtered by the match argument'''
663 684 if match.always():
664 685 return self.copy()
665 686
666 687 return self._matches(match)
667 688
668 689 def _matches(self, match, alldirs=False):
669 690 '''recursively generate a new manifest filtered by the match argument.
670 691
671 692 Will visit all subdirectories if alldirs is True, otherwise it will
672 693 only visit subdirectories for which match.visitdir is True.'''
673 694
674 695 ret = treemanifest(self._dir)
675 696 if not alldirs:
676 697 # substring to strip trailing slash
677 698 visit = match.visitdir(self._dir[:-1] or '.')
678 699 if not visit:
679 700 return ret
680 701 alldirs = (visit == 'all')
681 702
682 703 for fn in self._files:
683 704 fullp = self._subpath(fn)
684 705 if not match(fullp):
685 706 continue
686 707 ret._files[fn] = self._files[fn]
687 708 if fn in self._flags:
688 709 ret._flags[fn] = self._flags[fn]
689 710
690 711 for dir, subm in self._dirs.iteritems():
691 712 m = subm._matches(match, alldirs)
692 713 if not m._isempty():
693 714 ret._dirs[dir] = m
694 715
695 716 return ret
696 717
697 718 def diff(self, m2, clean=False):
698 719 '''Finds changes between the current manifest and m2.
699 720
700 721 Args:
701 722 m2: the manifest to which this manifest should be compared.
702 723 clean: if true, include files unchanged between these manifests
703 724 with a None value in the returned dictionary.
704 725
705 726 The result is returned as a dict with filename as key and
706 727 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
707 728 nodeid in the current/other manifest and fl1/fl2 is the flag
708 729 in the current/other manifest. Where the file does not exist,
709 730 the nodeid will be None and the flags will be the empty
710 731 string.
711 732 '''
712 733 result = {}
713 734 emptytree = treemanifest()
714 735 def _diff(t1, t2):
715 736 for d, m1 in t1._dirs.iteritems():
716 737 m2 = t2._dirs.get(d, emptytree)
717 738 _diff(m1, m2)
718 739
719 740 for d, m2 in t2._dirs.iteritems():
720 741 if d not in t1._dirs:
721 742 _diff(emptytree, m2)
722 743
723 744 for fn, n1 in t1._files.iteritems():
724 745 fl1 = t1._flags.get(fn, '')
725 746 n2 = t2._files.get(fn, None)
726 747 fl2 = t2._flags.get(fn, '')
727 748 if n1 != n2 or fl1 != fl2:
728 749 result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
729 750 elif clean:
730 751 result[t1._subpath(fn)] = None
731 752
732 753 for fn, n2 in t2._files.iteritems():
733 754 if fn not in t1._files:
734 755 fl2 = t2._flags.get(fn, '')
735 756 result[t2._subpath(fn)] = ((None, ''), (n2, fl2))
736 757
737 758 _diff(self, m2)
738 759 return result
739 760
740 def parse(self, text):
761 def parse(self, text, readsubtree):
741 762 for f, n, fl in _parse(text):
742 self[f] = n
743 if fl:
744 self.setflag(f, fl)
763 if fl == 'd':
764 f = f + '/'
765 self._dirs[f] = readsubtree(self._subpath(f), n)
766 else:
767 # Use __setitem__ and setflag rather than assigning directly
768 # to _files and _flags, thereby letting us parse flat manifests
769 # as well as tree manifests.
770 self[f] = n
771 if fl:
772 self.setflag(f, fl)
745 773
746 774 def text(self, usemanifestv2=False):
747 775 """Get the full data of this manifest as a bytestring."""
748 776 flags = self.flags
749 777 return _text(((f, self[f], flags(f)) for f in self.keys()),
750 778 usemanifestv2)
751 779
780 def dirtext(self, usemanifestv2=False):
781 """Get the full data of this directory as a bytestring. Make sure that
782 any submanifests have been written first, so their nodeids are correct.
783 """
784 flags = self.flags
785 dirs = [(d[:-1], self._dirs[d]._node, 'd') for d in self._dirs]
786 files = [(f, self._files[f], flags(f)) for f in self._files]
787 return _text(sorted(dirs + files), usemanifestv2)
788
789 def writesubtrees(self, m1, m2, writesubtree):
790 emptytree = treemanifest()
791 for d, subm in self._dirs.iteritems():
792 subp1 = m1._dirs.get(d, emptytree)._node
793 subp2 = m2._dirs.get(d, emptytree)._node
794 if subp1 == revlog.nullid:
795 subp1, subp2 = subp2, subp1
796 writesubtree(subm, subp1, subp2)
797
752 798 class manifest(revlog.revlog):
753 def __init__(self, opener):
799 def __init__(self, opener, dir=''):
754 800 # During normal operations, we expect to deal with not more than four
755 801 # revs at a time (such as during commit --amend). When rebasing large
756 802 # stacks of commits, the number can go up, hence the config knob below.
757 803 cachesize = 4
758 804 usetreemanifest = False
759 805 usemanifestv2 = False
760 806 opts = getattr(opener, 'options', None)
761 807 if opts is not None:
762 808 cachesize = opts.get('manifestcachesize', cachesize)
763 809 usetreemanifest = opts.get('treemanifest', usetreemanifest)
764 810 usemanifestv2 = opts.get('manifestv2', usemanifestv2)
765 811 self._mancache = util.lrucachedict(cachesize)
766 revlog.revlog.__init__(self, opener, "00manifest.i")
767 812 self._treeinmem = usetreemanifest
768 813 self._treeondisk = usetreemanifest
769 814 self._usemanifestv2 = usemanifestv2
815 indexfile = "00manifest.i"
816 if dir:
817 assert self._treeondisk
818 indexfile = "meta/" + dir + "00manifest.i"
819 revlog.revlog.__init__(self, opener, indexfile)
820 self._dir = dir
770 821
771 822 def _newmanifest(self, data=''):
772 823 if self._treeinmem:
773 return treemanifest('', data)
824 return treemanifest(self._dir, data)
774 825 return manifestdict(data)
775 826
776 827 def _slowreaddelta(self, node):
777 828 r0 = self.deltaparent(self.rev(node))
778 829 m0 = self.read(self.node(r0))
779 830 m1 = self.read(node)
780 831 md = self._newmanifest()
781 832 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).iteritems():
782 833 if n1:
783 834 md[f] = n1
784 835 if fl1:
785 836 md.setflag(f, fl1)
786 837 return md
787 838
788 839 def readdelta(self, node):
789 840 if self._usemanifestv2 or self._treeondisk:
790 841 return self._slowreaddelta(node)
791 842 r = self.rev(node)
792 843 d = mdiff.patchtext(self.revdiff(self.deltaparent(r), r))
793 844 return self._newmanifest(d)
794 845
795 846 def readfast(self, node):
796 847 '''use the faster of readdelta or read
797 848
798 849 This will return a manifest which is either only the files
799 850 added/modified relative to p1, or all files in the
800 851 manifest. Which one is returned depends on the codepath used
801 852 to retrieve the data.
802 853 '''
803 854 r = self.rev(node)
804 855 deltaparent = self.deltaparent(r)
805 856 if deltaparent != revlog.nullrev and deltaparent in self.parentrevs(r):
806 857 return self.readdelta(node)
807 858 return self.read(node)
808 859
809 860 def read(self, node):
810 861 if node == revlog.nullid:
811 862 return self._newmanifest() # don't upset local cache
812 863 if node in self._mancache:
813 864 return self._mancache[node][0]
814 865 text = self.revision(node)
815 arraytext = array.array('c', text)
816 m = self._newmanifest(text)
866 if self._treeondisk:
867 def readsubtree(dir, subm):
868 sublog = manifest(self.opener, dir)
869 return sublog.read(subm)
870 m = self._newmanifest()
871 m.parse(text, readsubtree)
872 m.setnode(node)
873 arraytext = None
874 else:
875 m = self._newmanifest(text)
876 arraytext = array.array('c', text)
817 877 self._mancache[node] = (m, arraytext)
818 878 return m
819 879
820 880 def find(self, node, f):
821 881 '''look up entry for a single file efficiently.
822 882 return (node, flags) pair if found, (None, None) if not.'''
823 883 m = self.read(node)
824 884 try:
825 885 return m.find(f)
826 886 except KeyError:
827 887 return None, None
828 888
829 889 def add(self, m, transaction, link, p1, p2, added, removed):
830 890 if (p1 in self._mancache and not self._treeinmem
831 891 and not self._usemanifestv2):
832 892 # If our first parent is in the manifest cache, we can
833 893 # compute a delta here using properties we know about the
834 894 # manifest up-front, which may save time later for the
835 895 # revlog layer.
836 896
837 897 _checkforbidden(added)
838 898 # combine the changed lists into one list for sorting
839 899 work = [(x, False) for x in added]
840 900 work.extend((x, True) for x in removed)
841 901 # this could use heapq.merge() (from Python 2.6+) or equivalent
842 902 # since the lists are already sorted
843 903 work.sort()
844 904
845 905 arraytext, deltatext = m.fastdelta(self._mancache[p1][1], work)
846 906 cachedelta = self.rev(p1), deltatext
847 907 text = util.buffer(arraytext)
848 908 n = self.addrevision(text, transaction, link, p1, p2, cachedelta)
849 909 else:
850 910 # The first parent manifest isn't already loaded, so we'll
851 911 # just encode a fulltext of the manifest and pass that
852 912 # through to the revlog layer, and let it handle the delta
853 913 # process.
854 text = m.text(self._usemanifestv2)
855 arraytext = array.array('c', text)
856 n = self.addrevision(text, transaction, link, p1, p2)
914 if self._treeondisk:
915 m1 = self.read(p1)
916 m2 = self.read(p2)
917 n = self._addtree(m, transaction, link, m1, m2)
918 arraytext = None
919 else:
920 text = m.text(self._usemanifestv2)
921 n = self.addrevision(text, transaction, link, p1, p2)
922 arraytext = array.array('c', text)
857 923
858 924 self._mancache[n] = (m, arraytext)
859 925
860 926 return n
927
928 def _addtree(self, m, transaction, link, m1, m2):
929 def writesubtree(subm, subp1, subp2):
930 sublog = manifest(self.opener, subm.dir())
931 sublog.add(subm, transaction, link, subp1, subp2, None, None)
932 m.writesubtrees(m1, m2, writesubtree)
933 text = m.dirtext(self._usemanifestv2)
934 # If the manifest is unchanged compared to one parent,
935 # don't write a new revision
936 if text == m1.dirtext(self._usemanifestv2):
937 n = m1.node()
938 elif text == m2.dirtext(self._usemanifestv2):
939 n = m2.node()
940 else:
941 n = self.addrevision(text, transaction, link, m1.node(), m2.node())
942 # Save nodeid so parent manifest can calculate its nodeid
943 m.setnode(n)
944 return n
@@ -1,542 +1,542
1 1 # store.py - repository store handling for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from i18n import _
9 9 import scmutil, util, parsers
10 10 import os, stat, errno
11 11
12 12 _sha = util.sha1
13 13
14 14 # This avoids a collision between a file named foo and a dir named
15 15 # foo.i or foo.d
16 16 def _encodedir(path):
17 17 '''
18 18 >>> _encodedir('data/foo.i')
19 19 'data/foo.i'
20 20 >>> _encodedir('data/foo.i/bla.i')
21 21 'data/foo.i.hg/bla.i'
22 22 >>> _encodedir('data/foo.i.hg/bla.i')
23 23 'data/foo.i.hg.hg/bla.i'
24 24 >>> _encodedir('data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
25 25 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
26 26 '''
27 27 return (path
28 28 .replace(".hg/", ".hg.hg/")
29 29 .replace(".i/", ".i.hg/")
30 30 .replace(".d/", ".d.hg/"))
31 31
32 32 encodedir = getattr(parsers, 'encodedir', _encodedir)
33 33
34 34 def decodedir(path):
35 35 '''
36 36 >>> decodedir('data/foo.i')
37 37 'data/foo.i'
38 38 >>> decodedir('data/foo.i.hg/bla.i')
39 39 'data/foo.i/bla.i'
40 40 >>> decodedir('data/foo.i.hg.hg/bla.i')
41 41 'data/foo.i.hg/bla.i'
42 42 '''
43 43 if ".hg/" not in path:
44 44 return path
45 45 return (path
46 46 .replace(".d.hg/", ".d/")
47 47 .replace(".i.hg/", ".i/")
48 48 .replace(".hg.hg/", ".hg/"))
49 49
50 50 def _buildencodefun():
51 51 '''
52 52 >>> enc, dec = _buildencodefun()
53 53
54 54 >>> enc('nothing/special.txt')
55 55 'nothing/special.txt'
56 56 >>> dec('nothing/special.txt')
57 57 'nothing/special.txt'
58 58
59 59 >>> enc('HELLO')
60 60 '_h_e_l_l_o'
61 61 >>> dec('_h_e_l_l_o')
62 62 'HELLO'
63 63
64 64 >>> enc('hello:world?')
65 65 'hello~3aworld~3f'
66 66 >>> dec('hello~3aworld~3f')
67 67 'hello:world?'
68 68
69 69 >>> enc('the\x07quick\xADshot')
70 70 'the~07quick~adshot'
71 71 >>> dec('the~07quick~adshot')
72 72 'the\\x07quick\\xadshot'
73 73 '''
74 74 e = '_'
75 75 winreserved = [ord(x) for x in '\\:*?"<>|']
76 76 cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
77 77 for x in (range(32) + range(126, 256) + winreserved):
78 78 cmap[chr(x)] = "~%02x" % x
79 79 for x in range(ord("A"), ord("Z") + 1) + [ord(e)]:
80 80 cmap[chr(x)] = e + chr(x).lower()
81 81 dmap = {}
82 82 for k, v in cmap.iteritems():
83 83 dmap[v] = k
84 84 def decode(s):
85 85 i = 0
86 86 while i < len(s):
87 87 for l in xrange(1, 4):
88 88 try:
89 89 yield dmap[s[i:i + l]]
90 90 i += l
91 91 break
92 92 except KeyError:
93 93 pass
94 94 else:
95 95 raise KeyError
96 96 return (lambda s: ''.join([cmap[c] for c in s]),
97 97 lambda s: ''.join(list(decode(s))))
98 98
99 99 _encodefname, _decodefname = _buildencodefun()
100 100
101 101 def encodefilename(s):
102 102 '''
103 103 >>> encodefilename('foo.i/bar.d/bla.hg/hi:world?/HELLO')
104 104 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
105 105 '''
106 106 return _encodefname(encodedir(s))
107 107
108 108 def decodefilename(s):
109 109 '''
110 110 >>> decodefilename('foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
111 111 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
112 112 '''
113 113 return decodedir(_decodefname(s))
114 114
115 115 def _buildlowerencodefun():
116 116 '''
117 117 >>> f = _buildlowerencodefun()
118 118 >>> f('nothing/special.txt')
119 119 'nothing/special.txt'
120 120 >>> f('HELLO')
121 121 'hello'
122 122 >>> f('hello:world?')
123 123 'hello~3aworld~3f'
124 124 >>> f('the\x07quick\xADshot')
125 125 'the~07quick~adshot'
126 126 '''
127 127 winreserved = [ord(x) for x in '\\:*?"<>|']
128 128 cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
129 129 for x in (range(32) + range(126, 256) + winreserved):
130 130 cmap[chr(x)] = "~%02x" % x
131 131 for x in range(ord("A"), ord("Z") + 1):
132 132 cmap[chr(x)] = chr(x).lower()
133 133 return lambda s: "".join([cmap[c] for c in s])
134 134
135 135 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
136 136
137 137 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
138 138 _winres3 = ('aux', 'con', 'prn', 'nul') # length 3
139 139 _winres4 = ('com', 'lpt') # length 4 (with trailing 1..9)
140 140 def _auxencode(path, dotencode):
141 141 '''
142 142 Encodes filenames containing names reserved by Windows or which end in
143 143 period or space. Does not touch other single reserved characters c.
144 144 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
145 145 Additionally encodes space or period at the beginning, if dotencode is
146 146 True. Parameter path is assumed to be all lowercase.
147 147 A segment only needs encoding if a reserved name appears as a
148 148 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
149 149 doesn't need encoding.
150 150
151 151 >>> s = '.foo/aux.txt/txt.aux/con/prn/nul/foo.'
152 152 >>> _auxencode(s.split('/'), True)
153 153 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
154 154 >>> s = '.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
155 155 >>> _auxencode(s.split('/'), False)
156 156 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
157 157 >>> _auxencode(['foo. '], True)
158 158 ['foo.~20']
159 159 >>> _auxencode([' .foo'], True)
160 160 ['~20.foo']
161 161 '''
162 162 for i, n in enumerate(path):
163 163 if not n:
164 164 continue
165 165 if dotencode and n[0] in '. ':
166 166 n = "~%02x" % ord(n[0]) + n[1:]
167 167 path[i] = n
168 168 else:
169 169 l = n.find('.')
170 170 if l == -1:
171 171 l = len(n)
172 172 if ((l == 3 and n[:3] in _winres3) or
173 173 (l == 4 and n[3] <= '9' and n[3] >= '1'
174 174 and n[:3] in _winres4)):
175 175 # encode third letter ('aux' -> 'au~78')
176 176 ec = "~%02x" % ord(n[2])
177 177 n = n[0:2] + ec + n[3:]
178 178 path[i] = n
179 179 if n[-1] in '. ':
180 180 # encode last period or space ('foo...' -> 'foo..~2e')
181 181 path[i] = n[:-1] + "~%02x" % ord(n[-1])
182 182 return path
183 183
184 184 _maxstorepathlen = 120
185 185 _dirprefixlen = 8
186 186 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
187 187
188 188 def _hashencode(path, dotencode):
189 189 digest = _sha(path).hexdigest()
190 le = lowerencode(path[5:]).split('/') # skips prefix 'data/'
190 le = lowerencode(path[5:]).split('/') # skips prefix 'data/' or 'meta/'
191 191 parts = _auxencode(le, dotencode)
192 192 basename = parts[-1]
193 193 _root, ext = os.path.splitext(basename)
194 194 sdirs = []
195 195 sdirslen = 0
196 196 for p in parts[:-1]:
197 197 d = p[:_dirprefixlen]
198 198 if d[-1] in '. ':
199 199 # Windows can't access dirs ending in period or space
200 200 d = d[:-1] + '_'
201 201 if sdirslen == 0:
202 202 t = len(d)
203 203 else:
204 204 t = sdirslen + 1 + len(d)
205 205 if t > _maxshortdirslen:
206 206 break
207 207 sdirs.append(d)
208 208 sdirslen = t
209 209 dirs = '/'.join(sdirs)
210 210 if len(dirs) > 0:
211 211 dirs += '/'
212 212 res = 'dh/' + dirs + digest + ext
213 213 spaceleft = _maxstorepathlen - len(res)
214 214 if spaceleft > 0:
215 215 filler = basename[:spaceleft]
216 216 res = 'dh/' + dirs + filler + digest + ext
217 217 return res
218 218
219 219 def _hybridencode(path, dotencode):
220 220 '''encodes path with a length limit
221 221
222 222 Encodes all paths that begin with 'data/', according to the following.
223 223
224 224 Default encoding (reversible):
225 225
226 226 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
227 227 characters are encoded as '~xx', where xx is the two digit hex code
228 228 of the character (see encodefilename).
229 229 Relevant path components consisting of Windows reserved filenames are
230 230 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
231 231
232 232 Hashed encoding (not reversible):
233 233
234 234 If the default-encoded path is longer than _maxstorepathlen, a
235 235 non-reversible hybrid hashing of the path is done instead.
236 236 This encoding uses up to _dirprefixlen characters of all directory
237 237 levels of the lowerencoded path, but not more levels than can fit into
238 238 _maxshortdirslen.
239 239 Then follows the filler followed by the sha digest of the full path.
240 240 The filler is the beginning of the basename of the lowerencoded path
241 241 (the basename is everything after the last path separator). The filler
242 242 is as long as possible, filling in characters from the basename until
243 243 the encoded path has _maxstorepathlen characters (or all chars of the
244 244 basename have been taken).
245 245 The extension (e.g. '.i' or '.d') is preserved.
246 246
247 247 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
248 248 encoding was used.
249 249 '''
250 250 path = encodedir(path)
251 251 ef = _encodefname(path).split('/')
252 252 res = '/'.join(_auxencode(ef, dotencode))
253 253 if len(res) > _maxstorepathlen:
254 254 res = _hashencode(path, dotencode)
255 255 return res
256 256
257 257 def _pathencode(path):
258 258 de = encodedir(path)
259 259 if len(path) > _maxstorepathlen:
260 260 return _hashencode(de, True)
261 261 ef = _encodefname(de).split('/')
262 262 res = '/'.join(_auxencode(ef, True))
263 263 if len(res) > _maxstorepathlen:
264 264 return _hashencode(de, True)
265 265 return res
266 266
267 267 _pathencode = getattr(parsers, 'pathencode', _pathencode)
268 268
269 269 def _plainhybridencode(f):
270 270 return _hybridencode(f, False)
271 271
272 272 def _calcmode(vfs):
273 273 try:
274 274 # files in .hg/ will be created using this mode
275 275 mode = vfs.stat().st_mode
276 276 # avoid some useless chmods
277 277 if (0777 & ~util.umask) == (0777 & mode):
278 278 mode = None
279 279 except OSError:
280 280 mode = None
281 281 return mode
282 282
283 283 _data = ('data 00manifest.d 00manifest.i 00changelog.d 00changelog.i'
284 284 ' phaseroots obsstore')
285 285
286 286 class basicstore(object):
287 287 '''base class for local repository stores'''
288 288 def __init__(self, path, vfstype):
289 289 vfs = vfstype(path)
290 290 self.path = vfs.base
291 291 self.createmode = _calcmode(vfs)
292 292 vfs.createmode = self.createmode
293 293 self.rawvfs = vfs
294 294 self.vfs = scmutil.filtervfs(vfs, encodedir)
295 295 self.opener = self.vfs
296 296
297 297 def join(self, f):
298 298 return self.path + '/' + encodedir(f)
299 299
300 300 def _walk(self, relpath, recurse):
301 301 '''yields (unencoded, encoded, size)'''
302 302 path = self.path
303 303 if relpath:
304 304 path += '/' + relpath
305 305 striplen = len(self.path) + 1
306 306 l = []
307 307 if self.rawvfs.isdir(path):
308 308 visit = [path]
309 309 readdir = self.rawvfs.readdir
310 310 while visit:
311 311 p = visit.pop()
312 312 for f, kind, st in readdir(p, stat=True):
313 313 fp = p + '/' + f
314 314 if kind == stat.S_IFREG and f[-2:] in ('.d', '.i'):
315 315 n = util.pconvert(fp[striplen:])
316 316 l.append((decodedir(n), n, st.st_size))
317 317 elif kind == stat.S_IFDIR and recurse:
318 318 visit.append(fp)
319 319 l.sort()
320 320 return l
321 321
322 322 def datafiles(self):
323 323 return self._walk('data', True)
324 324
325 325 def topfiles(self):
326 326 # yield manifest before changelog
327 327 return reversed(self._walk('', False))
328 328
329 329 def walk(self):
330 330 '''yields (unencoded, encoded, size)'''
331 331 # yield data files first
332 332 for x in self.datafiles():
333 333 yield x
334 334 for x in self.topfiles():
335 335 yield x
336 336
337 337 def copylist(self):
338 338 return ['requires'] + _data.split()
339 339
340 340 def write(self, tr):
341 341 pass
342 342
343 343 def invalidatecaches(self):
344 344 pass
345 345
346 346 def markremoved(self, fn):
347 347 pass
348 348
349 349 def __contains__(self, path):
350 350 '''Checks if the store contains path'''
351 351 path = "/".join(("data", path))
352 352 # file?
353 353 if self.vfs.exists(path + ".i"):
354 354 return True
355 355 # dir?
356 356 if not path.endswith("/"):
357 357 path = path + "/"
358 358 return self.vfs.exists(path)
359 359
360 360 class encodedstore(basicstore):
361 361 def __init__(self, path, vfstype):
362 362 vfs = vfstype(path + '/store')
363 363 self.path = vfs.base
364 364 self.createmode = _calcmode(vfs)
365 365 vfs.createmode = self.createmode
366 366 self.rawvfs = vfs
367 367 self.vfs = scmutil.filtervfs(vfs, encodefilename)
368 368 self.opener = self.vfs
369 369
370 370 def datafiles(self):
371 371 for a, b, size in self._walk('data', True):
372 372 try:
373 373 a = decodefilename(a)
374 374 except KeyError:
375 375 a = None
376 376 yield a, b, size
377 377
378 378 def join(self, f):
379 379 return self.path + '/' + encodefilename(f)
380 380
381 381 def copylist(self):
382 382 return (['requires', '00changelog.i'] +
383 383 ['store/' + f for f in _data.split()])
384 384
385 385 class fncache(object):
386 386 # the filename used to be partially encoded
387 387 # hence the encodedir/decodedir dance
388 388 def __init__(self, vfs):
389 389 self.vfs = vfs
390 390 self.entries = None
391 391 self._dirty = False
392 392
393 393 def _load(self):
394 394 '''fill the entries from the fncache file'''
395 395 self._dirty = False
396 396 try:
397 397 fp = self.vfs('fncache', mode='rb')
398 398 except IOError:
399 399 # skip nonexistent file
400 400 self.entries = set()
401 401 return
402 402 self.entries = set(decodedir(fp.read()).splitlines())
403 403 if '' in self.entries:
404 404 fp.seek(0)
405 405 for n, line in enumerate(fp):
406 406 if not line.rstrip('\n'):
407 407 t = _('invalid entry in fncache, line %s') % (n + 1)
408 408 raise util.Abort(t)
409 409 fp.close()
410 410
411 411 def write(self, tr):
412 412 if self._dirty:
413 413 tr.addbackup('fncache')
414 414 fp = self.vfs('fncache', mode='wb', atomictemp=True)
415 415 if self.entries:
416 416 fp.write(encodedir('\n'.join(self.entries) + '\n'))
417 417 fp.close()
418 418 self._dirty = False
419 419
420 420 def add(self, fn):
421 421 if self.entries is None:
422 422 self._load()
423 423 if fn not in self.entries:
424 424 self._dirty = True
425 425 self.entries.add(fn)
426 426
427 427 def remove(self, fn):
428 428 if self.entries is None:
429 429 self._load()
430 430 try:
431 431 self.entries.remove(fn)
432 432 self._dirty = True
433 433 except KeyError:
434 434 pass
435 435
436 436 def __contains__(self, fn):
437 437 if self.entries is None:
438 438 self._load()
439 439 return fn in self.entries
440 440
441 441 def __iter__(self):
442 442 if self.entries is None:
443 443 self._load()
444 444 return iter(self.entries)
445 445
446 446 class _fncachevfs(scmutil.abstractvfs, scmutil.auditvfs):
447 447 def __init__(self, vfs, fnc, encode):
448 448 scmutil.auditvfs.__init__(self, vfs)
449 449 self.fncache = fnc
450 450 self.encode = encode
451 451
452 452 def __call__(self, path, mode='r', *args, **kw):
453 453 if mode not in ('r', 'rb') and path.startswith('data/'):
454 454 self.fncache.add(path)
455 455 return self.vfs(self.encode(path), mode, *args, **kw)
456 456
457 457 def join(self, path):
458 458 if path:
459 459 return self.vfs.join(self.encode(path))
460 460 else:
461 461 return self.vfs.join(path)
462 462
463 463 class fncachestore(basicstore):
464 464 def __init__(self, path, vfstype, dotencode):
465 465 if dotencode:
466 466 encode = _pathencode
467 467 else:
468 468 encode = _plainhybridencode
469 469 self.encode = encode
470 470 vfs = vfstype(path + '/store')
471 471 self.path = vfs.base
472 472 self.pathsep = self.path + '/'
473 473 self.createmode = _calcmode(vfs)
474 474 vfs.createmode = self.createmode
475 475 self.rawvfs = vfs
476 476 fnc = fncache(vfs)
477 477 self.fncache = fnc
478 478 self.vfs = _fncachevfs(vfs, fnc, encode)
479 479 self.opener = self.vfs
480 480
481 481 def join(self, f):
482 482 return self.pathsep + self.encode(f)
483 483
484 484 def getsize(self, path):
485 485 return self.rawvfs.stat(path).st_size
486 486
487 487 def datafiles(self):
488 488 for f in sorted(self.fncache):
489 489 ef = self.encode(f)
490 490 try:
491 491 yield f, ef, self.getsize(ef)
492 492 except OSError, err:
493 493 if err.errno != errno.ENOENT:
494 494 raise
495 495
496 496 def copylist(self):
497 497 d = ('data dh fncache phaseroots obsstore'
498 498 ' 00manifest.d 00manifest.i 00changelog.d 00changelog.i')
499 499 return (['requires', '00changelog.i'] +
500 500 ['store/' + f for f in d.split()])
501 501
502 502 def write(self, tr):
503 503 self.fncache.write(tr)
504 504
505 505 def invalidatecaches(self):
506 506 self.fncache.entries = None
507 507
508 508 def markremoved(self, fn):
509 509 self.fncache.remove(fn)
510 510
511 511 def _exists(self, f):
512 512 ef = self.encode(f)
513 513 try:
514 514 self.getsize(ef)
515 515 return True
516 516 except OSError, err:
517 517 if err.errno != errno.ENOENT:
518 518 raise
519 519 # nonexistent entry
520 520 return False
521 521
522 522 def __contains__(self, path):
523 523 '''Checks if the store contains path'''
524 524 path = "/".join(("data", path))
525 525 # check for files (exact match)
526 526 e = path + '.i'
527 527 if e in self.fncache and self._exists(e):
528 528 return True
529 529 # now check for directories (prefix match)
530 530 if not path.endswith('/'):
531 531 path += '/'
532 532 for e in self.fncache:
533 533 if e.startswith(path) and self._exists(e):
534 534 return True
535 535 return False
536 536
537 537 def store(requirements, path, vfstype):
538 538 if 'store' in requirements:
539 539 if 'fncache' in requirements:
540 540 return fncachestore(path, vfstype, 'dotencode' in requirements)
541 541 return encodedstore(path, vfstype)
542 542 return basicstore(path, vfstype)
General Comments 0
You need to be logged in to leave comments. Login now