##// END OF EJS Templates
convert --filemap: reduce memory usage
Alexis S. L. Carvalho -
r5401:4c555dd1 default
parent child Browse files
Show More
@@ -1,330 +1,352
1 1 # Copyright 2007 Bryan O'Sullivan <bos@serpentine.com>
2 2 # Copyright 2007 Alexis S. L. Carvalho <alexis@cecm.usp.br>
3 3 #
4 4 # This software may be used and distributed according to the terms of
5 5 # the GNU General Public License, incorporated herein by reference.
6 6
7 7 import shlex
8 8 from mercurial.i18n import _
9 9 from mercurial import util
10 10 from common import SKIPREV
11 11
12 12 def rpairs(name):
13 13 e = len(name)
14 14 while e != -1:
15 15 yield name[:e], name[e+1:]
16 16 e = name.rfind('/', 0, e)
17 17
18 18 class filemapper(object):
19 19 '''Map and filter filenames when importing.
20 20 A name can be mapped to itself, a new name, or None (omit from new
21 21 repository).'''
22 22
23 23 def __init__(self, ui, path=None):
24 24 self.ui = ui
25 25 self.include = {}
26 26 self.exclude = {}
27 27 self.rename = {}
28 28 if path:
29 29 if self.parse(path):
30 30 raise util.Abort(_('errors in filemap'))
31 31
32 32 def parse(self, path):
33 33 errs = 0
34 34 def check(name, mapping, listname):
35 35 if name in mapping:
36 36 self.ui.warn(_('%s:%d: %r already in %s list\n') %
37 37 (lex.infile, lex.lineno, name, listname))
38 38 return 1
39 39 return 0
40 40 lex = shlex.shlex(open(path), path, True)
41 41 lex.wordchars += '!@#$%^&*()-=+[]{}|;:,./<>?'
42 42 cmd = lex.get_token()
43 43 while cmd:
44 44 if cmd == 'include':
45 45 name = lex.get_token()
46 46 errs += check(name, self.exclude, 'exclude')
47 47 self.include[name] = name
48 48 elif cmd == 'exclude':
49 49 name = lex.get_token()
50 50 errs += check(name, self.include, 'include')
51 51 errs += check(name, self.rename, 'rename')
52 52 self.exclude[name] = name
53 53 elif cmd == 'rename':
54 54 src = lex.get_token()
55 55 dest = lex.get_token()
56 56 errs += check(src, self.exclude, 'exclude')
57 57 self.rename[src] = dest
58 58 elif cmd == 'source':
59 59 errs += self.parse(lex.get_token())
60 60 else:
61 61 self.ui.warn(_('%s:%d: unknown directive %r\n') %
62 62 (lex.infile, lex.lineno, cmd))
63 63 errs += 1
64 64 cmd = lex.get_token()
65 65 return errs
66 66
67 67 def lookup(self, name, mapping):
68 68 for pre, suf in rpairs(name):
69 69 try:
70 70 return mapping[pre], pre, suf
71 71 except KeyError, err:
72 72 pass
73 73 return '', name, ''
74 74
75 75 def __call__(self, name):
76 76 if self.include:
77 77 inc = self.lookup(name, self.include)[0]
78 78 else:
79 79 inc = name
80 80 if self.exclude:
81 81 exc = self.lookup(name, self.exclude)[0]
82 82 else:
83 83 exc = ''
84 84 if not inc or exc:
85 85 return None
86 86 newpre, pre, suf = self.lookup(name, self.rename)
87 87 if newpre:
88 88 if newpre == '.':
89 89 return suf
90 90 if suf:
91 91 return newpre + '/' + suf
92 92 return newpre
93 93 return name
94 94
95 95 def active(self):
96 96 return bool(self.include or self.exclude or self.rename)
97 97
98 98 # This class does two additional things compared to a regular source:
99 99 #
100 100 # - Filter and rename files. This is mostly wrapped by the filemapper
101 101 # class above. We hide the original filename in the revision that is
102 102 # returned by getchanges to be able to find things later in getfile
103 103 # and getmode.
104 104 #
105 105 # - Return only revisions that matter for the files we're interested in.
106 106 # This involves rewriting the parents of the original revision to
107 107 # create a graph that is restricted to those revisions.
108 108 #
109 109 # This set of revisions includes not only revisions that directly
110 110 # touch files we're interested in, but also merges that merge two
111 111 # or more interesting revisions.
112 112
113 113 class filemap_source(object):
114 114 def __init__(self, ui, baseconverter, filemap):
115 115 self.ui = ui
116 116 self.base = baseconverter
117 117 self.filemapper = filemapper(ui, filemap)
118 118 self.commits = {}
119 119 # if a revision rev has parent p in the original revision graph, then
120 120 # rev will have parent self.parentmap[p] in the restricted graph.
121 121 self.parentmap = {}
122 122 # self.wantedancestors[rev] is the set of all ancestors of rev that
123 123 # are in the restricted graph.
124 124 self.wantedancestors = {}
125 125 self.convertedorder = None
126 126 self._rebuilt = False
127 127 self.origparents = {}
128 self.children = {}
129 self.seenchildren = {}
128 130
129 131 def setrevmap(self, revmap, order):
130 132 # rebuild our state to make things restartable
131 133 #
132 134 # To avoid calling getcommit for every revision that has already
133 135 # been converted, we rebuild only the parentmap, delaying the
134 136 # rebuild of wantedancestors until we need it (i.e. until a
135 137 # merge).
136 138 #
137 139 # We assume the order argument lists the revisions in
138 140 # topological order, so that we can infer which revisions were
139 141 # wanted by previous runs.
140 142 self._rebuilt = not revmap
141 143 seen = {SKIPREV: SKIPREV}
142 144 dummyset = util.set()
143 145 converted = []
144 146 for rev in order:
145 147 mapped = revmap[rev]
146 148 wanted = mapped not in seen
147 149 if wanted:
148 150 seen[mapped] = rev
149 151 self.parentmap[rev] = rev
150 152 else:
151 153 self.parentmap[rev] = seen[mapped]
152 154 self.wantedancestors[rev] = dummyset
153 155 arg = seen[mapped]
154 156 if arg == SKIPREV:
155 157 arg = None
156 158 converted.append((rev, wanted, arg))
157 159 self.convertedorder = converted
158 160 return self.base.setrevmap(revmap, order)
159 161
160 162 def rebuild(self):
161 163 if self._rebuilt:
162 164 return True
163 165 self._rebuilt = True
164 pmap = self.parentmap.copy()
165 166 self.parentmap.clear()
166 167 self.wantedancestors.clear()
168 self.seenchildren.clear()
167 169 for rev, wanted, arg in self.convertedorder:
168 parents = self.origparents.get(rev)
169 if parents is None:
170 parents = self.base.getcommit(rev).parents
170 if rev not in self.origparents:
171 self.origparents[rev] = self.getcommit(rev).parents
172 if arg is not None:
173 self.children[arg] = self.children.get(arg, 0) + 1
174
175 for rev, wanted, arg in self.convertedorder:
176 parents = self.origparents[rev]
171 177 if wanted:
172 178 self.mark_wanted(rev, parents)
173 179 else:
174 180 self.mark_not_wanted(rev, arg)
181 self._discard(arg, *parents)
175 182
176 assert pmap == self.parentmap
177 183 return True
178 184
179 185 def getheads(self):
180 186 return self.base.getheads()
181 187
182 188 def getcommit(self, rev):
183 189 # We want to save a reference to the commit objects to be able
184 190 # to rewrite their parents later on.
185 self.commits[rev] = self.base.getcommit(rev)
186 return self.commits[rev]
191 c = self.commits[rev] = self.base.getcommit(rev)
192 for p in c.parents:
193 self.children[p] = self.children.get(p, 0) + 1
194 return c
195
196 def _discard(self, *revs):
197 for r in revs:
198 if r is None:
199 continue
200 self.seenchildren[r] = self.seenchildren.get(r, 0) + 1
201 if self.seenchildren[r] == self.children[r]:
202 del self.wantedancestors[r]
203 del self.parentmap[r]
204 del self.seenchildren[r]
205 if self._rebuilt:
206 del self.children[r]
187 207
188 208 def wanted(self, rev, i):
189 209 # Return True if we're directly interested in rev.
190 210 #
191 211 # i is an index selecting one of the parents of rev (if rev
192 212 # has no parents, i is None). getchangedfiles will give us
193 213 # the list of files that are different in rev and in the parent
194 214 # indicated by i. If we're interested in any of these files,
195 215 # we're interested in rev.
196 216 try:
197 217 files = self.base.getchangedfiles(rev, i)
198 218 except NotImplementedError:
199 219 raise util.Abort(_("source repository doesn't support --filemap"))
200 220 for f in files:
201 221 if self.filemapper(f):
202 222 return True
203 223 return False
204 224
205 225 def mark_not_wanted(self, rev, p):
206 226 # Mark rev as not interesting and update data structures.
207 227
208 228 if p is None:
209 229 # A root revision. Use SKIPREV to indicate that it doesn't
210 230 # map to any revision in the restricted graph. Put SKIPREV
211 231 # in the set of wanted ancestors to simplify code elsewhere
212 232 self.parentmap[rev] = SKIPREV
213 233 self.wantedancestors[rev] = util.set((SKIPREV,))
214 234 return
215 235
216 236 # Reuse the data from our parent.
217 237 self.parentmap[rev] = self.parentmap[p]
218 238 self.wantedancestors[rev] = self.wantedancestors[p]
219 239
220 240 def mark_wanted(self, rev, parents):
221 241 # Mark rev ss wanted and update data structures.
222 242
223 243 # rev will be in the restricted graph, so children of rev in
224 244 # the original graph should still have rev as a parent in the
225 245 # restricted graph.
226 246 self.parentmap[rev] = rev
227 247
228 248 # The set of wanted ancestors of rev is the union of the sets
229 249 # of wanted ancestors of its parents. Plus rev itself.
230 250 wrev = util.set()
231 251 for p in parents:
232 252 wrev.update(self.wantedancestors[p])
233 253 wrev.add(rev)
234 254 self.wantedancestors[rev] = wrev
235 255
236 256 def getchanges(self, rev):
237 257 parents = self.commits[rev].parents
238 258 if len(parents) > 1:
239 259 self.rebuild()
240 260
241 261 # To decide whether we're interested in rev we:
242 262 #
243 263 # - calculate what parents rev will have if it turns out we're
244 264 # interested in it. If it's going to have more than 1 parent,
245 265 # we're interested in it.
246 266 #
247 267 # - otherwise, we'll compare it with the single parent we found.
248 268 # If any of the files we're interested in is different in the
249 269 # the two revisions, we're interested in rev.
250 270
251 271 # A parent p is interesting if its mapped version (self.parentmap[p]):
252 272 # - is not SKIPREV
253 273 # - is still not in the list of parents (we don't want duplicates)
254 274 # - is not an ancestor of the mapped versions of the other parents
255 275 mparents = []
256 276 wp = None
257 277 for i, p1 in enumerate(parents):
258 278 mp1 = self.parentmap[p1]
259 279 if mp1 == SKIPREV or mp1 in mparents:
260 280 continue
261 281 for p2 in parents:
262 282 if p1 == p2 or mp1 == self.parentmap[p2]:
263 283 continue
264 284 if mp1 in self.wantedancestors[p2]:
265 285 break
266 286 else:
267 287 mparents.append(mp1)
268 288 wp = i
269 289
270 290 if wp is None and parents:
271 291 wp = 0
272 292
273 293 self.origparents[rev] = parents
274 294
275 295 if len(mparents) < 2 and not self.wanted(rev, wp):
276 296 # We don't want this revision.
277 297 # Update our state and tell the convert process to map this
278 298 # revision to the same revision its parent as mapped to.
279 299 p = None
280 300 if parents:
281 301 p = parents[wp]
282 302 self.mark_not_wanted(rev, p)
283 303 self.convertedorder.append((rev, False, p))
304 self._discard(*parents)
284 305 return self.parentmap[rev]
285 306
286 307 # We want this revision.
287 308 # Rewrite the parents of the commit object
288 309 self.commits[rev].parents = mparents
289 310 self.mark_wanted(rev, parents)
290 311 self.convertedorder.append((rev, True, None))
312 self._discard(*parents)
291 313
292 314 # Get the real changes and do the filtering/mapping.
293 315 # To be able to get the files later on in getfile and getmode,
294 316 # we hide the original filename in the rev part of the return
295 317 # value.
296 318 changes, copies = self.base.getchanges(rev)
297 319 newnames = {}
298 320 files = []
299 321 for f, r in changes:
300 322 newf = self.filemapper(f)
301 323 if newf:
302 324 files.append((newf, (f, r)))
303 325 newnames[f] = newf
304 326
305 327 ncopies = {}
306 328 for c in copies:
307 329 newc = self.filemapper(c)
308 330 if newc:
309 331 newsource = self.filemapper(copies[c])
310 332 if newsource:
311 333 ncopies[newc] = newsource
312 334
313 335 return files, ncopies
314 336
315 337 def getfile(self, name, rev):
316 338 realname, realrev = rev
317 339 return self.base.getfile(realname, realrev)
318 340
319 341 def getmode(self, name, rev):
320 342 realname, realrev = rev
321 343 return self.base.getmode(realname, realrev)
322 344
323 345 def gettags(self):
324 346 return self.base.gettags()
325 347
326 348 def before(self):
327 349 pass
328 350
329 351 def after(self):
330 352 pass
General Comments 0
You need to be logged in to leave comments. Login now