##// END OF EJS Templates
convert --filemap: reduce memory usage
Alexis S. L. Carvalho -
r5401:4c555dd1 default
parent child Browse files
Show More
@@ -1,330 +1,352
1 # Copyright 2007 Bryan O'Sullivan <bos@serpentine.com>
1 # Copyright 2007 Bryan O'Sullivan <bos@serpentine.com>
2 # Copyright 2007 Alexis S. L. Carvalho <alexis@cecm.usp.br>
2 # Copyright 2007 Alexis S. L. Carvalho <alexis@cecm.usp.br>
3 #
3 #
4 # This software may be used and distributed according to the terms of
4 # This software may be used and distributed according to the terms of
5 # the GNU General Public License, incorporated herein by reference.
5 # the GNU General Public License, incorporated herein by reference.
6
6
7 import shlex
7 import shlex
8 from mercurial.i18n import _
8 from mercurial.i18n import _
9 from mercurial import util
9 from mercurial import util
10 from common import SKIPREV
10 from common import SKIPREV
11
11
12 def rpairs(name):
12 def rpairs(name):
13 e = len(name)
13 e = len(name)
14 while e != -1:
14 while e != -1:
15 yield name[:e], name[e+1:]
15 yield name[:e], name[e+1:]
16 e = name.rfind('/', 0, e)
16 e = name.rfind('/', 0, e)
17
17
18 class filemapper(object):
18 class filemapper(object):
19 '''Map and filter filenames when importing.
19 '''Map and filter filenames when importing.
20 A name can be mapped to itself, a new name, or None (omit from new
20 A name can be mapped to itself, a new name, or None (omit from new
21 repository).'''
21 repository).'''
22
22
23 def __init__(self, ui, path=None):
23 def __init__(self, ui, path=None):
24 self.ui = ui
24 self.ui = ui
25 self.include = {}
25 self.include = {}
26 self.exclude = {}
26 self.exclude = {}
27 self.rename = {}
27 self.rename = {}
28 if path:
28 if path:
29 if self.parse(path):
29 if self.parse(path):
30 raise util.Abort(_('errors in filemap'))
30 raise util.Abort(_('errors in filemap'))
31
31
32 def parse(self, path):
32 def parse(self, path):
33 errs = 0
33 errs = 0
34 def check(name, mapping, listname):
34 def check(name, mapping, listname):
35 if name in mapping:
35 if name in mapping:
36 self.ui.warn(_('%s:%d: %r already in %s list\n') %
36 self.ui.warn(_('%s:%d: %r already in %s list\n') %
37 (lex.infile, lex.lineno, name, listname))
37 (lex.infile, lex.lineno, name, listname))
38 return 1
38 return 1
39 return 0
39 return 0
40 lex = shlex.shlex(open(path), path, True)
40 lex = shlex.shlex(open(path), path, True)
41 lex.wordchars += '!@#$%^&*()-=+[]{}|;:,./<>?'
41 lex.wordchars += '!@#$%^&*()-=+[]{}|;:,./<>?'
42 cmd = lex.get_token()
42 cmd = lex.get_token()
43 while cmd:
43 while cmd:
44 if cmd == 'include':
44 if cmd == 'include':
45 name = lex.get_token()
45 name = lex.get_token()
46 errs += check(name, self.exclude, 'exclude')
46 errs += check(name, self.exclude, 'exclude')
47 self.include[name] = name
47 self.include[name] = name
48 elif cmd == 'exclude':
48 elif cmd == 'exclude':
49 name = lex.get_token()
49 name = lex.get_token()
50 errs += check(name, self.include, 'include')
50 errs += check(name, self.include, 'include')
51 errs += check(name, self.rename, 'rename')
51 errs += check(name, self.rename, 'rename')
52 self.exclude[name] = name
52 self.exclude[name] = name
53 elif cmd == 'rename':
53 elif cmd == 'rename':
54 src = lex.get_token()
54 src = lex.get_token()
55 dest = lex.get_token()
55 dest = lex.get_token()
56 errs += check(src, self.exclude, 'exclude')
56 errs += check(src, self.exclude, 'exclude')
57 self.rename[src] = dest
57 self.rename[src] = dest
58 elif cmd == 'source':
58 elif cmd == 'source':
59 errs += self.parse(lex.get_token())
59 errs += self.parse(lex.get_token())
60 else:
60 else:
61 self.ui.warn(_('%s:%d: unknown directive %r\n') %
61 self.ui.warn(_('%s:%d: unknown directive %r\n') %
62 (lex.infile, lex.lineno, cmd))
62 (lex.infile, lex.lineno, cmd))
63 errs += 1
63 errs += 1
64 cmd = lex.get_token()
64 cmd = lex.get_token()
65 return errs
65 return errs
66
66
67 def lookup(self, name, mapping):
67 def lookup(self, name, mapping):
68 for pre, suf in rpairs(name):
68 for pre, suf in rpairs(name):
69 try:
69 try:
70 return mapping[pre], pre, suf
70 return mapping[pre], pre, suf
71 except KeyError, err:
71 except KeyError, err:
72 pass
72 pass
73 return '', name, ''
73 return '', name, ''
74
74
75 def __call__(self, name):
75 def __call__(self, name):
76 if self.include:
76 if self.include:
77 inc = self.lookup(name, self.include)[0]
77 inc = self.lookup(name, self.include)[0]
78 else:
78 else:
79 inc = name
79 inc = name
80 if self.exclude:
80 if self.exclude:
81 exc = self.lookup(name, self.exclude)[0]
81 exc = self.lookup(name, self.exclude)[0]
82 else:
82 else:
83 exc = ''
83 exc = ''
84 if not inc or exc:
84 if not inc or exc:
85 return None
85 return None
86 newpre, pre, suf = self.lookup(name, self.rename)
86 newpre, pre, suf = self.lookup(name, self.rename)
87 if newpre:
87 if newpre:
88 if newpre == '.':
88 if newpre == '.':
89 return suf
89 return suf
90 if suf:
90 if suf:
91 return newpre + '/' + suf
91 return newpre + '/' + suf
92 return newpre
92 return newpre
93 return name
93 return name
94
94
95 def active(self):
95 def active(self):
96 return bool(self.include or self.exclude or self.rename)
96 return bool(self.include or self.exclude or self.rename)
97
97
98 # This class does two additional things compared to a regular source:
98 # This class does two additional things compared to a regular source:
99 #
99 #
100 # - Filter and rename files. This is mostly wrapped by the filemapper
100 # - Filter and rename files. This is mostly wrapped by the filemapper
101 # class above. We hide the original filename in the revision that is
101 # class above. We hide the original filename in the revision that is
102 # returned by getchanges to be able to find things later in getfile
102 # returned by getchanges to be able to find things later in getfile
103 # and getmode.
103 # and getmode.
104 #
104 #
105 # - Return only revisions that matter for the files we're interested in.
105 # - Return only revisions that matter for the files we're interested in.
106 # This involves rewriting the parents of the original revision to
106 # This involves rewriting the parents of the original revision to
107 # create a graph that is restricted to those revisions.
107 # create a graph that is restricted to those revisions.
108 #
108 #
109 # This set of revisions includes not only revisions that directly
109 # This set of revisions includes not only revisions that directly
110 # touch files we're interested in, but also merges that merge two
110 # touch files we're interested in, but also merges that merge two
111 # or more interesting revisions.
111 # or more interesting revisions.
112
112
113 class filemap_source(object):
113 class filemap_source(object):
114 def __init__(self, ui, baseconverter, filemap):
114 def __init__(self, ui, baseconverter, filemap):
115 self.ui = ui
115 self.ui = ui
116 self.base = baseconverter
116 self.base = baseconverter
117 self.filemapper = filemapper(ui, filemap)
117 self.filemapper = filemapper(ui, filemap)
118 self.commits = {}
118 self.commits = {}
119 # if a revision rev has parent p in the original revision graph, then
119 # if a revision rev has parent p in the original revision graph, then
120 # rev will have parent self.parentmap[p] in the restricted graph.
120 # rev will have parent self.parentmap[p] in the restricted graph.
121 self.parentmap = {}
121 self.parentmap = {}
122 # self.wantedancestors[rev] is the set of all ancestors of rev that
122 # self.wantedancestors[rev] is the set of all ancestors of rev that
123 # are in the restricted graph.
123 # are in the restricted graph.
124 self.wantedancestors = {}
124 self.wantedancestors = {}
125 self.convertedorder = None
125 self.convertedorder = None
126 self._rebuilt = False
126 self._rebuilt = False
127 self.origparents = {}
127 self.origparents = {}
128 self.children = {}
129 self.seenchildren = {}
128
130
129 def setrevmap(self, revmap, order):
131 def setrevmap(self, revmap, order):
130 # rebuild our state to make things restartable
132 # rebuild our state to make things restartable
131 #
133 #
132 # To avoid calling getcommit for every revision that has already
134 # To avoid calling getcommit for every revision that has already
133 # been converted, we rebuild only the parentmap, delaying the
135 # been converted, we rebuild only the parentmap, delaying the
134 # rebuild of wantedancestors until we need it (i.e. until a
136 # rebuild of wantedancestors until we need it (i.e. until a
135 # merge).
137 # merge).
136 #
138 #
137 # We assume the order argument lists the revisions in
139 # We assume the order argument lists the revisions in
138 # topological order, so that we can infer which revisions were
140 # topological order, so that we can infer which revisions were
139 # wanted by previous runs.
141 # wanted by previous runs.
140 self._rebuilt = not revmap
142 self._rebuilt = not revmap
141 seen = {SKIPREV: SKIPREV}
143 seen = {SKIPREV: SKIPREV}
142 dummyset = util.set()
144 dummyset = util.set()
143 converted = []
145 converted = []
144 for rev in order:
146 for rev in order:
145 mapped = revmap[rev]
147 mapped = revmap[rev]
146 wanted = mapped not in seen
148 wanted = mapped not in seen
147 if wanted:
149 if wanted:
148 seen[mapped] = rev
150 seen[mapped] = rev
149 self.parentmap[rev] = rev
151 self.parentmap[rev] = rev
150 else:
152 else:
151 self.parentmap[rev] = seen[mapped]
153 self.parentmap[rev] = seen[mapped]
152 self.wantedancestors[rev] = dummyset
154 self.wantedancestors[rev] = dummyset
153 arg = seen[mapped]
155 arg = seen[mapped]
154 if arg == SKIPREV:
156 if arg == SKIPREV:
155 arg = None
157 arg = None
156 converted.append((rev, wanted, arg))
158 converted.append((rev, wanted, arg))
157 self.convertedorder = converted
159 self.convertedorder = converted
158 return self.base.setrevmap(revmap, order)
160 return self.base.setrevmap(revmap, order)
159
161
160 def rebuild(self):
162 def rebuild(self):
161 if self._rebuilt:
163 if self._rebuilt:
162 return True
164 return True
163 self._rebuilt = True
165 self._rebuilt = True
164 pmap = self.parentmap.copy()
165 self.parentmap.clear()
166 self.parentmap.clear()
166 self.wantedancestors.clear()
167 self.wantedancestors.clear()
168 self.seenchildren.clear()
167 for rev, wanted, arg in self.convertedorder:
169 for rev, wanted, arg in self.convertedorder:
168 parents = self.origparents.get(rev)
170 if rev not in self.origparents:
169 if parents is None:
171 self.origparents[rev] = self.getcommit(rev).parents
170 parents = self.base.getcommit(rev).parents
172 if arg is not None:
173 self.children[arg] = self.children.get(arg, 0) + 1
174
175 for rev, wanted, arg in self.convertedorder:
176 parents = self.origparents[rev]
171 if wanted:
177 if wanted:
172 self.mark_wanted(rev, parents)
178 self.mark_wanted(rev, parents)
173 else:
179 else:
174 self.mark_not_wanted(rev, arg)
180 self.mark_not_wanted(rev, arg)
181 self._discard(arg, *parents)
175
182
176 assert pmap == self.parentmap
177 return True
183 return True
178
184
179 def getheads(self):
185 def getheads(self):
180 return self.base.getheads()
186 return self.base.getheads()
181
187
182 def getcommit(self, rev):
188 def getcommit(self, rev):
183 # We want to save a reference to the commit objects to be able
189 # We want to save a reference to the commit objects to be able
184 # to rewrite their parents later on.
190 # to rewrite their parents later on.
185 self.commits[rev] = self.base.getcommit(rev)
191 c = self.commits[rev] = self.base.getcommit(rev)
186 return self.commits[rev]
192 for p in c.parents:
193 self.children[p] = self.children.get(p, 0) + 1
194 return c
195
196 def _discard(self, *revs):
197 for r in revs:
198 if r is None:
199 continue
200 self.seenchildren[r] = self.seenchildren.get(r, 0) + 1
201 if self.seenchildren[r] == self.children[r]:
202 del self.wantedancestors[r]
203 del self.parentmap[r]
204 del self.seenchildren[r]
205 if self._rebuilt:
206 del self.children[r]
187
207
188 def wanted(self, rev, i):
208 def wanted(self, rev, i):
189 # Return True if we're directly interested in rev.
209 # Return True if we're directly interested in rev.
190 #
210 #
191 # i is an index selecting one of the parents of rev (if rev
211 # i is an index selecting one of the parents of rev (if rev
192 # has no parents, i is None). getchangedfiles will give us
212 # has no parents, i is None). getchangedfiles will give us
193 # the list of files that are different in rev and in the parent
213 # the list of files that are different in rev and in the parent
194 # indicated by i. If we're interested in any of these files,
214 # indicated by i. If we're interested in any of these files,
195 # we're interested in rev.
215 # we're interested in rev.
196 try:
216 try:
197 files = self.base.getchangedfiles(rev, i)
217 files = self.base.getchangedfiles(rev, i)
198 except NotImplementedError:
218 except NotImplementedError:
199 raise util.Abort(_("source repository doesn't support --filemap"))
219 raise util.Abort(_("source repository doesn't support --filemap"))
200 for f in files:
220 for f in files:
201 if self.filemapper(f):
221 if self.filemapper(f):
202 return True
222 return True
203 return False
223 return False
204
224
205 def mark_not_wanted(self, rev, p):
225 def mark_not_wanted(self, rev, p):
206 # Mark rev as not interesting and update data structures.
226 # Mark rev as not interesting and update data structures.
207
227
208 if p is None:
228 if p is None:
209 # A root revision. Use SKIPREV to indicate that it doesn't
229 # A root revision. Use SKIPREV to indicate that it doesn't
210 # map to any revision in the restricted graph. Put SKIPREV
230 # map to any revision in the restricted graph. Put SKIPREV
211 # in the set of wanted ancestors to simplify code elsewhere
231 # in the set of wanted ancestors to simplify code elsewhere
212 self.parentmap[rev] = SKIPREV
232 self.parentmap[rev] = SKIPREV
213 self.wantedancestors[rev] = util.set((SKIPREV,))
233 self.wantedancestors[rev] = util.set((SKIPREV,))
214 return
234 return
215
235
216 # Reuse the data from our parent.
236 # Reuse the data from our parent.
217 self.parentmap[rev] = self.parentmap[p]
237 self.parentmap[rev] = self.parentmap[p]
218 self.wantedancestors[rev] = self.wantedancestors[p]
238 self.wantedancestors[rev] = self.wantedancestors[p]
219
239
220 def mark_wanted(self, rev, parents):
240 def mark_wanted(self, rev, parents):
221 # Mark rev ss wanted and update data structures.
241 # Mark rev ss wanted and update data structures.
222
242
223 # rev will be in the restricted graph, so children of rev in
243 # rev will be in the restricted graph, so children of rev in
224 # the original graph should still have rev as a parent in the
244 # the original graph should still have rev as a parent in the
225 # restricted graph.
245 # restricted graph.
226 self.parentmap[rev] = rev
246 self.parentmap[rev] = rev
227
247
228 # The set of wanted ancestors of rev is the union of the sets
248 # The set of wanted ancestors of rev is the union of the sets
229 # of wanted ancestors of its parents. Plus rev itself.
249 # of wanted ancestors of its parents. Plus rev itself.
230 wrev = util.set()
250 wrev = util.set()
231 for p in parents:
251 for p in parents:
232 wrev.update(self.wantedancestors[p])
252 wrev.update(self.wantedancestors[p])
233 wrev.add(rev)
253 wrev.add(rev)
234 self.wantedancestors[rev] = wrev
254 self.wantedancestors[rev] = wrev
235
255
236 def getchanges(self, rev):
256 def getchanges(self, rev):
237 parents = self.commits[rev].parents
257 parents = self.commits[rev].parents
238 if len(parents) > 1:
258 if len(parents) > 1:
239 self.rebuild()
259 self.rebuild()
240
260
241 # To decide whether we're interested in rev we:
261 # To decide whether we're interested in rev we:
242 #
262 #
243 # - calculate what parents rev will have if it turns out we're
263 # - calculate what parents rev will have if it turns out we're
244 # interested in it. If it's going to have more than 1 parent,
264 # interested in it. If it's going to have more than 1 parent,
245 # we're interested in it.
265 # we're interested in it.
246 #
266 #
247 # - otherwise, we'll compare it with the single parent we found.
267 # - otherwise, we'll compare it with the single parent we found.
248 # If any of the files we're interested in is different in the
268 # If any of the files we're interested in is different in the
249 # the two revisions, we're interested in rev.
269 # the two revisions, we're interested in rev.
250
270
251 # A parent p is interesting if its mapped version (self.parentmap[p]):
271 # A parent p is interesting if its mapped version (self.parentmap[p]):
252 # - is not SKIPREV
272 # - is not SKIPREV
253 # - is still not in the list of parents (we don't want duplicates)
273 # - is still not in the list of parents (we don't want duplicates)
254 # - is not an ancestor of the mapped versions of the other parents
274 # - is not an ancestor of the mapped versions of the other parents
255 mparents = []
275 mparents = []
256 wp = None
276 wp = None
257 for i, p1 in enumerate(parents):
277 for i, p1 in enumerate(parents):
258 mp1 = self.parentmap[p1]
278 mp1 = self.parentmap[p1]
259 if mp1 == SKIPREV or mp1 in mparents:
279 if mp1 == SKIPREV or mp1 in mparents:
260 continue
280 continue
261 for p2 in parents:
281 for p2 in parents:
262 if p1 == p2 or mp1 == self.parentmap[p2]:
282 if p1 == p2 or mp1 == self.parentmap[p2]:
263 continue
283 continue
264 if mp1 in self.wantedancestors[p2]:
284 if mp1 in self.wantedancestors[p2]:
265 break
285 break
266 else:
286 else:
267 mparents.append(mp1)
287 mparents.append(mp1)
268 wp = i
288 wp = i
269
289
270 if wp is None and parents:
290 if wp is None and parents:
271 wp = 0
291 wp = 0
272
292
273 self.origparents[rev] = parents
293 self.origparents[rev] = parents
274
294
275 if len(mparents) < 2 and not self.wanted(rev, wp):
295 if len(mparents) < 2 and not self.wanted(rev, wp):
276 # We don't want this revision.
296 # We don't want this revision.
277 # Update our state and tell the convert process to map this
297 # Update our state and tell the convert process to map this
278 # revision to the same revision its parent as mapped to.
298 # revision to the same revision its parent as mapped to.
279 p = None
299 p = None
280 if parents:
300 if parents:
281 p = parents[wp]
301 p = parents[wp]
282 self.mark_not_wanted(rev, p)
302 self.mark_not_wanted(rev, p)
283 self.convertedorder.append((rev, False, p))
303 self.convertedorder.append((rev, False, p))
304 self._discard(*parents)
284 return self.parentmap[rev]
305 return self.parentmap[rev]
285
306
286 # We want this revision.
307 # We want this revision.
287 # Rewrite the parents of the commit object
308 # Rewrite the parents of the commit object
288 self.commits[rev].parents = mparents
309 self.commits[rev].parents = mparents
289 self.mark_wanted(rev, parents)
310 self.mark_wanted(rev, parents)
290 self.convertedorder.append((rev, True, None))
311 self.convertedorder.append((rev, True, None))
312 self._discard(*parents)
291
313
292 # Get the real changes and do the filtering/mapping.
314 # Get the real changes and do the filtering/mapping.
293 # To be able to get the files later on in getfile and getmode,
315 # To be able to get the files later on in getfile and getmode,
294 # we hide the original filename in the rev part of the return
316 # we hide the original filename in the rev part of the return
295 # value.
317 # value.
296 changes, copies = self.base.getchanges(rev)
318 changes, copies = self.base.getchanges(rev)
297 newnames = {}
319 newnames = {}
298 files = []
320 files = []
299 for f, r in changes:
321 for f, r in changes:
300 newf = self.filemapper(f)
322 newf = self.filemapper(f)
301 if newf:
323 if newf:
302 files.append((newf, (f, r)))
324 files.append((newf, (f, r)))
303 newnames[f] = newf
325 newnames[f] = newf
304
326
305 ncopies = {}
327 ncopies = {}
306 for c in copies:
328 for c in copies:
307 newc = self.filemapper(c)
329 newc = self.filemapper(c)
308 if newc:
330 if newc:
309 newsource = self.filemapper(copies[c])
331 newsource = self.filemapper(copies[c])
310 if newsource:
332 if newsource:
311 ncopies[newc] = newsource
333 ncopies[newc] = newsource
312
334
313 return files, ncopies
335 return files, ncopies
314
336
315 def getfile(self, name, rev):
337 def getfile(self, name, rev):
316 realname, realrev = rev
338 realname, realrev = rev
317 return self.base.getfile(realname, realrev)
339 return self.base.getfile(realname, realrev)
318
340
319 def getmode(self, name, rev):
341 def getmode(self, name, rev):
320 realname, realrev = rev
342 realname, realrev = rev
321 return self.base.getmode(realname, realrev)
343 return self.base.getmode(realname, realrev)
322
344
323 def gettags(self):
345 def gettags(self):
324 return self.base.gettags()
346 return self.base.gettags()
325
347
326 def before(self):
348 def before(self):
327 pass
349 pass
328
350
329 def after(self):
351 def after(self):
330 pass
352 pass
General Comments 0
You need to be logged in to leave comments. Login now