##// END OF EJS Templates
setdiscovery: limit lines to 80 characters
Steven Brown -
r14206:2bf60f15 default
parent child Browse files
Show More
@@ -1,242 +1,248 b''
1 1 # dagutil.py - dag utilities for mercurial
2 2 #
3 3 # Copyright 2010 Benoit Boissinot <bboissin@gmail.com>
4 4 # and Peter Arrenbrecht <peter@arrenbrecht.ch>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 from node import nullrev
10 10
11 11
12 12 class basedag(object):
13 13 '''generic interface for DAGs
14 14
15 15 terms:
16 16 "ix" (short for index) identifies a nodes internally,
17 17 "id" identifies one externally.
18 18
19 19 All params are ixs unless explicitly suffixed otherwise.
20 20 Pluralized params are lists or sets.
21 21 '''
22 22
23 23 def __init__(self):
24 24 self._inverse = None
25 25
26 26 def nodeset(self):
27 27 '''set of all node idxs'''
28 28 raise NotImplementedError()
29 29
30 30 def heads(self):
31 31 '''list of head ixs'''
32 32 raise NotImplementedError()
33 33
34 34 def parents(self, ix):
35 35 '''list of parents ixs of ix'''
36 36 raise NotImplementedError()
37 37
38 38 def inverse(self):
39 39 '''inverse DAG, where parents becomes children, etc.'''
40 40 raise NotImplementedError()
41 41
42 42 def ancestorset(self, starts, stops=None):
43 '''set of all ancestors of starts (incl), but stop walk at stops (excl)'''
43 '''
44 set of all ancestors of starts (incl), but stop walk at stops (excl)
45 '''
44 46 raise NotImplementedError()
45 47
46 48 def descendantset(self, starts, stops=None):
47 '''set of all descendants of starts (incl), but stop walk at stops (excl)'''
49 '''
50 set of all descendants of starts (incl), but stop walk at stops (excl)
51 '''
48 52 return self.inverse().ancestorset(starts, stops)
49 53
50 54 def headsetofconnecteds(self, ixs):
51 '''subset of connected list of ixs so that no node has a descendant in it
55 '''
56 subset of connected list of ixs so that no node has a descendant in it
52 57
53 58 By "connected list" we mean that if an ancestor and a descendant are in
54 the list, then so is at least one path connecting them.'''
59 the list, then so is at least one path connecting them.
60 '''
55 61 raise NotImplementedError()
56 62
57 63 def externalize(self, ix):
58 64 '''return a list of (or set if given a set) of node ids'''
59 65 return self._externalize(ix)
60 66
61 67 def externalizeall(self, ixs):
62 68 '''return a list of (or set if given a set) of node ids'''
63 69 ids = self._externalizeall(ixs)
64 70 if isinstance(ixs, set):
65 71 return set(ids)
66 72 return list(ids)
67 73
68 74 def internalize(self, id):
69 75 '''return a list of (or set if given a set) of node ixs'''
70 76 return self._internalize(id)
71 77
72 78 def internalizeall(self, ids, filterunknown=False):
73 79 '''return a list of (or set if given a set) of node ids'''
74 80 ixs = self._internalizeall(ids, filterunknown)
75 81 if isinstance(ids, set):
76 82 return set(ixs)
77 83 return list(ixs)
78 84
79 85
80 86 class genericdag(basedag):
81 87 '''generic implementations for DAGs'''
82 88
83 89 def ancestorset(self, starts, stops=None):
84 90 stops = stops and set(stops) or set()
85 91 seen = set()
86 92 pending = list(starts)
87 93 while pending:
88 94 n = pending.pop()
89 95 if n not in seen and n not in stops:
90 96 seen.add(n)
91 97 pending.extend(self.parents(n))
92 98 return seen
93 99
94 100 def headsetofconnecteds(self, ixs):
95 101 hds = set(ixs)
96 102 if not hds:
97 103 return hds
98 104 for n in ixs:
99 105 for p in self.parents(n):
100 106 hds.discard(p)
101 107 assert hds
102 108 return hds
103 109
104 110
105 111 class revlogbaseddag(basedag):
106 112 '''generic dag interface to a revlog'''
107 113
108 114 def __init__(self, revlog, nodeset):
109 115 basedag.__init__(self)
110 116 self._revlog = revlog
111 117 self._heads = None
112 118 self._nodeset = nodeset
113 119
114 120 def nodeset(self):
115 121 return self._nodeset
116 122
117 123 def heads(self):
118 124 if self._heads is None:
119 125 self._heads = self._getheads()
120 126 return self._heads
121 127
122 128 def _externalize(self, ix):
123 129 return self._revlog.index[ix][7]
124 130 def _externalizeall(self, ixs):
125 131 idx = self._revlog.index
126 132 return [idx[i][7] for i in ixs]
127 133
128 134 def _internalize(self, id):
129 135 ix = self._revlog.rev(id)
130 136 if ix == nullrev:
131 137 raise LookupError(id, self._revlog.indexfile, _('nullid'))
132 138 return ix
133 139 def _internalizeall(self, ids, filterunknown):
134 140 rl = self._revlog
135 141 if filterunknown:
136 142 return [r for r in map(rl.nodemap.get, ids)
137 143 if r is not None and r != nullrev]
138 144 return map(self._internalize, ids)
139 145
140 146
141 147 class revlogdag(revlogbaseddag):
142 148 '''dag interface to a revlog'''
143 149
144 150 def __init__(self, revlog):
145 151 revlogbaseddag.__init__(self, revlog, set(xrange(len(revlog))))
146 152
147 153 def _getheads(self):
148 154 return [r for r in self._revlog.headrevs() if r != nullrev]
149 155
150 156 def parents(self, ix):
151 157 rlog = self._revlog
152 158 idx = rlog.index
153 159 revdata = idx[ix]
154 160 prev = revdata[5]
155 161 if prev != nullrev:
156 162 prev2 = revdata[6]
157 163 if prev2 == nullrev:
158 164 return [prev]
159 165 return [prev, prev2]
160 166 prev2 = revdata[6]
161 167 if prev2 != nullrev:
162 168 return [prev2]
163 169 return []
164 170
165 171 def inverse(self):
166 172 if self._inverse is None:
167 173 self._inverse = inverserevlogdag(self)
168 174 return self._inverse
169 175
170 176 def ancestorset(self, starts, stops=None):
171 177 rlog = self._revlog
172 178 idx = rlog.index
173 179 stops = stops and set(stops) or set()
174 180 seen = set()
175 181 pending = list(starts)
176 182 while pending:
177 183 rev = pending.pop()
178 184 if rev not in seen and rev not in stops:
179 185 seen.add(rev)
180 186 revdata = idx[rev]
181 187 for i in [5, 6]:
182 188 prev = revdata[i]
183 189 if prev != nullrev:
184 190 pending.append(prev)
185 191 return seen
186 192
187 193 def headsetofconnecteds(self, ixs):
188 194 if not ixs:
189 195 return set()
190 196 rlog = self._revlog
191 197 idx = rlog.index
192 198 headrevs = set(ixs)
193 199 for rev in ixs:
194 200 revdata = idx[rev]
195 201 for i in [5, 6]:
196 202 prev = revdata[i]
197 203 if prev != nullrev:
198 204 headrevs.discard(prev)
199 205 assert headrevs
200 206 return headrevs
201 207
202 208
203 209 class inverserevlogdag(revlogbaseddag, genericdag):
204 210 '''inverse of an existing revlog dag; see revlogdag.inverse()'''
205 211
206 212 def __init__(self, orig):
207 213 revlogbaseddag.__init__(self, orig._revlog, orig._nodeset)
208 214 self._orig = orig
209 215 self._children = {}
210 216 self._roots = []
211 217 self._walkfrom = len(self._revlog) - 1
212 218
213 219 def _walkto(self, walkto):
214 220 rev = self._walkfrom
215 221 cs = self._children
216 222 roots = self._roots
217 223 idx = self._revlog.index
218 224 while rev >= walkto:
219 225 data = idx[rev]
220 226 isroot = True
221 227 for prev in [data[5], data[6]]: # parent revs
222 228 if prev != nullrev:
223 229 cs.setdefault(prev, []).append(rev)
224 230 isroot = False
225 231 if isroot:
226 232 roots.append(rev)
227 233 rev -= 1
228 234 self._walkfrom = rev - 1
229 235
230 236 def _getheads(self):
231 237 self._walkto(nullrev)
232 238 return self._roots
233 239
234 240 def parents(self, ix):
235 241 if ix is None:
236 242 return []
237 243 if ix <= self._walkfrom:
238 244 self._walkto(ix)
239 245 return self._children.get(ix, [])
240 246
241 247 def inverse(self):
242 248 return self._orig
@@ -1,178 +1,178 b''
1 1 # setdiscovery.py - improved discovery of common nodeset for mercurial
2 2 #
3 3 # Copyright 2010 Benoit Boissinot <bboissin@gmail.com>
4 4 # and Peter Arrenbrecht <peter@arrenbrecht.ch>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 from node import nullid
10 10 from i18n import _
11 11 import random, collections, util, dagutil
12 12
13 13 def _updatesample(dag, nodes, sample, always, quicksamplesize=0):
14 14 # if nodes is empty we scan the entire graph
15 15 if nodes:
16 16 heads = dag.headsetofconnecteds(nodes)
17 17 else:
18 18 heads = dag.heads()
19 19 dist = {}
20 20 visit = collections.deque(heads)
21 21 seen = set()
22 22 factor = 1
23 23 while visit:
24 24 curr = visit.popleft()
25 25 if curr in seen:
26 26 continue
27 27 d = dist.setdefault(curr, 1)
28 28 if d > factor:
29 29 factor *= 2
30 30 if d == factor:
31 31 if curr not in always: # need this check for the early exit below
32 32 sample.add(curr)
33 33 if quicksamplesize and (len(sample) >= quicksamplesize):
34 34 return
35 35 seen.add(curr)
36 36 for p in dag.parents(curr):
37 37 if not nodes or p in nodes:
38 38 dist.setdefault(p, d + 1)
39 39 visit.append(p)
40 40
41 41 def _setupsample(dag, nodes, size):
42 42 if len(nodes) <= size:
43 43 return set(nodes), None, 0
44 44 always = set(dag.heads())
45 45 desiredlen = size - len(always)
46 46 if desiredlen <= 0:
47 47 # This could be bad if there are very many heads, all unknown to the
48 48 # server. We're counting on long request support here.
49 49 return always, None, desiredlen
50 50 return always, set(), desiredlen
51 51
52 52 def _takequicksample(dag, nodes, size, initial):
53 53 always, sample, desiredlen = _setupsample(dag, nodes, size)
54 54 if sample is None:
55 55 return always
56 56 if initial:
57 57 fromset = None
58 58 else:
59 59 fromset = nodes
60 60 _updatesample(dag, fromset, sample, always, quicksamplesize=desiredlen)
61 61 sample.update(always)
62 62 return sample
63 63
64 64 def _takefullsample(dag, nodes, size):
65 65 always, sample, desiredlen = _setupsample(dag, nodes, size)
66 66 if sample is None:
67 67 return always
68 68 # update from heads
69 69 _updatesample(dag, nodes, sample, always)
70 70 # update from roots
71 71 _updatesample(dag.inverse(), nodes, sample, always)
72 72 assert sample
73 73 if len(sample) > desiredlen:
74 74 sample = set(random.sample(sample, desiredlen))
75 75 elif len(sample) < desiredlen:
76 76 more = desiredlen - len(sample)
77 77 sample.update(random.sample(list(nodes - sample - always), more))
78 78 sample.update(always)
79 79 return sample
80 80
81 81 def findcommonheads(ui, local, remote,
82 82 initialsamplesize=100,
83 83 fullsamplesize=200,
84 84 abortwhenunrelated=True):
85 '''Return a tuple (common, anyincoming, remoteheads) used to identify missing
86 nodes from or in remote.
85 '''Return a tuple (common, anyincoming, remoteheads) used to identify
86 missing nodes from or in remote.
87 87
88 88 shortcutlocal determines whether we try use direct access to localrepo if
89 89 remote is actually local.
90 90 '''
91 91 roundtrips = 0
92 92 cl = local.changelog
93 93 dag = dagutil.revlogdag(cl)
94 94 nodes = dag.nodeset()
95 95
96 96 # early exit if we know all the specified server heads already
97 97 ui.debug("query 1; heads\n")
98 98 roundtrips += 1
99 99 srvheadhashes = remote.heads()
100 100
101 101 ## TODO We might want to request an additional random sample of the server's
102 102 ## nodes batched with the heads query here.
103 103
104 104 if cl.tip() == nullid:
105 105 if srvheadhashes != [nullid]:
106 106 return [nullid], True, srvheadhashes
107 107 return [nullid], False, []
108 108
109 # start actual discovery (we note this before the next "if" for compatibility
110 # reasons)
109 # start actual discovery (we note this before the next "if" for
110 # compatibility reasons)
111 111 ui.status(_("searching for changes\n"))
112 112
113 113 srvheads = dag.internalizeall(srvheadhashes, filterunknown=True)
114 114 if len(srvheads) == len(srvheadhashes):
115 115 ui.note("all remote heads known locally\n")
116 116 return (srvheadhashes, False, srvheadhashes,)
117 117
118 118 # full blown discovery
119 119 undecided = nodes # own nodes where I don't know if the server knows them
120 120 common = set() # own nodes I know we both know
121 121 missing = set() # own nodes I know the server lacks
122 122
123 123 # treat remote heads as a first implicit sample response
124 124 common.update(dag.ancestorset(srvheads))
125 125 undecided.difference_update(common)
126 126 # use cheapish initial sample
127 127 if common:
128 128 ui.debug("taking initial sample\n")
129 129 sample = _takefullsample(dag, undecided, size=fullsamplesize)
130 130 else:
131 131 ui.debug("taking quick initial sample\n")
132 132 sample = _takequicksample(dag, nodes, size=initialsamplesize,
133 133 initial=True)
134 134
135 135 roundtrips += 1
136 136 ui.progress(_('searching'), roundtrips, unit=_('queries'))
137 137 ui.debug("query %i; still undecided: %i, sample size is: %i\n"
138 138 % (roundtrips, len(undecided), len(sample)))
139 139 # indices between sample and externalized version must match
140 140 sample = list(sample)
141 141 yesno = remote.known(dag.externalizeall(sample))
142 142
143 143 while undecided:
144 144 commoninsample = set(n for i, n in enumerate(sample) if yesno[i])
145 145 common.update(dag.ancestorset(commoninsample, common))
146 146
147 147 missinginsample = [n for i, n in enumerate(sample) if not yesno[i]]
148 148 missing.update(dag.descendantset(missinginsample, missing))
149 149
150 150 undecided.difference_update(missing)
151 151 undecided.difference_update(common)
152 152
153 153 if not undecided:
154 154 break
155 155
156 156 ui.note("sampling from both directions\n")
157 157 sample = _takefullsample(dag, undecided, size=fullsamplesize)
158 158
159 159 roundtrips += 1
160 160 ui.progress(_('searching'), roundtrips, unit=_('queries'))
161 161 ui.debug("query %i; still undecided: %i, sample size is: %i\n"
162 162 % (roundtrips, len(undecided), len(sample)))
163 163 # indices between sample and externalized version must match
164 164 sample = list(sample)
165 165 yesno = remote.known(dag.externalizeall(sample))
166 166
167 167 result = dag.headsetofconnecteds(common)
168 168 ui.progress(_('searching'), None)
169 169 ui.debug("%d total queries\n" % roundtrips)
170 170
171 171 if not result and srvheadhashes != [nullid]:
172 172 if abortwhenunrelated:
173 173 raise util.Abort(_("repository is unrelated"))
174 174 else:
175 175 ui.warn(_("warning: repository is unrelated\n"))
176 176 return (set([nullid]), True, srvheadhashes,)
177 177
178 178 return (dag.externalizeall(result), True, srvheadhashes,)
General Comments 0
You need to be logged in to leave comments. Login now