##// END OF EJS Templates
rust-index: find_gca_candidates bit sets genericization...
Georges Racinet on incendie.racinet.fr -
r52117:43241f31 default
parent child Browse files
Show More
@@ -1044,7 +1044,7 b' impl Index {'
1044 1044 /// greatest common ancestor. In revset notation, this is the set
1045 1045 /// `heads(::a and ::b and ...)`
1046 1046 #[allow(dead_code)]
1047 fn find_gca_candidates(
1047 fn find_gca_candidates<BS: PoisonableBitSet + Clone>(
1048 1048 &self,
1049 1049 revs: &[Revision],
1050 1050 ) -> Result<Vec<Revision>, GraphError> {
@@ -1053,13 +1053,12 b' impl Index {'
1053 1053 }
1054 1054 let revcount = revs.len();
1055 1055 let mut candidates = vec![];
1056 let all_seen = 1u64 << (revcount - 1);
1057 let poison = 1u64 << revs.len();
1058 1056 let max_rev = revs.iter().max().unwrap();
1059 let mut seen = vec![0u64; (max_rev.0 + 1) as usize];
1057
1058 let mut seen = BS::vec_of_empty(revs.len(), (max_rev.0 + 1) as usize);
1060 1059
1061 1060 for (idx, rev) in revs.iter().enumerate() {
1062 seen[rev.0 as usize] = 1 << idx;
1061 seen[rev.0 as usize].add(idx);
1063 1062 }
1064 1063 let mut current_rev = *max_rev;
1065 1064 // Number of revisions whose inspection in the main loop
@@ -1089,17 +1088,18 b' impl Index {'
1089 1088 // - Early return in case it is detected that one of the incoming revs
1090 1089 // is a common ancestor of all of them.
1091 1090 while current_rev.0 >= 0 && interesting > 0 {
1092 let mut current_seen = seen[current_rev.0 as usize];
1091 let mut current_seen = seen[current_rev.0 as usize].clone();
1093 1092
1094 if current_seen == 0 {
1093 if current_seen.is_empty() {
1095 1094 current_rev = Revision(current_rev.0 - 1);
1096 1095 continue;
1097 1096 }
1098 if current_seen < poison {
1097 if !current_seen.is_poisoned() {
1099 1098 interesting -= 1;
1100 if current_seen == all_seen {
1099 if current_seen.is_full_range(revcount) {
1101 1100 candidates.push(current_rev);
1102 current_seen |= poison;
1101 seen[current_rev.0 as usize].poison();
1102 current_seen.poison(); // avoid recloning
1103 1103
1104 1104 // Being a common ancestor, if `current_rev` is among
1105 1105 // the input revisions, it is *the* answer.
@@ -1114,26 +1114,28 b' impl Index {'
1114 1114 if parent == NULL_REVISION {
1115 1115 continue;
1116 1116 }
1117 let parent_seen = seen[parent.0 as usize];
1118 if current_seen < poison {
1117 let parent_seen = &seen[parent.0 as usize];
1118 if !current_seen.is_poisoned() {
1119 1119 // Without the `interesting` accounting, this block would
1120 1120 // be logically equivalent to: parent_seen |= current_seen
1121 1121 // The parent counts as interesting if it was not already
1122 1122 // known to be an ancestor (would already have counted)
1123 if parent_seen == 0 {
1124 seen[parent.0 as usize] = current_seen;
1123 if parent_seen.is_empty() {
1124 seen[parent.0 as usize] = current_seen.clone();
1125 1125 interesting += 1;
1126 } else if parent_seen != current_seen {
1127 seen[parent.0 as usize] |= current_seen;
1126 } else if *parent_seen != current_seen {
1127 seen[parent.0 as usize].union(&current_seen);
1128 1128 }
1129 1129 } else {
1130 1130 // this block is logically equivalent to poisoning parent
1131 1131 // and counting it as non interesting if it
1132 1132 // has been seen before (hence counted then as interesting)
1133 if parent_seen != 0 && parent_seen < poison {
1133 if !parent_seen.is_empty() && !parent_seen.is_poisoned() {
1134 1134 interesting -= 1;
1135 1135 }
1136 seen[parent.0 as usize] = current_seen;
1136 // equivalent to poisoning parent, which we should do to
1137 // avoid the cloning.
1138 seen[parent.0 as usize] = current_seen.clone();
1137 1139 }
1138 1140 }
1139 1141
@@ -1242,6 +1244,189 b' impl Index {'
1242 1244 }
1243 1245 }
1244 1246
1247 /// The kind of functionality needed by find_gca_candidates
1248 ///
1249 /// This is a bit mask which can be declared to be "poisoned", which callers
1250 /// interpret to break out of some loops.
1251 ///
1252 /// The maximum capacity of the bit mask is up to the actual implementation
1253 trait PoisonableBitSet: Sized + PartialEq {
1254 /// Return a vector of exactly n elements, initialized to be empty.
1255 ///
1256 /// Optimization can vastly depend on implementation. Those being `Copy`
1257 /// and having constant capacity typically can have a very simple
1258 /// implementation.
1259 fn vec_of_empty(sets_size: usize, vec_len: usize) -> Vec<Self>;
1260
1261 /// The size of the bit mask in memory
1262 fn size(&self) -> usize;
1263
1264 /// The number of elements that can be represented in the set.
1265 ///
1266 /// Another way to put it is that it is the highest integer `C` such that
1267 /// the set is guaranteed to always be a subset of the integer range
1268 /// `[0, C)`
1269 fn capacity(&self) -> usize;
1270
1271 /// Declare `n` to belong to the set
1272 fn add(&mut self, n: usize);
1273
1274 /// Declare `n` not to belong to the set
1275 fn discard(&mut self, n: usize);
1276
1277 /// Replace this bit set by its union with other
1278 fn union(&mut self, other: &Self);
1279
1280 /// Poison the bit set
1281 ///
1282 /// Interpretation up to the caller
1283 fn poison(&mut self);
1284
1285 /// Is the bit set poisoned?
1286 ///
1287 /// Interpretation is up to the caller
1288 fn is_poisoned(&self) -> bool;
1289
1290 /// Is the bit set empty?
1291 fn is_empty(&self) -> bool;
1292
1293 /// return `true` if and only if the bit is the full range `[0, n)`
1294 /// of integers
1295 fn is_full_range(&self, n: usize) -> bool;
1296 }
1297
1298 const U64_POISON: u64 = 1 << 63;
1299
1300 impl PoisonableBitSet for u64 {
1301 fn vec_of_empty(_sets_size: usize, vec_len: usize) -> Vec<Self> {
1302 vec![0u64; vec_len]
1303 }
1304
1305 fn size(&self) -> usize {
1306 8
1307 }
1308
1309 fn capacity(&self) -> usize {
1310 63
1311 }
1312
1313 fn add(&mut self, n: usize) {
1314 (*self) |= 1u64 << n;
1315 }
1316
1317 fn discard(&mut self, n: usize) {
1318 (*self) &= u64::MAX - (1u64 << n);
1319 }
1320
1321 fn union(&mut self, other: &Self) {
1322 (*self) |= *other;
1323 }
1324
1325 fn is_full_range(&self, n: usize) -> bool {
1326 *self + 1 == (1u64 << n)
1327 }
1328
1329 fn is_empty(&self) -> bool {
1330 *self == 0
1331 }
1332
1333 fn poison(&mut self) {
1334 *self = U64_POISON;
1335 }
1336
1337 fn is_poisoned(&self) -> bool {
1338 // equality comparison would be tempting but would not resist
1339 // operations after poisoning (even if these should be bogus).
1340 *self >= U64_POISON
1341 }
1342 }
1343
1344 /// A poisonable bit set whose capacity is not known at compile time but
1345 /// is constant after initial construction
1346 ///
1347 /// This can be way further optimized if performance assessments (speed
1348 /// and/or RAM) require it.
1349 /// As far as RAM is concerned, for large vectors of these, the main problem
1350 /// would be the repetition of set_size in each item. We would need a trait
1351 /// to abstract over the idea of a vector of such bit sets to do better.
1352 #[derive(Clone, PartialEq)]
1353 struct NonStaticPoisonableBitSet {
1354 set_size: usize,
1355 bit_set: Vec<u64>,
1356 }
1357
1358 /// Number of `u64` needed for a [`NonStaticPoisonableBitSet`] of given size
1359 fn non_static_poisonable_inner_len(set_size: usize) -> usize {
1360 1 + (set_size + 1) / 64
1361 }
1362
1363 impl NonStaticPoisonableBitSet {
1364 /// The index of the sub-bit set for the given n, and the index inside
1365 /// the latter
1366 fn index(&self, n: usize) -> (usize, usize) {
1367 (n / 64, n % 64)
1368 }
1369 }
1370
1371 /// Mock implementation to ensure that the trait makes sense
1372 impl PoisonableBitSet for NonStaticPoisonableBitSet {
1373 fn vec_of_empty(set_size: usize, vec_len: usize) -> Vec<Self> {
1374 let tmpl = Self {
1375 set_size,
1376 bit_set: vec![0u64; non_static_poisonable_inner_len(set_size)],
1377 };
1378 vec![tmpl; vec_len]
1379 }
1380
1381 fn size(&self) -> usize {
1382 8 + self.bit_set.len() * 8
1383 }
1384
1385 fn capacity(&self) -> usize {
1386 self.set_size
1387 }
1388
1389 fn add(&mut self, n: usize) {
1390 let (sub_bs, bit_pos) = self.index(n);
1391 self.bit_set[sub_bs] |= 1 << bit_pos
1392 }
1393
1394 fn discard(&mut self, n: usize) {
1395 let (sub_bs, bit_pos) = self.index(n);
1396 self.bit_set[sub_bs] |= u64::MAX - (1 << bit_pos)
1397 }
1398
1399 fn union(&mut self, other: &Self) {
1400 assert!(
1401 self.set_size == other.set_size,
1402 "Binary operations on bit sets can only be done on same size"
1403 );
1404 for i in 0..self.bit_set.len() - 1 {
1405 self.bit_set[i] |= other.bit_set[i]
1406 }
1407 }
1408
1409 fn is_full_range(&self, n: usize) -> bool {
1410 let (sub_bs, bit_pos) = self.index(n);
1411 self.bit_set[..sub_bs].iter().all(|bs| *bs == u64::MAX)
1412 && self.bit_set[sub_bs] == (1 << (bit_pos + 1)) - 1
1413 }
1414
1415 fn is_empty(&self) -> bool {
1416 self.bit_set.iter().all(|bs| *bs == 0u64)
1417 }
1418
1419 fn poison(&mut self) {
1420 let (sub_bs, bit_pos) = self.index(self.set_size);
1421 self.bit_set[sub_bs] = 1 << bit_pos;
1422 }
1423
1424 fn is_poisoned(&self) -> bool {
1425 let (sub_bs, bit_pos) = self.index(self.set_size);
1426 self.bit_set[sub_bs] >= 1 << bit_pos
1427 }
1428 }
1429
1245 1430 /// Set of roots of all non-public phases
1246 1431 pub type RootsPerPhase = [HashSet<Revision>; Phase::non_public_phases().len()];
1247 1432
General Comments 0
You need to be logged in to leave comments. Login now