##// END OF EJS Templates
sparse-revlog: introduce native (C) implementation of slicechunktodensity...
Boris Feld -
r40743:cc76ca9f default
parent child Browse files
Show More
@@ -12,6 +12,7 b''
12 #include <ctype.h>
12 #include <ctype.h>
13 #include <limits.h>
13 #include <limits.h>
14 #include <stddef.h>
14 #include <stddef.h>
15 #include <stdlib.h>
15 #include <string.h>
16 #include <string.h>
16
17
17 #include "bitmanipulation.h"
18 #include "bitmanipulation.h"
@@ -1096,6 +1097,229 b' static Py_ssize_t trim_endidx(indexObjec'
1096 return endidx;
1097 return endidx;
1097 }
1098 }
1098
1099
1100 struct Gap {
1101 int64_t size;
1102 Py_ssize_t idx;
1103 };
1104
1105 static int gap_compare(const void *left, const void *right)
1106 {
1107 const struct Gap *l_left = ((const struct Gap *)left);
1108 const struct Gap *l_right = ((const struct Gap *)right);
1109 if (l_left->size < l_right->size) {
1110 return -1;
1111 } else if (l_left->size > l_right->size) {
1112 return 1;
1113 }
1114 return 0;
1115 }
1116 static int Py_ssize_t_compare(const void *left, const void *right)
1117 {
1118 const Py_ssize_t l_left = *(const Py_ssize_t *)left;
1119 const Py_ssize_t l_right = *(const Py_ssize_t *)right;
1120 if (l_left < l_right) {
1121 return -1;
1122 } else if (l_left > l_right) {
1123 return 1;
1124 }
1125 return 0;
1126 }
1127
1128 static PyObject *index_slicechunktodensity(indexObject *self, PyObject *args)
1129 {
1130 /* method arguments */
1131 PyObject *list_revs = NULL; /* revisions in the chain */
1132 double targetdensity = 0; /* min density to achieve */
1133 Py_ssize_t mingapsize = 0; /* threshold to ignore gaps */
1134
1135 /* other core variables */
1136 Py_ssize_t idxlen = index_length(self);
1137 Py_ssize_t i; /* used for various iteration */
1138 PyObject *result = NULL; /* the final return of the function */
1139
1140 /* generic information about the delta chain being slice */
1141 Py_ssize_t num_revs = 0; /* size of the full delta chain */
1142 Py_ssize_t *revs = NULL; /* native array of revision in the chain */
1143 int64_t chainpayload = 0; /* sum of all delta in the chain */
1144 int64_t deltachainspan = 0; /* distance from first byte to last byte */
1145
1146 /* variable used for slicing the delta chain */
1147 int64_t readdata = 0; /* amount of data currently planned to be read */
1148 double density = 0; /* ration of payload data compared to read ones */
1149 int64_t previous_end;
1150 struct Gap *gaps = NULL; /* array of notable gap in the chain */
1151 Py_ssize_t num_gaps =
1152 0; /* total number of notable gap recorded so far */
1153 Py_ssize_t *selected_indices = NULL; /* indices of gap skipped over */
1154 Py_ssize_t num_selected = 0; /* number of gaps skipped */
1155 PyObject *chunk = NULL; /* individual slice */
1156 PyObject *allchunks = NULL; /* all slices */
1157 Py_ssize_t previdx;
1158
1159 /* parsing argument */
1160 if (!PyArg_ParseTuple(args, "O!dn", &PyList_Type, &list_revs,
1161 &targetdensity, &mingapsize)) {
1162 goto bail;
1163 }
1164
1165 /* If the delta chain contains a single element, we do not need slicing
1166 */
1167 num_revs = PyList_GET_SIZE(list_revs);
1168 if (num_revs <= 1) {
1169 result = PyTuple_Pack(1, list_revs);
1170 goto done;
1171 }
1172
1173 /* Turn the python list into a native integer array (for efficiency) */
1174 revs = (Py_ssize_t *)calloc(num_revs, sizeof(Py_ssize_t));
1175 if (revs == NULL) {
1176 PyErr_NoMemory();
1177 goto bail;
1178 }
1179 for (i = 0; i < num_revs; i++) {
1180 Py_ssize_t revnum = PyInt_AsLong(PyList_GET_ITEM(list_revs, i));
1181 if (revnum == -1 && PyErr_Occurred()) {
1182 goto bail;
1183 }
1184 if (revnum < 0 || revnum >= idxlen) {
1185 PyErr_SetString(PyExc_IndexError, "index out of range");
1186 goto bail;
1187 }
1188 revs[i] = revnum;
1189 }
1190
1191 /* Compute and check various property of the unsliced delta chain */
1192 deltachainspan = index_segment_span(self, revs[0], revs[num_revs - 1]);
1193 if (deltachainspan < 0) {
1194 goto bail;
1195 }
1196
1197 if (deltachainspan <= mingapsize) {
1198 result = PyTuple_Pack(1, list_revs);
1199 goto done;
1200 }
1201 chainpayload = 0;
1202 for (i = 0; i < num_revs; i++) {
1203 int tmp = index_get_length(self, revs[i]);
1204 if (tmp < 0) {
1205 goto bail;
1206 }
1207 chainpayload += tmp;
1208 }
1209
1210 readdata = deltachainspan;
1211 density = 1.0;
1212
1213 if (0 < deltachainspan) {
1214 density = (double)chainpayload / (double)deltachainspan;
1215 }
1216
1217 if (density >= targetdensity) {
1218 result = PyTuple_Pack(1, list_revs);
1219 goto done;
1220 }
1221
1222 /* if chain is too sparse, look for relevant gaps */
1223 gaps = (struct Gap *)calloc(num_revs, sizeof(struct Gap));
1224 if (gaps == NULL) {
1225 PyErr_NoMemory();
1226 goto bail;
1227 }
1228
1229 previous_end = -1;
1230 for (i = 0; i < num_revs; i++) {
1231 int64_t revstart;
1232 int revsize;
1233 revstart = index_get_start(self, revs[i]);
1234 if (revstart < 0) {
1235 goto bail;
1236 };
1237 revsize = index_get_length(self, revs[i]);
1238 if (revsize < 0) {
1239 goto bail;
1240 };
1241 if (revsize == 0) {
1242 continue;
1243 }
1244 if (previous_end >= 0) {
1245 int64_t gapsize = revstart - previous_end;
1246 if (gapsize > mingapsize) {
1247 gaps[num_gaps].size = gapsize;
1248 gaps[num_gaps].idx = i;
1249 num_gaps += 1;
1250 }
1251 }
1252 previous_end = revstart + revsize;
1253 }
1254 if (num_gaps == 0) {
1255 result = PyTuple_Pack(1, list_revs);
1256 goto done;
1257 }
1258 qsort(gaps, num_gaps, sizeof(struct Gap), &gap_compare);
1259
1260 /* Slice the largest gap first, they improve the density the most */
1261 selected_indices =
1262 (Py_ssize_t *)malloc((num_gaps + 1) * sizeof(Py_ssize_t));
1263 if (selected_indices == NULL) {
1264 PyErr_NoMemory();
1265 goto bail;
1266 }
1267
1268 for (i = num_gaps - 1; i >= 0; i--) {
1269 selected_indices[num_selected] = gaps[i].idx;
1270 readdata -= gaps[i].size;
1271 num_selected += 1;
1272 if (readdata <= 0) {
1273 density = 1.0;
1274 } else {
1275 density = (double)chainpayload / (double)readdata;
1276 }
1277 if (density >= targetdensity) {
1278 break;
1279 }
1280 }
1281 qsort(selected_indices, num_selected, sizeof(Py_ssize_t),
1282 &Py_ssize_t_compare);
1283
1284 /* create the resulting slice */
1285 allchunks = PyList_New(0);
1286 if (allchunks == NULL) {
1287 goto bail;
1288 }
1289 previdx = 0;
1290 selected_indices[num_selected] = num_revs;
1291 for (i = 0; i <= num_selected; i++) {
1292 Py_ssize_t idx = selected_indices[i];
1293 Py_ssize_t endidx = trim_endidx(self, revs, previdx, idx);
1294 if (endidx < 0) {
1295 goto bail;
1296 }
1297 if (previdx < endidx) {
1298 chunk = PyList_GetSlice(list_revs, previdx, endidx);
1299 if (chunk == NULL) {
1300 goto bail;
1301 }
1302 if (PyList_Append(allchunks, chunk) == -1) {
1303 goto bail;
1304 }
1305 Py_DECREF(chunk);
1306 chunk = NULL;
1307 }
1308 previdx = idx;
1309 }
1310 result = allchunks;
1311 goto done;
1312
1313 bail:
1314 Py_XDECREF(allchunks);
1315 Py_XDECREF(chunk);
1316 done:
1317 free(revs);
1318 free(gaps);
1319 free(selected_indices);
1320 return result;
1321 }
1322
1099 static inline int nt_level(const char *node, Py_ssize_t level)
1323 static inline int nt_level(const char *node, Py_ssize_t level)
1100 {
1324 {
1101 int v = node[level >> 1];
1325 int v = node[level >> 1];
@@ -2328,6 +2552,8 b' static PyMethodDef index_methods[] = {'
2328 "get filtered head revisions"}, /* Can always do filtering */
2552 "get filtered head revisions"}, /* Can always do filtering */
2329 {"deltachain", (PyCFunction)index_deltachain, METH_VARARGS,
2553 {"deltachain", (PyCFunction)index_deltachain, METH_VARARGS,
2330 "determine revisions with deltas to reconstruct fulltext"},
2554 "determine revisions with deltas to reconstruct fulltext"},
2555 {"slicechunktodensity", (PyCFunction)index_slicechunktodensity,
2556 METH_VARARGS, "determine revisions with deltas to reconstruct fulltext"},
2331 {"append", (PyCFunction)index_append, METH_O, "append an index entry"},
2557 {"append", (PyCFunction)index_append, METH_O, "append an index entry"},
2332 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
2558 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
2333 "match a potentially ambiguous node ID"},
2559 "match a potentially ambiguous node ID"},
General Comments 0
You need to be logged in to leave comments. Login now