Show More
@@ -12,6 +12,7 | |||||
12 | #include <ctype.h> |
|
12 | #include <ctype.h> | |
13 | #include <limits.h> |
|
13 | #include <limits.h> | |
14 | #include <stddef.h> |
|
14 | #include <stddef.h> | |
|
15 | #include <stdlib.h> | |||
15 | #include <string.h> |
|
16 | #include <string.h> | |
16 |
|
17 | |||
17 | #include "bitmanipulation.h" |
|
18 | #include "bitmanipulation.h" | |
@@ -1096,6 +1097,229 static Py_ssize_t trim_endidx(indexObjec | |||||
1096 | return endidx; |
|
1097 | return endidx; | |
1097 | } |
|
1098 | } | |
1098 |
|
1099 | |||
|
1100 | struct Gap { | |||
|
1101 | int64_t size; | |||
|
1102 | Py_ssize_t idx; | |||
|
1103 | }; | |||
|
1104 | ||||
|
1105 | static int gap_compare(const void *left, const void *right) | |||
|
1106 | { | |||
|
1107 | const struct Gap *l_left = ((const struct Gap *)left); | |||
|
1108 | const struct Gap *l_right = ((const struct Gap *)right); | |||
|
1109 | if (l_left->size < l_right->size) { | |||
|
1110 | return -1; | |||
|
1111 | } else if (l_left->size > l_right->size) { | |||
|
1112 | return 1; | |||
|
1113 | } | |||
|
1114 | return 0; | |||
|
1115 | } | |||
|
1116 | static int Py_ssize_t_compare(const void *left, const void *right) | |||
|
1117 | { | |||
|
1118 | const Py_ssize_t l_left = *(const Py_ssize_t *)left; | |||
|
1119 | const Py_ssize_t l_right = *(const Py_ssize_t *)right; | |||
|
1120 | if (l_left < l_right) { | |||
|
1121 | return -1; | |||
|
1122 | } else if (l_left > l_right) { | |||
|
1123 | return 1; | |||
|
1124 | } | |||
|
1125 | return 0; | |||
|
1126 | } | |||
|
1127 | ||||
|
1128 | static PyObject *index_slicechunktodensity(indexObject *self, PyObject *args) | |||
|
1129 | { | |||
|
1130 | /* method arguments */ | |||
|
1131 | PyObject *list_revs = NULL; /* revisions in the chain */ | |||
|
1132 | double targetdensity = 0; /* min density to achieve */ | |||
|
1133 | Py_ssize_t mingapsize = 0; /* threshold to ignore gaps */ | |||
|
1134 | ||||
|
1135 | /* other core variables */ | |||
|
1136 | Py_ssize_t idxlen = index_length(self); | |||
|
1137 | Py_ssize_t i; /* used for various iteration */ | |||
|
1138 | PyObject *result = NULL; /* the final return of the function */ | |||
|
1139 | ||||
|
1140 | /* generic information about the delta chain being slice */ | |||
|
1141 | Py_ssize_t num_revs = 0; /* size of the full delta chain */ | |||
|
1142 | Py_ssize_t *revs = NULL; /* native array of revision in the chain */ | |||
|
1143 | int64_t chainpayload = 0; /* sum of all delta in the chain */ | |||
|
1144 | int64_t deltachainspan = 0; /* distance from first byte to last byte */ | |||
|
1145 | ||||
|
1146 | /* variable used for slicing the delta chain */ | |||
|
1147 | int64_t readdata = 0; /* amount of data currently planned to be read */ | |||
|
1148 | double density = 0; /* ration of payload data compared to read ones */ | |||
|
1149 | int64_t previous_end; | |||
|
1150 | struct Gap *gaps = NULL; /* array of notable gap in the chain */ | |||
|
1151 | Py_ssize_t num_gaps = | |||
|
1152 | 0; /* total number of notable gap recorded so far */ | |||
|
1153 | Py_ssize_t *selected_indices = NULL; /* indices of gap skipped over */ | |||
|
1154 | Py_ssize_t num_selected = 0; /* number of gaps skipped */ | |||
|
1155 | PyObject *chunk = NULL; /* individual slice */ | |||
|
1156 | PyObject *allchunks = NULL; /* all slices */ | |||
|
1157 | Py_ssize_t previdx; | |||
|
1158 | ||||
|
1159 | /* parsing argument */ | |||
|
1160 | if (!PyArg_ParseTuple(args, "O!dn", &PyList_Type, &list_revs, | |||
|
1161 | &targetdensity, &mingapsize)) { | |||
|
1162 | goto bail; | |||
|
1163 | } | |||
|
1164 | ||||
|
1165 | /* If the delta chain contains a single element, we do not need slicing | |||
|
1166 | */ | |||
|
1167 | num_revs = PyList_GET_SIZE(list_revs); | |||
|
1168 | if (num_revs <= 1) { | |||
|
1169 | result = PyTuple_Pack(1, list_revs); | |||
|
1170 | goto done; | |||
|
1171 | } | |||
|
1172 | ||||
|
1173 | /* Turn the python list into a native integer array (for efficiency) */ | |||
|
1174 | revs = (Py_ssize_t *)calloc(num_revs, sizeof(Py_ssize_t)); | |||
|
1175 | if (revs == NULL) { | |||
|
1176 | PyErr_NoMemory(); | |||
|
1177 | goto bail; | |||
|
1178 | } | |||
|
1179 | for (i = 0; i < num_revs; i++) { | |||
|
1180 | Py_ssize_t revnum = PyInt_AsLong(PyList_GET_ITEM(list_revs, i)); | |||
|
1181 | if (revnum == -1 && PyErr_Occurred()) { | |||
|
1182 | goto bail; | |||
|
1183 | } | |||
|
1184 | if (revnum < 0 || revnum >= idxlen) { | |||
|
1185 | PyErr_SetString(PyExc_IndexError, "index out of range"); | |||
|
1186 | goto bail; | |||
|
1187 | } | |||
|
1188 | revs[i] = revnum; | |||
|
1189 | } | |||
|
1190 | ||||
|
1191 | /* Compute and check various property of the unsliced delta chain */ | |||
|
1192 | deltachainspan = index_segment_span(self, revs[0], revs[num_revs - 1]); | |||
|
1193 | if (deltachainspan < 0) { | |||
|
1194 | goto bail; | |||
|
1195 | } | |||
|
1196 | ||||
|
1197 | if (deltachainspan <= mingapsize) { | |||
|
1198 | result = PyTuple_Pack(1, list_revs); | |||
|
1199 | goto done; | |||
|
1200 | } | |||
|
1201 | chainpayload = 0; | |||
|
1202 | for (i = 0; i < num_revs; i++) { | |||
|
1203 | int tmp = index_get_length(self, revs[i]); | |||
|
1204 | if (tmp < 0) { | |||
|
1205 | goto bail; | |||
|
1206 | } | |||
|
1207 | chainpayload += tmp; | |||
|
1208 | } | |||
|
1209 | ||||
|
1210 | readdata = deltachainspan; | |||
|
1211 | density = 1.0; | |||
|
1212 | ||||
|
1213 | if (0 < deltachainspan) { | |||
|
1214 | density = (double)chainpayload / (double)deltachainspan; | |||
|
1215 | } | |||
|
1216 | ||||
|
1217 | if (density >= targetdensity) { | |||
|
1218 | result = PyTuple_Pack(1, list_revs); | |||
|
1219 | goto done; | |||
|
1220 | } | |||
|
1221 | ||||
|
1222 | /* if chain is too sparse, look for relevant gaps */ | |||
|
1223 | gaps = (struct Gap *)calloc(num_revs, sizeof(struct Gap)); | |||
|
1224 | if (gaps == NULL) { | |||
|
1225 | PyErr_NoMemory(); | |||
|
1226 | goto bail; | |||
|
1227 | } | |||
|
1228 | ||||
|
1229 | previous_end = -1; | |||
|
1230 | for (i = 0; i < num_revs; i++) { | |||
|
1231 | int64_t revstart; | |||
|
1232 | int revsize; | |||
|
1233 | revstart = index_get_start(self, revs[i]); | |||
|
1234 | if (revstart < 0) { | |||
|
1235 | goto bail; | |||
|
1236 | }; | |||
|
1237 | revsize = index_get_length(self, revs[i]); | |||
|
1238 | if (revsize < 0) { | |||
|
1239 | goto bail; | |||
|
1240 | }; | |||
|
1241 | if (revsize == 0) { | |||
|
1242 | continue; | |||
|
1243 | } | |||
|
1244 | if (previous_end >= 0) { | |||
|
1245 | int64_t gapsize = revstart - previous_end; | |||
|
1246 | if (gapsize > mingapsize) { | |||
|
1247 | gaps[num_gaps].size = gapsize; | |||
|
1248 | gaps[num_gaps].idx = i; | |||
|
1249 | num_gaps += 1; | |||
|
1250 | } | |||
|
1251 | } | |||
|
1252 | previous_end = revstart + revsize; | |||
|
1253 | } | |||
|
1254 | if (num_gaps == 0) { | |||
|
1255 | result = PyTuple_Pack(1, list_revs); | |||
|
1256 | goto done; | |||
|
1257 | } | |||
|
1258 | qsort(gaps, num_gaps, sizeof(struct Gap), &gap_compare); | |||
|
1259 | ||||
|
1260 | /* Slice the largest gap first, they improve the density the most */ | |||
|
1261 | selected_indices = | |||
|
1262 | (Py_ssize_t *)malloc((num_gaps + 1) * sizeof(Py_ssize_t)); | |||
|
1263 | if (selected_indices == NULL) { | |||
|
1264 | PyErr_NoMemory(); | |||
|
1265 | goto bail; | |||
|
1266 | } | |||
|
1267 | ||||
|
1268 | for (i = num_gaps - 1; i >= 0; i--) { | |||
|
1269 | selected_indices[num_selected] = gaps[i].idx; | |||
|
1270 | readdata -= gaps[i].size; | |||
|
1271 | num_selected += 1; | |||
|
1272 | if (readdata <= 0) { | |||
|
1273 | density = 1.0; | |||
|
1274 | } else { | |||
|
1275 | density = (double)chainpayload / (double)readdata; | |||
|
1276 | } | |||
|
1277 | if (density >= targetdensity) { | |||
|
1278 | break; | |||
|
1279 | } | |||
|
1280 | } | |||
|
1281 | qsort(selected_indices, num_selected, sizeof(Py_ssize_t), | |||
|
1282 | &Py_ssize_t_compare); | |||
|
1283 | ||||
|
1284 | /* create the resulting slice */ | |||
|
1285 | allchunks = PyList_New(0); | |||
|
1286 | if (allchunks == NULL) { | |||
|
1287 | goto bail; | |||
|
1288 | } | |||
|
1289 | previdx = 0; | |||
|
1290 | selected_indices[num_selected] = num_revs; | |||
|
1291 | for (i = 0; i <= num_selected; i++) { | |||
|
1292 | Py_ssize_t idx = selected_indices[i]; | |||
|
1293 | Py_ssize_t endidx = trim_endidx(self, revs, previdx, idx); | |||
|
1294 | if (endidx < 0) { | |||
|
1295 | goto bail; | |||
|
1296 | } | |||
|
1297 | if (previdx < endidx) { | |||
|
1298 | chunk = PyList_GetSlice(list_revs, previdx, endidx); | |||
|
1299 | if (chunk == NULL) { | |||
|
1300 | goto bail; | |||
|
1301 | } | |||
|
1302 | if (PyList_Append(allchunks, chunk) == -1) { | |||
|
1303 | goto bail; | |||
|
1304 | } | |||
|
1305 | Py_DECREF(chunk); | |||
|
1306 | chunk = NULL; | |||
|
1307 | } | |||
|
1308 | previdx = idx; | |||
|
1309 | } | |||
|
1310 | result = allchunks; | |||
|
1311 | goto done; | |||
|
1312 | ||||
|
1313 | bail: | |||
|
1314 | Py_XDECREF(allchunks); | |||
|
1315 | Py_XDECREF(chunk); | |||
|
1316 | done: | |||
|
1317 | free(revs); | |||
|
1318 | free(gaps); | |||
|
1319 | free(selected_indices); | |||
|
1320 | return result; | |||
|
1321 | } | |||
|
1322 | ||||
1099 | static inline int nt_level(const char *node, Py_ssize_t level) |
|
1323 | static inline int nt_level(const char *node, Py_ssize_t level) | |
1100 | { |
|
1324 | { | |
1101 | int v = node[level >> 1]; |
|
1325 | int v = node[level >> 1]; | |
@@ -2328,6 +2552,8 static PyMethodDef index_methods[] = { | |||||
2328 | "get filtered head revisions"}, /* Can always do filtering */ |
|
2552 | "get filtered head revisions"}, /* Can always do filtering */ | |
2329 | {"deltachain", (PyCFunction)index_deltachain, METH_VARARGS, |
|
2553 | {"deltachain", (PyCFunction)index_deltachain, METH_VARARGS, | |
2330 | "determine revisions with deltas to reconstruct fulltext"}, |
|
2554 | "determine revisions with deltas to reconstruct fulltext"}, | |
|
2555 | {"slicechunktodensity", (PyCFunction)index_slicechunktodensity, | |||
|
2556 | METH_VARARGS, "determine revisions with deltas to reconstruct fulltext"}, | |||
2331 | {"append", (PyCFunction)index_append, METH_O, "append an index entry"}, |
|
2557 | {"append", (PyCFunction)index_append, METH_O, "append an index entry"}, | |
2332 | {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS, |
|
2558 | {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS, | |
2333 | "match a potentially ambiguous node ID"}, |
|
2559 | "match a potentially ambiguous node ID"}, |
General Comments 0
You need to be logged in to leave comments.
Login now