# HG changeset patch
# User Durham Goode <durham@fb.com>
# Date 2016-03-08 08:20:08
# Node ID 1c658391b22fb4d98ccfb60c0e57315b55634117
# Parent  3f9e25a42e690171fc4f6b1f9e9241f527263b22

parsers: optimize filtered headrevs logic

The old native head revs logic would iterate over every node, starting from 0,
and check if every node was filtered (by testing it against the filteredrevs
python set). On large repos with hundreds of thousands of commits, this could
take 150ms.

This new logic iterates over the nodes in reverse order, and skips the filtered
check if we've seen an unfiltered child of the node. This saves approximately a
bagillion filteredrevs set checks, which shaves the time down from 150ms to
20ms during every branch cache write.

diff --git a/mercurial/parsers.c b/mercurial/parsers.c
--- a/mercurial/parsers.c
+++ b/mercurial/parsers.c
@@ -1446,20 +1446,26 @@ static PyObject *index_headrevs(indexObj
 		goto bail;
 	}
 
-	for (i = 0; i < len; i++) {
+	for (i = len - 1; i >= 0; i--) {
 		int isfiltered;
 		int parents[2];
 
-		isfiltered = check_filter(filter, i);
-		if (isfiltered == -1) {
-			PyErr_SetString(PyExc_TypeError,
-				"unable to check filter");
-			goto bail;
-		}
-
-		if (isfiltered) {
-			nothead[i] = 1;
-			continue;
+		/* If nothead[i] == 1, it means we've seen an unfiltered child of this
+		 * node already, and therefore this node is not filtered. So we can skip
+		 * the expensive check_filter step.
+		 */
+		if (nothead[i] != 1) {
+			isfiltered = check_filter(filter, i);
+			if (isfiltered == -1) {
+				PyErr_SetString(PyExc_TypeError,
+					"unable to check filter");
+				goto bail;
+			}
+
+			if (isfiltered) {
+				nothead[i] = 1;
+				continue;
+			}
 		}
 
 		if (index_get_parents(self, i, parents, (int)len - 1) < 0)