# HG changeset patch # User Yuya Nishihara # Date 2015-07-16 14:36:08 # Node ID 82d6a35cf432b66c4c7bdcbdc111a6eceb84b66f # Parent ebb5bb9bc32e1e75166cf134d2c1d6095143f710 parsers: fix buffer overflow by invalid parent revision read from revlog If revlog file is corrupted, it can have parent pointing to invalid revision. So we should validate it before updating nothead[], phases[], seen[], etc. Otherwise it would cause segfault at best. We could use "rev" instead of "maxrev" as upper bound, but I think the explicit "maxrev" can clarify that we just want to avoid possible buffer overflow vulnerability. diff --git a/mercurial/parsers.c b/mercurial/parsers.c --- a/mercurial/parsers.c +++ b/mercurial/parsers.c @@ -752,8 +752,8 @@ static const char *index_deref(indexObje return PyString_AS_STRING(self->data) + pos * v1_hdrsize; } -static inline void index_get_parents(indexObject *self, Py_ssize_t rev, - int *ps) +static inline int index_get_parents(indexObject *self, Py_ssize_t rev, + int *ps, int maxrev) { if (rev >= self->length - 1) { PyObject *tuple = PyList_GET_ITEM(self->added, @@ -765,6 +765,13 @@ static inline void index_get_parents(ind ps[0] = getbe32(data + 24); ps[1] = getbe32(data + 28); } + /* If index file is corrupted, ps[] may point to invalid revisions. So + * there is a risk of buffer overflow to trust them unconditionally. */ + if (ps[0] > maxrev || ps[1] > maxrev) { + PyErr_SetString(PyExc_ValueError, "parent out of range"); + return -1; + } + return 0; } @@ -1149,7 +1156,8 @@ static PyObject *compute_phases_map_sets if (minrevallphases != -1) { int parents[2]; for (i = minrevallphases; i < len; i++) { - index_get_parents(self, i, parents); + if (index_get_parents(self, i, parents, len - 1) < 0) + goto release_phasesetlist; set_phase_from_parents(phases, parents[0], parents[1], i); } } @@ -1248,7 +1256,8 @@ static PyObject *index_headrevs(indexObj continue; } - index_get_parents(self, i, parents); + if (index_get_parents(self, i, parents, len - 1) < 0) + goto bail; for (j = 0; j < 2; j++) { if (parents[j] >= 0) nothead[parents[j]] = 1; @@ -1716,7 +1725,8 @@ static PyObject *find_gca_candidates(ind } } } - index_get_parents(self, v, parents); + if (index_get_parents(self, v, parents, maxrev) < 0) + goto bail; for (i = 0; i < 2; i++) { int p = parents[i]; @@ -1813,7 +1823,8 @@ static PyObject *find_deepest(indexObjec continue; sv = seen[v]; - index_get_parents(self, v, parents); + if (index_get_parents(self, v, parents, maxrev) < 0) + goto bail; for (i = 0; i < 2; i++) { int p = parents[i]; diff --git a/tests/test-parseindex.t b/tests/test-parseindex.t --- a/tests/test-parseindex.t +++ b/tests/test-parseindex.t @@ -59,3 +59,62 @@ We approximate that by reducing the read 26333235a41c $ cd .. + +Test corrupted p1/p2 fields that could cause SEGV at parsers.c: + + $ mkdir invalidparent + $ cd invalidparent + + $ hg clone --pull -q --config phases.publish=False ../a limit + $ hg clone --pull -q --config phases.publish=False ../a segv + $ rm -R limit/.hg/cache segv/.hg/cache + + $ python < data = open("limit/.hg/store/00changelog.i", "rb").read() + > for n, p in [('limit', '\0\0\0\x02'), ('segv', '\0\x01\0\0')]: + > # corrupt p1 at rev0 and p2 at rev1 + > d = data[:24] + p + data[28:127 + 28] + p + data[127 + 32:] + > open(n + "/.hg/store/00changelog.i", "wb").write(d) + > EOF + + $ hg debugindex -f1 limit/.hg/store/00changelog.i + rev flag offset length size base link p1 p2 nodeid + 0 0000 0 63 62 0 0 2 -1 7c31755bf9b5 + 1 0000 63 66 65 1 1 0 2 26333235a41c + $ hg debugindex -f1 segv/.hg/store/00changelog.i + rev flag offset length size base link p1 p2 nodeid + 0 0000 0 63 62 0 0 65536 -1 7c31755bf9b5 + 1 0000 63 66 65 1 1 0 65536 26333235a41c + + $ cat < test.py + > import sys + > from mercurial import changelog, scmutil + > cl = changelog.changelog(scmutil.vfs(sys.argv[1])) + > n0, n1 = cl.node(0), cl.node(1) + > ops = [ + > ('compute_phases_map_sets', lambda: cl.computephases([[0], []])), + > ('index_headrevs', lambda: cl.headrevs()), + > ('find_gca_candidates', lambda: cl.commonancestorsheads(n0, n1)), + > ('find_deepest', lambda: cl.ancestor(n0, n1)), + > ] + > for l, f in ops: + > print l + ':', + > try: + > f() + > print 'uncaught buffer overflow?' + > except ValueError, inst: + > print inst + > EOF + + $ python test.py limit/.hg/store + compute_phases_map_sets: parent out of range + index_headrevs: parent out of range + find_gca_candidates: parent out of range + find_deepest: parent out of range + $ python test.py segv/.hg/store + compute_phases_map_sets: parent out of range + index_headrevs: parent out of range + find_gca_candidates: parent out of range + find_deepest: parent out of range + + $ cd ..