##// END OF EJS Templates
merge-lists: make it possible to specify pattern to match...
merge-lists: make it possible to specify pattern to match The `merge-lists` tool doesn't know anything about Python other than its regex that attempts to match import lines. Let's make it possible to pass in a custom regex so it's easy to use the tool for e.g. C/C++ `#include` lines or Rust `use` lines (given the limited). Differential Revision: https://phab.mercurial-scm.org/D12392

File last commit:

r47575:d4ba4d51 default
r49875:b999edb1 default
Show More
mpatch.c
393 lines | 8.2 KiB | text/x-c | CLexer
mpm@selenic.com
Add an O(m + nlog n) patching extension
r72 /*
mpatch.c - efficient binary patching for Mercurial
This implements a patch algorithm that's O(m + nlog n) where m is the
size of the output and n is the number of patches.
Given a list of binary patches, it unpacks each into a hunk list,
then combines the hunk lists with a treewise recursion to form a
single hunk list. This hunk list is then applied to the original
text.
The text (or binary) fragments are copied directly from their source
Python objects into a preallocated output string to avoid the
allocation of intermediate Python objects. Working memory is about 2x
the total number of hunks.
Raphaël Gomès
contributor: change mentions of mpm to olivia...
r47575 Copyright 2005, 2006 Olivia Mackall <olivia@selenic.com>
mpm@selenic.com
Add an O(m + nlog n) patching extension
r72
This software may be used and distributed according to the terms
of the GNU General Public License, incorporated herein by reference.
*/
Augie Fackler
mpatch: introduce a safeadd() helper to work around UB int overflow...
r38248 #include <limits.h>
mpm@selenic.com
Add an O(m + nlog n) patching extension
r72 #include <stdlib.h>
#include <string.h>
Vadim Gelfer
mac os x: fixes for 10.2 from chris monson <monpublic@gmail.com>
r2468
Maciej Fijalkowski
internals: move the bitmanipulation routines into its own file...
r29444 #include "bitmanipulation.h"
Maciej Fijalkowski
mpatch: change Py_ssize_t to ssize_t in places that will be later copied
r29691 #include "compat.h"
Maciej Fijalkowski
mpatch: remove dependency on Python.h in mpatch.c...
r29694 #include "mpatch.h"
mpm@selenic.com
Add an O(m + nlog n) patching extension
r72
Augie Fackler
mpatch: introduce a safeadd() helper to work around UB int overflow...
r38248 /* VC9 doesn't include bool and lacks stdbool.h based on cext/util.h */
#if defined(_MSC_VER) || __STDC_VERSION__ < 199901L
#define true 1
#define false 0
typedef unsigned char bool;
#else
#include <stdbool.h>
#endif
Yuya Nishihara
mpatch: change lalloc() to local function...
r29741 static struct mpatch_flist *lalloc(ssize_t size)
mpm@selenic.com
Add an O(m + nlog n) patching extension
r72 {
Maciej Fijalkowski
mpatch: provide things that will be exported later with a prefixed name...
r29692 struct mpatch_flist *a = NULL;
mpm@selenic.com
Add an O(m + nlog n) patching extension
r72
Augie Fackler
cleanup: use clang-tidy to add missing {} around one-line statements...
r41367 if (size < 1) {
Matt Mackall
mpatch: Fix for malloc corner case on AIX
r3138 size = 1;
Augie Fackler
cleanup: use clang-tidy to add missing {} around one-line statements...
r41367 }
Matt Mackall
mpatch: Fix for malloc corner case on AIX
r3138
Maciej Fijalkowski
mpatch: provide things that will be exported later with a prefixed name...
r29692 a = (struct mpatch_flist *)malloc(sizeof(struct mpatch_flist));
mpm@selenic.com
Add safety checking to mpatch
r128 if (a) {
Augie Fackler
mpatch: re-wrap wide line with clang-format...
r34634 a->base = (struct mpatch_frag *)malloc(
sizeof(struct mpatch_frag) * size);
Thomas Arendsen Hein
Set correct exception for another possible malloc error in mpatch.c
r2048 if (a->base) {
mpm@selenic.com
Add safety checking to mpatch
r128 a->head = a->tail = a->base;
Thomas Arendsen Hein
Set correct exception for another possible malloc error in mpatch.c
r2048 return a;
}
free(a);
mpm@selenic.com
Add safety checking to mpatch
r128 }
Benoit Boissinot
catch errors and throw exception with invalid binary patch data
r1722 return NULL;
mpm@selenic.com
Add an O(m + nlog n) patching extension
r72 }
Maciej Fijalkowski
mpatch: split mpatch into two files
r29693 void mpatch_lfree(struct mpatch_flist *a)
mpm@selenic.com
Add an O(m + nlog n) patching extension
r72 {
mpm@selenic.com
Add safety checking to mpatch
r128 if (a) {
free(a->base);
free(a);
}
mpm@selenic.com
Add an O(m + nlog n) patching extension
r72 }
Maciej Fijalkowski
mpatch: provide things that will be exported later with a prefixed name...
r29692 static ssize_t lsize(struct mpatch_flist *a)
mpm@selenic.com
Add an O(m + nlog n) patching extension
r72 {
return a->tail - a->head;
}
Augie Fackler
mpatch: introduce a safeadd() helper to work around UB int overflow...
r38248 /* add helper to add src and *dest iff it won't overflow */
static inline bool safeadd(int src, int *dest)
{
if ((src > 0) == (*dest > 0)) {
if (*dest > 0) {
if (src > (INT_MAX - *dest)) {
return false;
}
} else {
if (src < (INT_MIN - *dest)) {
return false;
}
}
}
*dest += src;
return true;
}
Augie Fackler
mpatch: introduce a safesub() helper as well...
r38249 /* subtract src from dest and store result in dest */
static inline bool safesub(int src, int *dest)
{
if (((src > 0) && (*dest < INT_MIN + src)) ||
((src < 0) && (*dest > INT_MAX + src))) {
return false;
}
*dest -= src;
return true;
}
mpm@selenic.com
Add an O(m + nlog n) patching extension
r72 /* move hunks in source that are less cut to dest, compensating
for changes in offset. the last hunk may be split if necessary.
*/
Maciej Fijalkowski
mpatch: provide things that will be exported later with a prefixed name...
r29692 static int gather(struct mpatch_flist *dest, struct mpatch_flist *src, int cut,
Augie Fackler
mpatch: reformat function prototypes with clang-format...
r34801 int offset)
mpm@selenic.com
Add an O(m + nlog n) patching extension
r72 {
Maciej Fijalkowski
mpatch: provide things that will be exported later with a prefixed name...
r29692 struct mpatch_frag *d = dest->tail, *s = src->head;
mpm@selenic.com
Add an O(m + nlog n) patching extension
r72 int postend, c, l;
while (s != src->tail) {
Augie Fackler
mpatch: fix UB in int overflows in gather() (SEC)
r38250 int soffset = s->start;
Augie Fackler
cleanup: use clang-tidy to add missing {} around one-line statements...
r41367 if (!safeadd(offset, &soffset)) {
Augie Fackler
mpatch: fix UB in int overflows in gather() (SEC)
r38250 break; /* add would overflow, oh well */
Augie Fackler
cleanup: use clang-tidy to add missing {} around one-line statements...
r41367 }
if (soffset >= cut) {
mpm@selenic.com
Gotos are embarrassing.
r82 break; /* we've gone far enough */
Augie Fackler
cleanup: use clang-tidy to add missing {} around one-line statements...
r41367 }
mpm@selenic.com
Add an O(m + nlog n) patching extension
r72
Augie Fackler
mpatch: fix UB in int overflows in gather() (SEC)
r38250 postend = offset;
if (!safeadd(s->start, &postend) ||
!safeadd(s->len, &postend)) {
break;
}
mpm@selenic.com
Add an O(m + nlog n) patching extension
r72 if (postend <= cut) {
/* save this hunk */
Augie Fackler
mpatch: fix UB in int overflows in gather() (SEC)
r38250 int tmp = s->start;
if (!safesub(s->end, &tmp)) {
break;
}
if (!safeadd(s->len, &tmp)) {
break;
}
if (!safeadd(tmp, &offset)) {
break; /* add would overflow, oh well */
}
mpm@selenic.com
Add an O(m + nlog n) patching extension
r72 *d++ = *s++;
Augie Fackler
mpatch: reflow two oddly formatted else blocks with clang-format...
r34635 } else {
mpm@selenic.com
Add an O(m + nlog n) patching extension
r72 /* break up this hunk */
Augie Fackler
mpatch: fix UB in int overflows in gather() (SEC)
r38250 c = cut;
if (!safesub(offset, &c)) {
break;
}
Augie Fackler
cleanup: use clang-tidy to add missing {} around one-line statements...
r41367 if (s->end < c) {
mpm@selenic.com
Add an O(m + nlog n) patching extension
r72 c = s->end;
Augie Fackler
cleanup: use clang-tidy to add missing {} around one-line statements...
r41367 }
mpm@selenic.com
Add an O(m + nlog n) patching extension
r72 l = cut - offset - s->start;
Augie Fackler
cleanup: use clang-tidy to add missing {} around one-line statements...
r41367 if (s->len < l) {
mpm@selenic.com
Add an O(m + nlog n) patching extension
r72 l = s->len;
Augie Fackler
cleanup: use clang-tidy to add missing {} around one-line statements...
r41367 }
mpm@selenic.com
Add an O(m + nlog n) patching extension
r72
offset += s->start + l - c;
d->start = s->start;
d->end = c;
d->len = l;
d->data = s->data;
d++;
s->start = c;
s->len = s->len - l;
s->data = s->data + l;
mpm@selenic.com
Gotos are embarrassing.
r82 break;
mpm@selenic.com
Add an O(m + nlog n) patching extension
r72 }
}
dest->tail = d;
src->head = s;
return offset;
}
/* like gather, but with no output list */
Maciej Fijalkowski
mpatch: provide things that will be exported later with a prefixed name...
r29692 static int discard(struct mpatch_flist *src, int cut, int offset)
mpm@selenic.com
Add an O(m + nlog n) patching extension
r72 {
Maciej Fijalkowski
mpatch: provide things that will be exported later with a prefixed name...
r29692 struct mpatch_frag *s = src->head;
mpm@selenic.com
Add an O(m + nlog n) patching extension
r72 int postend, c, l;
while (s != src->tail) {
Augie Fackler
mpatch: fix UB integer overflows in discard() (SEC)
r38251 int cmpcut = s->start;
if (!safeadd(offset, &cmpcut)) {
break;
}
Augie Fackler
cleanup: use clang-tidy to add missing {} around one-line statements...
r41367 if (cmpcut >= cut) {
mpm@selenic.com
Gotos are embarrassing.
r82 break;
Augie Fackler
cleanup: use clang-tidy to add missing {} around one-line statements...
r41367 }
mpm@selenic.com
Add an O(m + nlog n) patching extension
r72
Augie Fackler
mpatch: fix UB integer overflows in discard() (SEC)
r38251 postend = offset;
if (!safeadd(s->start, &postend)) {
break;
}
if (!safeadd(s->len, &postend)) {
break;
}
mpm@selenic.com
Add an O(m + nlog n) patching extension
r72 if (postend <= cut) {
Augie Fackler
mpatch: fix UB integer overflows in discard() (SEC)
r38251 /* do the subtraction first to avoid UB integer overflow
*/
int tmp = s->start;
if (!safesub(s->end, &tmp)) {
break;
}
if (!safeadd(s->len, &tmp)) {
break;
}
if (!safeadd(tmp, &offset)) {
break;
}
mpm@selenic.com
Add an O(m + nlog n) patching extension
r72 s++;
Augie Fackler
mpatch: reflow two oddly formatted else blocks with clang-format...
r34635 } else {
Augie Fackler
mpatch: fix UB integer overflows in discard() (SEC)
r38251 c = cut;
if (!safesub(offset, &c)) {
break;
}
Augie Fackler
cleanup: use clang-tidy to add missing {} around one-line statements...
r41367 if (s->end < c) {
mpm@selenic.com
Add an O(m + nlog n) patching extension
r72 c = s->end;
Augie Fackler
cleanup: use clang-tidy to add missing {} around one-line statements...
r41367 }
mpm@selenic.com
Add an O(m + nlog n) patching extension
r72 l = cut - offset - s->start;
Augie Fackler
cleanup: use clang-tidy to add missing {} around one-line statements...
r41367 if (s->len < l) {
mpm@selenic.com
Add an O(m + nlog n) patching extension
r72 l = s->len;
Augie Fackler
cleanup: use clang-tidy to add missing {} around one-line statements...
r41367 }
mpm@selenic.com
Add an O(m + nlog n) patching extension
r72
offset += s->start + l - c;
s->start = c;
s->len = s->len - l;
s->data = s->data + l;
mpm@selenic.com
Gotos are embarrassing.
r82 break;
mpm@selenic.com
Add an O(m + nlog n) patching extension
r72 }
}
src->head = s;
return offset;
}
/* combine hunk lists a and b, while adjusting b for offset changes in a/
this deletes a and b and returns the resultant list. */
Maciej Fijalkowski
mpatch: provide things that will be exported later with a prefixed name...
r29692 static struct mpatch_flist *combine(struct mpatch_flist *a,
Augie Fackler
mpatch: reformat function prototypes with clang-format...
r34801 struct mpatch_flist *b)
mpm@selenic.com
Add an O(m + nlog n) patching extension
r72 {
Maciej Fijalkowski
mpatch: provide things that will be exported later with a prefixed name...
r29692 struct mpatch_flist *c = NULL;
struct mpatch_frag *bh, *ct;
mpm@selenic.com
Add an O(m + nlog n) patching extension
r72 int offset = 0, post;
Augie Fackler
cleanup: use clang-tidy to add missing {} around one-line statements...
r41367 if (a && b) {
mpm@selenic.com
Add safety checking to mpatch
r128 c = lalloc((lsize(a) + lsize(b)) * 2);
Augie Fackler
cleanup: use clang-tidy to add missing {} around one-line statements...
r41367 }
mpm@selenic.com
Add safety checking to mpatch
r128
if (c) {
mpm@selenic.com
Add an O(m + nlog n) patching extension
r72
mpm@selenic.com
Add safety checking to mpatch
r128 for (bh = b->head; bh != b->tail; bh++) {
/* save old hunks */
offset = gather(c, a, bh->start, offset);
mpm@selenic.com
Add an O(m + nlog n) patching extension
r72
mpm@selenic.com
Add safety checking to mpatch
r128 /* discard replaced hunks */
post = discard(a, bh->end, offset);
mpm@selenic.com
Add an O(m + nlog n) patching extension
r72
mpm@selenic.com
Add safety checking to mpatch
r128 /* insert new hunk */
ct = c->tail;
Augie Fackler
mpatch: avoid integer overflow in combine() (SEC)...
r38253 ct->start = bh->start;
ct->end = bh->end;
if (!safesub(offset, &(ct->start)) ||
!safesub(post, &(ct->end))) {
/* It was already possible to exit
* this function with a return value
* of NULL before the safesub()s were
* added, so this should be fine. */
mpatch_lfree(c);
c = NULL;
goto done;
}
mpm@selenic.com
Add safety checking to mpatch
r128 ct->len = bh->len;
ct->data = bh->data;
c->tail++;
offset = post;
}
/* hold on to tail from a */
Maciej Fijalkowski
mpatch: provide things that will be exported later with a prefixed name...
r29692 memcpy(c->tail, a->head, sizeof(struct mpatch_frag) * lsize(a));
mpm@selenic.com
Add safety checking to mpatch
r128 c->tail += lsize(a);
mpm@selenic.com
Add an O(m + nlog n) patching extension
r72 }
Augie Fackler
mpatch: avoid integer overflow in combine() (SEC)...
r38253 done:
Maciej Fijalkowski
mpatch: provide things that will be exported later with a prefixed name...
r29692 mpatch_lfree(a);
mpatch_lfree(b);
mpm@selenic.com
Add an O(m + nlog n) patching extension
r72 return c;
}
/* decode a binary patch into a hunk list */
Maciej Fijalkowski
mpatch: remove dependency on Python.h in mpatch.c...
r29694 int mpatch_decode(const char *bin, ssize_t len, struct mpatch_flist **res)
mpm@selenic.com
Add an O(m + nlog n) patching extension
r72 {
Maciej Fijalkowski
mpatch: provide things that will be exported later with a prefixed name...
r29692 struct mpatch_flist *l;
struct mpatch_frag *lt;
Matt Mackall
mpatch: rewrite pointer overflow checks
r20167 int pos = 0;
mpm@selenic.com
Add an O(m + nlog n) patching extension
r72
/* assume worst case size, we won't have many of these lists */
Matt Mackall
parsers: fix list sizing rounding error (SEC)...
r28656 l = lalloc(len / 12 + 1);
Augie Fackler
cleanup: use clang-tidy to add missing {} around one-line statements...
r41367 if (!l) {
Maciej Fijalkowski
mpatch: remove dependency on Python.h in mpatch.c...
r29694 return MPATCH_ERR_NO_MEM;
Augie Fackler
cleanup: use clang-tidy to add missing {} around one-line statements...
r41367 }
Benoit Boissinot
catch errors and throw exception with invalid binary patch data
r1722
mpm@selenic.com
Add an O(m + nlog n) patching extension
r72 lt = l->tail;
Augie Fackler
mpatch: be more careful about parsing binary patch data (SEC)...
r38245 /* We check against len-11 to ensure we have at least 12 bytes
left in the patch so we can read our three be32s out of it. */
while (pos >= 0 && pos < (len - 11)) {
Matt Mackall
mpatch: rewrite pointer overflow checks
r20167 lt->start = getbe32(bin + pos);
lt->end = getbe32(bin + pos + 4);
lt->len = getbe32(bin + pos + 8);
Augie Fackler
cleanup: use clang-tidy to add missing {} around one-line statements...
r41367 if (lt->start < 0 || lt->start > lt->end || lt->len < 0) {
Matt Mackall
parsers: detect short records (SEC)...
r28657 break; /* sanity check */
Augie Fackler
cleanup: use clang-tidy to add missing {} around one-line statements...
r41367 }
Augie Fackler
mpatch: avoid integer overflow in mpatch_decode (SEC)
r38252 if (!safeadd(12, &pos)) {
break;
}
lt->data = bin + pos;
if (!safeadd(lt->len, &pos)) {
break;
}
mpm@selenic.com
Add an O(m + nlog n) patching extension
r72 lt++;
}
Matt Mackall
mpatch: rewrite pointer overflow checks
r20167 if (pos != len) {
Maciej Fijalkowski
mpatch: provide things that will be exported later with a prefixed name...
r29692 mpatch_lfree(l);
Maciej Fijalkowski
mpatch: remove dependency on Python.h in mpatch.c...
r29694 return MPATCH_ERR_CANNOT_BE_DECODED;
Benoit Boissinot
catch errors and throw exception with invalid binary patch data
r1722 }
mpm@selenic.com
Add an O(m + nlog n) patching extension
r72 l->tail = lt;
Maciej Fijalkowski
mpatch: remove dependency on Python.h in mpatch.c...
r29694 *res = l;
return 0;
mpm@selenic.com
Add an O(m + nlog n) patching extension
r72 }
/* calculate the size of resultant text */
Maciej Fijalkowski
mpatch: split mpatch into two files
r29693 ssize_t mpatch_calcsize(ssize_t len, struct mpatch_flist *l)
mpm@selenic.com
Add an O(m + nlog n) patching extension
r72 {
Maciej Fijalkowski
mpatch: change Py_ssize_t to ssize_t in places that will be later copied
r29691 ssize_t outlen = 0, last = 0;
Maciej Fijalkowski
mpatch: provide things that will be exported later with a prefixed name...
r29692 struct mpatch_frag *f = l->head;
mpm@selenic.com
Add an O(m + nlog n) patching extension
r72
while (f != l->tail) {
Benoit Boissinot
catch errors and throw exception with invalid binary patch data
r1722 if (f->start < last || f->end > len) {
Maciej Fijalkowski
mpatch: remove dependency on Python.h in mpatch.c...
r29694 return MPATCH_ERR_INVALID_PATCH;
Benoit Boissinot
catch errors and throw exception with invalid binary patch data
r1722 }
mpm@selenic.com
Add an O(m + nlog n) patching extension
r72 outlen += f->start - last;
last = f->end;
outlen += f->len;
f++;
}
outlen += len - last;
return outlen;
}
Maciej Fijalkowski
mpatch: split mpatch into two files
r29693 int mpatch_apply(char *buf, const char *orig, ssize_t len,
Augie Fackler
mpatch: reformat function prototypes with clang-format...
r34801 struct mpatch_flist *l)
mpm@selenic.com
Add an O(m + nlog n) patching extension
r72 {
Maciej Fijalkowski
mpatch: provide things that will be exported later with a prefixed name...
r29692 struct mpatch_frag *f = l->head;
mpm@selenic.com
Add an O(m + nlog n) patching extension
r72 int last = 0;
char *p = buf;
while (f != l->tail) {
Augie Fackler
mpatch: ensure fragment start isn't past the end of orig (SEC)...
r38247 if (f->start < last || f->start > len || f->end > len ||
last < 0) {
Maciej Fijalkowski
mpatch: remove dependency on Python.h in mpatch.c...
r29694 return MPATCH_ERR_INVALID_PATCH;
Benoit Boissinot
catch errors and throw exception with invalid binary patch data
r1722 }
mpm@selenic.com
Add an O(m + nlog n) patching extension
r72 memcpy(p, orig + last, f->start - last);
p += f->start - last;
memcpy(p, f->data, f->len);
last = f->end;
p += f->len;
f++;
}
Augie Fackler
mpatch: protect against underflow in mpatch_apply (SEC)...
r38246 if (last < 0) {
return MPATCH_ERR_INVALID_PATCH;
}
mpm@selenic.com
Add an O(m + nlog n) patching extension
r72 memcpy(p, orig + last, len - last);
Maciej Fijalkowski
mpatch: remove dependency on Python.h in mpatch.c...
r29694 return 0;
mpm@selenic.com
Add an O(m + nlog n) patching extension
r72 }
/* recursively generate a patch of all bins between start and end */
Augie Fackler
mpatch: reformat function prototypes with clang-format...
r34801 struct mpatch_flist *
mpatch_fold(void *bins, struct mpatch_flist *(*get_next_item)(void *, ssize_t),
ssize_t start, ssize_t end)
mpm@selenic.com
Add an O(m + nlog n) patching extension
r72 {
Maciej Fijalkowski
mpatch: remove dependency on Python.h in mpatch.c...
r29694 ssize_t len;
mpm@selenic.com
Add an O(m + nlog n) patching extension
r72
if (start + 1 == end) {
/* trivial case, output a decoded list */
Maciej Fijalkowski
mpatch: remove dependency on Python.h in mpatch.c...
r29694 return get_next_item(bins, start);
mpm@selenic.com
Add an O(m + nlog n) patching extension
r72 }
/* divide and conquer, memory management is elsewhere */
len = (end - start) / 2;
Maciej Fijalkowski
mpatch: remove dependency on Python.h in mpatch.c...
r29694 return combine(mpatch_fold(bins, get_next_item, start, start + len),
Augie Fackler
mpatch: switch alignment of wrapped line from tab to spaces with clang-format...
r34802 mpatch_fold(bins, get_next_item, start + len, end));
mpm@selenic.com
Add an O(m + nlog n) patching extension
r72 }