mpatch.c
367 lines
| 7.8 KiB
| text/x-c
|
CLexer
/ mercurial / mpatch.c
mpm@selenic.com
|
r72 | /* | ||
mpatch.c - efficient binary patching for Mercurial | ||||
This implements a patch algorithm that's O(m + nlog n) where m is the | ||||
size of the output and n is the number of patches. | ||||
Given a list of binary patches, it unpacks each into a hunk list, | ||||
then combines the hunk lists with a treewise recursion to form a | ||||
single hunk list. This hunk list is then applied to the original | ||||
text. | ||||
The text (or binary) fragments are copied directly from their source | ||||
Python objects into a preallocated output string to avoid the | ||||
allocation of intermediate Python objects. Working memory is about 2x | ||||
the total number of hunks. | ||||
Vadim Gelfer
|
r2859 | Copyright 2005, 2006 Matt Mackall <mpm@selenic.com> | ||
mpm@selenic.com
|
r72 | |||
This software may be used and distributed according to the terms | ||||
of the GNU General Public License, incorporated herein by reference. | ||||
*/ | ||||
Augie Fackler
|
r38248 | #include <limits.h> | ||
mpm@selenic.com
|
r72 | #include <stdlib.h> | ||
#include <string.h> | ||||
Vadim Gelfer
|
r2468 | |||
Maciej Fijalkowski
|
r29444 | #include "bitmanipulation.h" | ||
Maciej Fijalkowski
|
r29691 | #include "compat.h" | ||
Maciej Fijalkowski
|
r29694 | #include "mpatch.h" | ||
mpm@selenic.com
|
r72 | |||
Augie Fackler
|
r38248 | /* VC9 doesn't include bool and lacks stdbool.h based on cext/util.h */ | ||
#if defined(_MSC_VER) || __STDC_VERSION__ < 199901L | ||||
#define true 1 | ||||
#define false 0 | ||||
typedef unsigned char bool; | ||||
#else | ||||
#include <stdbool.h> | ||||
#endif | ||||
Yuya Nishihara
|
r29741 | static struct mpatch_flist *lalloc(ssize_t size) | ||
mpm@selenic.com
|
r72 | { | ||
Maciej Fijalkowski
|
r29692 | struct mpatch_flist *a = NULL; | ||
mpm@selenic.com
|
r72 | |||
Matt Mackall
|
r3138 | if (size < 1) | ||
size = 1; | ||||
Maciej Fijalkowski
|
r29692 | a = (struct mpatch_flist *)malloc(sizeof(struct mpatch_flist)); | ||
mpm@selenic.com
|
r128 | if (a) { | ||
Augie Fackler
|
r34634 | a->base = (struct mpatch_frag *)malloc( | ||
sizeof(struct mpatch_frag) * size); | ||||
Thomas Arendsen Hein
|
r2048 | if (a->base) { | ||
mpm@selenic.com
|
r128 | a->head = a->tail = a->base; | ||
Thomas Arendsen Hein
|
r2048 | return a; | ||
} | ||||
free(a); | ||||
mpm@selenic.com
|
r128 | } | ||
Benoit Boissinot
|
r1722 | return NULL; | ||
mpm@selenic.com
|
r72 | } | ||
Maciej Fijalkowski
|
r29693 | void mpatch_lfree(struct mpatch_flist *a) | ||
mpm@selenic.com
|
r72 | { | ||
mpm@selenic.com
|
r128 | if (a) { | ||
free(a->base); | ||||
free(a); | ||||
} | ||||
mpm@selenic.com
|
r72 | } | ||
Maciej Fijalkowski
|
r29692 | static ssize_t lsize(struct mpatch_flist *a) | ||
mpm@selenic.com
|
r72 | { | ||
return a->tail - a->head; | ||||
} | ||||
Augie Fackler
|
r38248 | /* add helper to add src and *dest iff it won't overflow */ | ||
static inline bool safeadd(int src, int *dest) | ||||
{ | ||||
if ((src > 0) == (*dest > 0)) { | ||||
if (*dest > 0) { | ||||
if (src > (INT_MAX - *dest)) { | ||||
return false; | ||||
} | ||||
} else { | ||||
if (src < (INT_MIN - *dest)) { | ||||
return false; | ||||
} | ||||
} | ||||
} | ||||
*dest += src; | ||||
return true; | ||||
} | ||||
Augie Fackler
|
r38249 | /* subtract src from dest and store result in dest */ | ||
static inline bool safesub(int src, int *dest) | ||||
{ | ||||
if (((src > 0) && (*dest < INT_MIN + src)) || | ||||
((src < 0) && (*dest > INT_MAX + src))) { | ||||
return false; | ||||
} | ||||
*dest -= src; | ||||
return true; | ||||
} | ||||
mpm@selenic.com
|
r72 | /* move hunks in source that are less cut to dest, compensating | ||
for changes in offset. the last hunk may be split if necessary. | ||||
*/ | ||||
Maciej Fijalkowski
|
r29692 | static int gather(struct mpatch_flist *dest, struct mpatch_flist *src, int cut, | ||
Augie Fackler
|
r34801 | int offset) | ||
mpm@selenic.com
|
r72 | { | ||
Maciej Fijalkowski
|
r29692 | struct mpatch_frag *d = dest->tail, *s = src->head; | ||
mpm@selenic.com
|
r72 | int postend, c, l; | ||
while (s != src->tail) { | ||||
Augie Fackler
|
r38250 | int soffset = s->start; | ||
if (!safeadd(offset, &soffset)) | ||||
break; /* add would overflow, oh well */ | ||||
if (soffset >= cut) | ||||
mpm@selenic.com
|
r82 | break; /* we've gone far enough */ | ||
mpm@selenic.com
|
r72 | |||
Augie Fackler
|
r38250 | postend = offset; | ||
if (!safeadd(s->start, &postend) || | ||||
!safeadd(s->len, &postend)) { | ||||
break; | ||||
} | ||||
mpm@selenic.com
|
r72 | if (postend <= cut) { | ||
/* save this hunk */ | ||||
Augie Fackler
|
r38250 | int tmp = s->start; | ||
if (!safesub(s->end, &tmp)) { | ||||
break; | ||||
} | ||||
if (!safeadd(s->len, &tmp)) { | ||||
break; | ||||
} | ||||
if (!safeadd(tmp, &offset)) { | ||||
break; /* add would overflow, oh well */ | ||||
} | ||||
mpm@selenic.com
|
r72 | *d++ = *s++; | ||
Augie Fackler
|
r34635 | } else { | ||
mpm@selenic.com
|
r72 | /* break up this hunk */ | ||
Augie Fackler
|
r38250 | c = cut; | ||
if (!safesub(offset, &c)) { | ||||
break; | ||||
} | ||||
mpm@selenic.com
|
r72 | if (s->end < c) | ||
c = s->end; | ||||
l = cut - offset - s->start; | ||||
if (s->len < l) | ||||
l = s->len; | ||||
offset += s->start + l - c; | ||||
d->start = s->start; | ||||
d->end = c; | ||||
d->len = l; | ||||
d->data = s->data; | ||||
d++; | ||||
s->start = c; | ||||
s->len = s->len - l; | ||||
s->data = s->data + l; | ||||
mpm@selenic.com
|
r82 | break; | ||
mpm@selenic.com
|
r72 | } | ||
} | ||||
dest->tail = d; | ||||
src->head = s; | ||||
return offset; | ||||
} | ||||
/* like gather, but with no output list */ | ||||
Maciej Fijalkowski
|
r29692 | static int discard(struct mpatch_flist *src, int cut, int offset) | ||
mpm@selenic.com
|
r72 | { | ||
Maciej Fijalkowski
|
r29692 | struct mpatch_frag *s = src->head; | ||
mpm@selenic.com
|
r72 | int postend, c, l; | ||
while (s != src->tail) { | ||||
Augie Fackler
|
r38251 | int cmpcut = s->start; | ||
if (!safeadd(offset, &cmpcut)) { | ||||
break; | ||||
} | ||||
if (cmpcut >= cut) | ||||
mpm@selenic.com
|
r82 | break; | ||
mpm@selenic.com
|
r72 | |||
Augie Fackler
|
r38251 | postend = offset; | ||
if (!safeadd(s->start, &postend)) { | ||||
break; | ||||
} | ||||
if (!safeadd(s->len, &postend)) { | ||||
break; | ||||
} | ||||
mpm@selenic.com
|
r72 | if (postend <= cut) { | ||
Augie Fackler
|
r38251 | /* do the subtraction first to avoid UB integer overflow | ||
*/ | ||||
int tmp = s->start; | ||||
if (!safesub(s->end, &tmp)) { | ||||
break; | ||||
} | ||||
if (!safeadd(s->len, &tmp)) { | ||||
break; | ||||
} | ||||
if (!safeadd(tmp, &offset)) { | ||||
break; | ||||
} | ||||
mpm@selenic.com
|
r72 | s++; | ||
Augie Fackler
|
r34635 | } else { | ||
Augie Fackler
|
r38251 | c = cut; | ||
if (!safesub(offset, &c)) { | ||||
break; | ||||
} | ||||
mpm@selenic.com
|
r72 | if (s->end < c) | ||
c = s->end; | ||||
l = cut - offset - s->start; | ||||
if (s->len < l) | ||||
l = s->len; | ||||
offset += s->start + l - c; | ||||
s->start = c; | ||||
s->len = s->len - l; | ||||
s->data = s->data + l; | ||||
mpm@selenic.com
|
r82 | break; | ||
mpm@selenic.com
|
r72 | } | ||
} | ||||
src->head = s; | ||||
return offset; | ||||
} | ||||
/* combine hunk lists a and b, while adjusting b for offset changes in a/ | ||||
this deletes a and b and returns the resultant list. */ | ||||
Maciej Fijalkowski
|
r29692 | static struct mpatch_flist *combine(struct mpatch_flist *a, | ||
Augie Fackler
|
r34801 | struct mpatch_flist *b) | ||
mpm@selenic.com
|
r72 | { | ||
Maciej Fijalkowski
|
r29692 | struct mpatch_flist *c = NULL; | ||
struct mpatch_frag *bh, *ct; | ||||
mpm@selenic.com
|
r72 | int offset = 0, post; | ||
mpm@selenic.com
|
r128 | if (a && b) | ||
c = lalloc((lsize(a) + lsize(b)) * 2); | ||||
if (c) { | ||||
mpm@selenic.com
|
r72 | |||
mpm@selenic.com
|
r128 | for (bh = b->head; bh != b->tail; bh++) { | ||
/* save old hunks */ | ||||
offset = gather(c, a, bh->start, offset); | ||||
mpm@selenic.com
|
r72 | |||
mpm@selenic.com
|
r128 | /* discard replaced hunks */ | ||
post = discard(a, bh->end, offset); | ||||
mpm@selenic.com
|
r72 | |||
mpm@selenic.com
|
r128 | /* insert new hunk */ | ||
ct = c->tail; | ||||
ct->start = bh->start - offset; | ||||
ct->end = bh->end - post; | ||||
ct->len = bh->len; | ||||
ct->data = bh->data; | ||||
c->tail++; | ||||
offset = post; | ||||
} | ||||
/* hold on to tail from a */ | ||||
Maciej Fijalkowski
|
r29692 | memcpy(c->tail, a->head, sizeof(struct mpatch_frag) * lsize(a)); | ||
mpm@selenic.com
|
r128 | c->tail += lsize(a); | ||
mpm@selenic.com
|
r72 | } | ||
Maciej Fijalkowski
|
r29692 | mpatch_lfree(a); | ||
mpatch_lfree(b); | ||||
mpm@selenic.com
|
r72 | return c; | ||
} | ||||
/* decode a binary patch into a hunk list */ | ||||
Maciej Fijalkowski
|
r29694 | int mpatch_decode(const char *bin, ssize_t len, struct mpatch_flist **res) | ||
mpm@selenic.com
|
r72 | { | ||
Maciej Fijalkowski
|
r29692 | struct mpatch_flist *l; | ||
struct mpatch_frag *lt; | ||||
Matt Mackall
|
r20167 | int pos = 0; | ||
mpm@selenic.com
|
r72 | |||
/* assume worst case size, we won't have many of these lists */ | ||||
Matt Mackall
|
r28656 | l = lalloc(len / 12 + 1); | ||
Benoit Boissinot
|
r1722 | if (!l) | ||
Maciej Fijalkowski
|
r29694 | return MPATCH_ERR_NO_MEM; | ||
Benoit Boissinot
|
r1722 | |||
mpm@selenic.com
|
r72 | lt = l->tail; | ||
Augie Fackler
|
r38245 | /* We check against len-11 to ensure we have at least 12 bytes | ||
left in the patch so we can read our three be32s out of it. */ | ||||
while (pos >= 0 && pos < (len - 11)) { | ||||
Matt Mackall
|
r20167 | lt->start = getbe32(bin + pos); | ||
lt->end = getbe32(bin + pos + 4); | ||||
lt->len = getbe32(bin + pos + 8); | ||||
lt->data = bin + pos + 12; | ||||
pos += 12 + lt->len; | ||||
Matt Mackall
|
r28657 | if (lt->start > lt->end || lt->len < 0) | ||
break; /* sanity check */ | ||||
mpm@selenic.com
|
r72 | lt++; | ||
} | ||||
Matt Mackall
|
r20167 | if (pos != len) { | ||
Maciej Fijalkowski
|
r29692 | mpatch_lfree(l); | ||
Maciej Fijalkowski
|
r29694 | return MPATCH_ERR_CANNOT_BE_DECODED; | ||
Benoit Boissinot
|
r1722 | } | ||
mpm@selenic.com
|
r72 | l->tail = lt; | ||
Maciej Fijalkowski
|
r29694 | *res = l; | ||
return 0; | ||||
mpm@selenic.com
|
r72 | } | ||
/* calculate the size of resultant text */ | ||||
Maciej Fijalkowski
|
r29693 | ssize_t mpatch_calcsize(ssize_t len, struct mpatch_flist *l) | ||
mpm@selenic.com
|
r72 | { | ||
Maciej Fijalkowski
|
r29691 | ssize_t outlen = 0, last = 0; | ||
Maciej Fijalkowski
|
r29692 | struct mpatch_frag *f = l->head; | ||
mpm@selenic.com
|
r72 | |||
while (f != l->tail) { | ||||
Benoit Boissinot
|
r1722 | if (f->start < last || f->end > len) { | ||
Maciej Fijalkowski
|
r29694 | return MPATCH_ERR_INVALID_PATCH; | ||
Benoit Boissinot
|
r1722 | } | ||
mpm@selenic.com
|
r72 | outlen += f->start - last; | ||
last = f->end; | ||||
outlen += f->len; | ||||
f++; | ||||
} | ||||
outlen += len - last; | ||||
return outlen; | ||||
} | ||||
Maciej Fijalkowski
|
r29693 | int mpatch_apply(char *buf, const char *orig, ssize_t len, | ||
Augie Fackler
|
r34801 | struct mpatch_flist *l) | ||
mpm@selenic.com
|
r72 | { | ||
Maciej Fijalkowski
|
r29692 | struct mpatch_frag *f = l->head; | ||
mpm@selenic.com
|
r72 | int last = 0; | ||
char *p = buf; | ||||
while (f != l->tail) { | ||||
Augie Fackler
|
r38247 | if (f->start < last || f->start > len || f->end > len || | ||
last < 0) { | ||||
Maciej Fijalkowski
|
r29694 | return MPATCH_ERR_INVALID_PATCH; | ||
Benoit Boissinot
|
r1722 | } | ||
mpm@selenic.com
|
r72 | memcpy(p, orig + last, f->start - last); | ||
p += f->start - last; | ||||
memcpy(p, f->data, f->len); | ||||
last = f->end; | ||||
p += f->len; | ||||
f++; | ||||
} | ||||
Augie Fackler
|
r38246 | if (last < 0) { | ||
return MPATCH_ERR_INVALID_PATCH; | ||||
} | ||||
mpm@selenic.com
|
r72 | memcpy(p, orig + last, len - last); | ||
Maciej Fijalkowski
|
r29694 | return 0; | ||
mpm@selenic.com
|
r72 | } | ||
/* recursively generate a patch of all bins between start and end */ | ||||
Augie Fackler
|
r34801 | struct mpatch_flist * | ||
mpatch_fold(void *bins, struct mpatch_flist *(*get_next_item)(void *, ssize_t), | ||||
ssize_t start, ssize_t end) | ||||
mpm@selenic.com
|
r72 | { | ||
Maciej Fijalkowski
|
r29694 | ssize_t len; | ||
mpm@selenic.com
|
r72 | |||
if (start + 1 == end) { | ||||
/* trivial case, output a decoded list */ | ||||
Maciej Fijalkowski
|
r29694 | return get_next_item(bins, start); | ||
mpm@selenic.com
|
r72 | } | ||
/* divide and conquer, memory management is elsewhere */ | ||||
len = (end - start) / 2; | ||||
Maciej Fijalkowski
|
r29694 | return combine(mpatch_fold(bins, get_next_item, start, start + len), | ||
Augie Fackler
|
r34802 | mpatch_fold(bins, get_next_item, start + len, end)); | ||
mpm@selenic.com
|
r72 | } | ||