mpatch.c
280 lines
| 5.9 KiB
| text/x-c
|
CLexer
/ mercurial / mpatch.c
mpm@selenic.com
|
r72 | /* | ||
mpatch.c - efficient binary patching for Mercurial | ||||
This implements a patch algorithm that's O(m + nlog n) where m is the | ||||
size of the output and n is the number of patches. | ||||
Given a list of binary patches, it unpacks each into a hunk list, | ||||
then combines the hunk lists with a treewise recursion to form a | ||||
single hunk list. This hunk list is then applied to the original | ||||
text. | ||||
The text (or binary) fragments are copied directly from their source | ||||
Python objects into a preallocated output string to avoid the | ||||
allocation of intermediate Python objects. Working memory is about 2x | ||||
the total number of hunks. | ||||
Vadim Gelfer
|
r2859 | Copyright 2005, 2006 Matt Mackall <mpm@selenic.com> | ||
mpm@selenic.com
|
r72 | |||
This software may be used and distributed according to the terms | ||||
of the GNU General Public License, incorporated herein by reference. | ||||
*/ | ||||
#include <stdlib.h> | ||||
#include <string.h> | ||||
Vadim Gelfer
|
r2468 | |||
Maciej Fijalkowski
|
r29444 | #include "bitmanipulation.h" | ||
Maciej Fijalkowski
|
r29691 | #include "compat.h" | ||
Maciej Fijalkowski
|
r29694 | #include "mpatch.h" | ||
mpm@selenic.com
|
r72 | |||
Yuya Nishihara
|
r29741 | static struct mpatch_flist *lalloc(ssize_t size) | ||
mpm@selenic.com
|
r72 | { | ||
Maciej Fijalkowski
|
r29692 | struct mpatch_flist *a = NULL; | ||
mpm@selenic.com
|
r72 | |||
Matt Mackall
|
r3138 | if (size < 1) | ||
size = 1; | ||||
Maciej Fijalkowski
|
r29692 | a = (struct mpatch_flist *)malloc(sizeof(struct mpatch_flist)); | ||
mpm@selenic.com
|
r128 | if (a) { | ||
Maciej Fijalkowski
|
r29692 | a->base = (struct mpatch_frag *)malloc(sizeof(struct mpatch_frag) * size); | ||
Thomas Arendsen Hein
|
r2048 | if (a->base) { | ||
mpm@selenic.com
|
r128 | a->head = a->tail = a->base; | ||
Thomas Arendsen Hein
|
r2048 | return a; | ||
} | ||||
free(a); | ||||
mpm@selenic.com
|
r128 | } | ||
Benoit Boissinot
|
r1722 | return NULL; | ||
mpm@selenic.com
|
r72 | } | ||
Maciej Fijalkowski
|
r29693 | void mpatch_lfree(struct mpatch_flist *a) | ||
mpm@selenic.com
|
r72 | { | ||
mpm@selenic.com
|
r128 | if (a) { | ||
free(a->base); | ||||
free(a); | ||||
} | ||||
mpm@selenic.com
|
r72 | } | ||
Maciej Fijalkowski
|
r29692 | static ssize_t lsize(struct mpatch_flist *a) | ||
mpm@selenic.com
|
r72 | { | ||
return a->tail - a->head; | ||||
} | ||||
/* move hunks in source that are less cut to dest, compensating | ||||
for changes in offset. the last hunk may be split if necessary. | ||||
*/ | ||||
Maciej Fijalkowski
|
r29692 | static int gather(struct mpatch_flist *dest, struct mpatch_flist *src, int cut, | ||
int offset) | ||||
mpm@selenic.com
|
r72 | { | ||
Maciej Fijalkowski
|
r29692 | struct mpatch_frag *d = dest->tail, *s = src->head; | ||
mpm@selenic.com
|
r72 | int postend, c, l; | ||
while (s != src->tail) { | ||||
if (s->start + offset >= cut) | ||||
mpm@selenic.com
|
r82 | break; /* we've gone far enough */ | ||
mpm@selenic.com
|
r72 | |||
postend = offset + s->start + s->len; | ||||
if (postend <= cut) { | ||||
/* save this hunk */ | ||||
offset += s->start + s->len - s->end; | ||||
*d++ = *s++; | ||||
} | ||||
else { | ||||
/* break up this hunk */ | ||||
c = cut - offset; | ||||
if (s->end < c) | ||||
c = s->end; | ||||
l = cut - offset - s->start; | ||||
if (s->len < l) | ||||
l = s->len; | ||||
offset += s->start + l - c; | ||||
d->start = s->start; | ||||
d->end = c; | ||||
d->len = l; | ||||
d->data = s->data; | ||||
d++; | ||||
s->start = c; | ||||
s->len = s->len - l; | ||||
s->data = s->data + l; | ||||
mpm@selenic.com
|
r82 | break; | ||
mpm@selenic.com
|
r72 | } | ||
} | ||||
dest->tail = d; | ||||
src->head = s; | ||||
return offset; | ||||
} | ||||
/* like gather, but with no output list */ | ||||
Maciej Fijalkowski
|
r29692 | static int discard(struct mpatch_flist *src, int cut, int offset) | ||
mpm@selenic.com
|
r72 | { | ||
Maciej Fijalkowski
|
r29692 | struct mpatch_frag *s = src->head; | ||
mpm@selenic.com
|
r72 | int postend, c, l; | ||
while (s != src->tail) { | ||||
if (s->start + offset >= cut) | ||||
mpm@selenic.com
|
r82 | break; | ||
mpm@selenic.com
|
r72 | |||
postend = offset + s->start + s->len; | ||||
if (postend <= cut) { | ||||
offset += s->start + s->len - s->end; | ||||
s++; | ||||
} | ||||
else { | ||||
c = cut - offset; | ||||
if (s->end < c) | ||||
c = s->end; | ||||
l = cut - offset - s->start; | ||||
if (s->len < l) | ||||
l = s->len; | ||||
offset += s->start + l - c; | ||||
s->start = c; | ||||
s->len = s->len - l; | ||||
s->data = s->data + l; | ||||
mpm@selenic.com
|
r82 | break; | ||
mpm@selenic.com
|
r72 | } | ||
} | ||||
src->head = s; | ||||
return offset; | ||||
} | ||||
/* combine hunk lists a and b, while adjusting b for offset changes in a/ | ||||
this deletes a and b and returns the resultant list. */ | ||||
Maciej Fijalkowski
|
r29692 | static struct mpatch_flist *combine(struct mpatch_flist *a, | ||
struct mpatch_flist *b) | ||||
mpm@selenic.com
|
r72 | { | ||
Maciej Fijalkowski
|
r29692 | struct mpatch_flist *c = NULL; | ||
struct mpatch_frag *bh, *ct; | ||||
mpm@selenic.com
|
r72 | int offset = 0, post; | ||
mpm@selenic.com
|
r128 | if (a && b) | ||
c = lalloc((lsize(a) + lsize(b)) * 2); | ||||
if (c) { | ||||
mpm@selenic.com
|
r72 | |||
mpm@selenic.com
|
r128 | for (bh = b->head; bh != b->tail; bh++) { | ||
/* save old hunks */ | ||||
offset = gather(c, a, bh->start, offset); | ||||
mpm@selenic.com
|
r72 | |||
mpm@selenic.com
|
r128 | /* discard replaced hunks */ | ||
post = discard(a, bh->end, offset); | ||||
mpm@selenic.com
|
r72 | |||
mpm@selenic.com
|
r128 | /* insert new hunk */ | ||
ct = c->tail; | ||||
ct->start = bh->start - offset; | ||||
ct->end = bh->end - post; | ||||
ct->len = bh->len; | ||||
ct->data = bh->data; | ||||
c->tail++; | ||||
offset = post; | ||||
} | ||||
/* hold on to tail from a */ | ||||
Maciej Fijalkowski
|
r29692 | memcpy(c->tail, a->head, sizeof(struct mpatch_frag) * lsize(a)); | ||
mpm@selenic.com
|
r128 | c->tail += lsize(a); | ||
mpm@selenic.com
|
r72 | } | ||
Maciej Fijalkowski
|
r29692 | mpatch_lfree(a); | ||
mpatch_lfree(b); | ||||
mpm@selenic.com
|
r72 | return c; | ||
} | ||||
/* decode a binary patch into a hunk list */ | ||||
Maciej Fijalkowski
|
r29694 | int mpatch_decode(const char *bin, ssize_t len, struct mpatch_flist **res) | ||
mpm@selenic.com
|
r72 | { | ||
Maciej Fijalkowski
|
r29692 | struct mpatch_flist *l; | ||
struct mpatch_frag *lt; | ||||
Matt Mackall
|
r20167 | int pos = 0; | ||
mpm@selenic.com
|
r72 | |||
/* assume worst case size, we won't have many of these lists */ | ||||
Matt Mackall
|
r28656 | l = lalloc(len / 12 + 1); | ||
Benoit Boissinot
|
r1722 | if (!l) | ||
Maciej Fijalkowski
|
r29694 | return MPATCH_ERR_NO_MEM; | ||
Benoit Boissinot
|
r1722 | |||
mpm@selenic.com
|
r72 | lt = l->tail; | ||
Matt Mackall
|
r20167 | while (pos >= 0 && pos < len) { | ||
lt->start = getbe32(bin + pos); | ||||
lt->end = getbe32(bin + pos + 4); | ||||
lt->len = getbe32(bin + pos + 8); | ||||
lt->data = bin + pos + 12; | ||||
pos += 12 + lt->len; | ||||
Matt Mackall
|
r28657 | if (lt->start > lt->end || lt->len < 0) | ||
break; /* sanity check */ | ||||
mpm@selenic.com
|
r72 | lt++; | ||
} | ||||
Matt Mackall
|
r20167 | if (pos != len) { | ||
Maciej Fijalkowski
|
r29692 | mpatch_lfree(l); | ||
Maciej Fijalkowski
|
r29694 | return MPATCH_ERR_CANNOT_BE_DECODED; | ||
Benoit Boissinot
|
r1722 | } | ||
mpm@selenic.com
|
r72 | l->tail = lt; | ||
Maciej Fijalkowski
|
r29694 | *res = l; | ||
return 0; | ||||
mpm@selenic.com
|
r72 | } | ||
/* calculate the size of resultant text */ | ||||
Maciej Fijalkowski
|
r29693 | ssize_t mpatch_calcsize(ssize_t len, struct mpatch_flist *l) | ||
mpm@selenic.com
|
r72 | { | ||
Maciej Fijalkowski
|
r29691 | ssize_t outlen = 0, last = 0; | ||
Maciej Fijalkowski
|
r29692 | struct mpatch_frag *f = l->head; | ||
mpm@selenic.com
|
r72 | |||
while (f != l->tail) { | ||||
Benoit Boissinot
|
r1722 | if (f->start < last || f->end > len) { | ||
Maciej Fijalkowski
|
r29694 | return MPATCH_ERR_INVALID_PATCH; | ||
Benoit Boissinot
|
r1722 | } | ||
mpm@selenic.com
|
r72 | outlen += f->start - last; | ||
last = f->end; | ||||
outlen += f->len; | ||||
f++; | ||||
} | ||||
outlen += len - last; | ||||
return outlen; | ||||
} | ||||
Maciej Fijalkowski
|
r29693 | int mpatch_apply(char *buf, const char *orig, ssize_t len, | ||
Maciej Fijalkowski
|
r29692 | struct mpatch_flist *l) | ||
mpm@selenic.com
|
r72 | { | ||
Maciej Fijalkowski
|
r29692 | struct mpatch_frag *f = l->head; | ||
mpm@selenic.com
|
r72 | int last = 0; | ||
char *p = buf; | ||||
while (f != l->tail) { | ||||
Benoit Boissinot
|
r1722 | if (f->start < last || f->end > len) { | ||
Maciej Fijalkowski
|
r29694 | return MPATCH_ERR_INVALID_PATCH; | ||
Benoit Boissinot
|
r1722 | } | ||
mpm@selenic.com
|
r72 | memcpy(p, orig + last, f->start - last); | ||
p += f->start - last; | ||||
memcpy(p, f->data, f->len); | ||||
last = f->end; | ||||
p += f->len; | ||||
f++; | ||||
} | ||||
memcpy(p, orig + last, len - last); | ||||
Maciej Fijalkowski
|
r29694 | return 0; | ||
mpm@selenic.com
|
r72 | } | ||
/* recursively generate a patch of all bins between start and end */ | ||||
Maciej Fijalkowski
|
r29694 | struct mpatch_flist *mpatch_fold(void *bins, | ||
struct mpatch_flist* (*get_next_item)(void*, ssize_t), | ||||
ssize_t start, ssize_t end) | ||||
mpm@selenic.com
|
r72 | { | ||
Maciej Fijalkowski
|
r29694 | ssize_t len; | ||
mpm@selenic.com
|
r72 | |||
if (start + 1 == end) { | ||||
/* trivial case, output a decoded list */ | ||||
Maciej Fijalkowski
|
r29694 | return get_next_item(bins, start); | ||
mpm@selenic.com
|
r72 | } | ||
/* divide and conquer, memory management is elsewhere */ | ||||
len = (end - start) / 2; | ||||
Maciej Fijalkowski
|
r29694 | return combine(mpatch_fold(bins, get_next_item, start, start + len), | ||
mpatch_fold(bins, get_next_item, start + len, end)); | ||||
mpm@selenic.com
|
r72 | } | ||