##// END OF EJS Templates
templatefuncs: add mailmap template function...
templatefuncs: add mailmap template function This commit adds a template function to support the .mailmap file in Mercurial repositories. The .mailmap file comes from git, and can be used to map new emails and names for old commits. The general use case is that someone may change their name or author commits under different emails and aliases, which would make these commits appear as though they came from different persons. The file allows you to specify the correct name that should be used in place of the author field specified in the commit. The mailmap file has 4 possible formats used to map old "commit" names to new "proper" names: 1. <proper@email.com> <commit@email.com> 2. Proper Name <commit@email.com> 3. Proper Name <proper@email.com> <commit@email.com> 4. Proper Name <proper@email.com> Commit Name <commit@email.com> Essentially there is a commit email present in each mailmap entry, that maps to either an updated name, email, or both. The final possible format allows commits authored by a person who used both an old name and an old email to map to a new name and email. To parse the file, we split by spaces and build a name out of every element that does not start with "<". Once we find an element that does start with "<" we concatenate all the name elements that preceded and add that as a parsed name. We then add the email as the first parsed email. We repeat the process until the end of the line, or a comment is found. We will be left with all parsed names in a list, and all parsed emails in a list, with the 0 index being the proper values and the 1 index being the commit values (if they were specified in the entry). The commit values are added as the keys to a dict, and with the proper fields as the values. The mapname function takes the mapping object and the commit author field and attempts to look for a corresponding entry. To do so we try (commit name, commit email) first, and if no results are returned then (None, commit email) is also looked up. This is due to format 4 from above, where someone may have a mailmap entry with both name and email, and if they don't it is possible they have an entry that uses only the commit email. Differential Revision: https://phab.mercurial-scm.org/D2904

File last commit:

r35741:50868145 default
r37227:2a2ce93e default
Show More
bdiff.c
320 lines | 6.9 KiB | text/x-c | CLexer
/*
bdiff.c - efficient binary diff extension for Mercurial
Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
This software may be used and distributed according to the terms of
the GNU General Public License, incorporated herein by reference.
Based roughly on Python difflib
*/
#include <limits.h>
#include <stdlib.h>
#include <string.h>
#include "bdiff.h"
#include "bitmanipulation.h"
#include "compat.h"
/* Hash implementation from diffutils */
#define ROL(v, n) ((v) << (n) | (v) >> (sizeof(v) * CHAR_BIT - (n)))
#define HASH(h, c) ((c) + ROL(h, 7))
struct pos {
int pos, len;
};
int bdiff_splitlines(const char *a, ssize_t len, struct bdiff_line **lr)
{
unsigned hash;
int i;
const char *p, *b = a;
const char *const plast = a + len - 1;
struct bdiff_line *l;
/* count the lines */
i = 1; /* extra line for sentinel */
for (p = a; p < plast; p++)
if (*p == '\n')
i++;
if (p == plast)
i++;
*lr = l = (struct bdiff_line *)calloc(i, sizeof(struct bdiff_line));
if (!l)
return -1;
/* build the line array and calculate hashes */
hash = 0;
for (p = a; p < plast; p++) {
hash = HASH(hash, *p);
if (*p == '\n') {
l->hash = hash;
hash = 0;
l->len = p - b + 1;
l->l = b;
l->n = INT_MAX;
l++;
b = p + 1;
}
}
if (p == plast) {
hash = HASH(hash, *p);
l->hash = hash;
l->len = p - b + 1;
l->l = b;
l->n = INT_MAX;
l++;
}
/* set up a sentinel */
l->hash = 0;
l->len = 0;
l->l = a + len;
return i - 1;
}
static inline int cmp(struct bdiff_line *a, struct bdiff_line *b)
{
return a->hash != b->hash || a->len != b->len ||
memcmp(a->l, b->l, a->len);
}
static int equatelines(struct bdiff_line *a, int an, struct bdiff_line *b,
int bn)
{
int i, j, buckets = 1, t, scale;
struct pos *h = NULL;
/* build a hash table of the next highest power of 2 */
while (buckets < bn + 1)
buckets *= 2;
/* try to allocate a large hash table to avoid collisions */
for (scale = 4; scale; scale /= 2) {
h = (struct pos *)calloc(buckets, scale * sizeof(struct pos));
if (h)
break;
}
if (!h)
return 0;
buckets = buckets * scale - 1;
/* clear the hash table */
for (i = 0; i <= buckets; i++) {
h[i].pos = -1;
h[i].len = 0;
}
/* add lines to the hash table chains */
for (i = 0; i < bn; i++) {
/* find the equivalence class */
for (j = b[i].hash & buckets; h[j].pos != -1;
j = (j + 1) & buckets)
if (!cmp(b + i, b + h[j].pos))
break;
/* add to the head of the equivalence class */
b[i].n = h[j].pos;
b[i].e = j;
h[j].pos = i;
h[j].len++; /* keep track of popularity */
}
/* compute popularity threshold */
t = (bn >= 31000) ? bn / 1000 : 1000000 / (bn + 1);
/* match items in a to their equivalence class in b */
for (i = 0; i < an; i++) {
/* find the equivalence class */
for (j = a[i].hash & buckets; h[j].pos != -1;
j = (j + 1) & buckets)
if (!cmp(a + i, b + h[j].pos))
break;
a[i].e = j; /* use equivalence class for quick compare */
if (h[j].len <= t)
a[i].n = h[j].pos; /* point to head of match list */
else
a[i].n = -1; /* too popular */
}
/* discard hash tables */
free(h);
return 1;
}
static int longest_match(struct bdiff_line *a, struct bdiff_line *b,
struct pos *pos, int a1, int a2, int b1, int b2,
int *omi, int *omj)
{
int mi = a1, mj = b1, mk = 0, i, j, k, half, bhalf;
/* window our search on large regions to better bound
worst-case performance. by choosing a window at the end, we
reduce skipping overhead on the b chains. */
if (a2 - a1 > 30000)
a1 = a2 - 30000;
half = (a1 + a2 - 1) / 2;
bhalf = (b1 + b2 - 1) / 2;
for (i = a1; i < a2; i++) {
/* skip all lines in b after the current block */
for (j = a[i].n; j >= b2; j = b[j].n)
;
/* loop through all lines match a[i] in b */
for (; j >= b1; j = b[j].n) {
/* does this extend an earlier match? */
for (k = 1; j - k >= b1 && i - k >= a1; k++) {
/* reached an earlier match? */
if (pos[j - k].pos == i - k) {
k += pos[j - k].len;
break;
}
/* previous line mismatch? */
if (a[i - k].e != b[j - k].e)
break;
}
pos[j].pos = i;
pos[j].len = k;
/* best match so far? we prefer matches closer
to the middle to balance recursion */
if (k > mk) {
/* a longer match */
mi = i;
mj = j;
mk = k;
} else if (k == mk) {
if (i > mi && i <= half && j > b1) {
/* same match but closer to half */
mi = i;
mj = j;
} else if (i == mi && (mj > bhalf || i == a1)) {
/* same i but best earlier j */
mj = j;
}
}
}
}
if (mk) {
mi = mi - mk + 1;
mj = mj - mk + 1;
}
/* expand match to include subsequent popular lines */
while (mi + mk < a2 && mj + mk < b2 && a[mi + mk].e == b[mj + mk].e)
mk++;
*omi = mi;
*omj = mj;
return mk;
}
static struct bdiff_hunk *recurse(struct bdiff_line *a, struct bdiff_line *b,
struct pos *pos, int a1, int a2, int b1,
int b2, struct bdiff_hunk *l)
{
int i, j, k;
while (1) {
/* find the longest match in this chunk */
k = longest_match(a, b, pos, a1, a2, b1, b2, &i, &j);
if (!k)
return l;
/* and recurse on the remaining chunks on either side */
l = recurse(a, b, pos, a1, i, b1, j, l);
if (!l)
return NULL;
l->next =
(struct bdiff_hunk *)malloc(sizeof(struct bdiff_hunk));
if (!l->next)
return NULL;
l = l->next;
l->a1 = i;
l->a2 = i + k;
l->b1 = j;
l->b2 = j + k;
l->next = NULL;
/* tail-recursion didn't happen, so do equivalent iteration */
a1 = i + k;
b1 = j + k;
}
}
int bdiff_diff(struct bdiff_line *a, int an, struct bdiff_line *b, int bn,
struct bdiff_hunk *base)
{
struct bdiff_hunk *curr;
struct pos *pos;
int t, count = 0;
/* allocate and fill arrays */
t = equatelines(a, an, b, bn);
pos = (struct pos *)calloc(bn ? bn : 1, sizeof(struct pos));
if (pos && t) {
/* generate the matching block list */
curr = recurse(a, b, pos, 0, an, 0, bn, base);
if (!curr)
return -1;
/* sentinel end hunk */
curr->next =
(struct bdiff_hunk *)malloc(sizeof(struct bdiff_hunk));
if (!curr->next)
return -1;
curr = curr->next;
curr->a1 = curr->a2 = an;
curr->b1 = curr->b2 = bn;
curr->next = NULL;
}
free(pos);
/* normalize the hunk list, try to push each hunk towards the end */
for (curr = base->next; curr; curr = curr->next) {
struct bdiff_hunk *next = curr->next;
if (!next)
break;
if (curr->a2 == next->a1 || curr->b2 == next->b1)
while (curr->a2 < an && curr->b2 < bn &&
next->a1 < next->a2 && next->b1 < next->b2 &&
!cmp(a + curr->a2, b + curr->b2)) {
curr->a2++;
next->a1++;
curr->b2++;
next->b1++;
}
}
for (curr = base->next; curr; curr = curr->next)
count++;
return count;
}
void bdiff_freehunks(struct bdiff_hunk *l)
{
struct bdiff_hunk *n;
for (; l; l = n) {
n = l->next;
free(l);
}
}