# HG changeset patch # User mpm@selenic.com # Date 2005-05-03 21:16:10 # Node ID 9117c6561b0bd7792fa13b50d28239d51b78e51f # Parent 0000000000000000000000000000000000000000 Add back links from file revisions to changeset revisions Add simple transaction support Add hg verify Improve caching in revlog Fix a bunch of bugs Self-hosting now that the metadata is close to finalized diff --git a/PKG-INFO b/PKG-INFO new file mode 100644 --- /dev/null +++ b/PKG-INFO @@ -0,0 +1,10 @@ +Metadata-Version: 1.0 +Name: mercurial +Version: 0.4c +Summary: scalable distributed SCM +Home-page: http://selenic.com/mercurial +Author: Matt Mackall +Author-email: mpm@selenic.com +License: GNU GPL +Description: UNKNOWN +Platform: UNKNOWN diff --git a/README b/README new file mode 100644 --- /dev/null +++ b/README @@ -0,0 +1,80 @@ +Setting up Mercurial in your home directory: + + Note: Debian fails to include bits of distutils, you'll need + python-dev to install. Alternately, shove everything somewhere in + your path. + + $ tar xvzf mercurial-.tar.gz + $ cd mercurial- + $ python setup.py install --home ~ + $ export PYTHONPATH=${HOME}/lib/python # add this to your .bashrc + $ export HGMERGE=tkmerge # customize this + $ hg # test installation, show help + + If you get complaints about missing modules, you probably haven't set + PYTHONPATH correctly. + + You may also want to install psyco, the python specializing compiler. + It makes commits more than twice as fast. The relevant Debian package + is python-psyco + +Setting up a Mercurial project: + + $ cd linux/ + $ hg init # creates .hg + $ hg status # show changes between repo and working dir + $ hg diff # generate a unidiff + $ hg addremove # add all unknown files and remove all missing files + $ hg commit # commit all changes, edit changelog entry + + Mercurial will look for a file named .hgignore in the root of your + repository contains a set of regular expressions to ignore in file + paths. + +Mercurial commands: + + $ hg history # show changesets + $ hg log Makefile # show commits per file + $ hg checkout # check out the tip revision + $ hg checkout # check out a specified changeset + $ hg add foo # add a new file for the next commit + $ hg remove bar # mark a file as removed + $ hg verify # check repo integrity + +Branching and merging: + + $ cd .. + $ mkdir linux-work + $ cd linux-work + $ hg branch ../linux # create a new branch + $ hg checkout # populate the working directory + $ + $ hg commit + $ cd ../linux + $ hg merge ../linux-work # pull changesets from linux-work + +Importing patches: + + Fast: + $ patch < ../p/foo.patch + $ hg addremove + $ hg commit + + Faster: + $ patch < ../p/foo.patch + $ hg commit `lsdiff -p1 ../p/foo.patch` + + Fastest: + $ cat ../p/patchlist | xargs hg import -p1 -b ../p + +Network support (highly experimental): + + # export your .hg directory as a directory on your webserver + foo$ ln -s .hg ~/public_html/hg-linux + + # merge changes from a remote machine + bar$ hg merge http://foo/~user/hg-linux + + This is just a proof of concept of grabbing byte ranges, and is not + expected to perform well. + diff --git a/hg b/hg new file mode 100644 --- /dev/null +++ b/hg @@ -0,0 +1,255 @@ +#!/usr/bin/env python +# +# mercurial - a minimal scalable distributed SCM +# v0.4c "oedipa maas" +# +# Copyright 2005 Matt Mackall +# +# This software may be used and distributed according to the terms +# of the GNU General Public License, incorporated herein by reference. + +# the psyco compiler makes commits about twice as fast +try: + import psyco + psyco.full() +except: + pass + +import sys, os +from mercurial import hg, mdiff, fancyopts + +options = {} +opts = [('v', 'verbose', None, 'verbose'), + ('d', 'debug', None, 'debug')] + +args = fancyopts.fancyopts(sys.argv[1:], opts, options, + 'hg [options] [command options] [files]') + +try: + cmd = args[0] + args = args[1:] +except: + cmd = "" + +ui = hg.ui(options["verbose"], options["debug"]) + +if cmd == "init": + repo = hg.repository(ui, ".", create=1) + sys.exit(0) +elif cmd == "branch" or cmd == "clone": + os.system("cp -al %s/.hg .hg" % args[0]) + sys.exit(0) +else: + repo = hg.repository(ui=ui) + +if cmd == "checkout" or cmd == "co": + node = repo.changelog.tip() + if len(args): rev = int(args[0]) + repo.checkout(node) + +elif cmd == "add": + repo.add(args) + +elif cmd == "remove" or cmd == "rm" or cmd == "del" or cmd == "delete": + repo.remove(args) + +elif cmd == "commit" or cmd == "checkin" or cmd == "ci": + if 1: + if len(args) > 0: + repo.commit(args) + else: + repo.commit() + +elif cmd == "import" or cmd == "patch": + ioptions = {} + opts = [('p', 'strip', 1, 'path strip'), + ('b', 'base', "", 'base path')] + + args = fancyopts.fancyopts(args, opts, ioptions, + 'hg import [options] ') + d = ioptions["base"] + strip = ioptions["strip"] + + for patch in args: + ui.status("applying %s\n" % patch) + pf = d + patch + os.system("patch -p%d < %s > /dev/null" % (strip, pf)) + f = os.popen("lsdiff --strip %d %s" % (strip, pf)) + files = f.read().splitlines() + f.close() + repo.commit(files) + +elif cmd == "status": + (c, a, d) = repo.diffdir(repo.root) + for f in c: print "C", f + for f in a: print "?", f + for f in d: print "R", f + +elif cmd == "diff": + mmap = {} + if repo.current: + change = repo.changelog.read(repo.current) + mmap = repo.manifest.read(change[0]) + + (c, a, d) = repo.diffdir(repo.root) + for f in c: + to = repo.file(f).read(mmap[f]) + tn = file(f).read() + sys.stdout.write(mdiff.unidiff(to, tn, f)) + for f in a: + to = "" + tn = file(f).read() + sys.stdout.write(mdiff.unidiff(to, tn, f)) + for f in d: + to = repo.file(f).read(mmap[f]) + tn = "" + sys.stdout.write(mdiff.unidiff(to, tn, f)) + +elif cmd == "addremove": + (c, a, d) = repo.diffdir(repo.root) + repo.add(a) + repo.remove(d) + +elif cmd == "history": + for i in range(repo.changelog.count()): + n = repo.changelog.node(i) + changes = repo.changelog.read(n) + (p1, p2) = repo.changelog.parents(n) + (h, h1, h2) = map(hg.hex, (n, p1, p2)) + (i1, i2) = map(repo.changelog.rev, (p1, p2)) + print "rev: %4d:%s" % (i, h) + print "parents: %4d:%s" % (i1, h1) + if i2: print " %4d:%s" % (i2, h2) + print "manifest: %4d:%s" % (repo.manifest.rev(changes[0]), + hg.hex(changes[0])) + print "user:", changes[1] + print "files:", len(changes[3]) + print "description:" + print changes[4] + +elif cmd == "log": + if args: + r = repo.file(args[0]) + for i in range(r.count()): + n = r.node(i) + (p1, p2) = r.parents(n) + (h, h1, h2) = map(hg.hex, (n, p1, p2)) + (i1, i2) = map(r.rev, (p1, p2)) + cr = r.linkrev(n) + cn = hg.hex(repo.changelog.node(cr)) + print "rev: %4d:%s" % (i, h) + print "changeset: %4d:%s" % (cr, cn) + print "parents: %4d:%s" % (i1, h1) + if i2: print " %4d:%s" % (i2, h2) + else: + print "missing filename" + +elif cmd == "dump": + if args: + r = repo.file(args[0]) + n = r.tip() + if len(args) > 1: n = hg.bin(args[1]) + sys.stdout.write(r.read(n)) + else: + print "missing filename" + +elif cmd == "dumpmanifest": + n = repo.manifest.tip() + if len(args) > 0: + n = hg.bin(args[0]) + m = repo.manifest.read(n) + files = m.keys() + files.sort() + + for f in files: + print hg.hex(m[f]), f + +elif cmd == "merge": + if args: + other = hg.repository(ui, args[0]) + repo.merge(other) + else: + print "missing source repository" + +elif cmd == "verify": + filelinkrevs = {} + filenodes = {} + manifestchangeset = {} + changesets = revisions = files = 0 + + print "checking changesets" + for i in range(repo.changelog.count()): + changesets += 1 + n = repo.changelog.node(i) + changes = repo.changelog.read(n) + manifestchangeset[changes[0]] = n + for f in changes[3]: + revisions += 1 + filelinkrevs.setdefault(f, []).append(i) + + print "checking manifests" + for i in range(repo.manifest.count()): + n = repo.manifest.node(i) + ca = repo.changelog.node(repo.manifest.linkrev(n)) + cc = manifestchangeset[n] + if ca != cc: + print "manifest %s points to %s, not %s" % \ + (hg.hex(n), hg.hex(ca), hg.hex(cc)) + m = repo.manifest.read(n) + for f, fn in m.items(): + filenodes.setdefault(f, {})[fn] = 1 + + print "crosschecking files in changesets and manifests" + for f in filenodes: + if f not in filelinkrevs: + print "file %s in manifest but not in changesets" + + for f in filelinkrevs: + if f not in filenodes: + print "file %s in changeset but not in manifest" + + print "checking files" + for f in filenodes: + files += 1 + fl = repo.file(f) + nodes = {"\0"*20: 1} + for i in range(fl.count()): + n = fl.node(i) + if n not in filenodes[f]: + print "%s:%s not in manifests" % (f, hg.hex(n)) + if fl.linkrev(n) not in filelinkrevs[f]: + print "%s:%s points to unknown changeset %s" \ + % (f, hg.hex(n), hg.hex(fl.changeset(n))) + t = fl.read(n) + (p1, p2) = fl.parents(n) + if p1 not in nodes: + print "%s:%s unknown parent 1 %s" % (f, hg.hex(n), hg.hex(p1)) + if p2 not in nodes: + print "file %s:%s unknown parent %s" % (f, hg.hex(n), hg.hex(p1)) + + nodes[n] = 1 + + print "%d files, %d changesets, %d total revisions" % (files, changesets, + revisions) + +else: + print """\ +unknown command + + commands: + + init create a new repository in this directory + branch create a branch of in this directory + merge merge changes from into local repository + checkout [changeset] checkout the latest or given changeset + status show new, missing, and changed files in working dir + add [files...] add the given files in the next commit + remove [files...] remove the given files in the next commit + addremove add all new files, delete all missing files + commit commit all changes to the repository + history show changeset history + log show revision history of a single file + dump [rev] dump the latest or given revision of a file + dumpmanifest [rev] dump the latest or given revision of the manifest +""" + sys.exit(1) diff --git a/mercurial/__init__.py b/mercurial/__init__.py new file mode 100644 diff --git a/mercurial/byterange.py b/mercurial/byterange.py new file mode 100644 --- /dev/null +++ b/mercurial/byterange.py @@ -0,0 +1,452 @@ +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the +# Free Software Foundation, Inc., +# 59 Temple Place, Suite 330, +# Boston, MA 02111-1307 USA + +# This file is part of urlgrabber, a high-level cross-protocol url-grabber +# Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko + +# $Id: byterange.py,v 1.9 2005/02/14 21:55:07 mstenner Exp $ + +import os +import stat +import urllib +import urllib2 +import rfc822 + +try: + from cStringIO import StringIO +except ImportError, msg: + from StringIO import StringIO + +class RangeError(IOError): + """Error raised when an unsatisfiable range is requested.""" + pass + +class HTTPRangeHandler(urllib2.BaseHandler): + """Handler that enables HTTP Range headers. + + This was extremely simple. The Range header is a HTTP feature to + begin with so all this class does is tell urllib2 that the + "206 Partial Content" reponse from the HTTP server is what we + expected. + + Example: + import urllib2 + import byterange + + range_handler = range.HTTPRangeHandler() + opener = urllib2.build_opener(range_handler) + + # install it + urllib2.install_opener(opener) + + # create Request and set Range header + req = urllib2.Request('http://www.python.org/') + req.header['Range'] = 'bytes=30-50' + f = urllib2.urlopen(req) + """ + + def http_error_206(self, req, fp, code, msg, hdrs): + # 206 Partial Content Response + r = urllib.addinfourl(fp, hdrs, req.get_full_url()) + r.code = code + r.msg = msg + return r + + def http_error_416(self, req, fp, code, msg, hdrs): + # HTTP's Range Not Satisfiable error + raise RangeError('Requested Range Not Satisfiable') + +class RangeableFileObject: + """File object wrapper to enable raw range handling. + This was implemented primarilary for handling range + specifications for file:// urls. This object effectively makes + a file object look like it consists only of a range of bytes in + the stream. + + Examples: + # expose 10 bytes, starting at byte position 20, from + # /etc/aliases. + >>> fo = RangeableFileObject(file('/etc/passwd', 'r'), (20,30)) + # seek seeks within the range (to position 23 in this case) + >>> fo.seek(3) + # tell tells where your at _within the range_ (position 3 in + # this case) + >>> fo.tell() + # read EOFs if an attempt is made to read past the last + # byte in the range. the following will return only 7 bytes. + >>> fo.read(30) + """ + + def __init__(self, fo, rangetup): + """Create a RangeableFileObject. + fo -- a file like object. only the read() method need be + supported but supporting an optimized seek() is + preferable. + rangetup -- a (firstbyte,lastbyte) tuple specifying the range + to work over. + The file object provided is assumed to be at byte offset 0. + """ + self.fo = fo + (self.firstbyte, self.lastbyte) = range_tuple_normalize(rangetup) + self.realpos = 0 + self._do_seek(self.firstbyte) + + def __getattr__(self, name): + """This effectively allows us to wrap at the instance level. + Any attribute not found in _this_ object will be searched for + in self.fo. This includes methods.""" + if hasattr(self.fo, name): + return getattr(self.fo, name) + raise AttributeError, name + + def tell(self): + """Return the position within the range. + This is different from fo.seek in that position 0 is the + first byte position of the range tuple. For example, if + this object was created with a range tuple of (500,899), + tell() will return 0 when at byte position 500 of the file. + """ + return (self.realpos - self.firstbyte) + + def seek(self,offset,whence=0): + """Seek within the byte range. + Positioning is identical to that described under tell(). + """ + assert whence in (0, 1, 2) + if whence == 0: # absolute seek + realoffset = self.firstbyte + offset + elif whence == 1: # relative seek + realoffset = self.realpos + offset + elif whence == 2: # absolute from end of file + # XXX: are we raising the right Error here? + raise IOError('seek from end of file not supported.') + + # do not allow seek past lastbyte in range + if self.lastbyte and (realoffset >= self.lastbyte): + realoffset = self.lastbyte + + self._do_seek(realoffset - self.realpos) + + def read(self, size=-1): + """Read within the range. + This method will limit the size read based on the range. + """ + size = self._calc_read_size(size) + rslt = self.fo.read(size) + self.realpos += len(rslt) + return rslt + + def readline(self, size=-1): + """Read lines within the range. + This method will limit the size read based on the range. + """ + size = self._calc_read_size(size) + rslt = self.fo.readline(size) + self.realpos += len(rslt) + return rslt + + def _calc_read_size(self, size): + """Handles calculating the amount of data to read based on + the range. + """ + if self.lastbyte: + if size > -1: + if ((self.realpos + size) >= self.lastbyte): + size = (self.lastbyte - self.realpos) + else: + size = (self.lastbyte - self.realpos) + return size + + def _do_seek(self,offset): + """Seek based on whether wrapped object supports seek(). + offset is relative to the current position (self.realpos). + """ + assert offset >= 0 + if not hasattr(self.fo, 'seek'): + self._poor_mans_seek(offset) + else: + self.fo.seek(self.realpos + offset) + self.realpos+= offset + + def _poor_mans_seek(self,offset): + """Seek by calling the wrapped file objects read() method. + This is used for file like objects that do not have native + seek support. The wrapped objects read() method is called + to manually seek to the desired position. + offset -- read this number of bytes from the wrapped + file object. + raise RangeError if we encounter EOF before reaching the + specified offset. + """ + pos = 0 + bufsize = 1024 + while pos < offset: + if (pos + bufsize) > offset: + bufsize = offset - pos + buf = self.fo.read(bufsize) + if len(buf) != bufsize: + raise RangeError('Requested Range Not Satisfiable') + pos+= bufsize + +class FileRangeHandler(urllib2.FileHandler): + """FileHandler subclass that adds Range support. + This class handles Range headers exactly like an HTTP + server would. + """ + def open_local_file(self, req): + import mimetypes + import mimetools + host = req.get_host() + file = req.get_selector() + localfile = urllib.url2pathname(file) + stats = os.stat(localfile) + size = stats[stat.ST_SIZE] + modified = rfc822.formatdate(stats[stat.ST_MTIME]) + mtype = mimetypes.guess_type(file)[0] + if host: + host, port = urllib.splitport(host) + if port or socket.gethostbyname(host) not in self.get_names(): + raise URLError('file not on local host') + fo = open(localfile,'rb') + brange = req.headers.get('Range',None) + brange = range_header_to_tuple(brange) + assert brange != () + if brange: + (fb,lb) = brange + if lb == '': lb = size + if fb < 0 or fb > size or lb > size: + raise RangeError('Requested Range Not Satisfiable') + size = (lb - fb) + fo = RangeableFileObject(fo, (fb,lb)) + headers = mimetools.Message(StringIO( + 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' % + (mtype or 'text/plain', size, modified))) + return urllib.addinfourl(fo, headers, 'file:'+file) + + +# FTP Range Support +# Unfortunately, a large amount of base FTP code had to be copied +# from urllib and urllib2 in order to insert the FTP REST command. +# Code modifications for range support have been commented as +# follows: +# -- range support modifications start/end here + +from urllib import splitport, splituser, splitpasswd, splitattr, \ + unquote, addclosehook, addinfourl +import ftplib +import socket +import sys +import ftplib +import mimetypes +import mimetools + +class FTPRangeHandler(urllib2.FTPHandler): + def ftp_open(self, req): + host = req.get_host() + if not host: + raise IOError, ('ftp error', 'no host given') + host, port = splitport(host) + if port is None: + port = ftplib.FTP_PORT + + # username/password handling + user, host = splituser(host) + if user: + user, passwd = splitpasswd(user) + else: + passwd = None + host = unquote(host) + user = unquote(user or '') + passwd = unquote(passwd or '') + + try: + host = socket.gethostbyname(host) + except socket.error, msg: + raise URLError(msg) + path, attrs = splitattr(req.get_selector()) + dirs = path.split('/') + dirs = map(unquote, dirs) + dirs, file = dirs[:-1], dirs[-1] + if dirs and not dirs[0]: + dirs = dirs[1:] + try: + fw = self.connect_ftp(user, passwd, host, port, dirs) + type = file and 'I' or 'D' + for attr in attrs: + attr, value = splitattr(attr) + if attr.lower() == 'type' and \ + value in ('a', 'A', 'i', 'I', 'd', 'D'): + type = value.upper() + + # -- range support modifications start here + rest = None + range_tup = range_header_to_tuple(req.headers.get('Range',None)) + assert range_tup != () + if range_tup: + (fb,lb) = range_tup + if fb > 0: rest = fb + # -- range support modifications end here + + fp, retrlen = fw.retrfile(file, type, rest) + + # -- range support modifications start here + if range_tup: + (fb,lb) = range_tup + if lb == '': + if retrlen is None or retrlen == 0: + raise RangeError('Requested Range Not Satisfiable due to unobtainable file length.') + lb = retrlen + retrlen = lb - fb + if retrlen < 0: + # beginning of range is larger than file + raise RangeError('Requested Range Not Satisfiable') + else: + retrlen = lb - fb + fp = RangeableFileObject(fp, (0,retrlen)) + # -- range support modifications end here + + headers = "" + mtype = mimetypes.guess_type(req.get_full_url())[0] + if mtype: + headers += "Content-Type: %s\n" % mtype + if retrlen is not None and retrlen >= 0: + headers += "Content-Length: %d\n" % retrlen + sf = StringIO(headers) + headers = mimetools.Message(sf) + return addinfourl(fp, headers, req.get_full_url()) + except ftplib.all_errors, msg: + raise IOError, ('ftp error', msg), sys.exc_info()[2] + + def connect_ftp(self, user, passwd, host, port, dirs): + fw = ftpwrapper(user, passwd, host, port, dirs) + return fw + +class ftpwrapper(urllib.ftpwrapper): + # range support note: + # this ftpwrapper code is copied directly from + # urllib. The only enhancement is to add the rest + # argument and pass it on to ftp.ntransfercmd + def retrfile(self, file, type, rest=None): + self.endtransfer() + if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1 + else: cmd = 'TYPE ' + type; isdir = 0 + try: + self.ftp.voidcmd(cmd) + except ftplib.all_errors: + self.init() + self.ftp.voidcmd(cmd) + conn = None + if file and not isdir: + # Use nlst to see if the file exists at all + try: + self.ftp.nlst(file) + except ftplib.error_perm, reason: + raise IOError, ('ftp error', reason), sys.exc_info()[2] + # Restore the transfer mode! + self.ftp.voidcmd(cmd) + # Try to retrieve as a file + try: + cmd = 'RETR ' + file + conn = self.ftp.ntransfercmd(cmd, rest) + except ftplib.error_perm, reason: + if str(reason)[:3] == '501': + # workaround for REST not supported error + fp, retrlen = self.retrfile(file, type) + fp = RangeableFileObject(fp, (rest,'')) + return (fp, retrlen) + elif str(reason)[:3] != '550': + raise IOError, ('ftp error', reason), sys.exc_info()[2] + if not conn: + # Set transfer mode to ASCII! + self.ftp.voidcmd('TYPE A') + # Try a directory listing + if file: cmd = 'LIST ' + file + else: cmd = 'LIST' + conn = self.ftp.ntransfercmd(cmd) + self.busy = 1 + # Pass back both a suitably decorated object and a retrieval length + return (addclosehook(conn[0].makefile('rb'), + self.endtransfer), conn[1]) + + +#################################################################### +# Range Tuple Functions +# XXX: These range tuple functions might go better in a class. + +_rangere = None +def range_header_to_tuple(range_header): + """Get a (firstbyte,lastbyte) tuple from a Range header value. + + Range headers have the form "bytes=-". This + function pulls the firstbyte and lastbyte values and returns + a (firstbyte,lastbyte) tuple. If lastbyte is not specified in + the header value, it is returned as an empty string in the + tuple. + + Return None if range_header is None + Return () if range_header does not conform to the range spec + pattern. + + """ + global _rangere + if range_header is None: return None + if _rangere is None: + import re + _rangere = re.compile(r'^bytes=(\d{1,})-(\d*)') + match = _rangere.match(range_header) + if match: + tup = range_tuple_normalize(match.group(1,2)) + if tup and tup[1]: + tup = (tup[0],tup[1]+1) + return tup + return () + +def range_tuple_to_header(range_tup): + """Convert a range tuple to a Range header value. + Return a string of the form "bytes=-" or None + if no range is needed. + """ + if range_tup is None: return None + range_tup = range_tuple_normalize(range_tup) + if range_tup: + if range_tup[1]: + range_tup = (range_tup[0],range_tup[1] - 1) + return 'bytes=%s-%s' % range_tup + +def range_tuple_normalize(range_tup): + """Normalize a (first_byte,last_byte) range tuple. + Return a tuple whose first element is guaranteed to be an int + and whose second element will be '' (meaning: the last byte) or + an int. Finally, return None if the normalized tuple == (0,'') + as that is equivelant to retrieving the entire file. + """ + if range_tup is None: return None + # handle first byte + fb = range_tup[0] + if fb in (None,''): fb = 0 + else: fb = int(fb) + # handle last byte + try: lb = range_tup[1] + except IndexError: lb = '' + else: + if lb is None: lb = '' + elif lb != '': lb = int(lb) + # check if range is over the entire file + if (fb,lb) == (0,''): return None + # check that the range is valid + if lb < fb: raise RangeError('Invalid byte range: %s-%s' % (fb,lb)) + return (fb,lb) + diff --git a/mercurial/fancyopts.py b/mercurial/fancyopts.py new file mode 100644 --- /dev/null +++ b/mercurial/fancyopts.py @@ -0,0 +1,51 @@ +import sys, os, getopt + +def fancyopts(args, options, state, syntax=''): + long=[] + short='' + map={} + dt={} + + def help(state, opt, arg, options=options, syntax=syntax): + print "Usage: ", syntax + + for s, l, d, c in options: + opt=' ' + if s: opt = opt + '-' + s + ' ' + if l: opt = opt + '--' + l + ' ' + if d: opt = opt + '(' + str(d) + ')' + print opt + if c: print ' %s' % c + sys.exit(0) + + if len(args) == 0: + help(state, None, args) + + options=[('h', 'help', help, 'Show usage info')] + options + + for s, l, d, c in options: + map['-'+s] = map['--'+l]=l + state[l] = d + dt[l] = type(d) + if not d is None and not type(d) is type(help): s, l=s+':', l+'=' + if s: short = short + s + if l: long.append(l) + + if os.environ.has_key("HG_OPTS"): + args = os.environ["HG_OPTS"].split() + args + + try: + opts, args = getopt.getopt(args, short, long) + except getopt.GetoptError: + help(state, None, args) + sys.exit(-1) + + for opt, arg in opts: + if dt[map[opt]] is type(help): state[map[opt]](state,map[opt],arg) + elif dt[map[opt]] is type(1): state[map[opt]] = int(arg) + elif dt[map[opt]] is type(''): state[map[opt]] = arg + elif dt[map[opt]] is type([]): state[map[opt]].append(arg) + elif dt[map[opt]] is type(None): state[map[opt]] = 1 + + return args + diff --git a/mercurial/hg.py b/mercurial/hg.py new file mode 100644 --- /dev/null +++ b/mercurial/hg.py @@ -0,0 +1,573 @@ +# hg.py - repository classes for mercurial +# +# Copyright 2005 Matt Mackall +# +# This software may be used and distributed according to the terms +# of the GNU General Public License, incorporated herein by reference. + +import sys, struct, sha, socket, os, time, base64, re, urllib2 +from mercurial import byterange +from mercurial.transaction import * +from mercurial.revlog import * + +def hex(node): return binascii.hexlify(node) +def bin(node): return binascii.unhexlify(node) + +class filelog(revlog): + def __init__(self, opener, path): + s = self.encodepath(path) + revlog.__init__(self, opener, os.path.join("data", s + "i"), + os.path.join("data", s)) + + def encodepath(self, path): + s = sha.sha(path).digest() + s = base64.encodestring(s)[:-3] + s = re.sub("\+", "%", s) + s = re.sub("/", "_", s) + return s + + def read(self, node): + return self.revision(node) + def add(self, text, transaction, link, p1=None, p2=None): + return self.addrevision(text, transaction, link, p1, p2) + + def resolvedag(self, old, new, transaction, link): + """resolve unmerged heads in our DAG""" + if old == new: return None + a = self.ancestor(old, new) + if old == a: return new + return self.merge3(old, new, a, transaction, link) + + def merge3(self, my, other, base, transaction, link): + """perform a 3-way merge and append the result""" + def temp(prefix, node): + (fd, name) = tempfile.mkstemp(prefix) + f = os.fdopen(fd, "w") + f.write(self.revision(node)) + f.close() + return name + + a = temp("local", my) + b = temp("remote", other) + c = temp("parent", base) + + cmd = os.environ["HGMERGE"] + r = os.system("%s %s %s %s" % (cmd, a, b, c)) + if r: + raise "Merge failed, implement rollback!" + + t = open(a).read() + os.unlink(a) + os.unlink(b) + os.unlink(c) + return self.addrevision(t, transaction, link, my, other) + + def merge(self, other, transaction, linkseq, link): + """perform a merge and resolve resulting heads""" + (o, n) = self.mergedag(other, transaction, linkseq) + return self.resolvedag(o, n, transaction, link) + +class manifest(revlog): + def __init__(self, opener): + self.mapcache = None + self.listcache = None + self.addlist = None + revlog.__init__(self, opener, "00manifest.i", "00manifest.d") + + def read(self, node): + if self.mapcache and self.mapcache[0] == node: + return self.mapcache[1] + text = self.revision(node) + map = {} + self.listcache = text.splitlines(1) + for l in self.listcache: + (f, n) = l.split('\0') + map[f] = bin(n[:40]) + self.mapcache = (node, map) + return map + + def diff(self, a, b): + # this is sneaky, as we're not actually using a and b + if self.listcache: + return mdiff.diff(self.listcache, self.addlist, 1) + else: + return mdiff.diff(a, b) + + def add(self, map, transaction, link, p1=None, p2=None): + files = map.keys() + files.sort() + + self.addlist = ["%s\000%s\n" % (f, hex(map[f])) for f in files] + text = "".join(self.addlist) + + n = self.addrevision(text, transaction, link, p1, p2) + self.mapcache = (n, map) + self.listcache = self.addlist + + return n + +class changelog(revlog): + def __init__(self, opener): + revlog.__init__(self, opener, "00changelog.i", "00changelog.d") + + def extract(self, text): + last = text.index("\n\n") + desc = text[last + 2:] + l = text[:last].splitlines() + manifest = bin(l[0]) + user = l[1] + date = l[2] + files = l[3:] + return (manifest, user, date, files, desc) + + def read(self, node): + return self.extract(self.revision(node)) + + def add(self, manifest, list, desc, transaction, p1=None, p2=None): + try: user = os.environ["HGUSER"] + except: user = os.environ["LOGNAME"] + '@' + socket.getfqdn() + date = "%d %d" % (time.time(), time.timezone) + list.sort() + l = [hex(manifest), user, date] + list + ["", desc] + text = "\n".join(l) + return self.addrevision(text, transaction, self.count(), p1, p2) + + def merge3(self, my, other, base): + pass + +class dircache: + def __init__(self, opener): + self.opener = opener + self.dirty = 0 + self.map = None + def __del__(self): + if self.dirty: self.write() + def __getitem__(self, key): + try: + return self.map[key] + except TypeError: + self.read() + return self[key] + + def read(self): + if self.map is not None: return self.map + + self.map = {} + try: + st = self.opener("dircache").read() + except: return + + pos = 0 + while pos < len(st): + e = struct.unpack(">llll", st[pos:pos+16]) + l = e[3] + pos += 16 + f = st[pos:pos + l] + self.map[f] = e[:3] + pos += l + + def update(self, files): + if not files: return + self.read() + self.dirty = 1 + for f in files: + try: + s = os.stat(f) + self.map[f] = (s.st_mode, s.st_size, s.st_mtime) + except IOError: + self.remove(f) + + def taint(self, files): + if not files: return + self.read() + self.dirty = 1 + for f in files: + self.map[f] = (0, -1, 0) + + def remove(self, files): + if not files: return + self.read() + self.dirty = 1 + for f in files: + try: del self[f] + except: pass + + def clear(self): + self.map = {} + self.dirty = 1 + + def write(self): + st = self.opener("dircache", "w") + for f, e in self.map.items(): + e = struct.pack(">llll", e[0], e[1], e[2], len(f)) + st.write(e + f) + self.dirty = 0 + + def copy(self): + self.read() + return self.map.copy() + +# used to avoid circular references so destructors work +def opener(base): + p = base + def o(path, mode="r"): + f = os.path.join(p, path) + if p[:7] == "http://": + return httprangereader(f) + + if mode != "r" and os.path.isfile(f): + s = os.stat(f) + if s.st_nlink > 1: + file(f + ".tmp", "w").write(file(f).read()) + os.rename(f+".tmp", f) + + return file(f, mode) + + return o + +class repository: + def __init__(self, ui, path=None, create=0): + self.remote = 0 + if path and path[:7] == "http://": + self.remote = 1 + self.path = path + else: + if not path: + p = os.getcwd() + while not os.path.isdir(os.path.join(p, ".hg")): + p = os.path.dirname(p) + if p == "/": raise "No repo found" + path = p + self.path = os.path.join(path, ".hg") + + self.root = path + self.ui = ui + + if create: + os.mkdir(self.path) + os.mkdir(self.join("data")) + + self.opener = opener(self.path) + self.manifest = manifest(self.opener) + self.changelog = changelog(self.opener) + self.ignorelist = None + + if not self.remote: + self.dircache = dircache(self.opener) + try: + self.current = bin(self.open("current").read()) + except: + self.current = None + + def setcurrent(self, node): + self.current = node + self.opener("current", "w").write(hex(node)) + + def ignore(self, f): + if self.ignorelist is None: + self.ignorelist = [] + try: + l = open(os.path.join(self.root, ".hgignore")).readlines() + for pat in l: + self.ignorelist.append(re.compile(pat[:-1])) + except IOError: pass + for pat in self.ignorelist: + if pat.search(f): return True + return False + + def join(self, f): + return os.path.join(self.path, f) + + def file(self, f): + return filelog(self.opener, f) + + def transaction(self): + return transaction(self.opener, self.join("journal")) + + def merge(self, other): + tr = self.transaction() + changed = {} + new = {} + nextrev = seqrev = self.changelog.count() + + # helpers for back-linking file revisions to local changeset + # revisions so we can immediately get to changeset from annotate + def accumulate(text): + n = nextrev + # track which files are added in which changeset and the + # corresponding _local_ changeset revision + files = self.changelog.extract(text)[3] + for f in files: + changed.setdefault(f, []).append(n) + n += 1 + + def seq(start): + while 1: + yield start + start += 1 + + def lseq(l): + for r in l: + yield r + + # begin the import/merge of changesets + self.ui.status("merging new changesets\n") + (co, cn) = self.changelog.mergedag(other.changelog, tr, + seq(seqrev), accumulate) + resolverev = self.changelog.count() + + # is there anything to do? + if co == cn: + tr.close() + return + + # do we need to resolve? + simple = (co == self.changelog.ancestor(co, cn)) + + # merge all files changed by the changesets, + # keeping track of the new tips + changelist = changed.keys() + changelist.sort() + for f in changelist: + sys.stdout.write(".") + sys.stdout.flush() + r = self.file(f) + node = r.merge(other.file(f), tr, lseq(changed[f]), resolverev) + if node: + new[f] = node + sys.stdout.write("\n") + + # begin the merge of the manifest + self.ui.status("merging manifests\n") + (mm, mo) = self.manifest.mergedag(other.manifest, tr, seq(seqrev)) + + # For simple merges, we don't need to resolve manifests or changesets + if simple: + tr.close() + return + + ma = self.manifest.ancestor(mm, mo) + + # resolve the manifest to point to all the merged files + self.ui.status("resolving manifests\n") + mmap = self.manifest.read(mm) # mine + omap = self.manifest.read(mo) # other + amap = self.manifest.read(ma) # ancestor + nmap = {} + + for f, mid in mmap.iteritems(): + if f in omap: + if mid != omap[f]: + nmap[f] = new.get(f, mid) # use merged version + else: + nmap[f] = new.get(f, mid) # they're the same + del omap[f] + elif f in amap: + if mid != amap[f]: + pass # we should prompt here + else: + pass # other deleted it + else: + nmap[f] = new.get(f, mid) # we created it + + del mmap + + for f, oid in omap.iteritems(): + if f in amap: + if oid != amap[f]: + pass # this is the nasty case, we should prompt + else: + pass # probably safe + else: + nmap[f] = new.get(f, oid) # remote created it + + del omap + del amap + + node = self.manifest.add(nmap, tr, resolverev, mm, mo) + + # Now all files and manifests are merged, we add the changed files + # and manifest id to the changelog + self.ui.status("committing merge changeset\n") + new = new.keys() + new.sort() + if co == cn: cn = -1 + + edittext = "\n"+"".join(["HG: changed %s\n" % f for f in new]) + edittext = self.ui.edit(edittext) + n = self.changelog.add(node, new, edittext, tr, co, cn) + + tr.close() + + def commit(self, update = None, text = ""): + tr = self.transaction() + + try: + remove = [ l[:-1] for l in self.opener("to-remove") ] + os.unlink(self.join("to-remove")) + + except IOError: + remove = [] + + if update == None: + update = self.diffdir(self.root)[0] + + # check in files + new = {} + linkrev = self.changelog.count() + for f in update: + try: + t = file(f).read() + except IOError: + remove.append(f) + continue + r = self.file(f) + new[f] = r.add(t, tr, linkrev) + + # update manifest + mmap = self.manifest.read(self.manifest.tip()) + mmap.update(new) + for f in remove: + del mmap[f] + mnode = self.manifest.add(mmap, tr, linkrev) + + # add changeset + new = new.keys() + new.sort() + + edittext = text + "\n"+"".join(["HG: changed %s\n" % f for f in new]) + edittext = self.ui.edit(edittext) + + n = self.changelog.add(mnode, new, edittext, tr) + tr.close() + + self.setcurrent(n) + self.dircache.update(new) + self.dircache.remove(remove) + + def checkdir(self, path): + d = os.path.dirname(path) + if not d: return + if not os.path.isdir(d): + self.checkdir(d) + os.mkdir(d) + + def checkout(self, node): + # checkout is really dumb at the moment + # it ought to basically merge + change = self.changelog.read(node) + mmap = self.manifest.read(change[0]) + + l = mmap.keys() + l.sort() + stats = [] + for f in l: + r = self.file(f) + t = r.revision(mmap[f]) + try: + file(f, "w").write(t) + except: + self.checkdir(f) + file(f, "w").write(t) + + self.setcurrent(node) + self.dircache.clear() + self.dircache.update(l) + + def diffdir(self, path): + dc = self.dircache.copy() + changed = [] + added = [] + + mmap = {} + if self.current: + change = self.changelog.read(self.current) + mmap = self.manifest.read(change[0]) + + for dir, subdirs, files in os.walk(self.root): + d = dir[len(self.root)+1:] + if ".hg" in subdirs: subdirs.remove(".hg") + + for f in files: + fn = os.path.join(d, f) + try: s = os.stat(fn) + except: continue + if fn in dc: + c = dc[fn] + del dc[fn] + if c[1] != s.st_size: + changed.append(fn) + elif c[0] != s.st_mode or c[2] != s.st_mtime: + t1 = file(fn).read() + t2 = self.file(fn).revision(mmap[fn]) + if t1 != t2: + changed.append(fn) + else: + if self.ignore(fn): continue + added.append(fn) + + deleted = dc.keys() + deleted.sort() + + return (changed, added, deleted) + + def add(self, list): + self.dircache.taint(list) + + def remove(self, list): + dl = self.opener("to-remove", "a") + for f in list: + dl.write(f + "\n") + +class ui: + def __init__(self, verbose=False, debug=False): + self.verbose = verbose + def write(self, *args): + for a in args: + sys.stdout.write(str(a)) + def prompt(self, msg, pat): + while 1: + sys.stdout.write(msg) + r = sys.stdin.readline()[:-1] + if re.match(pat, r): + return r + def status(self, *msg): + self.write(*msg) + def warn(self, msg): + self.write(*msg) + def note(self, msg): + if self.verbose: self.write(*msg) + def debug(self, msg): + if self.debug: self.write(*msg) + def edit(self, text): + (fd, name) = tempfile.mkstemp("hg") + f = os.fdopen(fd, "w") + f.write(text) + f.close() + + editor = os.environ.get("EDITOR", "vi") + r = os.system("%s %s" % (editor, name)) + if r: + raise "Edit failed!" + + t = open(name).read() + t = re.sub("(?m)^HG:.*\n", "", t) + + return t + + +class httprangereader: + def __init__(self, url): + self.url = url + self.pos = 0 + def seek(self, pos): + self.pos = pos + def read(self, bytes=None): + opener = urllib2.build_opener(byterange.HTTPRangeHandler()) + urllib2.install_opener(opener) + req = urllib2.Request(self.url) + end = '' + if bytes: end = self.pos + bytes + req.add_header('Range', 'bytes=%d-%s' % (self.pos, end)) + f = urllib2.urlopen(req) + return f.read() diff --git a/mercurial/mdiff.py b/mercurial/mdiff.py new file mode 100644 --- /dev/null +++ b/mercurial/mdiff.py @@ -0,0 +1,76 @@ +#!/usr/bin/python +import difflib, struct +from cStringIO import StringIO + +def unidiff(a, b, fn): + a = a.splitlines(1) + b = b.splitlines(1) + l = difflib.unified_diff(a, b, fn, fn) + return "".join(l) + +def textdiff(a, b): + return diff(a.splitlines(1), b.splitlines(1)) + +def sortdiff(a, b): + la = lb = 0 + + while 1: + if la >= len(a) or lb >= len(b): break + if b[lb] < a[la]: + si = lb + while lb < len(b) and b[lb] < a[la] : lb += 1 + yield "insert", la, la, si, lb + elif a[la] < b[lb]: + si = la + while la < len(a) and a[la] < b[lb]: la += 1 + yield "delete", si, la, lb, lb + else: + la += 1 + lb += 1 + + si = lb + while lb < len(b): + lb += 1 + yield "insert", la, la, si, lb + + si = la + while la < len(a): + la += 1 + yield "delete", si, la, lb, lb + +def diff(a, b, sorted=0): + bin = [] + p = [0] + for i in a: p.append(p[-1] + len(i)) + + if sorted: + d = sortdiff(a, b) + else: + d = difflib.SequenceMatcher(None, a, b).get_opcodes() + + for o, m, n, s, t in d: + if o == 'equal': continue + s = "".join(b[s:t]) + bin.append(struct.pack(">lll", p[m], p[n], len(s)) + s) + + return "".join(bin) + +def patch(a, bin): + last = pos = 0 + r = [] + + while pos < len(bin): + p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12]) + pos += 12 + r.append(a[last:p1]) + r.append(bin[pos:pos + l]) + pos += l + last = p2 + r.append(a[last:]) + + return "".join(r) + + + + + diff --git a/mercurial/revlog.py b/mercurial/revlog.py new file mode 100644 --- /dev/null +++ b/mercurial/revlog.py @@ -0,0 +1,199 @@ +# revlog.py - storage back-end for mercurial +# +# This provides efficient delta storage with O(1) retrieve and append +# and O(changes) merge between branches +# +# Copyright 2005 Matt Mackall +# +# This software may be used and distributed according to the terms +# of the GNU General Public License, incorporated herein by reference. + +import zlib, struct, sha, binascii, os, tempfile +from mercurial import mdiff + +def compress(text): + return zlib.compress(text) + +def decompress(bin): + return zlib.decompress(bin) + +def hash(text, p1, p2): + l = [p1, p2] + l.sort() + return sha.sha(l[0] + l[1] + text).digest() + +nullid = "\0" * 20 +indexformat = ">4l20s20s20s" + +class revlog: + def __init__(self, opener, indexfile, datafile): + self.indexfile = indexfile + self.datafile = datafile + self.index = [] + self.opener = opener + self.cache = None + self.nodemap = { -1: nullid, nullid: -1 } + # read the whole index for now, handle on-demand later + try: + n = 0 + i = self.opener(self.indexfile).read() + s = struct.calcsize(indexformat) + for f in range(0, len(i), s): + # offset, size, base, linkrev, p1, p2, nodeid, changeset + e = struct.unpack(indexformat, i[f:f + s]) + self.nodemap[e[6]] = n + self.index.append(e) + n += 1 + except IOError: pass + + def tip(self): return self.node(len(self.index) - 1) + def count(self): return len(self.index) + def node(self, rev): return rev < 0 and nullid or self.index[rev][6] + def rev(self, node): return self.nodemap[node] + def linkrev(self, node): return self.index[self.nodemap[node]][3] + def parents(self, node): return self.index[self.nodemap[node]][4:6] + + def start(self, rev): return self.index[rev][0] + def length(self, rev): return self.index[rev][1] + def end(self, rev): return self.start(rev) + self.length(rev) + def base(self, rev): return self.index[rev][2] + + def revisions(self, list): + # this can be optimized to do spans, etc + # be stupid for now + for r in list: + yield self.revision(r) + + def diff(self, a, b): + return mdiff.textdiff(a, b) + + def patch(self, text, patch): + return mdiff.patch(text, patch) + + def revision(self, node): + if node is nullid: return "" + if self.cache and self.cache[0] == node: return self.cache[2] + + text = None + rev = self.rev(node) + base = self.base(rev) + start = self.start(base) + end = self.end(rev) + + if self.cache and self.cache[1] >= base and self.cache[1] < rev: + base = self.cache[1] + start = self.start(base + 1) + text = self.cache[2] + last = 0 + + f = self.opener(self.datafile) + f.seek(start) + data = f.read(end - start) + + if not text: + last = self.length(base) + text = decompress(data[:last]) + + for r in range(base + 1, rev + 1): + s = self.length(r) + b = decompress(data[last:last + s]) + text = self.patch(text, b) + last = last + s + + (p1, p2) = self.parents(node) + if self.node(rev) != hash(text, p1, p2): + raise "Consistency check failed on %s:%d" % (self.datafile, rev) + + self.cache = (node, rev, text) + return text + + def addrevision(self, text, transaction, link, p1=None, p2=None): + if text is None: text = "" + if p1 is None: p1 = self.tip() + if p2 is None: p2 = nullid + + node = hash(text, p1, p2) + + n = self.count() + t = n - 1 + + if n: + start = self.start(self.base(t)) + end = self.end(t) + prev = self.revision(self.tip()) + if 0: + dd = self.diff(prev, text) + tt = self.patch(prev, dd) + if tt != text: + print prev + print text + print tt + raise "diff+patch failed" + data = compress(self.diff(prev, text)) + + # full versions are inserted when the needed deltas + # become comparable to the uncompressed text + if not n or (end + len(data) - start) > len(text) * 2: + data = compress(text) + base = n + else: + base = self.base(t) + + offset = 0 + if t >= 0: + offset = self.end(t) + + e = (offset, len(data), base, link, p1, p2, node) + + self.index.append(e) + self.nodemap[node] = n + entry = struct.pack(indexformat, *e) + + transaction.add(self.datafile, e[0]) + self.opener(self.datafile, "a").write(data) + transaction.add(self.indexfile, n * len(entry)) + self.opener(self.indexfile, "a").write(entry) + + self.cache = (node, n, text) + return node + + def ancestor(self, a, b): + def expand(e1, e2, a1, a2): + ne = [] + for n in e1: + (p1, p2) = self.parents(n) + if p1 in a2: return p1 + if p2 in a2: return p2 + if p1 != nullid and p1 not in a1: + a1[p1] = 1 + ne.append(p1) + if p2 != nullid and p2 not in a1: + a1[p2] = 1 + ne.append(p2) + return expand(e2, ne, a2, a1) + return expand([a], [b], {a:1}, {b:1}) + + def mergedag(self, other, transaction, linkseq, accumulate = None): + """combine the nodes from other's DAG into ours""" + old = self.tip() + i = self.count() + l = [] + + # merge the other revision log into our DAG + for r in range(other.count()): + id = other.node(r) + if id not in self.nodemap: + (xn, yn) = other.parents(id) + l.append((id, xn, yn)) + self.nodemap[id] = i + i += 1 + + # merge node date for new nodes + r = other.revisions([e[0] for e in l]) + for e in l: + t = r.next() + if accumulate: accumulate(t) + self.addrevision(t, transaction, linkseq.next(), e[1], e[2]) + + # return the unmerged heads for later resolving + return (old, self.tip()) diff --git a/mercurial/transaction.py b/mercurial/transaction.py new file mode 100644 --- /dev/null +++ b/mercurial/transaction.py @@ -0,0 +1,62 @@ +# transaction.py - simple journalling scheme for mercurial +# +# This transaction scheme is intended to gracefully handle program +# errors and interruptions. More serious failures like system crashes +# can be recovered with an fsck-like tool. As the whole repository is +# effectively log-structured, this should amount to simply truncating +# anything that isn't referenced in the changelog. +# +# Copyright 2005 Matt Mackall +# +# This software may be used and distributed according to the terms +# of the GNU General Public License, incorporated herein by reference. + +import os + +class transaction: + def __init__(self, opener, journal): + self.opener = opener + self.entries = [] + self.journal = journal + + # abort here if the journal already exists + if os.path.exists(self.journal): + raise "Journal already exists!" + self.file = open(self.journal, "w") + + def __del__(self): + if self.entries: self.abort() + + def add(self, file, offset): + self.entries.append((file, offset)) + # add enough data to the journal to do the truncate + self.file.write("%s\0%d\n" % (file, offset)) + self.file.flush() + + def close(self): + self.file.close() + self.entries = [] + os.unlink(self.journal) + + def abort(self): + if not self.entries: return + + print "transaction abort!" + + for f, o in self.entries: + self.opener(f, "a").truncate(o) + + self.entries = [] + + try: + os.unlink(self.journal) + self.file.close() + except: pass + + print "rollback completed" + + def recover(self): + for l in open(self.journal).readlines(): + f, o = l.split('\0') + self.opener(f, "a").truncate(int(o)) + diff --git a/notes.txt b/notes.txt new file mode 100644 --- /dev/null +++ b/notes.txt @@ -0,0 +1,159 @@ +Some notes about Mercurial's design + +Revlogs: + +The fundamental storage type in Mercurial is a "revlog". A revlog is +the set of all revisions to a file. Each revision is either stored +compressed in its entirety or as a compressed binary delta against the +previous version. The decision of when to store a full version is made +based on how much data would be needed to reconstruct the file. This +lets us ensure that we never need to read huge amounts of data to +reconstruct a file, regardless of how many revisions of it we store. + +In fact, we should always be able to do it with a single read, +provided we know when and where to read. This is where the index comes +in. Each revlog has an index containing a special hash (nodeid) of the +text, hashes for its parents, and where and how much of the revlog +data we need to read to reconstruct it. Thus, with one read of the +index and one read of the data, we can reconstruct any version in time +proportional to the file size. + +Similarly, revlogs and their indices are append-only. This means that +adding a new version is also O(1) seeks. + +Generally revlogs are used to represent revisions of files, but they +also are used to represent manifests and changesets. + +Manifests: + +A manifest is simply a list of all files in a given revision of a +project along with the nodeids of the corresponding file revisions. So +grabbing a given version of the project means simply looking up its +manifest and reconstruction all the file revisions pointed to by it. + +Changesets: + +A changeset is a list of all files changed in a check-in along with a +change description and some metadata like user and date. It also +contains a nodeid to the relevent revision of the manifest. Changesets +and manifests are one-to-one, but contain different data for +convenience. + +Nodeids: + +Nodeids are unique ids that are used to represent the contents of a +file AND its position in the project history. That is, if you change a +file and then change it back, the result will have a different nodeid +because it has different history. This is accomplished by including +the parents of a given revision's nodeids with the revision's text +when calculating the hash. + +Graph merging: + +Nodeids are implemented as they are to simplify merging. Merging a +pair of directed acyclic graphs (aka "the family tree" of the file +history) requires some method of determining if nodes in different +graphs correspond. Simply comparing the contents of the node (by +comparing text of given revisions or their hashes) can get confused by +identical revisions in the tree. + +The nodeid approach makes it trivial - the hash uniquely describes a +revision's contents and its graph position relative to the root, so +merge is simply checking whether each nodeid in graph A is in the hash +table of graph B. If not, we pull them in, adding them sequentially to +the revlog. + +Graph resolving: + +Mercurial does branching by copying (or COWing) a repository and thus +keeps everything nice and linear within a repository. However, when a +merge of repositories (a "pull") is done, we may often have two head +revisions in a given graph. To keep things simple, Mercurial forces +the head revisions to be merged. + +It first finds the closest common ancestor of the two heads. If one is +a child of the other, it becomes the new head. Otherwise, we call out +to a user-specified 3-way merge tool. + +Merging files, manifests, and changesets: + +We begin by comparing changeset DAGs, pulling all nodes we don't have +in our DAG from the other repository. As we do so, we collect a list +of changed files to merge. + +Then for each file, we perform a graph merge and resolve as above. +It's important to merge files using per-file DAGs rather than just +changeset level DAGs as this diagram illustrates: + +M M1 M2 + +AB + |`-------v M2 clones M +aB AB file A is change in mainline + |`---v AB' file B is changed in M2 + | aB / | M1 clones M + | ab/ | M1 changes B + | ab' | M1 merges from M2, changes to B conflict + | | A'B' M2 changes A + `---+--.| + | a'B' M2 merges from mainline, changes to A conflict + `--.| + ??? depending on which ancestor we choose, we will have + to redo A hand-merge, B hand-merge, or both + but if we look at the files independently, everything + is fine + +After we've merged files, we merge the manifest log DAG and resolve +additions and deletions. Then we are ready to resolve the changeset +DAG - if our merge required any changes (the new head is not a +decendent of our tip), we must create a new changeset describing all +of the changes needed to merge it into the tip. + +Merge performance: + +The I/O operations for performing a merge are O(changed files), not +O(total changes) and in many cases, we needn't even unpack the deltas +to add them to our repository (though this optimization isn't +necessary). + +Rollback: + +Rollback is not yet implemented, but will be easy to add. When +performing a commit or a merge, we order things so that the changeset +entry gets added last. We keep a transaction log of the name of each +file and its length prior to the transaction. On abort, we simply +truncate each file to its prior length. This is one of the nice +properties of the append-only structure of the revlogs. + +Remote access: + +Mercurial currently supports pulling from "serverless" repositories. +Simply making the repo directory accessibly via the web and pointing +hg at it can accomplish a pull. This is relatively bandwidth efficient +but no effort has been spent on pipelining, so it won't work +especially well over LAN yet. + +It's also quite amenable to rsync, if you don't mind keeping an intact +copy of the master around locally. + +Also note the append-only and ordering properties of the commit +guarantee that readers will always see a repository in a consistent +state and no special locking is necessary. As there is generally only +one writer to an hg repository, there is in fact no exclusion +implemented yet. + + +Some comparisons to git: + +Most notably, Mercurial uses delta compression and repositories +created with it will grow much more slowly over time. This also allows +it to be much more bandwidth efficient. I expect repos sizes and sync +speeds to be similar to or better than BK, given the use of binary diffs. + +Mercurial is roughly the same performance as git and is faster in +others as it keeps around more metadata. One example is listing and +retrieving past versions of a file, which it can do without reading +all the changesets. This metadata will also allow it to perform better +merges as described above. + + diff --git a/setup.py b/setup.py new file mode 100644 --- /dev/null +++ b/setup.py @@ -0,0 +1,18 @@ +#!/usr/bin/env python + +# This is the mercurial setup script. +# +# './setup.py install', or +# './setup.py --help' for more options + +from distutils.core import setup + +setup(name='mercurial', + version='0.4c', + author='Matt Mackall', + author_email='mpm@selenic.com', + url='http://selenic.com/mercurial', + description='scalable distributed SCM', + license='GNU GPL', + packages=['mercurial'], + scripts=['hg']) diff --git a/tkmerge b/tkmerge new file mode 100644 --- /dev/null +++ b/tkmerge @@ -0,0 +1,2 @@ +merge $1 $3 $2 || tkdiff -conflict $1 -o $1 +