##// END OF EJS Templates
hgweb: support constructing URLs from an alternate base URL...
hgweb: support constructing URLs from an alternate base URL The web.baseurl config option allows server operators to define a custom URL for hosted content. The way it works today is that hgwebdir parses this config option into URL components then updates the appropriate WSGI environment variables so the request "lies" about its details. For example, SERVER_NAME is updated to reflect the alternate base URL's hostname. The WSGI environment should not be modified because WSGI applications may want to know the original request details (for debugging, etc). This commit teaches our request parser about the existence of an alternate base URL. If defined, the advertised URL and other self-reflected paths will take the alternate base URL into account. The hgweb WSGI application didn't use web.baseurl. But hgwebdir did. We update hgwebdir to alter the environment parsing accordingly. The old code around environment manipulation has been removed. With this change, parserequestfromenv() has grown to a bit unwieldy. Now that practically everyone is using it, it is obvious that there is some unused features that can be trimmed. So look for this in follow-up commits. Differential Revision: https://phab.mercurial-scm.org/D2822

File last commit:

r36345:58c1368a default
r36916:219b2335 default
Show More
test-pathencode.py
209 lines | 6.4 KiB | text/x-python | PythonLexer
/ tests / test-pathencode.py
Bryan O'Sullivan
tests: add a randomized test for pathencode...
r17934 # This is a randomized test that generates different pathnames every
# time it is invoked, and tests the encoding of those pathnames.
#
# It uses a simple probabilistic model to generate valid pathnames
timeless@mozdev.org
spelling: behaviour -> behavior
r26098 # that have proven likely to expose bugs and divergent behavior in
Bryan O'Sullivan
tests: add a randomized test for pathencode...
r17934 # different encoding implementations.
Pulkit Goyal
tests: make test-pathencode use absolute_import
r28928 from __future__ import absolute_import, print_function
Pulkit Goyal
tests: make test-pathencode use print_function
r28918
Pulkit Goyal
tests: make test-pathencode use absolute_import
r28928 import binascii
Bryan O'Sullivan
test-pathencode: make a 2.4-safe import of collections
r17935 import collections
Pulkit Goyal
tests: make test-pathencode use absolute_import
r28928 import itertools
import math
import os
import random
import sys
import time
from mercurial import (
store,
)
Bryan O'Sullivan
tests: add a randomized test for pathencode...
r17934
Augie Fackler
tests: add xrange alias for test-pathencode.py
r34221 try:
xrange
except NameError:
xrange = range
Bryan O'Sullivan
tests: add a randomized test for pathencode...
r17934 validchars = set(map(chr, range(0, 256)))
alphanum = range(ord('A'), ord('Z'))
for c in '\0/':
validchars.remove(c)
winreserved = ('aux con prn nul'.split() +
['com%d' % i for i in xrange(1, 10)] +
['lpt%d' % i for i in xrange(1, 10)])
def casecombinations(names):
'''Build all case-diddled combinations of names.'''
combos = set()
for r in names:
for i in xrange(len(r) + 1):
for c in itertools.combinations(xrange(len(r)), i):
d = r
for j in c:
d = ''.join((d[:j], d[j].upper(), d[j + 1:]))
combos.add(d)
return sorted(combos)
def buildprobtable(fp, cmd='hg manifest tip'):
'''Construct and print a table of probabilities for path name
components. The numbers are percentages.'''
Bryan O'Sullivan
test-pathencode: make a 2.4-safe import of collections
r17935 counts = collections.defaultdict(lambda: 0)
Bryan O'Sullivan
tests: add a randomized test for pathencode...
r17934 for line in os.popen(cmd).read().splitlines():
if line[-2:] in ('.i', '.d'):
line = line[:-2]
if line.startswith('data/'):
line = line[5:]
for c in line:
counts[c] += 1
for c in '\r/\n':
counts.pop(c, None)
t = sum(counts.itervalues()) / 100.0
fp.write('probtable = (')
Pulkit Goyal
py3: use dict.items() instead of dict.iteritems() in tests...
r36345 for i, (k, v) in enumerate(sorted(counts.items(), key=lambda x: x[1],
Bryan O'Sullivan
tests: add a randomized test for pathencode...
r17934 reverse=True)):
if (i % 5) == 0:
fp.write('\n ')
vt = v / t
if vt < 0.0005:
break
fp.write('(%r, %.03f), ' % (k, vt))
fp.write('\n )\n')
# A table of character frequencies (as percentages), gleaned by
# looking at filelog names from a real-world, very large repo.
probtable = (
('t', 9.828), ('e', 9.042), ('s', 8.011), ('a', 6.801), ('i', 6.618),
('g', 5.053), ('r', 5.030), ('o', 4.887), ('p', 4.363), ('n', 4.258),
('l', 3.830), ('h', 3.693), ('_', 3.659), ('.', 3.377), ('m', 3.194),
('u', 2.364), ('d', 2.296), ('c', 2.163), ('b', 1.739), ('f', 1.625),
('6', 0.666), ('j', 0.610), ('y', 0.554), ('x', 0.487), ('w', 0.477),
('k', 0.476), ('v', 0.473), ('3', 0.336), ('1', 0.335), ('2', 0.326),
('4', 0.310), ('5', 0.305), ('9', 0.302), ('8', 0.300), ('7', 0.299),
('q', 0.298), ('0', 0.250), ('z', 0.223), ('-', 0.118), ('C', 0.095),
('T', 0.087), ('F', 0.085), ('B', 0.077), ('S', 0.076), ('P', 0.076),
('L', 0.059), ('A', 0.058), ('N', 0.051), ('D', 0.049), ('M', 0.046),
('E', 0.039), ('I', 0.035), ('R', 0.035), ('G', 0.028), ('U', 0.026),
('W', 0.025), ('O', 0.017), ('V', 0.015), ('H', 0.013), ('Q', 0.011),
('J', 0.007), ('K', 0.005), ('+', 0.004), ('X', 0.003), ('Y', 0.001),
)
for c, _ in probtable:
validchars.remove(c)
validchars = list(validchars)
def pickfrom(rng, table):
c = 0
r = rng.random() * sum(i[1] for i in table)
for i, p in table:
c += p
if c >= r:
return i
reservedcombos = casecombinations(winreserved)
# The first component of a name following a slash.
firsttable = (
(lambda rng: pickfrom(rng, probtable), 90),
(lambda rng: rng.choice(validchars), 5),
(lambda rng: rng.choice(reservedcombos), 5),
)
# Components of a name following the first.
resttable = firsttable[:-1]
# Special suffixes.
internalsuffixcombos = casecombinations('.hg .i .d'.split())
# The last component of a path, before a slash or at the end of a name.
lasttable = resttable + (
(lambda rng: '', 95),
(lambda rng: rng.choice(internalsuffixcombos), 5),
)
def makepart(rng, k):
'''Construct a part of a pathname, without slashes.'''
p = pickfrom(rng, firsttable)(rng)
l = len(p)
ps = [p]
Siddharth Agarwal
test-pathencode: randomize length of each path component...
r19319 maxl = rng.randint(1, k)
while l < maxl:
Bryan O'Sullivan
tests: add a randomized test for pathencode...
r17934 p = pickfrom(rng, resttable)(rng)
l += len(p)
ps.append(p)
ps.append(pickfrom(rng, lasttable)(rng))
return ''.join(ps)
def makepath(rng, j, k):
'''Construct a complete pathname.'''
return ('data/' + '/'.join(makepart(rng, k) for _ in xrange(j)) +
rng.choice(['.d', '.i']))
def genpath(rng, count):
'''Generate random pathnames with gradually increasing lengths.'''
mink, maxk = 1, 4096
def steps():
for i in xrange(count):
yield mink + int(round(math.sqrt((maxk - mink) * float(i) / count)))
for k in steps():
x = rng.randint(1, k)
y = rng.randint(1, k)
yield makepath(rng, x, y)
def runtests(rng, seed, count):
nerrs = 0
for p in genpath(rng, count):
Bryan O'Sullivan
store: switch to C-based hashed path encoding
r18435 h = store._pathencode(p) # uses C implementation, if available
Adrian Buehlmann
test-pathencode: compare current pathencoding implementations...
r18094 r = store._hybridencode(p, True) # reference implementation in Python
if h != r:
if nerrs == 0:
Pulkit Goyal
tests: make test-pathencode use print_function
r28918 print('seed:', hex(seed)[:-1], file=sys.stderr)
print("\np: '%s'" % p.encode("string_escape"), file=sys.stderr)
print("h: '%s'" % h.encode("string_escape"), file=sys.stderr)
print("r: '%s'" % r.encode("string_escape"), file=sys.stderr)
Adrian Buehlmann
test-pathencode: compare current pathencoding implementations...
r18094 nerrs += 1
Bryan O'Sullivan
tests: add a randomized test for pathencode...
r17934 return nerrs
def main():
import getopt
# Empirically observed to take about a second to run
count = 100
seed = None
opts, args = getopt.getopt(sys.argv[1:], 'c:s:',
['build', 'count=', 'seed='])
for o, a in opts:
if o in ('-c', '--count'):
count = int(a)
elif o in ('-s', '--seed'):
Augie Fackler
tests: use int() instead of long() in test-pathencode.py
r34222 seed = int(a, base=0) # accepts base 10 or 16 strings
Bryan O'Sullivan
tests: add a randomized test for pathencode...
r17934 elif o == '--build':
buildprobtable(sys.stdout,
'find .hg/store/data -type f && '
'cat .hg/store/fncache 2>/dev/null')
sys.exit(0)
if seed is None:
try:
Augie Fackler
tests: use int() instead of long() in test-pathencode.py
r34222 seed = int(binascii.hexlify(os.urandom(16)), 16)
Bryan O'Sullivan
tests: add a randomized test for pathencode...
r17934 except AttributeError:
Augie Fackler
tests: use int() instead of long() in test-pathencode.py
r34222 seed = int(time.time() * 1000)
Bryan O'Sullivan
tests: add a randomized test for pathencode...
r17934
rng = random.Random(seed)
if runtests(rng, seed, count):
sys.exit(1)
Bryan O'Sullivan
test-pathencode: more aggressively check for python < 2.6
r17947 if __name__ == '__main__':
Bryan O'Sullivan
tests: add a randomized test for pathencode...
r17934 main()