##// END OF EJS Templates
manifest: avoid corruption by dropping removed files with pure (issue5801)...
manifest: avoid corruption by dropping removed files with pure (issue5801) Previously, removed files would simply be marked by overwriting the first byte with NUL and dropping their entry in `self.position`. But no effort was made to ignore them when compacting the dictionary into text form. This allowed them to slip into the manifest revision, since the code seems to be trying to minimize the string operations by copying as large a chunk as possible. As part of this, compact() walks the existing text based on entries in the `positions` list, and consumed everything up to the next position entry. This typically resulted in a ValueError complaining about unsorted manifest entries. Sometimes it seems that files do get dropped in large repos- it seems to correspond to there being a new entry that would take the same slot. A much more trivial problem is that if the only changes were removals, `_compact()` didn't even run because `__delitem__` doesn't add anything to `self.extradata`. Now there's an explicit variable to flag this, both to allow `_compact()` to run, and to avoid searching the manifest in cases where there are no removals. In practice, this behavior was mostly obscured by the check in fastdelta() which takes a different path that explicitly drops removed files if there are fewer than 1000 changes. However, timeless has a repo where after rebasing tens of commits, a totally different path[1] is taken that bypasses the change count check and hits this problem. [1] https://www.mercurial-scm.org/repo/hg/file/2338bdea4474/mercurial/manifest.py#l1511

File last commit:

r40195:5774fc62 default
r42569:0546ead3 stable
Show More
urllibcompat.py
196 lines | 5.3 KiB | text/x-python | PythonLexer
Augie Fackler
urllibcompat: new library to help abstract out some python3 urllib2 stuff...
r34466 # urllibcompat.py - adapters to ease using urllib2 on Py2 and urllib on Py3
#
# Copyright 2017 Google, Inc.
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
from __future__ import absolute_import
from . import pycompat
Augie Fackler
urllibcompat: move some adapters from pycompat to urllibcompat...
r34468 _sysstr = pycompat.sysstr
class _pycompatstub(object):
def __init__(self):
self._aliases = {}
def _registeraliases(self, origin, items):
"""Add items that will be populated at the first access"""
items = map(_sysstr, items)
self._aliases.update(
Yuya Nishihara
py3: use r'' instead of sysstr('') to get around code transformer...
r36853 (item.replace(r'_', r'').lower(), (origin, item))
Augie Fackler
urllibcompat: move some adapters from pycompat to urllibcompat...
r34468 for item in items)
def _registeralias(self, origin, attr, name):
"""Alias ``origin``.``attr`` as ``name``"""
self._aliases[_sysstr(name)] = (origin, _sysstr(attr))
def __getattr__(self, name):
try:
origin, item = self._aliases[name]
except KeyError:
raise AttributeError(name)
self.__dict__[name] = obj = getattr(origin, item)
return obj
httpserver = _pycompatstub()
urlreq = _pycompatstub()
urlerr = _pycompatstub()
Augie Fackler
urllibcompat: new library to help abstract out some python3 urllib2 stuff...
r34466 if pycompat.ispy3:
Augie Fackler
urllibcompat: move some adapters from pycompat to urllibcompat...
r34468 import urllib.parse
urlreq._registeraliases(urllib.parse, (
"splitattr",
"splitpasswd",
"splitport",
"splituser",
"urlparse",
"urlunparse",
))
Gregory Szorc
wireprotoserver: define and use parse_qs from urllib...
r36094 urlreq._registeralias(urllib.parse, "parse_qs", "parseqs")
Gregory Szorc
hgweb: teach WSGI parser about query strings...
r36827 urlreq._registeralias(urllib.parse, "parse_qsl", "parseqsl")
Augie Fackler
urllibcompat: move some adapters from pycompat to urllibcompat...
r34468 urlreq._registeralias(urllib.parse, "unquote_to_bytes", "unquote")
import urllib.request
urlreq._registeraliases(urllib.request, (
"AbstractHTTPHandler",
"BaseHandler",
"build_opener",
"FileHandler",
"FTPHandler",
"ftpwrapper",
"HTTPHandler",
"HTTPSHandler",
"install_opener",
"pathname2url",
"HTTPBasicAuthHandler",
"HTTPDigestAuthHandler",
"HTTPPasswordMgrWithDefaultRealm",
"ProxyHandler",
"Request",
"url2pathname",
"urlopen",
))
import urllib.response
urlreq._registeraliases(urllib.response, (
"addclosehook",
"addinfourl",
))
import urllib.error
urlerr._registeraliases(urllib.error, (
"HTTPError",
"URLError",
))
import http.server
httpserver._registeraliases(http.server, (
"HTTPServer",
"BaseHTTPRequestHandler",
"SimpleHTTPRequestHandler",
"CGIHTTPRequestHandler",
))
# urllib.parse.quote() accepts both str and bytes, decodes bytes
# (if necessary), and returns str. This is wonky. We provide a custom
# implementation that only accepts bytes and emits bytes.
def quote(s, safe=r'/'):
Gregory Szorc
py3: coerce bytestr to bytes to appease urllib.parse.quote_from_bytes()...
r40195 # bytestr has an __iter__ that emits characters. quote_from_bytes()
# does an iteration and expects ints. We coerce to bytes to appease it.
if isinstance(s, pycompat.bytestr):
s = bytes(s)
Augie Fackler
urllibcompat: move some adapters from pycompat to urllibcompat...
r34468 s = urllib.parse.quote_from_bytes(s, safe=safe)
return s.encode('ascii', 'strict')
# urllib.parse.urlencode() returns str. We use this function to make
# sure we return bytes.
def urlencode(query, doseq=False):
s = urllib.parse.urlencode(query, doseq=doseq)
return s.encode('ascii')
urlreq.quote = quote
urlreq.urlencode = urlencode
Augie Fackler
urllibcompat: new library to help abstract out some python3 urllib2 stuff...
r34466
def getfullurl(req):
return req.full_url
def gethost(req):
return req.host
def getselector(req):
return req.selector
def getdata(req):
return req.data
def hasdata(req):
return req.data is not None
else:
Augie Fackler
urllibcompat: move some adapters from pycompat to urllibcompat...
r34468 import BaseHTTPServer
import CGIHTTPServer
import SimpleHTTPServer
import urllib2
import urllib
import urlparse
urlreq._registeraliases(urllib, (
"addclosehook",
"addinfourl",
"ftpwrapper",
"pathname2url",
"quote",
"splitattr",
"splitpasswd",
"splitport",
"splituser",
"unquote",
"url2pathname",
"urlencode",
))
urlreq._registeraliases(urllib2, (
"AbstractHTTPHandler",
"BaseHandler",
"build_opener",
"FileHandler",
"FTPHandler",
"HTTPBasicAuthHandler",
"HTTPDigestAuthHandler",
"HTTPHandler",
"HTTPPasswordMgrWithDefaultRealm",
"HTTPSHandler",
"install_opener",
"ProxyHandler",
"Request",
"urlopen",
))
urlreq._registeraliases(urlparse, (
"urlparse",
"urlunparse",
))
Gregory Szorc
wireprotoserver: define and use parse_qs from urllib...
r36094 urlreq._registeralias(urlparse, "parse_qs", "parseqs")
Gregory Szorc
hgweb: teach WSGI parser about query strings...
r36827 urlreq._registeralias(urlparse, "parse_qsl", "parseqsl")
Augie Fackler
urllibcompat: move some adapters from pycompat to urllibcompat...
r34468 urlerr._registeraliases(urllib2, (
"HTTPError",
"URLError",
))
httpserver._registeraliases(BaseHTTPServer, (
"HTTPServer",
"BaseHTTPRequestHandler",
))
httpserver._registeraliases(SimpleHTTPServer, (
"SimpleHTTPRequestHandler",
))
httpserver._registeraliases(CGIHTTPServer, (
"CGIHTTPRequestHandler",
))
Augie Fackler
urllibcompat: new library to help abstract out some python3 urllib2 stuff...
r34466
def gethost(req):
return req.get_host()
def getselector(req):
return req.get_selector()
def getfullurl(req):
return req.get_full_url()
def getdata(req):
return req.get_data()
def hasdata(req):
return req.has_data()