# url.py - HTTP handling for mercurial
#
# Copyright 2005, 2006, 2007, 2008 Matt Mackall <[email protected]>
# Copyright 2006, 2007 Alexis S. L. Carvalho <[email protected]>
# Copyright 2006 Vadim Gelfer <[email protected]>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2, incorporated herein by reference.

import urllib, urllib2, urlparse, httplib, os, re, socket, cStringIO
from i18n import _
import keepalive, util

def hidepassword(url):
   '''hide user credential in a url string'''
   scheme, netloc, path, params, query, fragment = urlparse.urlparse(url)
   netloc = re.sub('([^:]*):([^@]*)@(.*)', r'\1:***@\3', netloc)
   return urlparse.urlunparse((scheme, netloc, path, params, query, fragment))

def removeauth(url):
   '''remove all authentication information from a url string'''
   scheme, netloc, path, params, query, fragment = urlparse.urlparse(url)
   netloc = netloc[netloc.find('@')+1:]
   return urlparse.urlunparse((scheme, netloc, path, params, query, fragment))

def netlocsplit(netloc):
   '''split [user[:passwd]@]host[:port] into 4-tuple.'''

   a = netloc.find('@')
   if a == -1:
       user, passwd = None, None
   else:
       userpass, netloc = netloc[:a], netloc[a+1:]
       c = userpass.find(':')
       if c == -1:
           user, passwd = urllib.unquote(userpass), None
       else:
           user = urllib.unquote(userpass[:c])
           passwd = urllib.unquote(userpass[c+1:])
   c = netloc.find(':')
   if c == -1:
       host, port = netloc, None
   else:
       host, port = netloc[:c], netloc[c+1:]
   return host, port, user, passwd

def netlocunsplit(host, port, user=None, passwd=None):
   '''turn host, port, user, passwd into [user[:passwd]@]host[:port].'''
   if port:
       hostport = host + ':' + port
   else:
       hostport = host
   if user:
       if passwd:
           userpass = urllib.quote(user) + ':' + urllib.quote(passwd)
       else:
           userpass = urllib.quote(user)
       return userpass + '@' + hostport
   return hostport

_safe = ('abcdefghijklmnopqrstuvwxyz'
        'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
        '0123456789' '_.-/')
_safeset = None
_hex = None
def quotepath(path):
   '''quote the path part of a URL

   This is similar to urllib.quote, but it also tries to avoid
   quoting things twice (inspired by wget):

   >>> quotepath('abc def')
   'abc%20def'
   >>> quotepath('abc%20def')
   'abc%20def'
   >>> quotepath('abc%20 def')
   'abc%20%20def'
   >>> quotepath('abc def%20')
   'abc%20def%20'
   >>> quotepath('abc def%2')
   'abc%20def%252'
   >>> quotepath('abc def%')
   'abc%20def%25'
   '''
   global _safeset, _hex
   if _safeset is None:
       _safeset = set(_safe)
       _hex = set('abcdefABCDEF0123456789')
   l = list(path)
   for i in xrange(len(l)):
       c = l[i]
       if c == '%' and i + 2 < len(l) and (l[i+1] in _hex and l[i+2] in _hex):
           pass
       elif c not in _safeset:
           l[i] = '%%%02X' % ord(c)
   return ''.join(l)

class passwordmgr(urllib2.HTTPPasswordMgrWithDefaultRealm):
   def __init__(self, ui):
       urllib2.HTTPPasswordMgrWithDefaultRealm.__init__(self)
       self.ui = ui

   def find_user_password(self, realm, authuri):
       authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm.find_user_password(
           self, realm, authuri)
       user, passwd = authinfo
       if user and passwd:
           self._writedebug(user, passwd)
           return (user, passwd)

       if not user:
           auth = self.readauthtoken(authuri)
           if auth:
               user, passwd = auth.get('username'), auth.get('password')
       if not user or not passwd:
           if not self.ui.interactive():
               raise util.Abort(_('http authorization required'))

           self.ui.write(_("http authorization required\n"))
           self.ui.status(_("realm: %s\n") % realm)
           if user:
               self.ui.status(_("user: %s\n") % user)
           else:
               user = self.ui.prompt(_("user:"), default=None)

           if not passwd:
               passwd = self.ui.getpass()

       self.add_password(realm, authuri, user, passwd)
       self._writedebug(user, passwd)
       return (user, passwd)

   def _writedebug(self, user, passwd):
       msg = _('http auth: user %s, password %s\n')
       self.ui.debug(msg % (user, passwd and '*' * len(passwd) or 'not set'))

   def readauthtoken(self, uri):
       # Read configuration
       config = dict()
       for key, val in self.ui.configitems('auth'):
           group, setting = key.split('.', 1)
           gdict = config.setdefault(group, dict())
           gdict[setting] = val

       # Find the best match
       scheme, hostpath = uri.split('://', 1)
       bestlen = 0
       bestauth = None
       for auth in config.itervalues():
           prefix = auth.get('prefix')
           if not prefix: continue
           p = prefix.split('://', 1)
           if len(p) > 1:
               schemes, prefix = [p[0]], p[1]
           else:
               schemes = (auth.get('schemes') or 'https').split()
           if (prefix == '*' or hostpath.startswith(prefix)) and \
               len(prefix) > bestlen and scheme in schemes:
               bestlen = len(prefix)
               bestauth = auth
       return bestauth

class proxyhandler(urllib2.ProxyHandler):
   def __init__(self, ui):
       proxyurl = ui.config("http_proxy", "host") or os.getenv('http_proxy')
       # XXX proxyauthinfo = None

       if proxyurl:
           # proxy can be proper url or host[:port]
           if not (proxyurl.startswith('http:') or
                   proxyurl.startswith('https:')):
               proxyurl = 'http://' + proxyurl + '/'
           snpqf = urlparse.urlsplit(proxyurl)
           proxyscheme, proxynetloc, proxypath, proxyquery, proxyfrag = snpqf
           hpup = netlocsplit(proxynetloc)

           proxyhost, proxyport, proxyuser, proxypasswd = hpup
           if not proxyuser:
               proxyuser = ui.config("http_proxy", "user")
               proxypasswd = ui.config("http_proxy", "passwd")

           # see if we should use a proxy for this url
           no_list = [ "localhost", "127.0.0.1" ]
           no_list.extend([p.lower() for
                           p in ui.configlist("http_proxy", "no")])
           no_list.extend([p.strip().lower() for
                           p in os.getenv("no_proxy", '').split(',')
                           if p.strip()])
           # "http_proxy.always" config is for running tests on localhost
           if ui.configbool("http_proxy", "always"):
               self.no_list = []
           else:
               self.no_list = no_list

           proxyurl = urlparse.urlunsplit((
               proxyscheme, netlocunsplit(proxyhost, proxyport,
                                               proxyuser, proxypasswd or ''),
               proxypath, proxyquery, proxyfrag))
           proxies = {'http': proxyurl, 'https': proxyurl}
           ui.debug(_('proxying through http://%s:%s\n') %
                     (proxyhost, proxyport))
       else:
           proxies = {}

       # urllib2 takes proxy values from the environment and those
       # will take precedence if found, so drop them
       for env in ["HTTP_PROXY", "http_proxy", "no_proxy"]:
           try:
               if env in os.environ:
                   del os.environ[env]
           except OSError:
               pass

       urllib2.ProxyHandler.__init__(self, proxies)
       self.ui = ui

   def proxy_open(self, req, proxy, type_):
       host = req.get_host().split(':')[0]
       if host in self.no_list:
           return None

       # work around a bug in Python < 2.4.2
       # (it leaves a "\n" at the end of Proxy-authorization headers)
       baseclass = req.__class__
       class _request(baseclass):
           def add_header(self, key, val):
               if key.lower() == 'proxy-authorization':
                   val = val.strip()
               return baseclass.add_header(self, key, val)
       req.__class__ = _request

       return urllib2.ProxyHandler.proxy_open(self, req, proxy, type_)

class httpsendfile(file):
   def __len__(self):
       return os.fstat(self.fileno()).st_size

def _gen_sendfile(connection):
   def _sendfile(self, data):
       # send a file
       if isinstance(data, httpsendfile):
           # if auth required, some data sent twice, so rewind here
           data.seek(0)
           for chunk in util.filechunkiter(data):
               connection.send(self, chunk)
       else:
           connection.send(self, data)
   return _sendfile

has_https = hasattr(urllib2, 'HTTPSHandler')
if has_https:
   try:
       # avoid using deprecated/broken FakeSocket in python 2.6
       import ssl
       _ssl_wrap_socket = ssl.wrap_socket
   except ImportError:
       def _ssl_wrap_socket(sock, key_file, cert_file):
           ssl = socket.ssl(sock, key_file, cert_file)
           return httplib.FakeSocket(sock, ssl)

class httpconnection(keepalive.HTTPConnection):
   # must be able to send big bundle as stream.
   send = _gen_sendfile(keepalive.HTTPConnection)

   def _proxytunnel(self):
       proxyheaders = dict(
               [(x, self.headers[x]) for x in self.headers
                if x.lower().startswith('proxy-')])
       self._set_hostport(self.host, self.port)
       self.send('CONNECT %s:%d HTTP/1.0\r\n' % (self.realhost, self.realport))
       for header in proxyheaders.iteritems():
           self.send('%s: %s\r\n' % header)
       self.send('\r\n')

       # majority of the following code is duplicated from
       # httplib.HTTPConnection as there are no adequate places to
       # override functions to provide the needed functionality
       res = self.response_class(self.sock,
                                 strict=self.strict,
                                 method=self._method)

       while True:
           version, status, reason = res._read_status()
           if status != httplib.CONTINUE:
               break
           while True:
               skip = res.fp.readline().strip()
               if not skip:
                   break
       res.status = status
       res.reason = reason.strip()

       if res.status == 200:
           while True:
               line = res.fp.readline()
               if line == '\r\n':
                   break
           return True

       if version == 'HTTP/1.0':
           res.version = 10
       elif version.startswith('HTTP/1.'):
           res.version = 11
       elif version == 'HTTP/0.9':
           res.version = 9
       else:
           raise httplib.UnknownProtocol(version)

       if res.version == 9:
           res.length = None
           res.chunked = 0
           res.will_close = 1
           res.msg = httplib.HTTPMessage(cStringIO.StringIO())
           return False

       res.msg = httplib.HTTPMessage(res.fp)
       res.msg.fp = None

       # are we using the chunked-style of transfer encoding?
       trenc = res.msg.getheader('transfer-encoding')
       if trenc and trenc.lower() == "chunked":
           res.chunked = 1
           res.chunk_left = None
       else:
           res.chunked = 0

       # will the connection close at the end of the response?
       res.will_close = res._check_close()

       # do we have a Content-Length?
       # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
       length = res.msg.getheader('content-length')
       if length and not res.chunked:
           try:
               res.length = int(length)
           except ValueError:
               res.length = None
           else:
               if res.length < 0:  # ignore nonsensical negative lengths
                   res.length = None
       else:
           res.length = None

       # does the body have a fixed length? (of zero)
       if (status == httplib.NO_CONTENT or status == httplib.NOT_MODIFIED or
           100 <= status < 200 or # 1xx codes
           res._method == 'HEAD'):
           res.length = 0

       # if the connection remains open, and we aren't using chunked, and
       # a content-length was not provided, then assume that the connection
       # WILL close.
       if (not res.will_close and
          not res.chunked and
          res.length is None):
           res.will_close = 1

       self.proxyres = res

       return False

   def connect(self):
       if has_https and self.realhost: # use CONNECT proxy
           self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
           self.sock.connect((self.host, self.port))
           if self._proxytunnel():
               # we do not support client x509 certificates
               self.sock = _ssl_wrap_socket(self.sock, None, None)
       else:
           keepalive.HTTPConnection.connect(self)

   def getresponse(self):
       proxyres = getattr(self, 'proxyres', None)
       if proxyres:
           if proxyres.will_close:
               self.close()
           self.proxyres = None
           return proxyres
       return keepalive.HTTPConnection.getresponse(self)

class httphandler(keepalive.HTTPHandler):
   def http_open(self, req):
       return self.do_open(httpconnection, req)

   def _start_transaction(self, h, req):
       if req.get_selector() == req.get_full_url(): # has proxy
           urlparts = urlparse.urlparse(req.get_selector())
           if urlparts[0] == 'https': # only use CONNECT for HTTPS
               if ':' in urlparts[1]:
                   realhost, realport = urlparts[1].split(':')
                   realport = int(realport)
               else:
                   realhost = urlparts[1]
                   realport = 443

               h.realhost = realhost
               h.realport = realport
               h.headers = req.headers.copy()
               h.headers.update(self.parent.addheaders)
               return keepalive.HTTPHandler._start_transaction(self, h, req)

       h.realhost = None
       h.realport = None
       h.headers = None
       return keepalive.HTTPHandler._start_transaction(self, h, req)

   def __del__(self):
       self.close_all()

if has_https:
   class httpsconnection(httplib.HTTPSConnection):
       response_class = keepalive.HTTPResponse
       # must be able to send big bundle as stream.
       send = _gen_sendfile(httplib.HTTPSConnection)

   class httpshandler(keepalive.KeepAliveHandler, urllib2.HTTPSHandler):
       def __init__(self, ui):
           keepalive.KeepAliveHandler.__init__(self)
           urllib2.HTTPSHandler.__init__(self)
           self.ui = ui
           self.pwmgr = passwordmgr(self.ui)

       def https_open(self, req):
           self.auth = self.pwmgr.readauthtoken(req.get_full_url())
           return self.do_open(self._makeconnection, req)

       def _makeconnection(self, host, port=443, *args, **kwargs):
           keyfile = None
           certfile = None

           if args: # key_file
               keyfile = args.pop(0)
           if args: # cert_file
               certfile = args.pop(0)

           # if the user has specified different key/cert files in
           # hgrc, we prefer these
           if self.auth and 'key' in self.auth and 'cert' in self.auth:
               keyfile = self.auth['key']
               certfile = self.auth['cert']

           # let host port take precedence
           if ':' in host and '[' not in host or ']:' in host:
               host, port = host.rsplit(':', 1)
               port = int(port)
               if '[' in host:
                   host = host[1:-1]

           return httpsconnection(host, port, keyfile, certfile, *args, **kwargs)

# In python < 2.5 AbstractDigestAuthHandler raises a ValueError if
# it doesn't know about the auth type requested.  This can happen if
# somebody is using BasicAuth and types a bad password.
class httpdigestauthhandler(urllib2.HTTPDigestAuthHandler):
   def http_error_auth_reqed(self, auth_header, host, req, headers):
       try:
           return urllib2.HTTPDigestAuthHandler.http_error_auth_reqed(
                       self, auth_header, host, req, headers)
       except ValueError, inst:
           arg = inst.args[0]
           if arg.startswith("AbstractDigestAuthHandler doesn't know "):
               return
           raise

def getauthinfo(path):
   scheme, netloc, urlpath, query, frag = urlparse.urlsplit(path)
   if not urlpath:
       urlpath = '/'
   if scheme != 'file':
       # XXX: why are we quoting the path again with some smart
       # heuristic here? Anyway, it cannot be done with file://
       # urls since path encoding is os/fs dependent (see
       # urllib.pathname2url() for details).
       urlpath = quotepath(urlpath)
   host, port, user, passwd = netlocsplit(netloc)

   # urllib cannot handle URLs with embedded user or passwd
   url = urlparse.urlunsplit((scheme, netlocunsplit(host, port),
                             urlpath, query, frag))
   if user:
       netloc = host
       if port:
           netloc += ':' + port
       # Python < 2.4.3 uses only the netloc to search for a password
       authinfo = (None, (url, netloc), user, passwd or '')
   else:
       authinfo = None
   return url, authinfo

handlerfuncs = []

def opener(ui, authinfo=None):
   '''
   construct an opener suitable for urllib2
   authinfo will be added to the password manager
   '''
   handlers = [httphandler()]
   if has_https:
       handlers.append(httpshandler(ui))

   handlers.append(proxyhandler(ui))

   passmgr = passwordmgr(ui)
   if authinfo is not None:
       passmgr.add_password(*authinfo)
       user, passwd = authinfo[2:4]
       ui.debug(_('http auth: user %s, password %s\n') %
                (user, passwd and '*' * len(passwd) or 'not set'))

   handlers.extend((urllib2.HTTPBasicAuthHandler(passmgr),
                    httpdigestauthhandler(passmgr)))
   handlers.extend([h(ui, passmgr) for h in handlerfuncs])
   opener = urllib2.build_opener(*handlers)

   # 1.0 here is the _protocol_ version
   opener.addheaders = [('User-agent', 'mercurial/proto-1.0')]
   opener.addheaders.append(('Accept', 'application/mercurial-0.1'))
   return opener

scheme_re = re.compile(r'^([a-zA-Z0-9+-.]+)://')

def open(ui, url, data=None):
   scheme = None
   m = scheme_re.search(url)
   if m:
       scheme = m.group(1).lower()
   if not scheme:
       path = util.normpath(os.path.abspath(url))
       url = 'file://' + urllib.pathname2url(path)
       authinfo = None
   else:
       url, authinfo = getauthinfo(url)
   return opener(ui, authinfo).open(url, data)