Viewing file: BaseRequestHandler.py (32.63 KB) -rw-r--r-- Select action/file-type: (+) | (+) | (+) | Code (+) | Session (+) | (+) | SDB (+) | (+) | (+) | (+) | (+) | (+) |
######################################################################## # $Header: /var/local/cvsroot/4Suite/Ft/Server/Server/Http/BaseRequestHandler.py,v 1.71 2005/04/06 06:19:15 mbrown Exp $ """ Basic functionality common to all HTTP requests and responses
Copyright 2005 Fourthought, Inc. (USA). Detailed license and copyright information: http://4suite.org/COPYRIGHT Project home, documentation, distributions: http://4suite.org/ """
__all__ = ['BaseRequestHandler']
import re, sys, os, time, socket, cgi, cStringIO try: # Python 2.3+ from types import basestring as StringTypes except ImportError: from types import StringTypes
# for request parsing import errno, traceback, mimetools
# for authentication import base64, sha
from Ft.Lib import Uri from Ft.Server import __version__, FTSS_URI_SCHEME from Ft.Server.Common.Util import RepoPathToUri from Ft.Server.Server import FtServerServerException from Ft.Server.Server import Error as ServerError from Ft.Server.Server import RequestHandler from Ft.Server.Server.Http import FtServerHttpException, Error from Ft.Server.Server.SCore import GetRepository
import Status import HeaderDict
# username to do a deliberately anonymous repo login #ANONYMOUS_USER_NAME = 'anonymous' from Ft.Server.Common.AclConstants import ANONYMOUS_USER_NAME
SERVER_VERSION = 'FtServer/' + __version__ CHARSET_PAT = re.compile("charset=[\'\"](.*)[\'\"]") PATH_END_PAT = re.compile('([^/]*)$') TRAILING_PARAMS_PAT = re.compile(';([^/]*)$')
class BaseRequestHandler(RequestHandler.RequestHandler): """ This is the base class for all HTTP requests. It encapsulates the request read stream, the response write stream, the data from the request, and the data for the response. It also provides the functions to parse the and store the components of the request, determine the real path (in the repo) for the requested path, start handling the request including catching any exceptions, write out a response, and log the request and response.
Requires in its constructor a HttpConnection instance and a ServerConfig instance.
Instance variables: - connection: a HttpConnection instance - server: the ServerConfig instance passed in the constructor -- used for logging, document root, authentication, etc. - rfile: request read stream - wfile: response write stream - request: the original Request-Line from the request [RFC 2616 sect. 5.1] - method: the Method for this request (like 'GET', 'POST', etc.) [RFC 2616 sect. 5.1.1] - unparsed_uri: the original Request-URI (typically URL-encoded) [RFC 2616 sect. 5.1.2] - unparsed_path: the path portion of the Request-URI (assumes URI consists of an abs_path [RFC 2616 sect. 5.1.2] and, because urllib is buggy, that the path stops at the first ';' or '?') - unparsed_params: because urllib is buggy, the part of path component of the Request-URI after the first ';'. This is a param [RFC 2396 sect. 3.3] - unparsed_args: the query component of the Request-URI [RFC 2396 sect. 3.4] - protocol: the protocol string as sent by the client, or 'HTTP/0.9' - version: a float representing the version portion of the protocol - headers_in: dictionary-like object of request headers - path: the URL-decoded form of the unparsed_uri_path - filename: the real path to the requested resource in the repo - args: dictionary of unparsed_uri_args as returned by cgi.parse_qs() - auth_type: authorization type from Authorization header of request - user: username decoded from Authorization header of request - password: password decoded from Authorization header of request - body: request body as a string (bytes) - headers_out: dictionary-like object of response headers """
# The Python system version, truncated to its first argument (the version) sys_version = 'Python/' + sys.version.split()[0]
def __init__(self, connection): RequestHandler.RequestHandler.__init__(self, connection) self.filename = None self.path = None self.request = None self.method = None self.unparsed_uri = None self.unparsed_path = None self.unparsed_params = None self.unparsed_args = None self.protocol = None self.form_encoding = None
# Until we are told otherwise self.status = Status.HTTP_OK self.version = 0.9
self.headers_only = False self.headers_in = HeaderDict.HeaderDict() self.headers_out = HeaderDict.HeaderDict() self.wfile = cStringIO.StringIO()
# Authentication information self.auth_type = None self.user = None self.password = None
return
def unhandledException(self, function): stream = cStringIO.StringIO() traceback.print_exc(file=stream) tb = stream.getvalue() self.server.errorLog.error("[client %s] in %s(): %s" % (self.connection.remote_ip, function, tb)) return tb
def handle(self): requeststart = time.time() try: self.processRequest() except: self.send_error(Status.HTTP_INTERNAL_SERVER_ERROR, error=self.unhandledException('processRequest'), admin=self.server.admin) self.aborted = True
requestend = time.time() self.server.errorLog.debug('Time to read & interpret request: %lfs' % (requestend - requeststart))
responsestart = time.time() try: self.processResponse() except (IOError, socket.error): # Windows and BeOS use socket.error for socket.makefile() objects self.server.errorLog.info('Client apparently stopped connection before ' 'response completed') self.aborted = True except: self.unhandledException('processResponse') self.aborted = True responseend = time.time() self.server.errorLog.debug('Time to generate & send response: %lfs' % (responseend - responsestart)) self.server.errorLog.debug('Total time to handle request: %lfs' % (responseend - requeststart)) return
# -- main processing ---------------------------------------------
def processRequest(self): """Handle a single HTTP request."""
if not self.readRequest(): # Bad request, response already created return
#self.server.errorLog.debug("HTTP request from %s for %s %s" % ( # self.connection.remote_ip, self.method, self.unparsed_uri)) # # using print forces debug output through the Controller's logger, # whereas self.server.errorLog is the HTTP server's logger print "HTTP request from %s for %s %s" % (self.connection.remote_ip, self.method, self.unparsed_uri)
# Convert URI to repo path string try: self.generateRepoPath() except ValueError, e: # Bypass searching in the repo if the path is obviously bad self.server.errorLog.debug(str(e)) self.send_error(Status.HTTP_NOT_FOUND, uri=self.path) return
# Get any authentication information if not self.authChecker(): # Authentication required, response already created return
# Get the body; in HTTP/1.1, any request method can have a body if not self.getRequestBody(): # error retrieving content return
# Get the arguments for this request; both query string and body self.getRequestArgs()
# Apply the handler that is appropriate for the type of request (GET, POST, etc) if self.headers_only: # A HEAD request; identical to GET without the response body request_method = 'GET' else: request_method = self.method
# Should we capture standard errors like not found, and invalid session? getattr(self, 'do_' + request_method, self._invalid_method)() return
def _invalid_method(self): error = "Invalid method in request: %s" % self.request self.server.errorLog.error(error) self.send_error(Status.HTTP_NOT_IMPLEMENTED, method=self.method, uri=self.unparsed_uri, error=error) return
def processResponse(self): self.logTransaction()
raw_write = self.connection.wfile.write if self.version != 0.9: message = Status.Responses[self.status][0] status_line = "%s %d %s" % (self.protocol, self.status, message) reqinfo = '%s %s' % (self.method, self.unparsed_uri) self.server.errorLog.info("Response: %s (req: %s)" % (status_line, reqinfo)) raw_write("%s\r\n" % status_line)
# These should always be first, hence writing directly raw_write('Date: %s\r\n' % self.rfcDateTimeString()) raw_write('Server: %s %s\r\n' % (SERVER_VERSION, self.sys_version))
# Avoid duplication if 'Date' in self.headers_out: del self.headers_out['Date'] if 'Server' in self.headers_out: del self.headers_out['Server']
# if HTTP/1.0, supplement Cache-Control: no-cache # with Pragma: no-cache, as per RFC 2616 sec. 14.32 if self.version == 1.0 and self.headers_out.get('Cache-Control') == 'no-cache': self.headers_out['Pragma'] = 'no-cache'
if 'Content-Length' in self.headers_out: # our value is authoritive self.server.errorLog.info('Replacing Content-Length header ' 'set by method handler') del self.headers_out['Content-Length']
raw_write('Content-Length: %d\r\n' % self.wfile.tell())
log_write = self.server.errorLog.debug log_write('Response headers:') log_write(' Date: %s' % self.rfcDateTimeString()) log_write(' Server: %s %s' % (SERVER_VERSION, self.sys_version)) log_write(' Content-Length: %d' % self.wfile.tell())
for name in self.headers_out: for value in self.headers_out[name]: raw_write('%s: %s\r\n' % (name, value)) log_write(' %s: %s' % (name, value))
# mark end of headers raw_write('\r\n')
if not self.headers_only: raw_write(self.wfile.getvalue()) return
# -- request parsing ---------------------------------------------
def readRequest(self): if not self.readRequestLine(): return False
self.server.errorLog.debug( "Interpreted request as a %s request for %s" % (self.method, self.unparsed_uri) ) if self.version >= 1.0: if not self.getMimeHeaders(): return False
lines = str(self.headers_in).strip().split('\r\n') self.server.errorLog.debug( "Request headers:%s" % (lines and '\n' + '\n '.join(lines) or ' (none)') )
# The will be case-sensitive and Camel-Cased headers = self.headers_in.terse()
# update what we think the virtual host is based on the headers # we've now read. if headers.get('Host') and not self.connection.updateVirtualHost(self, headers.get('Host')): error = 'malformed Host header' self.send_error(Status.HTTP_BAD_REQUEST, error=error) self.server.errorLog.error('request failed: %s' % error) return False
if (not self.hostname and self.version >= 1.1) or \ (self.version == 1.1 and 'Host' not in self.headers_in): # Client sent us an HTTP/1.1 or later request without telling us the # hostname, either with a full URL or a Host: header. We therefore # need to (as per the 1.1 spec) send an error. As a special case, # HTTP/1.1 mentions twice (S9, S14.23) that a request MUST contain # a Host: header, and the server MUST respond with 400 if it doesn't. error = "HTTP/1.1 request without hostname (see RFC 2616 section 14.23)" self.send_error(Status.HTTP_BAD_REQUEST, error=error) self.server.errorLog.error('request failed: %s' % error) return False
if 'Expect' in headers: # The Expect header field was added to HTTP/1.1 after RFC 2068 # as a means to signal when a 100 response is desired and, # unfortunately, to signal a poor man's mandatory extension that # the server must understand or return 417 Expectation Failed. expect = headers['Expect'] if expect.lower() == '100-continue': self.expecting = True else: self.server.errorLog.error('client sent an unrecognized ' 'expectation value of Expect: ' '%s' % expect) self.send_error(Status.HTTP_EXPECTATION_FAILED, expect=expect) self.discardRequestBody() return False
return True
def readRequestLine(self): """ Parse the Request-Line of an HTTP request coming in via the rfile stream. The Request-Line is the first line of the request. (It should look like "GET /foo HTTP/1.1").
If the request line is in some way malformed, an error repsonse is written directly to the wfile stream and false is returned, otherwise true indicates success.
Progress and errors are logged to server.errorLog. """ try: line = self.connection.rfile.readline() except: self.server.errorLog.error('request failed: ' 'unable to read request line') return False
if line.endswith('\r\n'): line = line[:-2] elif line.endswith('\n'): line = line[:-1]
self.server.errorLog.debug("read request: %s" % line)
self.request = line
words = line.split() if len(words) == 3: # A HTTP 1.0 or later request self.method, self.unparsed_uri, self.protocol = words self.headers_only = (self.method == 'HEAD') if not self.protocol.startswith('HTTP/'): error = "Bad request version (%s)" % self.protocol self.send_error(Status.HTTP_BAD_REQUEST, error=error) self.server.errorLog.error('request failed: %s' % error) return False elif len(words) == 2: # The HTTP 0.9 protocol only supports GETs self.method, self.unparsed_uri = words self.protocol = 'HTTP/0.9' if self.method != 'GET': error = "Bad HTTP/0.9 request method (%s)" % self.method self.send_error(Status.HTTP_BAD_REQUEST, error=error) self.server.errorLog.error('request failed: %s' % error) return False else: error = "Bad request syntax (%s)" % line self.send_error(Status.HTTP_BAD_REQUEST, error=error) self.server.errorLog.error('request failed: %s' % error) return False
self.parseUri(self.unparsed_uri)
self.version = float(self.protocol[5:]) return True
def parseUri(self, uri): """ Parse the Request-URI, storing its components (path, query, etc.) in the instance (self.unparsed_path, self.path, etc.) """ # Ordinarily, there will not be a scheme or hostinfo, because # when requesting a resource from a non-proxy server, HTTP/1.1 # clients are required to use a relative URI and the Host # header. However, all HTTP/1.1 servers must accept an absolute # URI, and we want to accept it anyway because this class could # very well be the basis of a proxy server.
scheme, authority, path, query, fragment = Uri.SplitUriRef(uri) userinfo, host, port = Uri.SplitAuthority(uri) self.port = port if scheme and scheme.lower() in ('http', FTSS_URI_SCHEME): self.hostname = Uri.PercentDecode(host)
#FIXME: currently unused #username, password = userinfo.split(':', 2) #self.username = Uri.PercentDecode(username) #self.password = Uri.PercentDecode(password)
# using regexps is faster than urlparse.urlparse() # FIXME: # We are going through contortions here to produce the same # params as urlparse would. We should support any # of params # appearing on any path segment, not just the last one.) pathend = PATH_END_PAT.findall(path)[0] params = TRAILING_PARAMS_PAT.findall(pathend) self.unparsed_params = params and params[-1] or ''
self.unparsed_path = path or '/' self.path = Uri.PercentDecode(path or '/')
query = query or '' self.unparsed_args = query self.args = cgi.parse_qs(query, keep_blank_values=True) return
def getMimeHeaders(self): # The dictionary interface on the Message class does not deal well # with multiple headers, but the class does store the raw headers # in a list. We let mimetools do the dirty work of reading the # headers, then we pull them out of its list in order to populate # our HeaderDict object m = mimetools.Message(self.connection.rfile, 0) if m.status: error = "Error reading the headers: %s" % m.status self.send_error(Status.HTTP_BAD_REQUEST, error=error) self.server.errorLog.error('request failed: %s' % error) return False
for h in m.headers: i = h.find(':') self.headers_in[h[:i]] = h[(i+1):].strip() return True
# -- request processing ------------------------------------------
def generateRepoPath(self): """ Attempts to convert the request URI to a repo path string and ftss URI, and stores those values.
Raises a ValueError exception if the conversion failed. """ if self.unparsed_path[:1] != '/': raise FtServerHttpException(Error.BAD_REQUEST_PATH, path=self.unparsed_path)
path = Uri.Absolutize(self.unparsed_path, '%s:///' % FTSS_URI_SCHEME) path = path[(len(FTSS_URI_SCHEME) + 3):] self.filename = self.server.documentRoot + \ '/'.join([Uri.PercentDecode(seg) for seg in path.split('/')]) self.server.errorLog.debug("Requested repo path interpreted as %s" % self.filename) self.ftss_uri = RepoPathToUri(path) return
def authChecker(self, mandatory=False): auth_name = getattr(self.server, 'mandatoryAuth', None) if auth_name: mandatory = True else: auth_name = getattr(self.server, 'authName', None) if auth_name or mandatory: # This will set auth_type, user and password to appropriate fields try: auth = self.headers_in.terse()['Authorization'] self.auth_type, encoded = auth.split() decoded = base64.decodestring(encoded) self.user, passwd = decoded.split(':') self.password = sha.new(passwd).hexdigest() except KeyError: # Authorization not in request headers if mandatory: self.server.errorLog.error("Server config requires HTTP Authorization; no credentials in request") self.send_error(Status.HTTP_UNAUTHORIZED) return False except ValueError: # Authorization header present in request, but not decodable # The specs don't say what kind of response to send; 401 sounds reasonable self.server.errorLog.error("Authorization credentials in request could not be decoded") self.send_error(Status.HTTP_UNAUTHORIZED) return False # Successful authorization, or auth not required by server config return True
def getRequestBody(self): length = self.headers_in.terse().get('Content-Length', 0) if length != 0: try: length = int(length) except ValueError: error = "Invalid Content-Length: %s" % length self.server.errorLog.error(error) self.send_error(Status.HTTP_BAD_REQUEST, error=error) return
self.body = length and self.connection.rfile.read(length) or '' if self.body: indented_body = '\n '.join(str(self.body).split('\n')) self.server.errorLog.debug('Request body:\n %s' % indented_body) return True
def getRequestArgs(self): if self.method == 'POST': # This will handle application/x-www-form-urlencoded and # multipart/* posts
# Mimic a CGI environment # (not all fields, just those needed for FieldStorage) environ = {'REQUEST_METHOD' : self.method, 'CONTENT_LENGTH' : len(self.body), } if 'Content-Type' in self.headers_in: #e.g. 'Content-Type: application/x-www-form-urlencoded; charset="iso-8859-1"' environ['CONTENT_TYPE'] = self.headers_in.terse()['Content-Type'] m = CHARSET_PAT.match(self.headers_in.terse()['Content-Type']) if m: self.form_encoding = m.group(1)
self.get_form_encoding(self.body) form = cgi.FieldStorage(cStringIO.StringIO(self.body), environ=environ, keep_blank_values=True)
# Create a dictionary that matches how cgi.parse_qs() returns the # name/value pairs (each value is a list, possibly of length one) post_args = {} if form.list: for name in form.keys(): value = form.getvalue(name) # could be string or list of strings name = unicode(name, self.form_encoding) if isinstance(value, list): value = [ unicode(v, self.form_encoding) for v in value ] post_args[name] = value else: value = unicode(value, self.form_encoding) post_args[name] = [value] self.args = self.joinQueryArgs(self.args, post_args)
if self.args: self.server.errorLog.debug("Request arguments:") keys = self.args.keys() keys.sort() for key in keys: for value in self.args[key]: self.server.errorLog.debug(" %s = %s" % (key, repr(value))) else: self.server.errorLog.debug("No request arguments found") return
def send_error(self, code, **keywords): """Send and log an error reply.
Arguments are the error code, and a detailed message. The detailed message defaults to the short entry matching the response code.
This sends an error response (so it must be called before any output has been generated), logs the error, and finally sends a piece of HTML explaining the error to the user.
""" # This is done here so that other modules don't need to worry # about authentication if (code == Status.HTTP_FORBIDDEN and (hasattr(self.server, 'authName') or hasattr(self.server, 'mandatoryAuth') ) ): code = Status.HTTP_UNAUTHORIZED
message, error = Status.Responses[code] reqinfo = '?' if self.method and self.unparsed_uri: reqinfo = '%s %s' % (self.method, self.unparsed_uri) self.server.errorLog.error("HTTP error response: [%d] %s (req: %s)" % (code, message, reqinfo)) for keyword, value in keywords.items(): if isinstance(value, StringTypes): value = value.replace("&", "&") value = value.replace("<", "<") keywords[keyword] = value body = error % keywords
content = Status.ERROR_DOCUMENT % {'code' : code, 'title' : message, 'body' : body, 'version' : SERVER_VERSION, 'admin' : self.server.admin, 'name' : self.server.hostname, 'port' : self.server.port, } self.status = code # Reset the headers for the new error document self.headers_out.clear() self.headers_out['Content-Type'] = 'text/html' if code == Status.HTTP_UNAUTHORIZED: authName = getattr(self.server, "authName", None) if not authName: getattr(self.server, "mandatoryAuth", None) auth = 'Basic realm="%s"' % authName self.headers_out['WWW-Authenticate'] = auth
# Replace any existing content with the error document self.wfile.reset() self.wfile.truncate() self.wfile.write(content) return
def logTransaction(self): # A '-' indicates unknown / not given if self.server.accessLog: # Create a time string suitable for logging args = {'bytes-sent' : self.wfile.tell() or '-', 'filename' : self.filename or '-', 'remote-ip' : self.connection.remote_ip, 'remote-host' : self.getRemoteHostname(), 'remote-user' : '-', # user who created remote socket 'pid' : os.getpid(), 'server-port' : self.server.port, 'request-line' : self.request or '-', 'status' : self.status, 'time' : self.localDateTimeString(), 'url' : self.path or '-', 'username' : self.user or '-', 'virtual-host' : self.server.hostname or '-', } key_to_dict = {'e' : os.environ, 'i' : self.headers_in.terse(), 'o' : self.headers_out.terse(), } filename, lock, format, fields = self.server.accessLog for key, names in fields.items(): pos = 0 for name in names: args['%s%d' % (key, pos)] = key_to_dict[key].get(name, '-') pos = pos + 1
# By locking, the log should stay pretty readable lock.acquire() try: fd = open(filename, 'a') fd.write(format % args) fd.close() finally: lock.release()
return
# -- helper methods ----------------------------------------------
# Required for a RFC 1123 date # weekday, DD month YYYY HH:MM:SS GMT # These are intentionally NOT localized! weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
monthname = ['', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
def rfcDateTimeString(self): """ Return the request time formatted for a message header. """ # This is used when generating the Date response header. # Per RFC 2616 sec. 14.18, the Date header should reflect # when the response was generated, but can in fact # represent any time during the response message creation. # # If we ever use something other than self.request_time, # be sure to make the appropriate change to Basic.py, # where a check is made to ensure that the Last-Modified # header value does not exceed the Date header value. gmtime = time.gmtime(self.request_time) year, month, day, hour, minute, second, weekday = gmtime[:7] weekday = self.weekdayname[weekday] month = self.monthname[month] return "%3s, %02d %3s %4d %02d:%02d:%02d GMT" % (weekday, day, month, year, hour, minute, second)
def localDateTimeString(self): """ Return the request time formatted for logging. """ # This time string stays local. It is OK to use the # localized names and time. localtime = time.localtime(self.request_time) strtime = time.strftime('[%d/%b/%Y:%T %%+03d%%02d]', localtime) tzoffset = time.daylight and time.altzone or time.timezone (hour, min) = divmod(tzoffset, 3600) return strtime % (-hour, min / 3600)
def getRemoteHostname(self): """ Return the client address, formatted for logging.
First the hostname returned by gethostbyaddr() is checked, then possibly existing aliases. In case no FQDN is available, hostname is returned. """ if not self.connection.remote_host: try: hostname = socket.gethostbyaddr(self.connection.remote_ip)[0] except socket.error: pass else: self.connection.remote_host = hostname.lower() return self.connection.remote_host or self.connection.remote_ip
# Helper interface for merging GET/POST argument dicts def joinQueryArgs(self, args1, args2): """ Join two query argument dicts.
Given two query string argument dictionaries from cgi.parse_qs(), return a new dictionary of the combined arguments, appending the values from args2 to the values for args1 for each duplicate key. """ d = {} for name, value in args1.items(): # Store a copy, we don't want to change the existing arg dicts d[name] = value[:] for name, value in args2.items(): if name in d: d[name].extend(value) else: # Store a copy, we don't want to change the existing arg dicts d[name] = value[:] return d
def getRepository(self, sendError=True): if not self.user or self.user == ANONYMOUS_USER_NAME: # Anonymous login or no authentication required self.server.errorLog.debug("Repository access obtained for anonymous user") repo = GetRepository(None, None, self.server.errorLog, self.server.properties) else: try: repo = GetRepository(self.user, self.password, self.server.errorLog, self.server.properties) self.server.errorLog.debug("Repository access obtained for user %s" % self.user) except FtServerServerException, e: if sendError and e.errorCode == ServerError.INVALID_LOGIN: if hasattr(self.server, 'authName') or hasattr(self.server, 'mandatoryAuth') : # Ask for authentication self.server.errorLog.error("Repository access denied for user %s; will request HTTP auth" % self.user) self.send_error(Status.HTTP_UNAUTHORIZED) return None else: # Generate an HTTP_FORBIDDEN response self.server.errorLog.error("Repository access denied for user %s; will send Forbidden response" % self.user) self.status = Status.HTTP_FORBIDDEN return None raise return repo
# Automatically determines what is available for any class def do_OPTIONS(self): """list the capabilities of this handler""" allowed = [] # FIXME: This doesn't really work right; when it is inherited by a # subclass, it only reports the do_FOO functions that exist on that # subclass. for name in self.__class__.__dict__: if name[:3] == 'do_': allowed.append(name[3:]) self.status = Status.HTTP_OK self.headers_out['Allow'] = ','.join(allowed) self.headers_out['Content-Type'] = 'text/plain'
|