Handle redirects

Handle redirect HTTP responses, in particular if a Bugzilla server is redirecting from http to https. We try to detect "Bugzilla URL base is over here" when we ask for show_bug.cgi and remember that for future requests to the same BugServer to avoid too many redirections. Switch from caching connection on the BugServer to a global connection cache, and rewrite the BugServer cache so to deal with the possibility of redirections.
author: Owen W. Taylor <otaylor@fishsoup.net> 2009-09-09 17:14:55 -0400
committer: Owen W. Taylor <otaylor@fishsoup.net> 2009-09-09 17:18:34 -0400
commit: 2680c3c68dca2b6614b1a75ef5e946aa71a6c3dd (patch)
tree: ab449655f4e63949af5a7ddb6ee1b9a82daba3c2
parent: 06187ce308dce7b56016e09aa5396e6f1bf60a90 (diff)
2 files changed, 89 insertions, 25 deletions
diff --git a/TODO b/TODO
index 5a2f609..e81f280 100644
--- a/TODO
+++ b/TODO
@@ -31,10 +31,6 @@ Automatically guess obvious obsoletes
  matches the Subject of the attachment, start the Obsoletes line
  uncommented?
 
-Handle redirects:
-
-  Should follow redirects, both to different URLs and http => https
-
 Better display of errors
 
   The switch to XML-RPC greatly improves errors when filing a new bug,
diff --git a/git-bz b/git-bz
index a3a42a4..6e385b2 100755
--- a/git-bz
+++ b/git-bz
@@ -76,7 +76,7 @@ import tempfile
 import time
 import traceback
 import xmlrpclib
-import urllib
+import urlparse
 from xml.etree.cElementTree import ElementTree
 
 # Globals
@@ -697,6 +697,20 @@ class BugPatch(object):
 class NoXmlRpcError(Exception):
     pass
 
+connections = {}
+
+def get_connection(host, https):
+    identifier = (host, https)
+    if not identifier in connections:
+        if https:
+            connection = httplib.HTTPSConnection(host, 443)
+        else:
+            connection = httplib.HTTPConnection(host, 80)
+
+        connections[identifier] = connection
+
+    return connections[identifier]
+
 class BugServer(object):
     def __init__(self, host, https):
         self.host = host
@@ -704,18 +718,8 @@ class BugServer(object):
 
         self.cookies = get_bugzilla_cookies(host)
 
-        self._connection = None
         self._xmlrpc_proxy = None
 
-    def get_connection(self):
-        if self._connection is None:
-            if self.https:
-                self._connection = httplib.HTTPSConnection(self.host, 443)
-            else:
-                self._connection = httplib.HTTPConnection(self.host, 80)
-
-        return self._connection
-
     def get_cookie_string(self):
         return ("Bugzilla_login=%s; Bugzilla_logincookie=%s" %
                 (self.cookies['Bugzilla_login'], self.cookies['Bugzilla_logincookie']))
@@ -725,8 +729,71 @@ class BugServer(object):
         headers['Cookie'] = self.get_cookie_string()
         headers['User-Agent'] = "git-bz"
 
-        self.get_connection().request(method, url, data, headers)
-        return self.get_connection().getresponse()
+        seen_urls = []
+        connection = get_connection(self.host, self.https)
+        while True:
+            connection.request(method, url, data, headers)
+            response = connection.getresponse()
+            seen_urls.append(url)
+
+            # Redirect status codes:
+            #
+            # 301 (Moved Permanently): Redo with the new URL,
+            #   save the new location.
+            # 303 (See Other): Redo with the method changed to GET/HEAD.
+            # 307 (Temporary Redirect): Redo with the new URL, don't
+            #   save the new location.
+            #
+            # [ For 301/307, you are supposed to ask the user if the
+            #   method isn't GET/HEAD, but we're automating anyways... ]
+            #
+            # 302 (Found): The confusing one, and the one that
+            # Bugzilla uses, both to redirect to http to https and to
+            # redirect attachment.cgi&action=view to a different base URL
+            # for security. Specified like 307, traditionally treated as 301.
+            #
+            # See http://en.wikipedia.org/wiki/HTTP_302
+
+            if response.status in (301, 302, 303, 307):
+                new_url = response.getheader("location")
+                if new_url is None:
+                    die("Redirect received without a location to redirect to")
+                if new_url in seen_urls or len(seen_urls) >= 10:
+                    die("Circular redirect or too many redirects")
+
+                old_split = urlparse.urlsplit(url)
+                new_split = urlparse.urlsplit(new_url)
+
+                new_https = new_split.scheme == 'https'
+
+                if new_split.hostname != self.host or new_https != self.https:
+                    connection = get_connection(new_split.hostname, new_https != self.https)
+
+                    # This is a bit of a hack to avoid keeping on redirecting for every
+                    # request. If the server redirected show_bug.cgi we assume it's
+                    # really saying "hey, the bugzilla instance is really over here".
+                    #
+                    # We can't do this for old.split.path == new_split.path because of
+                    # attachment.cgi, though we alternatively could just exclude
+                    # attachment.cgi here.
+                    if (response.status in (301, 302) and
+                        method == 'GET' and
+                        old_split.path == '/show_bug.cgi' and new_split.path == '/show_bug.cgi'):
+
+                        self.host = new_split.hostname
+                        self.https = new_https
+
+                # We can't treat 302 like 303 because of the use of 302 for http
+                # to https, though the hack above will hopefully get us on https
+                # before we try to POST.
+                if response.status == 303:
+                    if method not in ('GET', 'HEAD'):
+                        method = 'GET'
+
+                # Get the relative component of the new URL
+                url = urlparse.urlunsplit((None, None, new_split.path, new_split.query, new_split.fragment))
+            else:
+                return response
 
     def send_post(self, url, fields, files=None):
         content_type, body = encode_multipart_formdata(fields, files)
@@ -784,17 +851,18 @@ class BugTransport(xmlrpclib.Transport):
         xmlrpclib.Transport.send_request(self, connection, *args)
         connection.putheader("Cookie", self.server.get_cookie_string())
 
-servers = []
+servers = {}
 
+# Note that if we detect that we are redirecting, we may rewrite the
+# host/https of the server to avoid doing too many redirections, and
+# so the host,https we connect to may be different than what we use
+# to look up the server.
 def get_bug_server(host, https):
-    for server in servers:
-        if server.host == host and server.https == https:
-            return server
-
-    server = BugServer(host, https)
-    servers.append(server)
+    identifier = (host, https)
+    if not identifier in servers:
+        servers[identifier] = BugServer(host, https)
 
-    return server
+    return servers[identifier]
 
 # Unfortunately, Bugzilla doesn't set a useful status code for
 # form posts.  Because it's very confusing to claim we succeeded
author	Owen W. Taylor <otaylor@fishsoup.net>	2009-09-09 17:14:55 -0400
committer	Owen W. Taylor <otaylor@fishsoup.net>	2009-09-09 17:18:34 -0400
commit	2680c3c68dca2b6614b1a75ef5e946aa71a6c3dd (patch)
tree	ab449655f4e63949af5a7ddb6ee1b9a82daba3c2
parent	06187ce308dce7b56016e09aa5396e6f1bf60a90 (diff)