tImproved parsing of HTML headers and handling of redirects - vaccinewars - be … | |
git clone git://src.adamsgaard.dk/vaccinewars | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
commit 6b906e129d9e1c2182b34824d49bae6466d2f3c7 | |
parent e47416a9a52ecccf7a41c4dd0a96a7dfff5b10a6 | |
Author: Ben Webb <[email protected]> | |
Date: Tue, 2 Oct 2001 21:44:55 +0000 | |
Improved parsing of HTML headers and handling of redirects | |
Diffstat: | |
M src/network.c | 103 ++++++++++++++++++++++++-----… | |
M src/network.h | 4 +++- | |
2 files changed, 82 insertions(+), 25 deletions(-) | |
--- | |
diff --git a/src/network.c b/src/network.c | |
t@@ -442,13 +442,8 @@ static void SendHttpRequest(HttpConnection *conn) { | |
text=g_string_new(""); | |
- if (conn->Redirect) { | |
- g_string_sprintf(text,"%s %s HTTP/1.0",conn->Method,conn->Redirect); | |
- g_free(conn->Redirect); conn->Redirect=NULL; | |
- } else { | |
- g_string_sprintf(text,"%s http://%s:%u%s HTTP/1.0", | |
- conn->Method,conn->HostName,conn->Port,conn->Query); | |
- } | |
+ g_string_sprintf(text,"%s http://%s:%u%s HTTP/1.0", | |
+ conn->Method,conn->HostName,conn->Port,conn->Query); | |
QueueMessageForSend(&conn->NetBuf,text->str); | |
if (conn->Headers) QueueMessageForSend(&conn->NetBuf,conn->Headers); | |
t@@ -482,8 +477,8 @@ static gboolean StartHttpConnect(HttpConnection *conn) { | |
gboolean OpenHttpConnection(HttpConnection **connpt,gchar *HostName, | |
unsigned Port,gchar *Proxy,unsigned ProxyPort, | |
- gchar *Method,gchar *Query,gchar *Headers, | |
- gchar *Body) { | |
+ gchar *Method,gchar *Query, | |
+ gchar *Headers,gchar *Body) { | |
HttpConnection *conn; | |
g_assert(HostName && Method && Query && connpt); | |
t@@ -515,7 +510,8 @@ void CloseHttpConnection(HttpConnection *conn) { | |
g_free(conn->Query); | |
g_free(conn->Headers); | |
g_free(conn->Body); | |
- g_free(conn->Redirect); | |
+ g_free(conn->RedirHost); | |
+ g_free(conn->RedirQuery); | |
g_free(conn); | |
} | |
t@@ -523,6 +519,65 @@ gboolean IsHttpError(HttpConnection *conn) { | |
return IsError(&conn->NetBuf.error); | |
} | |
+static gboolean ParseHtmlLocation(gchar *uri,gchar **host,unsigned *port, | |
+ gchar **query) { | |
+ gchar *uris,*colon,*slash; | |
+ | |
+ uris = g_strstrip(uri); | |
+ if (!uris || strlen(uris)<7 || | |
+ g_strncasecmp(uris,"http://",7)!=0) return FALSE; | |
+ | |
+ uris+=7; /* skip to hostname */ | |
+ | |
+/* ':' denotes the port to connect to */ | |
+ colon = strchr(uris,':'); | |
+ if (colon && colon==uris) return FALSE; /* No hostname */ | |
+ | |
+/* '/' denotes the start of the path of the HTML file */ | |
+ slash = strchr(uris,'/'); | |
+ if (slash && slash==uris) return FALSE; /* No hostname */ | |
+ | |
+ if (colon && (!slash || slash>colon)) { | |
+ if (slash) *slash='\0'; | |
+ *port = atoi(colon+1); | |
+ if (slash) *slash='\\'; | |
+ if (*port==0) return FALSE; /* Invalid port */ | |
+ *host = g_strndup(uris,colon-uris); | |
+ } else { | |
+ *port=80; | |
+ if (slash) *host=g_strndup(uris,slash-uris); | |
+ else *host=g_strdup(uris); | |
+ } | |
+ | |
+ if (slash) { | |
+ *query = g_strdup(slash); | |
+ } else { | |
+ *query = g_strdup("/"); | |
+ } | |
+ return TRUE; | |
+} | |
+ | |
+static void ParseHtmlHeader(gchar *line,HttpConnection *conn) { | |
+ gchar **split,*host,*query; | |
+ unsigned port; | |
+ | |
+ split=g_strsplit(line," ",1); | |
+ if (split[0] && split[1]) { | |
+ if (g_strcasecmp(split[0],"Location:")==0 && | |
+ (conn->StatusCode==302 || conn->StatusCode==301)) { | |
+ if (ParseHtmlLocation(split[1],&host,&port,&query)) { | |
+ g_print("Redirect to %s:%u%s\n",host,port,query); | |
+ g_free(conn->RedirHost); g_free(conn->RedirQuery); | |
+ conn->RedirHost=host; conn->RedirQuery=query; | |
+ conn->RedirPort=port; | |
+ } else { | |
+ g_print("FIXME: Bad redirect\n"); | |
+ } | |
+ } | |
+ } | |
+ g_strfreev(split); | |
+} | |
+ | |
gchar *ReadHttpResponse(HttpConnection *conn) { | |
gchar *msg,**split; | |
t@@ -539,18 +594,7 @@ gchar *ReadHttpResponse(HttpConnection *conn) { | |
break; | |
case HS_READHEADERS: | |
if (msg[0]==0) conn->Status=HS_READSEPARATOR; | |
- else { | |
- split=g_strsplit(msg," ",1); | |
- if (split[0] && split[1]) { | |
- if (conn->StatusCode==302 && strcmp(split[0],"Location:")==0) { | |
- g_print("Redirect to %s\n",split[1]); | |
- g_free(conn->Redirect); | |
- conn->Redirect = g_strdup(split[1]); | |
- } | |
-/* g_print("Header %s (value %s) read\n",split[0],split[1]);*/ | |
- } | |
- g_strfreev(split); | |
- } | |
+ else ParseHtmlHeader(msg,conn); | |
break; | |
case HS_READSEPARATOR: | |
conn->Status=HS_READBODY; | |
t@@ -564,11 +608,22 @@ gchar *ReadHttpResponse(HttpConnection *conn) { | |
gboolean HandleHttpCompletion(HttpConnection *conn) { | |
NBCallBack CallBack; | |
gpointer CallBackData; | |
- if (conn->Redirect) { | |
- g_print("Following redirect\n"); | |
+ if (conn->RedirHost) { | |
+ g_print("Following redirect to %s\n",conn->RedirHost); | |
CallBack=conn->NetBuf.CallBack; | |
CallBackData=conn->NetBuf.CallBackData; | |
ShutdownNetworkBuffer(&conn->NetBuf); | |
+ g_free(conn->HostName); g_free(conn->Query); | |
+ conn->HostName = conn->RedirHost; | |
+ conn->Query = conn->RedirQuery; | |
+ conn->Port = conn->RedirPort; | |
+ conn->RedirHost = conn->RedirQuery = NULL; | |
+ | |
+ if (conn->Tries>=5) { | |
+ g_print("FIXME: Number of tries exceeded\n"); | |
+ return TRUE; | |
+ } | |
+ | |
if (StartHttpConnect(conn)) { | |
SendHttpRequest(conn); | |
SetNetworkBufferCallBack(&conn->NetBuf,CallBack,CallBackData); | |
diff --git a/src/network.h b/src/network.h | |
t@@ -95,7 +95,9 @@ typedef struct _HttpConnection { | |
gchar *Query; /* e.g. the path of the desired webpage */ | |
gchar *Headers; /* if non-NULL, e.g. Content-Type */ | |
gchar *Body; /* if non-NULL, data to send */ | |
- gchar *Redirect; /* if non-NULL, a URL to redirect to */ | |
+ gchar *RedirHost; /* if non-NULL, a hostname to redirect to */ | |
+ gchar *RedirQuery; /* if non-NULL, the path to redirect to */ | |
+ unsigned RedirPort; /* The port on the host to redirect to */ | |
NetworkBuffer NetBuf; /* The actual network connection itself */ | |
gint Tries; /* Number of requests actually sent so far */ | |
gint StatusCode; /* 0=no status yet, otherwise an HTTP status code */ |