diff options
author | Robert James Kaes <rjkaes@users.sourceforge.net> | 2004-01-26 19:11:52 +0000 |
---|---|---|
committer | Robert James Kaes <rjkaes@users.sourceforge.net> | 2004-01-26 19:11:52 +0000 |
commit | 0a8e4e4d8d72e02ba398ec5e340f181cad5af10e (patch) | |
tree | dff88561b34898156e2d57aa9d0e108b133b97f5 | |
parent | f2d846d0571af4bed05d35abc4152da9adad4ab8 (diff) | |
download | tinyproxy-0a8e4e4d8d72e02ba398ec5e340f181cad5af10e.tar.gz tinyproxy-0a8e4e4d8d72e02ba398ec5e340f181cad5af10e.zip |
Added reverse proxy support from Kim Holviala. His comments regarding
this addition follow:
The patch implements a simple reverse proxy (with one funky extra
feature). It has all the regular features: mapping remote servers to local
namespace (ReversePath), disabling forward proxying (ReverseOnly) and HTTP
redirect rewriting (ReverseBaseURL).
The funky feature is this: You map Google to /google/ and the Google front
page opens up fine. Type in stuff and click "Google Search" and you'll get
an error from tinyproxy. Reason for this is that Google's form submits to
"/search" which unfortunately bypasses our /google/ mapping (if they'd
submit to "search" without the slash it would have worked ok). Turn on
ReverseMagic and it starts working....
ReverseMagic "hijacks" one cookie which it sends to the client browser.
This cookie contains the current reverse proxy path mapping (in the above
case /google/) so that even if the site uses absolute links the reverse
proxy still knows where to map the request.
And yes, it works. No, I've never seen this done before - I couldn't find
_any_ working OSS reverse proxies, and the commercial ones I've seen try
to parse the page and fix all links (in the above case changing "/search"
to "/google/search"). The problem with modifying the html is that it might
not be parsable (very common) or it might be encoded so that the proxy
can't read it (mod_gzip or likes).
Hope you like that patch. One caveat - I haven't coded with C in like
three years so my code might be a bit messy.... There shouldn't be any
security problems thou, but you never know. I did all the stuff out of my
memory without reading any RFC's, but I tested everything with Moz, Konq,
IE6, Links and Lynx and they all worked fine.
Diffstat (limited to '')
-rw-r--r-- | configure.ac | 13 | ||||
-rw-r--r-- | doc/tinyproxy.conf | 39 | ||||
-rw-r--r-- | src/conns.c | 11 | ||||
-rw-r--r-- | src/conns.h | 9 | ||||
-rw-r--r-- | src/grammar.y | 43 | ||||
-rw-r--r-- | src/reqs.c | 178 | ||||
-rw-r--r-- | src/reqs.h | 3 | ||||
-rw-r--r-- | src/scanner.l | 36 | ||||
-rw-r--r-- | src/tinyproxy.c | 11 | ||||
-rw-r--r-- | src/tinyproxy.h | 18 |
10 files changed, 330 insertions, 31 deletions
diff --git a/configure.ac b/configure.ac index 425b477..6482af0 100644 --- a/configure.ac +++ b/configure.ac @@ -1,4 +1,4 @@ -dnl $Id: configure.ac,v 2.64 2003-10-17 16:10:59 rjkaes Exp $ +dnl $Id: configure.ac,v 2.65 2004-01-26 19:11:52 rjkaes Exp $ dnl Devlopers, please strive to achieve this order: dnl @@ -127,6 +127,17 @@ if test x"$upstream_enabled" = x"yes"; then AC_DEFINE(UPSTREAM_SUPPORT) fi +dnl Include support for reverse proxy? +AH_TEMPLATE([REVERSE_SUPPORT], + [Include support for reverse proxy.]) +AC_ARG_ENABLE(reverse, + [AC_HELP_STRING([--enable-reverse], + [Enable support for reverse proxy (default is NO)])], + reverse_enabled=$enableval, reverse_enabled=no) +if test x"$reverse_enabled" = x"yes"; then + AC_DEFINE(REVERSE_SUPPORT) +fi + dnl Include the transparent proxy support AH_TEMPLATE([TRANSPARENT_PROXY], [Include support for using tinyproxy as a transparent proxy.]) diff --git a/doc/tinyproxy.conf b/doc/tinyproxy.conf index 981cd26..d8af086 100644 --- a/doc/tinyproxy.conf +++ b/doc/tinyproxy.conf @@ -255,3 +255,42 @@ ViaProxyName "tinyproxy" # ConnectPort 443 ConnectPort 563 + +# +# Configure one or more ReversePath directives to enable reverse proxy +# support. With reverse proxying it's possible to make a number of +# sites appear as if they were part of a single site. +# +# If you uncomment the following two directives and run tinyproxy +# on your own computer at port 8888, you can access Google using +# http://localhost:8888/google/ and Wired News using +# http://localhost:8888/wired/news/. Neither will actually work +# until you uncomment ReverseMagic as they use absolute linking. +# +#ReversePath "/google/" "http://www.google.com/" +#ReversePath "/wired/" "http://www.wired.com/" + +# +# When using tinyproxy as a reverse proxy, it is STRONGLY recommended +# that the normal proxy is turned off by uncommenting the next directive. +# +#ReverseOnly Yes + +# +# Use a cookie to track reverse proxy mappings. If you need to reverse +# proxy sites which have absolute links you must uncomment this. +# +#ReverseMagic Yes + +# +# The URL that's used to access this reverse proxy. The URL is used to +# rewrite HTTP redirects so that they won't escape the proxy. If you +# have a chain of reverse proxies, you'll need to put the outermost +# URL here (the address which the end user types into his/her browser). +# +# If not set then no rewriting occurs. +# +#ReverseBaseURL "http://localhost:8888/" + + + diff --git a/src/conns.c b/src/conns.c index 6a320ac..4420721 100644 --- a/src/conns.c +++ b/src/conns.c @@ -1,4 +1,4 @@ -/* $Id: conns.c,v 1.19 2003-08-01 00:14:34 rjkaes Exp $ +/* $Id: conns.c,v 1.20 2004-01-26 19:11:51 rjkaes Exp $ * * Create and free the connection structure. One day there could be * other connection related tasks put here, but for now the header @@ -76,6 +76,10 @@ initialize_conn(int client_fd, const char* ipaddr, const char* string_addr) update_stats(STAT_OPEN); +#ifdef REVERSE_SUPPORT + connptr->reversepath = NULL; +#endif + return connptr; error_exit: @@ -123,6 +127,11 @@ destroy_conn(struct conn_s *connptr) if (connptr->client_string_addr) safefree(connptr->client_string_addr); +#ifdef REVERSE_SUPPORT + if (connptr->reversepath) + safefree(connptr->reversepath); +#endif + safefree(connptr); update_stats(STAT_CLOSE); diff --git a/src/conns.h b/src/conns.h index c5ad9a7..5d0422a 100644 --- a/src/conns.h +++ b/src/conns.h @@ -1,4 +1,4 @@ -/* $Id: conns.h,v 1.15 2003-08-01 00:14:34 rjkaes Exp $ +/* $Id: conns.h,v 1.16 2004-01-26 19:11:51 rjkaes Exp $ * * See 'conns.c' for a detailed description. * @@ -66,6 +66,13 @@ struct conn_s { unsigned int major; unsigned int minor; } protocol; + +#ifdef REVERSE_SUPPORT + /* + * Place to store the current per-connection reverse proxy path + */ + char* reversepath; +#endif }; /* diff --git a/src/grammar.y b/src/grammar.y index 00596dd..9f3a74c 100644 --- a/src/grammar.y +++ b/src/grammar.y @@ -1,4 +1,4 @@ -/* $Id: grammar.y,v 1.23 2003-06-26 18:17:09 rjkaes Exp $ +/* $Id: grammar.y,v 1.24 2004-01-26 19:11:51 rjkaes Exp $ * * This is the grammar for tinyproxy's configuration file. It needs to be * in sync with scanner.l. If you know more about yacc and lex than I do @@ -50,6 +50,7 @@ int yylex(void); %token KW_FILTER KW_FILTERURLS KW_FILTEREXTENDED KW_FILTER_DEFAULT_DENY %token KW_FILTER_CASESENSITIVE %token KW_UPSTREAM +%token KW_REVERSEPATH KW_REVERSEONLY KW_REVERSEMAGIC KW_REVERSEBASEURL %token KW_CONNECTPORT KW_BIND %token KW_STATHOST %token KW_ALLOW KW_DENY @@ -167,6 +168,46 @@ statement log_message(LOG_WARNING, "X-Tinyproxy header support was not compiled in."); #endif } + | KW_REVERSEPATH string + { +#ifdef REVERSE_SUPPORT + reversepath_add(NULL, $2); +#else + log_message(LOG_WARNING, "Reverse proxy support was not compiled in."); +#endif + } + | KW_REVERSEPATH string string + { +#ifdef REVERSE_SUPPORT + reversepath_add($2, $3); +#else + log_message(LOG_WARNING, "Reverse proxy support was not compiled in."); +#endif + } + | KW_REVERSEONLY yesno + { +#ifdef REVERSE_SUPPORT + config.reverseonly = $2; +#else + log_message(LOG_WARNING, "Reverse proxy support was not compiled in."); +#endif + } + | KW_REVERSEMAGIC yesno + { +#ifdef REVERSE_SUPPORT + config.reversemagic = $2; +#else + log_message(LOG_WARNING, "Reverse proxy support was not compiled in."); +#endif + } + | KW_REVERSEBASEURL string + { +#ifdef REVERSE_SUPPORT + config.reversebaseurl = $2; +#else + log_message(LOG_WARNING, "Reverse proxy support was not compiled in."); +#endif + } | KW_UPSTREAM unique_address ':' NUMBER { #ifdef UPSTREAM_SUPPORT @@ -1,4 +1,4 @@ -/* $Id: reqs.c,v 1.108 2003-08-07 16:32:12 rjkaes Exp $ +/* $Id: reqs.c,v 1.109 2004-01-26 19:11:51 rjkaes Exp $ * * This is where all the work in tinyproxy is actually done. Incoming * connections have a new child created for them. The child then @@ -330,7 +330,7 @@ upstream_add(const char *host, int port, const char *domain) if (domain == NULL) { if (!host || host[0] == '\0' || port < 1) { - log_message(LOG_WARNING, "Nonsence upstream rule: invalid host or port"); + log_message(LOG_WARNING, "Nonsense upstream rule: invalid host or port"); goto upstream_cleanup; } @@ -404,7 +404,7 @@ upstream_add(const char *host, int port, const char *domain) return; - upstream_cleanup: +upstream_cleanup: safefree(up->host); safefree(up->domain); safefree(up); @@ -465,6 +465,68 @@ upstream_get(char *host) } #endif +#ifdef REVERSE_SUPPORT +/* + * Add entry to the reversepath list + */ +void +reversepath_add(const char *path, const char *url) +{ + struct reversepath *reverse; + + if (url == NULL) { + log_message(LOG_WARNING, "Illegal reverse proxy rule: missing url"); + return; + } + + if (!strstr(url, "://")) { + log_message(LOG_WARNING, + "Skipping reverse proxy rule: '%s' is not a valid url", url); + return; + } + + if (path && *path != '/') { + log_message(LOG_WARNING, + "Skipping reverse proxy rule: path '%s' doesn't start with a /", path); + return; + } + + if (!(reverse = safemalloc(sizeof (struct reversepath)))) { + log_message(LOG_ERR, "Unable to allocate memory in reversepath_add()"); + return; + } + + if (!path) reverse->path = safestrdup("/"); + else reverse->path = safestrdup(path); + + reverse->url = safestrdup(url); + + reverse->next = config.reversepath_list; + config.reversepath_list = reverse; + + log_message(LOG_INFO, + "Added reverse proxy rule: %s -> %s", reverse->path, reverse->url); +} + +/* + * Check if a request url is in the reversepath list + */ +static struct reversepath * +reversepath_get(char *url) +{ + struct reversepath *reverse = config.reversepath_list; + + while (reverse) { + if (strstr(url, reverse->path) == url) + return reverse; + + reverse = reverse->next; + } + + return NULL; +} +#endif + /* * Create a connection for HTTP connections. */ @@ -488,7 +550,7 @@ establish_http_connection(struct conn_s *connptr, struct request_s *request) } /* - * These two defines are for the SSL tunneling. + * These two defines are for the SSL tunnelling. */ #define SSL_CONNECTION_RESPONSE "HTTP/1.0 200 Connection established" #define PROXY_AGENT "Proxy-agent: " PACKAGE "/" VERSION @@ -517,6 +579,13 @@ process_request(struct conn_s *connptr, hashmap_t hashofheaders) char *url; struct request_s *request; +#ifdef REVERSE_SUPPORT + char *rewrite_url = NULL; + char *cookie = NULL; + char *cookieval; + struct reversepath *reverse; +#endif + int ret; size_t request_len; @@ -577,6 +646,66 @@ process_request(struct conn_s *connptr, hashmap_t hashofheaders) return NULL; } +#ifdef REVERSE_SUPPORT + /* + * Reverse proxy URL rewriting. + */ + if (config.reversepath_list != NULL) { + /* Reverse requests always start with a slash */ + if (*url == '/') { + /* First try locating the reverse mapping by request url */ + reverse = reversepath_get(url); + if (reverse) { + rewrite_url = safemalloc(strlen(url) + + strlen(reverse->url) + 1); + strcpy(rewrite_url, reverse->url); + strcat(rewrite_url, url + strlen(reverse->path)); + } else if (config.reversemagic && + hashmap_entry_by_key(hashofheaders, "cookie", + (void **)&cookie) > 0) { + + /* No match - try the magical tracking cookie next */ + if ((cookieval = strstr(cookie, REVERSE_COOKIE "=")) && + (reverse = reversepath_get(cookieval + + strlen(REVERSE_COOKIE) + 1))) { + + rewrite_url = safemalloc(strlen(url) + + strlen(reverse->url) + 1); + strcpy(rewrite_url, reverse->url); + strcat(rewrite_url, url + 1); + + log_message(LOG_INFO, + "Magical tracking cookie says: %s", + reverse->path); + } + } + } + + /* Forward proxy support off and no reverse path match found */ + if (config.reverseonly && !rewrite_url) { + log_message(LOG_ERR, "Bad request"); + indicate_http_error(connptr, 400, "Bad Request", + "detail", "Request has an invalid URL", + "url", url, + NULL); + + safefree(url); + free_request_struct(request); + + return NULL; + } + + log_message(LOG_CONN, "Rewriting URL: %s -> %s", + url, rewrite_url); + + safefree(url); + url = rewrite_url; + + /* Store reverse path so that the magical tracking cookie can be set */ + if (config.reversemagic) connptr->reversepath = safestrdup(reverse->path); + } +#endif + if (strncasecmp(url, "http://", 7) == 0 || (UPSTREAM_CONFIGURED() && strncasecmp(url, "ftp://", 6) == 0)) { char *skipped_type = strstr(url, "//") + 2; @@ -726,7 +855,7 @@ process_request(struct conn_s *connptr, hashmap_t hashofheaders) request->host); indicate_http_error(connptr, 403, "Filtered", - "detail", "The request you made has been filted", + "detail", "The request you made has been filtered", "url", url, NULL); @@ -1178,6 +1307,10 @@ process_server_headers(struct conn_s *connptr) int i; int ret; +#ifdef REVERSE_SUPPORT + struct reversepath *reverse = config.reversepath_list; +#endif + /* FIXME: Remember to handle a "simple_req" type */ /* Get the response line from the remote server. */ @@ -1251,6 +1384,41 @@ process_server_headers(struct conn_s *connptr) if (ret < 0) goto ERROR_EXIT; +#ifdef REVERSE_SUPPORT + /* Write tracking cookie for the magical reverse proxy path hack */ + if (config.reversemagic && connptr->reversepath) { + ret = write_message(connptr->client_fd, + "Set-Cookie: " REVERSE_COOKIE "=%s; path=/\r\n", + connptr->reversepath); + if (ret < 0) goto ERROR_EXIT; + } + + /* Rewrite the HTTP redirect if needed */ + if (config.reversebaseurl && + hashmap_entry_by_key(hashofheaders, "location", (void **)&header) > 0) { + + /* Look for a matching entry in the reversepath list */ + while (reverse) { + if (strncasecmp(header, + reverse->url, + (len = strlen(reverse->url))) == 0) break; + reverse = reverse->next; + } + + if (reverse) { + ret = write_message(connptr->client_fd, "Location: %s%s%s\r\n", + config.reversebaseurl, (reverse->path + 1), + (header + len)); + if (ret < 0) goto ERROR_EXIT; + + log_message(LOG_INFO, + "Rewriting HTTP redirect: %s -> %s%s%s", header, + config.reversebaseurl, (reverse->path + 1), (header + len)); + hashmap_remove(hashofheaders, "location"); + } + } +#endif + /* * All right, output all the remaining headers to the client. */ @@ -1,4 +1,4 @@ -/* $Id: reqs.h,v 1.4 2003-05-29 19:43:57 rjkaes Exp $ +/* $Id: reqs.h,v 1.5 2004-01-26 19:11:51 rjkaes Exp $ * * See 'reqs.c' for a detailed description. * @@ -22,5 +22,6 @@ extern void handle_connection(int fd); extern void add_connect_port_allowed(int port); extern void upstream_add(const char *host, int port, const char *domain); +extern void reversepath_add(const char *path, const char *url); #endif diff --git a/src/scanner.l b/src/scanner.l index 54e01a4..8418ca0 100644 --- a/src/scanner.l +++ b/src/scanner.l @@ -1,4 +1,4 @@ -/* $Id: scanner.l,v 1.22 2003-06-26 18:26:10 rjkaes Exp $ +/* $Id: scanner.l,v 1.23 2004-01-26 19:11:51 rjkaes Exp $ * * This builds the scanner for the tinyproxy configuration file. This * file needs to stay in sync with grammar.y. If someone knows lex and yacc @@ -24,12 +24,12 @@ struct keyword { char *kw_name; - int kw_token; + int kw_token; }; static struct keyword keywords[] = { - /* statements */ - { "port", KW_PORT }, + /* statements */ + { "port", KW_PORT }, { "logfile", KW_LOGFILE }, { "syslog", KW_SYSLOG }, { "maxclients", KW_MAXCLIENTS }, @@ -44,24 +44,28 @@ static struct keyword keywords[] = { { "group", KW_GROUP }, { "anonymous", KW_ANONYMOUS }, { "filter", KW_FILTER }, - { "filterurls", KW_FILTERURLS }, - { "filterextended", KW_FILTEREXTENDED }, - { "filterdefaultdeny", KW_FILTER_DEFAULT_DENY }, - { "filtercasesensitive", KW_FILTER_CASESENSITIVE }, + { "filterurls", KW_FILTERURLS }, + { "filterextended", KW_FILTEREXTENDED }, + { "filterdefaultdeny", KW_FILTER_DEFAULT_DENY }, + { "filtercasesensitive", KW_FILTER_CASESENSITIVE }, { "xtinyproxy", KW_XTINYPROXY }, - { "upstream", KW_UPSTREAM }, + { "reversepath", KW_REVERSEPATH }, + { "reverseonly", KW_REVERSEONLY }, + { "reversemagic", KW_REVERSEMAGIC }, + { "reversebaseurl", KW_REVERSEBASEURL }, + { "upstream", KW_UPSTREAM }, { "allow", KW_ALLOW }, - { "deny", KW_DENY }, - { "connectport", KW_CONNECTPORT }, - { "bind", KW_BIND }, - { "viaproxyname", KW_VIA_PROXY_NAME }, - { "stathost", KW_STATHOST }, + { "deny", KW_DENY }, + { "connectport", KW_CONNECTPORT }, + { "bind", KW_BIND }, + { "viaproxyname", KW_VIA_PROXY_NAME }, + { "stathost", KW_STATHOST }, { "errorfile", KW_ERRORPAGE }, { "defaulterrorfile", KW_DEFAULT_ERRORPAGE }, { "statfile", KW_STATPAGE }, - /* loglevel and the settings */ - { "loglevel", KW_LOGLEVEL }, + /* loglevel and the settings */ + { "loglevel", KW_LOGLEVEL }, { "critical", KW_LOG_CRITICAL }, { "error", KW_LOG_ERROR }, { "warning", KW_LOG_WARNING }, diff --git a/src/tinyproxy.c b/src/tinyproxy.c index 76ff993..578a10a 100644 --- a/src/tinyproxy.c +++ b/src/tinyproxy.c @@ -1,4 +1,4 @@ -/* $Id: tinyproxy.c,v 1.46 2003-03-17 04:24:19 rjkaes Exp $ +/* $Id: tinyproxy.c,v 1.47 2004-01-26 19:11:51 rjkaes Exp $ * * The initialize routine. Basically sets up all the initial stuff (logfile, * listening socket, config options, etc.) and then sits there and loops @@ -130,9 +130,9 @@ Options:\n\ -v Display the version number.\n"); /* Display the modes compiled into tinyproxy */ - printf("\nFeatures Compiled In:\n"); + printf("\nFeatures compiled in:\n"); #ifdef XTINYPROXY_ENABLE - printf(" XTinyproxy Header\n"); + printf(" XTinyproxy header\n"); #endif /* XTINYPROXY */ #ifdef FILTER_ENABLE printf(" Filtering\n"); @@ -141,8 +141,11 @@ Options:\n\ printf(" Debugging code\n"); #endif /* NDEBUG */ #ifdef TRANSPARENT_PROXY - printf(" Transparent Proxy Support\n"); + printf(" Transparent proxy support\n"); #endif /* TRANSPARENT_PROXY */ +#ifdef REVERSE_SUPPORT + printf(" Reverse proxy support\n"); +#endif /* REVERSE_SUPPORT */ } int diff --git a/src/tinyproxy.h b/src/tinyproxy.h index 0c4e461..ad02120 100644 --- a/src/tinyproxy.h +++ b/src/tinyproxy.h @@ -1,4 +1,4 @@ -/* $Id: tinyproxy.h,v 1.42 2003-08-01 00:14:34 rjkaes Exp $ +/* $Id: tinyproxy.h,v 1.43 2004-01-26 19:11:51 rjkaes Exp $ * * See 'tinyproxy.c' for a detailed description. * @@ -36,6 +36,16 @@ struct upstream { }; #endif +#ifdef REVERSE_SUPPORT +struct reversepath { + struct reversepath *next; + char *path; + char *url; +}; + +#define REVERSE_COOKIE "yummy_magical_cookie" +#endif + /* * Hold all the configuration time information. */ @@ -58,6 +68,12 @@ struct config_s { #ifdef XTINYPROXY_ENABLE char *my_domain; #endif +#ifdef REVERSE_SUPPORT + struct reversepath *reversepath_list; + unsigned int reverseonly; /* boolean */ + unsigned int reversemagic; /* boolean */ + char *reversebaseurl; +#endif #ifdef UPSTREAM_SUPPORT struct upstream *upstream_list; #endif /* UPSTREAM_SUPPORT */ |