--- polipo-1.0.4.1.old/http_parse.c +++ polipo-1.0.4.1/http_parse.c @@ -39,7 +39,7 @@ atomVia, atomVary, atomExpect, atomAuthorization, atomSetCookie, atomCookie, atomCookie2, atomXPolipoDate, atomXPolipoAccess, atomXPolipoLocation, - atomXPolipoBodyOffset; + atomXPolipoBodyOffset, atomUserAgent; AtomPtr atomContentType, atomContentEncoding; @@ -109,6 +109,7 @@ A(atomXPolipoAccess, "x-polipo-access"); A(atomXPolipoLocation, "x-polipo-location"); A(atomXPolipoBodyOffset, "x-polipo-body-offset"); + A(atomUserAgent, "user-agent"); #undef A return; @@ -520,6 +521,8 @@ int i; int name_start, name_end, value_start, value_end; + *value_start_return = 0; + if(buf[start] == '\n') { *name_start_return = -1; return start + 1; @@ -767,6 +770,7 @@ AtomListPtr hopToHop = NULL; HTTPRangeRec range = {-1, -1, -1}, content_range = {-1, -1, -1}; int haveCacheControl = 0; + int KeepOrigUserAgent = 0; #define RESIZE_HBUF() \ do { \ @@ -799,7 +803,89 @@ name = internAtomLowerN(buf + name_start, name_end - name_start); - if(name == atomConnection) { +//-------------------------------------------------------------------- + +// The following code is an attempt to bypass a web-server problem. + +// Details: If you try to use a Mozilla-based browser (such as Ice- +// Cat) with "polipo", some web sites will reject connections unless +// you change the reported user agent from "Mozilla/5.0 ..." (etc.) +// to a non-Mozilla value. They don't like to see a non-Mozilla client +// [polipo] connecting with a Mozilla user-agent string. It's viewed +// as a sign that the client may be "bad" (for example, a spammer ad- +// dress harvester). + +// To complicate matters, other web sites allow "polipo" to connect +// with a Mozilla user-agent string, and (in fact) the sites may mal- +// function if a Mozilla user-agent string (or a user-agent string for +// another major browser) *isn't* used. "yahoo.com" is one example. + +// This code checks the "host:" and "user-agent:" fields specified by +// each HTTP request. If the "user-agent" field is present, and it +// starts with "Mozilla", and the "host" field isn't present, or it's +// not one of the hosts that *requires* the "Mozilla" setting, this +// code replaces the string "Mozilla" with "XPolipo". This solves the +// problem for some sites. However, additional changes may be required +// for other sites. + +// Note: The replacement string used must be exactly the same length +// as the original string (i.e., seven characters). + +// This isn't a perfect solution: a. There may be side-effects. b. +// Some web sites may reject connections from Mozilla-based browsers +// operating through "polipo" whether or not the user-agent string is +// changed. + + if (name == atomHost) + { + char *xp = &buf [value_start]; + + if (value_start > 0) + { + char *bp; + +#ifdef NOTDEF + if (!strncmp (xp, "google.com" , 10) || + !strncmp (xp, "yahoo.com" , 9) || + !strncmp (xp, "groups.google.com" , 17) || + !strncmp (xp, "mail.yahoo.com" , 14) || + !strncmp (xp, "www.google.com" , 14) || + !strncmp (xp, "www.yahoo.com" , 13)) + { + KeepOrigUserAgent = 1; + } +#else + for (bp = xp; *bp != '\0'; bp++) + { // Not perfect, but good enough + if ((*bp == 'g') && (bp [1] == 'o') && + !strncmp (bp, "google.com", 10)) + { + KeepOrigUserAgent = 1; + } + + if ((*bp == 'y') && (bp [1] == 'a') && + !strncmp (bp, "yahoo.com", 9)) + { + KeepOrigUserAgent = 1; + } + } +#endif + } + } + + if ((name == atomUserAgent) && + (value_start > 0) && !KeepOrigUserAgent) + { + char *xp = &buf [value_start]; + + if (!strncmp (xp, "Mozilla", 7)) + { + strncpy (xp, "XPolipo", 7); + } + } + + if (name == atomConnection) + { j = getNextTokenInList(buf, value_start, &token_start, &token_end, NULL, NULL, &end);