#!/usr/bin/perl -w
# $Id: blossom.pl,v 1.19 2006-03-20 23:19:33 goodell Exp $
$license = <<EOF
Copyright (c) 2005 Geoffrey Goodell.

This program is free software; you can redistribute it and/or modify it under
the terms of version 2 of the GNU General Public License as published by the
Free Software Foundation.

This program is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
PARTICULAR PURPOSE.  See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place - Suite 330, Boston, MA  02111-1307, USA.

EOF
;

use strict;
use Socket;

# global configuration parameters

my $CACHE           = "/var/cache/www-data";
my $F_CCODES        = "/afs/eecs.harvard.edu/user/goodell/misc/country-codes.txt";
my $URL_FLAGS       = "http://afs.eecs.harvard.edu/~goodell/flags";
my $URL_ICONS       = "http://afs.eecs.harvard.edu/~goodell/icons";
my $URL_EXIT        = "http://serifos.eecs.harvard.edu/cgi-bin/exit.pl";
my $URL_PROXY       = "http://serifos.eecs.harvard.edu/proxy/";
my $URL_SELF        = "http://serifos.eecs.harvard.edu/cgi-bin/blossom.pl";
my $URL_SOURCE      = "http://afs.eecs.harvard.edu/~goodell/blossom/src/blossom.pl";
my $URL_HOME        = "http://afs.eecs.harvard.edu/~goodell/blossom/";
my $WHOIS_SCRIPT    = "/cgi-bin/whois.pl";
my $STATUS          = "?ports=80&addr=1&textonly=1";
my $BLOSSOM         = "$STATUS&blossom=lefkada:9031";
my $BLOSSOM_TAG     = "U0";
my $BLOSSOM_TEXT    = "Blossom";
my $TITLE           = "Blossom User Interface";
my $WGET            = "/usr/bin/wget -O -";
my $ICON_V1         = "v1.gif";
my $ICON_V2         = "v2.gif";
my $ICON_V3         = "v3.gif";
my $F_SIZE          = "width=18 height=12";
my $V1_MINBW        = 10;
my $V2_MINBW        = 60;
my $V3_MINBW        = 400;

my %ccode           = ();
my %nodes           = ();
my %uri_fields      = ();

my $cachefile       = "blossom.html";
my $response        = "";
my $uri             = "";
my $method          = undef;

use vars qw($license);

sub addrouters($) {
    my $href        = shift;
    my @sorted      = undef;
    my $response    = "";
    my %routers     = %$href;

    @sorted = sort keys %routers;

    foreach my $router (@sorted) {
        $response .= "<tr>\n    " . $routers{$router} . "</tr>\n";
    }
    return $response;
}

sub parsewhois($$$) {
    my ($tag, $default, $arrayref) = (shift, shift, shift);
    my $t;
    my @lines = @$arrayref;
    my @matches = grep /^$tag/i, @lines;

    chomp $matches[$#matches] if $matches[$#matches];
    ($t = $matches[$#matches] || $default) =~ s/\S+\s+//;
    return $t;
}

sub modify_url($$) {
    my ($modurl, $suffix) = (shift, shift);

    unless($modurl eq "") {
        $modurl =~ s/%3A/:/g;
        $modurl =~ s/%2F/\//g;

        $modurl = "http://$modurl" if $modurl !~ /^http:\/\//;
        $modurl = "$modurl/" if $modurl !~ /^http:\/\/.*\//;

        if($modurl =~ /^(http:\/\/[A-Za-z0-9-.]+)([\/:].*)$/) {
            $modurl = "$1.$suffix$2";
        }

        $modurl = "$URL_PROXY$modurl" if $uri_fields{"proxy"};
    }
    return $modurl;
}

sub report_router($$$$$$) {
    my ($router, $address, $bandwidth, $netname, $modurl, $b)
            = (shift, shift, shift, shift, shift, shift);

    my $ab          = "";
    my $bb          = "";
    my $cb          = "";
    my $icon        = "";
    my $r           = "";

    if($b) {
        $ab = " class=\"unverified\"";
        $bb = " class=\"unverified\"";
        $cb = "*";
    } else {
        $ab = " class=\"standard\"";
        $bb = "";
        $cb = "";
    }

    my $modlink     = "<a$ab href=\"$modurl\">$cb$router</a>";

    # security feature
    $address = "" if $address !~ /^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$/;

    if($bandwidth >= $V3_MINBW*1000) {
        $icon = "<img $F_SIZE src=\"$URL_ICONS/$ICON_V3\" alt=\"v3\">";
    } elsif($bandwidth >= $V2_MINBW*1000) {
        $icon = "<img $F_SIZE src=\"$URL_ICONS/$ICON_V2\" alt=\"v2\">";
    } else {
        $icon = "<img $F_SIZE src=\"$URL_ICONS/$ICON_V1\" alt=\"v1\">";
    }
    $icon = "<acronym title=\"$bandwidth B/s\">$icon</acronym>";

    $bandwidth = sprintf "%4s kB/s", int($bandwidth/1000);
    $bandwidth =~ s/ /&nbsp;/g;

    $r = <<EOF

<tr>
    <td$bb><tt>$icon&nbsp;$modlink</tt></td>
    <td$bb><tt>$bandwidth</tt></td>
    <td$bb><tt>[<a$ab href=\"$WHOIS_SCRIPT?q=$address\">$netname</a>]</tt></td>
</tr>

EOF
;
    return $r;
}

# parse the URI parameters

if($ENV{"REQUEST_URI"} && $ENV{"REQUEST_URI"} =~ /\?/) {
    ($uri = $ENV{"REQUEST_URI"}) =~ s/.*\?//g;
}

my @prompts = split /&/, $uri;

foreach (@prompts) {
    my ($k, $v) = split /=/, $_;
    $uri_fields{$k} = $v;
}

# parse file containing country codes

open F, "<$F_CCODES" || warn "country code mapping not available";
while(<F>) {
    if(!/^#/) {
        $ccode{$1} = $2 if /^(\S+)\s+(.+)$/;
    }
}
close F;

# compose the header and navigation links

$response = <<EOF
Content-type: text/html

<!doctype html public "-//W3C//DTD HTML 4.01//EN"
    "http://www.w3.org/TR/html4/strict.dtd">
<html>
<head>
<title>$TITLE</title>
<meta name="Author" content="Geoffrey Goodell">
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
<meta http-equiv="Content-Style-Type" content="text/css">
<link rel="stylesheet" type="text/css" href="http://serifos.eecs.harvard.edu/style.css">
</head>

<body>

<h2>Blossom User Interface</h2>

EOF
;

# parse HTTP POST data, if available

$method = $ENV{"REQUEST_METHOD"};

if($method eq "POST") {
    read(STDIN, $_, $ENV{'CONTENT_LENGTH'});

    my %addr        = ();
    my %bn          = ();
    my %bw          = ();
    my %fields      = ();
    my %net         = ();

    my @prompts     = split /&/, $_;
    my @urls        = ();

    my $b_inst      = "";
    my $cname       = "";
    my $country     = "";
    my $randomly    = "";
    my $readentries = undef;

    foreach (@prompts) {
        my ($k, $v) = split /=/, $_;
        $fields{$k} = $v;
        if($k =~ /^([A-Z0-9][A-Z0-9])$/) {
            $country = $1;
            $cname = $ccode{$country};
        } elsif($k eq $BLOSSOM_TAG) {
            $country = $BLOSSOM_TAG;
            $cname = $BLOSSOM_TEXT;
        }
    }

    $response .= "<p>You have requested: <b>$cname</b></p>\n";

    if($uri_fields{"proxy"}) {
        $response .= "<p>You have requested: <b>Implicit Proxy</b></p>\n";
    }

    my $modurl = modify_url($fields{"url"}, "q.c-$country.blossom");

    if($country eq $BLOSSOM_TAG) {
        push @urls, "$URL_EXIT$BLOSSOM";
    } else {
        push @urls, "$URL_EXIT$STATUS";
        push @urls, "$URL_EXIT$BLOSSOM";
        $b_inst      = "  Blossom nodes are shown in red and marked with an asterisk (*).";
        $randomly    = " or <a href=\"$modurl\">instruct Blossom to randomly select a node instead</a>";
    }

    $response .= <<EOF

<p>Please select the Tor node from which you wish to connect to the remote
website$randomly.  Click the network name to view the corresponding WHOIS
record.$b_inst</p>

EOF
;

    foreach my $url (@urls) {
        my $b = 0;
        if($url eq "$URL_EXIT$BLOSSOM" and $country ne $BLOSSOM_TAG) {
            $b = 1;
        }
        open W, "$WGET \"$url\" |" || warn "node status not available";
        while(<W>) {
            if(/^(\S+)\s+(\S+)\s+(\S+)\s+\S+\s+(\S+)\s+(\S+)\s+(\S+)$/) {
                my ($cc, $router, $bandwidth, $address, $netname, $port)
                        = ($1, $2, $3, $4, $5, $6);
                if($port ne "-"
                        and ($country eq $BLOSSOM_TAG
                        or ($country eq $cc
                        and ($b
                        or ($router !~ /^\*/
                        and $bandwidth >= $V1_MINBW*1000))))) {
                    $router =~ s/^\*//;
                    $bw{$router}    = $bandwidth;
                    $addr{$router}  = $address;
                    $net{$router}   = $netname;
                    $bn{$router}    = $b or 0;
                }
            }
        }
        close W;
    }

    unless($fields{"url"} and $fields{"url"} ne "") {
        $response .= "<p><span class=\"heading\">ERROR:</span> URL not specified.</p>\n";
    }

    $response .= "<table>\n\n";

    foreach my $router (sort keys %bw) {
        my $modurl = modify_url($fields{"url"}, "$router.exit");
        $response .= report_router($router, $addr{$router}, $bw{$router}, $net{$router}, $modurl, $bn{$router})
    }
    $response .= <<EOF

</table>

<p><a href="">return to main page</a></p>

EOF
;
} else {
    # POST data is unavailable

    my %addr        = ();
    my %bw          = ();
    my %fields      = ();
    my %net         = ();

    my $readentries = "";

    $response .= <<EOF

<p>Blossom allows users to access a wide range of Internet resources from the
perspective of participating <a href="http://tor.eff.org/">Tor</a> exit nodes,
including nodes on the Tor overlay network as well as nodes on the
independently-constructed Blossom overlay network, which supports arbitrary
underlying network topologies.  For detailed information about the current
state of the Tor network, consult the <a href="$URL_EXIT">Tor Exit Node
Status</a> page.</p>
EOF
;

    if($uri_fields{"proxy"}) {
        $response .= <<EOF

<p><span class="heading">Step-1</span> Consider <a href="$URL_SELF">manually
configuring your own proxy settings</a> rather than using our proxy
implicitly.</p>
EOF
;
    } else {
        $response .= <<EOF

<p><span class="heading">Step-1</span> Configure your browser to use the HTTP
proxy running on <b>cassandra.eecs.harvard.edu:8119</b>.  If you do not know
how to do this, then please either <a
href="http://www.idmask.com/en/help_changing_proxy_fox.html">determine how to
change your browser proxy settings</a> or <a
href="$URL_SELF?proxy=1">implicitly use our proxy instead</a>.</p>
EOF
;
    }

    $response .= <<EOF

<p><span class="heading">Step-2</span> Provide a URL to access via Blossom.</p>

<form action="" method="post">

<p><b>URL:</b>&nbsp;<input type="text" name="url" size="64" maxlength="256"></p>

<p><span class="heading">Step-3a</span> To view the web resource using a
Blossom proxy, please choose the following option:</p>

<p><input type=\"submit\" name=\"$BLOSSOM_TAG\" value=\"Select a Blossom Node\"></p>

<p><b>- OR -</b></p>

<p><span class="heading">Step-3b</span> To select a node by country from either
the Tor network or the Blossom network, click the corresponding flag:</p>

<table>

EOF
;

    # determine countries with acceptable exit nodes

    foreach my $url ("$URL_EXIT$STATUS", "$URL_EXIT$BLOSSOM") {
        my $b = 1 if $url eq "$URL_EXIT$BLOSSOM";
        open W, "$WGET \"$url\" |" || warn "node status not available";
        while(<W>) {
            if(/^(\S+)\s+(\S+)\s+(\S+)\s+\S+\s+\S+\s+\S+\s+(\S+)$/) {
                my ($country, $router, $bandwidth, $port) = ($1, $2, $3, $4);
                unless(($port eq "-")
                        or ((not $b) and $router =~ /^\*/)
                        or ((not $b) and $bandwidth < $V1_MINBW*1000)) {
                    $nodes{$country}++
                }
            }
        }
        close W;
    }

    foreach my $country (sort keys %nodes) {
        my $nn = $nodes{$country};
        if($nn > 0) {
            (my $cc = $ccode{$country}) =~ s/ /&nbsp;/;
            (my $cy = $country) =~ y/A-Z/a-z/;

            $response .= "<tr>\n";
            $response .= "    <td><input type=\"image\" name=\"$country\" value=\"1\" src=\"$URL_FLAGS/$cy\" alt=\"$country\">&nbsp;$cc&nbsp;&nbsp;</td>\n";
            $response .= "    <td class=\"number\">$nn</td>\n";
            $response .= "</tr>\n";
        }
    }

    $response .= <<EOF

</table>
</form>

<p>
    [<a href="$URL_SOURCE">source&nbsp;code</a>]
    [<a href="$URL_HOME">Blossom&nbsp;home&nbsp;page</a>]
</p>

EOF
;
}

$response .= <<EOF

<hr>

<p><a href="http://validator.w3.org/check?uri=http%3A%2F%2Fserifos.eecs.harvard.edu%2Fcgi-bin%2Fblossom.pl"><img src="http://validator.w3.org/images/vh401.gif" alt="valid HTML 4.01"/></a></p>

<p><a href="http://jigsaw.w3.org/css-validator/validator?uri=http%3A%2F%2Fserifos.eecs.harvard.edu%2Fcgi-bin%2Fexit.pl"><img src="http://jigsaw.w3.org/css-validator/images/vcss" alt="valid CSS"/></a></p>

</body></html>

EOF
;

# cache the result

if($cachefile) {
    open C, ">$CACHE/$cachefile" || die;
    print C $response;
    close C;
}

# output the result

print $response;
exit 0;

