#! /usr/bin/perl use CGI; use File::Basename; $query = new CGI; #---------------------------------------------------------------- # Set up assorted configuration variables $swish = "/usr/local/bin/swish-e"; # Swish search engine $didHeader = 0; $prefix = "http://goodkitty.gies.com/spanking/"; # Root URL for site #---------------------------------------------------------------- # Process the CGI input $cmd = "$swish -f index.swish -w " . $query->param('text'); if (open(INPUT, "$cmd |")) { &process(INPUT); close INPUT; }; #---------------------------------------------------------------- # process - Munch the output of swish into HTMLized search results # sub process { local(*IN) = @_; while ( ) { chop; last if /^\.$/; # Remember what words we searched for /^search words:\s+(.*)/ && do { $searchWords = $1; }; # If there was an error message, print it and exit /^err:\s+(.*)/ && do { print "ERROR triggered by: $_\n"; print "Search Error\n"; print "

Error:

"; print "$1\n"; print "
\n"; &searchAgain($input{"text"}); last; }; /(\d+)\s+($prefix)?(.*?)\s+\"(.*)\"\s+(\d+)/ && do { if (!$didHeader) { printHeader($searchWords); $didHeader = 1; } $score = $1; $url = "../$3"; $title = $4; $size = $5; @info = &getFileInfo($url); $author = $info[1]; if ($url =~ /$title$/) { $title = $info[0]; } print "$score", "", &htmlize($title), "", "", $author, "", "$size\n"; }; } print "" if ($didHeader); print $query->end_html(); } sub searchAgain($) { local($text) = @_; print "
\n"; print "Search again: "; print ""; print "\n"; print "
\n"; } sub printHeader($) { local($searchWords) = @_; print $query->header(); print $query->start_html(-title=>'Search Results'); print $query->h2('Search Results'); &searchAgain($searchWords); print "
\n"; print "Searched for: ", $query->param('text'), "\n"; print "\n"; print "
ScoreTitle"; print "AuthorSize\n"; print "

\n"; } # # getFileInfo($file) # # Find the title of a usenet post and the author's name # sub getFileInfo($) { local($file) = @_; local ($title, $author, $first, $text) = ("?", "?", 1, 1); $text = 0 if ($file =~ /\.html$/); open FILE, "$file"; while () { chop; last if /<\/pre>/ && !$text; last if /^\s*$/ && $text && !$first; $first = 0; /^(
)?Subject: (.*)/i && do {
	    $title = &prettyTitle($2);
	};

	/^(
)?From: (.*)/i && do {
	    $author = &prettyAuthor($2);
	};
    }
    close(FILE);

    $author = getAuthorName(dirname($file)) || $author;
    
    return ($title, $author);
}

#-----------------------------------------------------------------------------
# getAuthorName
# Return the name of the author whose stories live in the given directory
#
sub getAuthorName($) {
    my ($dir) = @_;

    if (defined $authorName{$dir}) {
	return $authorName{$dir};
    }

    my $name;

    open INDEX, "$dir/index.html";
    while () {
	/Spanking Stories(\s*-\s*)?(.*)<\/title>/i && do {
	    $name = $2;
	    last;
	};
	/<h([1-3])>(.*)'s Stories<\/h\1>/i && do {
            $name = $2;
            last;
        };
	/<h([1-3])>(.*)<\/h\1>/i && do {
            $name = $2;
            last;
        };
    }
    close(INDEX);

    $authorName{$dir} = $name if $name;
    return $name;
}

sub htmlize($) {
    local($_) = @_;

    s/&/&/g;    # Must come first (duh)
    s/</</g;
    s/>/>/g;

    s/_(\w+)_(\w+)/<em>$1<\/em> _$2/g;
    s/_(\w+)_/<em>$1<\/em>/g;

        # Turn email addresses into mailto links
    s/([\w\.\-]+@[\w\.\-]+)/<a href="mailto:$1">$1<\/a>/g;

    return $_;
}

sub prettyTitle($) {
    local($_) = @_;

    if ( /[zZ]+[ \-\:]*(.*)/ ) {
	$_ = $1;
    }
    if ( /(new )??story( by ?\w+)?[ \:\-]*(.*)/i ) {
	$_ = $3;
    }
    if ( /(.*?),?\s*(part|pt(\.)??) \d+/i ) {
	$_ = $1;
    }
    if ( /(.*?)\s*<.*>$/ ) {
	$_ = $1;
    }
    while ( /(.*?)\s*\(.*\)$/ ) {
	$_ = $1;
    }
    if ( /(.*?) by \w+$/ ) {
	$_ = $1;
    }
    if ( /(.*?)\s*\(.*\)$/ ) {
	$_ = $1;
    }
    if ( /(.*?)((,?\s*)|(\s*\())([mf]+\/[mf]+.*)$/i ) {
	$_ = $1;
    }
    if ( /\"(.*)\"/ ) {
	$_ = $1;
    }
    if ( /(.*?)\s*[\-\,]*\s+[Bb]y [A-Z]\w+/ ) {
	$_ = $1;
    }
    if (  /^[^a-z]*$/ ) {
	$_ = capitalize($_);
    }
    return $_;
}

sub prettyAuthor {
    local($_) = @_;

    # Remove links
    $_ = "$1$2$3" if /(.*)<a href=\".*\">(.*)<\/a>(.*)/i;
  
    # foo@bar.com (Joe Schmoe)
    $_ = $1 if /[\w\.\-]+@[\w\.\-]+\s*\((.*)\)/;

    # Joe Schmoe <foo@bar.com>
    $_ = $1 if /(.*?)\s*<[\w\.\-]+@[\w\.\-]+>/;
    $_ = $1 if /(.*?)\s*<[\w\.\-]+@[\w\.\-]+>/;

    return $_;
}

sub authorIndex {
    local($story) = @_;

    local @path = split('/', $story);

    $path[$#path] = "index.html";

    return join('/', @path);
}

sub capitalize {
    local($subject, $i) = @_;
    local @words = split(' ', $subject);

    for ($i = 0; $i <= $#words; $i++) {
	if ($words[$i] =~ /([A-Z])(.*)/) {
	    $first = $1;
	    ($rest = $2) =~ tr/A-Z/a-z/;
	    $words[$i] = $first . $rest;
	}
    }
    return join(' ', @words);
}