#!/usr/bin/perl
#########################################################
# AltaVista Keyword Harvestor #
#########################################################
# #
# #
# This script was created by: #
# #
# Adult Designz Web Specialties PTY. #
# http://www.adultdesignz.com #
# #
# and is being distributed to users of #
# http://theadultwebmaster.com as both a promotion and #
# learning tool. #
# #
# This and many other fine scripts are available at #
# the above website or by emailing the authors at #
# staff@perlcoders.com or lisaryan@logicworld.com.au #
# #
# To install this script just upload it to your #
# server in ASCII mode and chmod 755. . #
# It can then be run either from a telnet prompt #
# or as a standard CGI from the web. #
#########################################################
$amcgi=0;
sub mydie {
print("Content-Type: text/plain\n\n") if ($amcgi==1);
die(shift);
}
sub encode {
my $string = shift;
$string =~ s/(\W)/sprintf("%%%X", ord($1))/eg;
return $string;
}
sub getrelated {
my(@keys,$related,@tmp);
my $query = encode("@_");
$sock = IO::Socket::INET->new( Proto=>"tcp",
PeerAddr=>"www.altavista.com",
PeerPort=>"http(80)") || &mydie("Error connecting to altavista, try again later");
print($sock "GET /cgi-bin/query?q=$query HTTP/1.0\n\n");
while(<$sock>) {
$related = 1 if /<b>Related Searches:<\/b>/;
$related = 0 if /<\/table>/;
@tmp = $_ =~ m!>([^<]*?)\<\/a\>!g;
push (@keys,@tmp) if $related;
}
return @keys;
}
if($ENV{"SERVER_SOFTWARE"} ne undef) {
use CGI;
$cgi=new CGI;
$amcgi=1;
}
# we start off a bit strange
eval "use IO::Socket";
if($@) {
print("Content-Type: text/plain\n\n") if($amcgi==1);
print<<"EOT";
You do not have the IO::Socket module installed which is
needed for this script to function properly. Please contact
your administrator.
The error when trying to load the module: $@
EOT
exit(0);
}
# now we're ready to rock. If we're running as CGI set up some shitz
if($amcgi==1) {
$action=$cgi->param('action');
$words=$cgi->param('words');
if($action eq undef) {
$url=$cgi->url;
print<<"EOT";
Content-Type: text/html
<html>
<head>
<title>Keyword harvester</title>
<body>
<form action="$url" method="post">
Please enter one or more keywords you want to find in the box below and press submit.
<br><br>
Words: <input type="text" name="words"><br>
<input type="submit" value="Search">
<input type="hidden" name="action" value="search">
</form>
</body>
</html>
EOT
exit(0);
} elsif($action eq "search") {
@pending=split(/\s/, $words);
} elsif($action eq "show") {
$file=$cgi->param('file');
open(FILE, $file) || &mydie("Error opening $file!");
chomp(@temp=<FILE>);
close(FILE);
print<<"EOT";
Content-Type: text/plain
Save these keywords by clicking the File menu, then Save As.
EOT
foreach $line (@temp) {
print($line."\n");
}
exit(0);
}
} else {
foreach $item (@ARGV) {
push(@pending, $item);
}
}
$file="words.$$.asc";
open(FILE, ">$file") || &mydie("Error opening $file for writing");
# we got pending list and shit.. lets rox
while(@pending) {
$query=shift(@pending);
@new=getrelated($query);
print(FILE "\n-- $query --\n", join("\n",@new), " \n");
map { $keys{$_}=1 } @new;
push(@pending,@new);
}
if($amcgi==1) {
$url=$cgi->url;
$dfile="words.$$.asc";
print<<"EOT";
Content-Type: text/html
<html>
<head><title>Search complete</title></head>
<body>
<p>
the search is complete. click the link below to download a
file containing all keywords.
</p>
<a href="$url?action=show&file=$dfile">keyword file</a>
</body>
</html>
EOT
exit(0);
} else {
print <<"EOT";
the searc is complete. a file containing all keywords found
has been created as words.$$.asc.
EOT
exit(0);
}