CGI AltaVista Keyword Harvestor

AltaVista Keyword Harvestor

#!/usr/bin/perl
#########################################################
#              AltaVista Keyword Harvestor              #
#########################################################
#                                                       #
#                                                       #
# This script was created by:                           #
#                                                       #
# Adult Designz Web Specialties PTY.                    #
# http://www.adultdesignz.com                           #
#                                                       #
# and is being distributed to users of                  #
# http://theadultwebmaster.com as both a promotion and  #
# learning tool.                                        #
#                                                       #
# This and many other fine scripts are available at     #
# the above website or by emailing the authors at       #
# staff@perlcoders.com or lisaryan@logicworld.com.au    #
#                                                       #
# To install this script just upload it to your         #
# server in ASCII mode and chmod 755.         .         #
# It can then be run either from a telnet prompt        #
# or as a standard CGI from the web.                    #
#########################################################
$amcgi=0;

sub	mydie {
	
	print("Content-Type: text/plain\n\n") if ($amcgi==1);
	die(shift);
}

sub 	encode {
  	my $string = shift;
  
	$string =~ s/(\W)/sprintf("%%%X", ord($1))/eg;
  	return $string;
}

sub 	getrelated {
  	my(@keys,$related,@tmp);
  	my $query = encode("@_");
  	
	$sock = IO::Socket::INET->new(	Proto=>"tcp",
                          		PeerAddr=>"www.altavista.com",
                          		PeerPort=>"http(80)") || &mydie("Error connecting to altavista, try again later");

  	print($sock "GET /cgi-bin/query?q=$query HTTP/1.0\n\n");
  	while(<$sock>) {
    		$related = 1 if /<b>Related Searches:<\/b>/;
    		$related = 0 if /<\/table>/;
    		@tmp = $_ =~ m!>([^<]*?)\<\/a\>!g;
    		push (@keys,@tmp) if $related;
  	}
  	return @keys;
}

if($ENV{"SERVER_SOFTWARE"} ne undef) {
	use CGI;
	$cgi=new CGI;
	$amcgi=1;
}

# we start off a bit strange
eval "use IO::Socket";
if($@) {
	print("Content-Type: text/plain\n\n") if($amcgi==1);
	print<<"EOT";
You do not have the IO::Socket module installed which is
needed for this script to function properly. Please contact
your administrator.

The error when trying to load the module: $@
EOT
	exit(0);
}

# now we're ready to rock. If we're running as CGI set up some shitz
if($amcgi==1) {
	$action=$cgi->param('action');
	$words=$cgi->param('words');
	if($action eq undef) {
		$url=$cgi->url;
		print<<"EOT";
Content-Type: text/html

<html>
<head>
<title>Keyword harvester</title>
<body>
<form action="$url" method="post">
Please enter one or more keywords you want to find in the box below and press submit.
<br><br>
Words: <input type="text" name="words"><br>
<input type="submit" value="Search">
<input type="hidden" name="action" value="search">
</form>
</body>
</html>
EOT
		exit(0);
	} elsif($action eq "search") {
		@pending=split(/\s/, $words);
	} elsif($action eq "show") {
		$file=$cgi->param('file');
		open(FILE, $file) || &mydie("Error opening $file!");
		chomp(@temp=<FILE>);
		close(FILE);
		print<<"EOT";
Content-Type: text/plain

Save these keywords by clicking the File menu, then Save As.

EOT
		foreach $line (@temp) {
			print($line."\n");
		}
		exit(0);
	}
} else {
	foreach $item (@ARGV) {
		push(@pending, $item);
	}
}

$file="words.$$.asc";

open(FILE, ">$file") || &mydie("Error opening $file for writing");

# we got pending list and shit.. lets rox

while(@pending) {
  	$query=shift(@pending);
  	@new=getrelated($query);
  	print(FILE "\n-- $query --\n", join("\n",@new), " \n");
  	map { $keys{$_}=1 } @new;
  	push(@pending,@new);
}

if($amcgi==1) {
	$url=$cgi->url;
	$dfile="words.$$.asc";
	print<<"EOT";
Content-Type: text/html

<html>
<head><title>Search complete</title></head>
<body>
<p>
the search is complete. click the link below to download a
file containing all keywords.
</p>
<a href="$url?action=show&file=$dfile">keyword file</a>
</body>
</html>
EOT
	exit(0);
} else {
	print <<"EOT";
the searc is complete. a file containing all keywords found
has been created as words.$$.asc.
EOT
	exit(0);
}