CGI Crypt Analysis

Crypt Analysis

#!perl

# Online Cryptanalysis Tool
# By Mike Bobbitt
# Mike@Army.ca

# Revision History
#
# 11 Dec 2000: 1.0 - Initial Release
# 12 Deb 2000: 1.1 - Handles quotes properly, shows plaintext guesses in table
# 29 Jul 2002: 1.2 - Added digraph table and ability to handle homophones

# To Do
#
# - Sort character list
# - Count number of guesses
# - Print message when all letters have guesses
# - Handle chars like < and ? properly

# Set debug level:
#	0 = None
#	1 = Rudimentary
#	2 = Detailed
#	3 = Overwhelming
$debug=0;

# Version
$ver="1.2";

# Write output immediately
$|=1;

# Print HTML Page Header
print "Content-Type: text/html\n\n";
print "<html>";

# Get script name
$script_name=$0;
# Drop the rest of the path (for UNIX)
$script_name=~s/.*\/(.*)/$1/;

# No change? Then this must be a Win32 machine...
if ($script_name eq $0)
{
	$script_name=~s/.*\\(.*)/$1/;
}

debug("Starting execution, debugging is on...");

# Seed random number generator
srand(time^$$);

# English Frequency Table
$eng_freq{a}=8.2;
$eng_freq{b}=1.5;
$eng_freq{c}=2.8;
$eng_freq{d}=4.3;
$eng_freq{e}=12.7;
$eng_freq{f}=2.2;
$eng_freq{g}=2.0;
$eng_freq{h}=6.1;
$eng_freq{i}=7.0;
$eng_freq{j}=0.2;
$eng_freq{k}=0.8;
$eng_freq{l}=4.0;
$eng_freq{m}=2.4;
$eng_freq{n}=6.7;
$eng_freq{o}=7.5;
$eng_freq{p}=1.9;
$eng_freq{q}=0.1;
$eng_freq{r}=6.0;
$eng_freq{s}=6.3;
$eng_freq{t}=9.1;
$eng_freq{u}=2.8;
$eng_freq{v}=1.0;
$eng_freq{w}=2.4;
$eng_freq{x}=0.2;
$eng_freq{y}=2.0;
$eng_freq{z}=0.1;

# Create the alphabet, for reference
@alphabet=("a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s","t","u","v","w","x","y","z");

# Read web environment variables

debug("REMOTE_HOST: $ENV{REMOTE_HOST}",2);
debug("REMOTE_USER: $ENV{REMOTE_USER}",2);
debug("REMOTE_IDENT: $ENV{REMOTE_IDENT}",2);
debug("HTTP_USER_AGENT: $ENV{HTTP_USER_AGENT}",2);
debug("REQUEST_METHOD: $ENV{REQUEST_METHOD}",2);
debug("QUERY_STRING: $ENV{QUERY_STRING}",2);
#debug("SSL_CLIENT_CN: $ENV{SSL_CLIENT_CN}",2);
#debug("SSL_CLIENT_IO: $ENV{SSL_CLIENT_IO}",2);

# Get the input
read(STDIN, $buffer, $ENV{'CONTENT_LENGTH'});

# Set multi-valued data delimiter
$delim="#";

# Read data passed in from the environment, if any
if ($ENV{QUERY_STRING})
{
	if ($buffer)
	{
		$buffer="$buffer&";
	}
	$buffer.=$ENV{QUERY_STRING};
}

# Split the name-value pairs
@pairs = split(/&/,$buffer);

debug("--- START WEB DATA ---",2);

foreach $pair (@pairs)
{
	($name,$value)=split(/=/,$pair);

	# Un-Webify plus signs and %-encoding
	$value=~tr/+/ /;
	$value=~s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;
	$value=~s/<!--(.|\n)*-->//g;

	# Remove \r's from submitted data
	$value=~s/\r//g;

	if ($allow_html!=1)
	{
		$value=~s/<([^>]|\n)*>//g;
	}

	if (!$FORM{$name})
	{
		$FORM{$name}=$value;
	}
	else
	{
		$FORM{$name}.=$delim.$value;
	}
	debug("{$name}=($value)",2);
}
debug("--- END WEB DATA ---<p>",2);

# Was a debug value passed in from the web?
if (!$debug)
{
	$debug=$FORM{debug};
}

$ciphertext=$FORM{ciphertext};
$plaintext_guess=$FORM{plaintext_guess};

# Explicitly decode "'s
$ciphertext=~s/%22/"/g;
$plaintext_guess=~s/%22/"/g;

# drop case, if we're told to do so...
if ($FORM{case_sense})
{
	$ciphertext=~tr/A-Z/a-z/;
	$plaintext_guess=~tr/A-Z/a-z/;
	debug("Dropping case...");
}

# Build Header
$header="<head><link rel=\"stylesheet\" type=\"text/css\" href=\"/CipherLogic.css\">";
$header.="<title>Online Cryptanalysis Tool</title></head>\n";
$header.="<p align=\"center\"><big><big>Cryptanalysis Tool</big></big><br></p>\n";

$header.="<form method=\"POST\" action=\"$script_name\">\n";

print $header;

if ($FORM{showengfreq})
{
	@freq_keys=keys %eng_freq;

	print <<HTML;
<center>
<table bgcolor="#C0C0C0" border="0">
<tr>
<td bgcolor="#808080" align="center">Character</td>
<td bgcolor="#808080" align="center">Frequency (%)</td>
</tr>
HTML

	foreach $freq_key (@freq_keys)
	{
		print "<tr><td bgcolor=\"#808080\" align=\"center\">$freq_key</td><td bgcolor=\"#808080\" align=\"center\">$eng_freq{$freq_key}</td></tr>\n";
	}

	print <<HTML;
</table>
<p>
<a href="" OnClick="window.close()">click to close</a>
</center>
HTML

	exit;
}

if ($ciphertext)
{
	if (!$plaintext_guess)
	{
		$plaintext_guess=$ciphertext;
	}

	$ciphertemp=$ciphertext;
	$ciphertemp=~s/\n//g;

	$realcipherlength=length($ciphertext);
	$cipherlength=length($ciphertemp);
	
	# Were we asked to generate random ciphertext?
	if ($FORM{encipher})
	{
		debug("We're going to encrypt this.");
		for $letter (@alphabet)
		{
			$gotone=0;
			while (!$gotone)
			{
				# Get random number
				$num=rand(@alphabet);
				$randlett=$alphabet[$num];
				if (!$taken{$randlett})
				{
					# comment out this if statement to allow a letter to stand for itself
					if ($randlett ne $letter)
					{
						$taken{$randlett}=1;
						$gotone=1;
						debug("Using $randlett as replacement for $letter.");
						$plaintext_guess=replaceChar($plaintext_guess,$ciphertext,$letter,$randlett);
					}
				}
			}
		}
	}

	# Do we have a character to replace?
	if ($FORM{replacement_char})
	{
		$plaintext_guess=replaceChar($plaintext_guess,$ciphertext,$FORM{original_char},$FORM{replacement_char});
		debug("Plaintext guess is now: $plaintext_guess");
	}

	printCompare($plaintext_guess,$ciphertext);
	print "<p>";

	if ($showwarning)
	{
		print "<font color=red>WARNING</font>: You have already used <font color=yellow>$FORM{replacement_char}</font> as a plaintext guess for $replaced{$FORM{replacement_char}}. (This is OK if you are solving a cipher with homophones.)";
	}
	$char_html=chargraph();
	$di_html=digraph();

	# Turn quotes to %22 for web submission
	$ciphertext=~s/"/%22/g;
	$plaintext_guess=~s/"/%22/g;
	print <<HTML;
<input type="hidden" value="$ciphertext" name="ciphertext"></p>
<input type="hidden" value="$plaintext_guess" name="plaintext_guess"></p>
<input type="hidden" value="$FORM{spaces}" name="spaces"></p>
<input type="hidden" value="$FORM{case_sense}" name="case_sense"></p>
<p>
Select Character to Replace: <select size="1" name="original_char" tabindex="1">
HTML

	foreach $freq_key (@freq_keys)
	{
		print "<option>$freq_key</option>";
	}

	print <<HTML;	
</select>
Replace Occurances With:
<input type="text" name="replacement_char" size="1" maxlength="1" tabindex="2">
<p>Or enter crib text here: <input type="text" name="crib" size="15" tabindex="3">
<p><input type="submit" value="Update" name="Update" tabindex="4"></p>
<a onclick="window.open('$script_name?showengfreq=1','Frequency','width=220,height=640,resizable'); return false" 
href="$script_name?showengfreq=1">
>> Show English Character Frequency Table</a><p>
<table><tr><td valign="top">$char_html</td><td>$di_html</td></tr></table>
HTML

	# Are we trying cribs?
	if ($FORM{crib})
	{
		showCribs();
	}
}
else
{
	print <<HTML;
This program can help crack simple monoalphabetic substitutions, such as the Cryptoquotes
in many common newspapers.
<p>
Enter the ciphertext below:
<p><textarea rows="10" name="ciphertext" cols="60" tabindex="1"></textarea></p>
<p><input type="checkbox" name="spaces" tabindex="2"> Include spaces and punctuation in analysis.<br>
<input type="checkbox" name="encipher" tabindex="3"> This is plaintext - generate the ciphertext randomly please.<br>
<input type="checkbox" name="case_sense" tabindex="4" checked> Convert all text to lower case.
<p><input type="submit" value="Analyze" name="Analyze" tabindex="5"></p>
</form>
HTML
}

# Get file "last modified" time
@allinfo=stat($0);
$revdate=localtime(@allinfo[9]);

# Print Footer
print <<HTML;
</p></center></div></form>
<hr><small><small><a href="http://Perl.Bobbitt.ca" title="$revdate">$ver</a>
© 1999-2003 Cipher Logic Canada Inc.</small></small>
<a href="mailto:Mike\@Army.ca">by Mike Bobbitt</a>
</body></html>
HTML

######################## END OF MAIN ########################

######################## START OF SUBROUTINES ########################


=head3 debug()

 debug($debug_message,$debug_level);

 $debug_message - String to print if debugging is on
 $debug_level - Only pring string if current debug level is $debug_level or higher (current debug level is set by $debug)

Prints a message, if $debug has a value.

=cut

sub debug
{
my $debug_message=shift;
my $debug_level=shift;

	if (($debug_level le $debug) && $debug)
	{
		print "<pre>$debug_message</pre>\n";
	}
}


##########################################################################

=head3 replaceChar()

 $newstring=replaceChar($string,$rostring,$original,$newchar);

 $string - String to perform replacements on
 $rostring - String to use for comparison (usually ciphertext)
 $original - Character in $string to replace
 $newchar - Character to replace $original with

Replaces all occurances of $original with $newchar in $string, and returns it.

=cut

sub replaceChar
{
my $string=shift;
my $rostring=shift;
my $original=shift;
my $newchar=shift;

my $stringlen=length($string);
my $marker=0;
my $newstring=$string;

	$replacements=0;

	while ($marker<=$stringlen-1)
	{
		debug("Marker: $marker",3);
		if (substr($rostring,$marker,1) eq $original)
		{
			$replacements++;
			debug("$original found at location $marker. ($newstring[$marker])");
			$newstring=substr($newstring,0,$marker).$newchar.substr($newstring,$marker+1,$stringlen);
			debug("\$newstring is now: [$newstring]");
		}
		$marker++;
	}
	return($newstring);
}


##########################################################################

=head3 digraph()

 $di_html=digraph();

 $di_html - HTML for digraph frequency table

Returns the HTML for a digraph frequency table.

=cut
sub digraph
{

	# Print digraph table
	$di_html=<<HTML;
Digraph Frequency Distribution:
<p>
<table border="0" bgcolor="#C0C0C0">
<tr>
<td bgcolor="#808080" align="center">Ciphertext Digraph</td>
<td bgcolor="#808080" align="center">Occurrances</td>
<td bgcolor="#808080" align="center">Frequency (%)</td>
</tr>
HTML
	for ($counter=0;$counter<=$cipherlength-1;$counter++)
	{
		$char=substr $ciphertemp,$counter,2;

		# Do we count punctuation?
		if ($FORM{spaces})
		{
			$difrequency{$char}++;
			$divirtual_length++;
		}
		else
		{
			if ($char=~/[a-zA-Z]/)
			{
				$difrequency{$char}++;
				$divirtual_length++;
			}
		}
		debug("[$char]");
	}

	@difreq_keys=keys %difrequency;

	foreach $difreq_key (@difreq_keys)
	{
		$freq_percent=(int($difrequency{$difreq_key}/$divirtual_length*10000))/100;
		$di_html.=<<HTML;
<tr>
<td bgcolor="#808080" align="center">$difreq_key</td>
<td bgcolor="#808080" align="center">$difrequency{$difreq_key}</td>
<td bgcolor="#808080" align="center">$freq_percent</td>
</tr>
HTML
	}
	$di_html.="</table>";

	return($di_html);
}


##########################################################################

=head3 chargraph()

 $char_html=chargraph();

 $char_html - HTML for character frequency table

Returns the HTML for a character frequency table.

=cut
sub chargraph
{
	$char_html=<<HTML;
Character Frequency Distribution:
<p>
<table border="0" bgcolor="#C0C0C0">
<tr>
<td bgcolor="#808080" align="center">Ciphertext Character</td>
<td bgcolor="#808080" align="center">Occurrances</td>
<td bgcolor="#808080" align="center">Frequency (%)</td>
<td bgcolor="#808080" align="center">Plaintext Guess</td>
<td bgcolor="#808080" align="center">Plaintext Guess Frequency (%)</td>
<td bgcolor="#808080" align="center">Frequency Difference (%)</td>
</tr>
HTML
	for ($counter=0;$counter<=$cipherlength-1;$counter++)
	{
		$char=substr $ciphertemp,$counter,1;

		# Do we count punctuation?
		if ($FORM{spaces})
		{
			$frequency{$char}++;
			$virtual_length++;
		}
		else
		{
			if ($char=~/[a-zA-Z]/)
			{
				$frequency{$char}++;
				$virtual_length++;
			}
		}
		debug("[$char]");
	}

	@freq_keys=keys %frequency;

	foreach $freq_key (@freq_keys)
	{
		$freq_percent=(int($frequency{$freq_key}/$virtual_length*10000))/100;
		if ($back_replaced{$freq_key})
		{
			$difference=(int(abs($freq_percent-$eng_freq{$back_replaced{$freq_key}})*100))/100;
		}
		else
		{
			$difference="";
		}
		$char_html.=<<HTML;
<tr>
<td bgcolor="#808080" align="center">$freq_key</td>
<td bgcolor="#808080" align="center">$frequency{$freq_key}</td>
<td bgcolor="#808080" align="center">$freq_percent</td>
<td bgcolor="#808080" align="center">$back_replaced{$freq_key}</td>
<td bgcolor="#808080" align="center">$eng_freq{$back_replaced{$freq_key}}</td>
<td bgcolor="#808080" align="center"><font color=
HTML
		if ($difference < 1)
		{
			$char_html.="green";
		}
		elsif ($difference > 3)
		{
			$char_html.="red";
		}
		else
		{
			$char_html.="yellow";
		}
		$char_html.=<<HTML;
>$difference</font>
</td>
</tr>
HTML
	}
	$char_html.="</table>";

	return($char_html);
}


##########################################################################

=head3 showCribs()

 showCribs();

Shows all possible positions of the provided crib within the working ciphertext

=cut
sub showCribs
{
my $counter,$char,$try,$orig,$original,$newchar,$counter2,$crib_repl;
my $crib=$FORM{crib};
my $criblen=length($crib);

	for ($counter=0;$counter<=$cipherlength-$criblen;$counter++)
	{
		$char=substr $ciphertemp,$counter,$criblen;
		$try=$ciphertext;
		$orig=$plaintext_guess;
		$crib_repl=0;

		# Swap out each character of the crib
		for ($counter2=0;$counter2<=$criblen-1;$counter2++)
		{
			# Pull off each character of the crib
			$newchar=substr $crib,$counter2,1;

			# Pull off corresponding char of the ciphertext
			$original=substr $ciphertemp,$counter+$counter2,1;

			# Replace 'em
 			$orig=replaceChar($orig,$try,$original,$newchar);
			$crib_repl+=$replacements;
		}
		print "There were $crib_repl substiturions in crib location #$counter:<br>";

		$orig=~s/\n//g;
		$try=~s/\n//g;

		# If the crib wasn't in the resulting plaintext guess, don't show it.
		if ($orig=~/$crib/)
		{
			printCompare($orig,$try,1);
		}
		else
		{
			print "<br>This crib position is invalid.<p>";
		}
	}
}


##########################################################################

=head3 printCompare()

 printCompare($plaintext_guess,$ciphertext,$mode);

 $plaintext_guess - Plaintext to display
 $ciphertext - The original ciphertext, used as a comparison
 $mode - If set to 1, don't show the ciphertext block (optional)

Displays the original ciphertext and the new plaintext guess.

=cut
sub printCompare
{
my $plaintext_guess=shift;
my $ciphertext=shift;
my $mode=shift;

	if (!$mode)
	{
		print "Original ";
		if ($FORM{encipher})
		{
			print "Plain";
		}
		else
		{
			print "Cipher";
		}
		print <<HTML;
text:<p>
<pre>
<ul>
$ciphertext
</ul>
</pre>
<p>
HTML

		if ($FORM{encipher})
		{
			print "Generated Ciphertext";
		}
		else
		{
			print "Plaintext Guess";
		}
		print <<HTML;
:
<p>
<pre>
<ul>
HTML
	}

	$showwarning=0;
	for ($counter=0;$counter<=$realcipherlength-1;$counter++)
	{
		$char=substr $ciphertext,$counter,1;
		$char2=substr $plaintext_guess,$counter,1;
		if ($char ne $char2)
		{
			# Show the newly replaced char as red, any replaced char as yellow
			if ($char2 eq $FORM{replacement_char})
			{
				print "<font color=red>";
			}
			else
			{
				print "<font color=yellow>";
			}
			if ($replaced{$char2} && ($replaced{$char2} ne $char))
			{
				$showwarning=1;
			}
			if (!$mode)
			{
				$replaced{$char2}=$char;
				$back_replaced{$char}=$char2;
			}
		}
		print "$char2";
		if ($char ne $char2)
		{
			print "</font>";
		}

	}
	print <<HTML;
</ul>
</pre>
HTML
}

######################## END OF SUBROUTINES ########################

######################## END OF FILE ########################

Include spaces and punctuation in analysis.
This is plaintext - generate the ciphertext randomly please.
Convert all text to lower case.

HTML } # Get file "last modified" time @allinfo=stat($0); $revdate=localtime(@allinfo[9]); # Print Footer print <

$ver © 1999-2003 Cipher Logic Canada Inc. by Mike Bobbitt HTML ######################## END OF MAIN ######################## ######################## START OF SUBROUTINES ######################## =head3 debug() debug($debug_message,$debug_level); $debug_message - String to print if debugging is on $debug_level - Only pring string if current debug level is $debug_level or higher (current debug level is set by $debug) Prints a message, if $debug has a value. =cut sub debug { my $debug_message=shift; my $debug_level=shift; if (($debug_level le $debug) && $debug) { print "
$debug_message
\n"; } } ########################################################################## =head3 replaceChar() $newstring=replaceChar($string,$rostring,$original,$newchar); $string - String to perform replacements on $rostring - String to use for comparison (usually ciphertext) $original - Character in $string to replace $newchar - Character to replace $original with Replaces all occurances of $original with $newchar in $string, and returns it. =cut sub replaceChar { my $string=shift; my $rostring=shift; my $original=shift; my $newchar=shift; my $stringlen=length($string); my $marker=0; my $newstring=$string; $replacements=0; while ($marker<=$stringlen-1) { debug("Marker: $marker",3); if (substr($rostring,$marker,1) eq $original) { $replacements++; debug("$original found at location $marker. ($newstring[$marker])"); $newstring=substr($newstring,0,$marker).$newchar.substr($newstring,$marker+1,$stringlen); debug("\$newstring is now: [$newstring]"); } $marker++; } return($newstring); } ########################################################################## =head3 digraph() $di_html=digraph(); $di_html - HTML for digraph frequency table Returns the HTML for a digraph frequency table. =cut sub digraph { # Print digraph table $di_html=< HTML for ($counter=0;$counter<=$cipherlength-1;$counter++) { $char=substr $ciphertemp,$counter,2; # Do we count punctuation? if ($FORM{spaces}) { $difrequency{$char}++; $divirtual_length++; } else { if ($char=~/[a-zA-Z]/) { $difrequency{$char}++; $divirtual_length++; } } debug("[$char]"); } @difreq_keys=keys %difrequency; foreach $difreq_key (@difreq_keys) { $freq_percent=(int($difrequency{$difreq_key}/$divirtual_length*10000))/100; $di_html.=< HTML } $di_html.="
Ciphertext Digraph Occurrances Frequency (%)
$difreq_key $difrequency{$difreq_key} $freq_percent
"; return($di_html); } ########################################################################## =head3 chargraph() $char_html=chargraph(); $char_html - HTML for character frequency table Returns the HTML for a character frequency table. =cut sub chargraph { $char_html=< HTML for ($counter=0;$counter<=$cipherlength-1;$counter++) { $char=substr $ciphertemp,$counter,1; # Do we count punctuation? if ($FORM{spaces}) { $frequency{$char}++; $virtual_length++; } else { if ($char=~/[a-zA-Z]/) { $frequency{$char}++; $virtual_length++; } } debug("[$char]"); } @freq_keys=keys %frequency; foreach $freq_key (@freq_keys) { $freq_percent=(int($frequency{$freq_key}/$virtual_length*10000))/100; if ($back_replaced{$freq_key}) { $difference=(int(abs($freq_percent-$eng_freq{$back_replaced{$freq_key}})*100))/100; } else { $difference=""; } $char_html.=< HTML } $char_html.="
Ciphertext Character Occurrances Frequency (%) Plaintext Guess Plaintext Guess Frequency (%) Frequency Difference (%)
$freq_key $frequency{$freq_key} $freq_percent $back_replaced{$freq_key} $eng_freq{$back_replaced{$freq_key}} 3) { $char_html.="red"; } else { $char_html.="yellow"; } $char_html.=<$difference
"; return($char_html); } ########################################################################## =head3 showCribs() showCribs(); Shows all possible positions of the provided crib within the working ciphertext =cut sub showCribs { my $counter,$char,$try,$orig,$original,$newchar,$counter2,$crib_repl; my $crib=$FORM{crib}; my $criblen=length($crib); for ($counter=0;$counter<=$cipherlength-$criblen;$counter++) { $char=substr $ciphertemp,$counter,$criblen; $try=$ciphertext; $orig=$plaintext_guess; $crib_repl=0; # Swap out each character of the crib for ($counter2=0;$counter2<=$criblen-1;$counter2++) { # Pull off each character of the crib $newchar=substr $crib,$counter2,1; # Pull off corresponding char of the ciphertext $original=substr $ciphertemp,$counter+$counter2,1; # Replace 'em $orig=replaceChar($orig,$try,$original,$newchar); $crib_repl+=$replacements; } print "There were $crib_repl substiturions in crib location #$counter:
"; $orig=~s/\n//g; $try=~s/\n//g; # If the crib wasn't in the resulting plaintext guess, don't show it. if ($orig=~/$crib/) { printCompare($orig,$try,1); } else { print "
This crib position is invalid.

"; } } } ########################################################################## =head3 printCompare() printCompare($plaintext_guess,$ciphertext,$mode); $plaintext_guess - Plaintext to display $ciphertext - The original ciphertext, used as a comparison $mode - If set to 1, don't show the ciphertext block (optional) Displays the original ciphertext and the new plaintext guess. =cut sub printCompare { my $plaintext_guess=shift; my $ciphertext=shift; my $mode=shift; if (!$mode) { print "Original "; if ($FORM{encipher}) { print "Plain"; } else { print "Cipher"; } print <

    $ciphertext

HTML if ($FORM{encipher}) { print "Generated Ciphertext"; } else { print "Plaintext Guess"; } print <

    HTML } $showwarning=0; for ($counter=0;$counter<=$realcipherlength-1;$counter++) { $char=substr $ciphertext,$counter,1; $char2=substr $plaintext_guess,$counter,1; if ($char ne $char2) { # Show the newly replaced char as red, any replaced char as yellow if ($char2 eq $FORM{replacement_char}) { print ""; } else { print ""; } if ($replaced{$char2} && ($replaced{$char2} ne $char)) { $showwarning=1; } if (!$mode) { $replaced{$char2}=$char; $back_replaced{$char}=$char2; } } print "$char2"; if ($char ne $char2) { print ""; } } print <
HTML } ######################## END OF SUBROUTINES ######################## ######################## END OF FILE ########################