#!/usr/local/bin/perl -wT
use strict;
# where do we connect to the Similarity server? Here:
my $remote_host = '127.0.0.1';
my $remote_port = '31134';
my $doc_base = '../docs';
use CGI;
use Socket;
BEGIN {
# Our University's webserver uses an ancient version of CGI::Carp
# so we can't do fatalsToBrowser.
# The carpout() function lets us modify the format of messages sent to
# a filehandle (in this case STDERR) to include timestamps
use CGI::Carp 'carpout';
carpout(*STDOUT);
}
# subroutine prototypes
sub showForm ($$$$);
sub round ($);
my $cgi = CGI->new;
# These are the colors of the text when we alternate text colors (when
# showing errors, for example).
my $text_color1 = 'black';
my $text_color2 = '#d03000';
# print the HTTP header
print $cgi->header;
# if the showform parameter is no, then don't show the form--this is how
# we avoid showing the form in popups
my $showform = $cgi->param ('showform') || 'yes';
# show the start of the page (all the usual HTML that goes at the top
# of a page, etc.)
showPageStart ();
# check if we want to show the version information (version of WordNet, etc.)
my $showversion = $cgi->param ('version');
if ($showversion) {
socket (Server, PF_INET, SOCK_STREAM, getprotobyname ('tcp'));
my $internet_addr = inet_aton ($remote_host)
or die "Could not convert $remote_host to an Internet addr: $!\n";
my $paddr = sockaddr_in ($remote_port, $internet_addr);
unless (connect (Server, $paddr)) {
print "<p>Cannot connect to server $remote_host:$remote_port</p>\n";
goto SHOW_END;
}
select ((select (Server), $|=1)[0]);
print Server "v\015\012\015\012";
print "<h2>Version information</h2>\n";
while (my $line = <Server>) {
last if $line eq "\015\012";
if ($line =~ /^v (\S+) (\S+)/) {
print "<p>$1 version $2</p>\n";
}
elsif ($line =~ m/^! (.*)/) {
print "<p>$1</p>\n";
}
else {
print "<p>Strange message from server: $line\n";
}
}
local $ENV{PATH} = "/usr/local/bin:/usr/bin:/bin:/sbin";
my $t_osinfo = `uname -a` || "Couldn't get system information: $!";
# $t_osinfo is tainted. Use it in a pattern match and $1 will
# be untainted.
$t_osinfo =~ /(.*)/;
print "<p>HTTP server: $ENV{HTTP_HOST} ($1)</p>\n";
print "<p>Similarity server: $remote_host</p>\n";
goto SHOW_END;
}
# check if we're generating this page as the result of a query; if so, then
# we need to show the results.
my $word1 = $cgi->param ('word1');
my $word2 = $cgi->param ('word2');
if ($word1 and !$word2) {
print "<p>Word 2 was not specified.</p>";
}
elsif (!$word1 and $word2) {
print "<p>Word 1 was not specified.</p>";
}
elsif ($word1 and $word2) {
print "<hr />\n";
socket (Server, PF_INET, SOCK_STREAM, getprotobyname ('tcp'));
my $internet_addr = inet_aton ($remote_host)
or die "Could not convert $remote_host to an Internet addr: $!\n";
my $paddr = sockaddr_in ($remote_port, $internet_addr);
unless (connect (Server, $paddr)) {
print "<p>Cannot connect to server $remote_host:$remote_port</p>\n";
goto SHOW_END;
}
select ((select (Server), $|=1)[0]);
# value of the parameters can be 'all', 'gloss', or 'synset'
my $w1option = $cgi->param ('senses1');
my $w2option = $cgi->param ('senses2');
my $query_type = 2;
if ($w1option eq 'gloss' or $w1option eq 'synset') {
$query_type = 1 unless $word1 =~ m/[^#]+\#[nvars]\#\d+/;
}
if ($w2option eq 'gloss' or $w2option eq 'synset') {
$query_type = 1 unless $word2 =~ m/[^#]+\#[nvars]\#\d+/;
}
if ($query_type == 1) {
my @senses1;
my @senses2;
if ($w1option eq 'gloss') {
print Server "g $word1\015\012";
}
elsif ($w1option eq 'synset') {
print Server "s $word1\015\012";
}
elsif ($w1option eq 'all') {
@senses1 = ([$word1, ""]);
}
else {
print "<pre>Internal error: invalid option `$w1option'</pre>\n";
}
if ($w2option eq 'gloss') {
print Server "g $word2\015\012";
}
elsif ($w2option eq 'synset') {
print Server "s $word2\015\012";
}
elsif ($w2option eq 'all') {
@senses2 = ([$word2, ""]);
}
else {
print "<pre>Internal error: invalid option `$w2option'</pre>\n";
}
print Server "\015\012";
while (my $response = <Server>) {
last if $response eq "\015\012";
my $prefix = substr $response, 0, 1;
my $end = substr $response, 2;
if ($prefix eq 'g') {
my ($wps, $gloss) = m/([^#]+\#[nvar]\#\d+) (.*)/;
print "<p>$wps: $gloss</p>";
}
elsif ($prefix eq '1') {
my ($wps, $gloss) = $end =~ m/([^#]+\#[nvar]\#\d+) (.*)/;
push @senses1, [$wps, $gloss];
}
elsif ($prefix eq '2') {
my ($wps, $gloss) = $end =~ m/([^#]+\#[nvar]\#\d+) (.*)/;
push @senses2, [$wps, $gloss];
}
else {
print "Strange message from server `$response'";
}
}
my $measure = $cgi->param ('measure') || 'path';
showForm (1, \@senses1, \@senses2, $measure);
showPageEnd ();
exit;
}
else {
my $measure = $cgi->param ('measure');
my $trace = $cgi->param ('trace') ? 'yes' : 'no';
my $gloss = $cgi->param ('gloss') ? 'yes' : 'no';
my $root = $cgi->param ('rootnode') ? 'yes' : 'no';
my $syns = $cgi->param ('synset') ? 'yes' : 'no';
my $all_senses = $cgi->param ('sense') ? 1 : 0;
$word1 =~ tr/A-Z /a-z_/;
$word2 =~ tr/A-Z /a-z_/;
# terminate all messages with CRLF (best to avoid \r\n because the
# meaning of \r and \n varies from platform to platform
if ($measure eq 'all') {
foreach my $m (qw/hso lch lesk lin jcn path res vector vector_pairs wup/) {
print Server +("r $word1 $word2 $m $trace $gloss $syns $root",
"\015\012");
}
print Server "\015\012";
}
else {
print Server ("r $word1 $word2 $measure $trace $gloss $syns $root",
"\015\012\015\012");
}
my @glosses;
my %scores;
my @errors;
my @synsets;
my @version_info;
my $lines = 0;
my $last_measure = '';
while (my $response = <Server>) {
last if $response eq "\015\012";
$lines++;
my $beginning = substr $response, 0, 1;
my $end = substr $response, 2;
if ($beginning eq '!') {
$end =~ s/\s+$//;
push @errors, $end;
}
elsif ($beginning eq 'r') {
my ($measure, $wps1, $wps2, $score) = split /\s+/, $end;
$score = round ($score);
$last_measure = $measure;
push @{$scores{$measure}}, [$score, $wps1, $wps2];
}
elsif ($beginning eq 't') {
$end =~ s|<CRLF>|<br />|g;
### FIXME -- we lost the traces
push @{$scores{$last_measure}->[-1]}, "$end\n";
}
elsif ($beginning eq 'g') {
my ($wps, @gloss_words) = split /\s+/, $end;
push @glosses, [$wps, substr ($end, length ($wps))];
}
elsif ($beginning eq 's') {
my (@synset_words) = split /\s+/, $end;
push @synsets, [@synset_words];
}
elsif ($beginning eq 'v') {
my ($package, $version) = split /\s+/, $end;
push @version_info, [$package, $version];
}
else {
push @errors,
"Error: received strange message from server `$response'";
}
}
my $query_string = $ENV{QUERY_STRING} || "";
# replace literal ampersands with their XML entity equivalents
$query_string =~ s/&/&/g;
if (scalar @version_info) {
foreach my $item (@version_info) {
print "<p>$item->[0] version $item->[1]</p>\n";
}
goto SHOW_END;
}
# show errors, if any
if (scalar @errors) {
unless ($cgi->param ('errors') eq 'show') {
my $query = $query_string . '&errors=show';
my $url = "similarity.cgi?${query}";
# Having onclick return false should keep the browser from
# loading the page specified by href, but IE loads it
# anyways. That's why we set href to # instead of the
# URL (setting it to the URL would let non-JavaScript
# browsers see the page in the main window, but such
# browsers are rare)
print +("<p>",
"<a href=\"#\" ",
"onclick=\"showWindow ('$url', 'Errors'); return false;\">View errors</a>",
'</p>',
"\n");
}
else {
print '<h2>Warnings/Errors:</h2>';
print '<p class="errors">';
my $parity = 0;
foreach (0..$#errors) {
my $color = $parity ? $text_color1 : $text_color2;
print "<div style=\"color: $color\">$errors[$_]</div>";
$parity = !$parity;
}
print "</p>\n";
goto SHOW_END;
}
}
# show glosses, if any
if ($gloss eq 'yes') {
my $parity = 0;
if (scalar @glosses) {
print '<h2>Glosses:</h2>';
print '<p class="gloss">';
print "<dl>";
foreach my $ref (@glosses) {
print "<dt>$ref->[0]</dt><dd>$ref->[1]</dd>";
}
print "</dl>\n";
}
else {
print "<p>Sorry, no glosses were found.</p>\n";
}
goto SHOW_END;
}
else {
my $query = $query_string . '&gloss=yes';
my $url = "similarity.cgi?${query}";
print +('<p>',
"<a href=\"#\" ",
"onclick=\"showWindow ('$url', 'Glosses'); return false;\">",
"View glosses (definitions)</a>",
'</p>',
"\n");
}
if ($syns eq 'yes') {
# show complete synsets, if any were requested
if (scalar @synsets) {
print '<h2>Synsets:</h2>';
print '<p class="synset">';
my $parity = 1;
foreach (0..$#synsets) {
my $color = $parity ? $text_color1 : $text_color2;
print "<div style=\"color: $color\" class=\"synset\">{";
print join (', ', @{$synsets[$_]});
print "}</div>";
$parity = !$parity;
}
print "</p>\n";
}
else {
print "<p>Sorry, no synsets were found.</p>\n";
}
goto SHOW_END;
}
else {
my $query = $query_string . '&synset=yes';
my $url = "similarity.cgi?${query}";
print +('<p>',
"<a href=\"#\" ",
"onclick=\"showWindow ('$url', 'Synsets'); return false;\">View synsets</a>",
'</p>',
"\n");
}
if ($all_senses) {
print '<h2>Results:</h2>' if scalar keys %scores;
print '<table class="results" border="1">';
print '<tr><th>Measure</th><th>Word 1</th><th>Word 2</th><th>Score</th>';
print '<th>Trace</th>' if $trace eq 'yes';
print "</tr>\n";
foreach my $m (keys %scores) {
my @scrs = sort {$b->[0] <=> $a->[0]} @{$scores{$m}};
foreach (@scrs) {
my $wps1 = $_->[1];
$wps1 =~ s/\#/%23/g;
my $wps2 = $_->[2];
$wps2 =~ s/\#/%23/g;
print "<tr><td>$m</td>";
print "<td><a href=\"#\" onclick=\"showWindow ('wps.cgi?wps=$wps1', ''); return false;\">$_->[1]</a></td>";
print "<td><a href=\"#\" onclick=\"showWindow ('wps.cgi?wps=$wps2', ''); return false;\">$_->[2]</a></td>";
print "<td>$_->[0]</td>";
if ($trace eq 'yes') {
print "<td>$_->[3]</td>";
}
print "</tr>\n";
}
}
print "</table>\n";
}
else {
my $query = $query_string;
# remove from the query string options that we don't want
$query =~ s/(?:&)sense=yes//;
$query =~ s/(?:&)?trace=yes//;
# now add the option we do want
$query .= '&sense=yes';
# prepare two query strings--one without traces and one with
my $url_nt = "similarity.cgi?${query}"; # 'nt' means 'no trace'
my $url_trace = $url_nt . '&trace=yes';
goto SHOW_END unless scalar keys %scores;
print '<h2>Results:</h2>';
foreach my $m (keys %scores) {
my $good = $scores{$m}->[0];
foreach my $i (1..$#{$scores{$m}}) {
if ($scores{$m}->[$i]->[0] > $good->[0]) {
$good = $scores{$m}->[$i];
}
}
my $wps1 = $good->[1];
$wps1 =~ s/\#/%23/g;
my $wps2 = $good->[2];
$wps2 =~ s/\#/%23/g;
print +("\n<p class=\"results\">",
"The relatedness of ",
"<a href=\"#\" onclick=\"showWindow ('wps.cgi?wps=$wps1', ''); return false;\">$good->[1]</a> ",
"and <a href=\"#\" onclick=\"showWindow ('wps.cgi?wps=$wps2', ''); return false;\">$good->[2]</a> ",
"using $m is $good->[0].",
"</p>\n");
if ($trace eq 'yes') {
print "<p class=\"trace\">$good->[3]</p>";
}
}
print +("<p><a href=\"#\" ",
"onclick=\"showWindow ('$url_nt', 'All senses'); return false\">",
"View relatedness of all senses (without traces)</a></p>\n");
print +("<p><a href=\"#\" ",
"onclick=\"showWindow ('$url_trace', 'All senses'); return false\">",
"View relatedness of all senses (with traces)</a></p>\n");
}
unless ($trace eq 'yes') {
my $urltrace = "similarity.cgi?${query_string}&trace=yes";
print +("<p><a href=\"#\" ",
"onclick=\"showWindow ('$urltrace', 'Traces'); return false;\">",
"View traces</a></p>\n");
}
SHOW_END:
print "<hr />";
close Server;
}
}
$word1 = defined $word1 ? $word1 : "";
$word2 = defined $word2 ? $word2 : "";
my $measure = 'path';#defined $measure ? $measure : 'path';
showForm (2, $word1, $word2, $measure) unless $showform eq 'no';
showPageEnd ();
exit;
# ========= subroutines =========
sub round ($)
{
my $num = shift;
my $str = sprintf ("%.4f", $num);
$str =~ s/\.?0+$//;
return $str;
}
sub showPageStart
{
print <<"EOINTRO";
<?xml version="1.0" encoding="ISO-8859-1"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<title>Similarity</title>
<link rel="stylesheet" href="$doc_base/sim-style.css" type="text/css" />
<script type="text/javascript">
<!-- hide script from old browsers
function measureChanged ()
{
/* get the form that we want */
var myform = document.getElementById ("queryform");
/* get the currently selected measure, put it in mm */
var mm = myform.measure.options[myform.measure.selectedIndex];
if (mm.value == "path" || mm.value == "wup" || mm.value == "lch"
|| mm.value == "res" || mm.value == "lin" || mm.value == "jcn"
|| mm.value == "all") {
myform.rootnode.disabled = "";
}
else {
myform.rootnode.disabled = "disabled";
}
}
function formReset ()
{
window.location = "similarity.cgi";
}
function showWindow (url, title)
{
url += '&showform=no';
var nw = window.open (url, "", "width=625, height=625, scrollbars=yes, resizeable=yes, location=no, toolbar=no");
nw.document.title = title;
}
// -->
</script>
</head>
<body>
<div id="umdlogo" style="float: left">
<a href="http://www.d.umn.edu/"><img style="border: 0px"
src="$doc_base/logo_black.gif"
alt="University of Minnesota Duluth" /></a>
</div>
<h1>WordNet::Similarity web interface</h1>
<p>Read an overview of
<a href="http://search.cpan.org/dist/WordNet-Similarity/doc/intro.pod">WordNet::Similarity</a>.
</p>
EOINTRO
}
sub showForm ($$$$)
{
my ($type, $arg1, $arg2, $arg3) = @_;
# the 'action' attribute for the HTML form below--should be the script
# name
my $action = 'similarity.cgi';
print <<"EOFORM1";
<p>You may enter any two words in one of three formats:</p>
<ol>
<li><tt>word</tt></li>
<li><tt>word#part_of_speech</tt> (where part_of_speech is one of n, v, a,
or r)</li>
<li><tt>word#part_of_speech#sense</tt> (where sense is a positive integer)</li>
</ol>
<p>If words are entered in format 1 or 2, then the relatedness of all
valid forms of the words will be computed (e.g., if 'dogs' is entered,
then 'dog' will be used to compute relatedness).
<a href="$doc_base/instructions.html">More instructions</a>.</p>
<form action="$action" method="get" id="queryform" onreset="formReset()">
<p>
EOFORM1
# check if we are trying to get the user to type in a pair of words or
# if the user needs to select senses from a option menu.
if ($type == 2) {
# the user needs to type in two words
print <<"EOT";
<label for="word1in" class="leftlabel">Word 1:</label>
<input type="text" name="word1" id="word1in" value=\"$arg1\" />
<input type="radio" name="senses1" id="senses1Ain" checked="checked" value="all" />
<label for="senses1Ain">Use all senses</label>
<input type="radio" name="senses1" id="senses1Bin" value="gloss" />
<label for="senses1Bin">Pick a sense by <a href="#" onclick="showWindow ('$doc_base/explanations.html#glossdef'); return false;">gloss</a></label>
<input type="radio" name="senses1" id="senses1Cin" value="synset" />
<label for="senses1Cin">Pick a sense by <a href="#" onclick="showWindow ('$doc_base/explanations.html#synsetdef'); return false;">synset</a></label>
<br />
<label for="word2in" class="leftlabel">Word 2:</label>
<input type="text" name="word2" id="word2in" value=\"$arg2\" />
<input type="radio" name="senses2" id="senses2Ain" checked="checked" value="all" />
<label for="senses2Ain">Use all senses</label>
<input type="radio" name="senses2" id="senses2Bin" value="gloss" />
<label for="senses2Bin">Pick a sense by <a href="#" onclick="showWindow ('$doc_base/explanations.html#glossdef'); return false;">gloss</a></label>
<input type="radio" name="senses2" id="senses2Cin" value="synset" />
<label for="senses2Cin">Pick a sense by <a href="#" onclick="showWindow ('$doc_base/explanations.html#synsetdef'); return false;">synset</a></label>
<br />
EOT
}
else {
# the user needs to select word senses from a menu
print "<label for=\"word1in\" class=\"leftlabel\">Word 1:</label>\n";
print "<select name=\"word1\" id=\"word1in\" style=\"width: 4in\">\n";
foreach my $ref (@$arg1) {
my ($sense, $gloss) = @$ref;
print "<option value=\"$sense\">$sense: $gloss</option>\n";
}
print "</select><br />\n";
print "<label for=\"word2in\" class=\"leftlabel\">Word 2:</label>\n";
print "<select name=\"word2\" id=\"word2in\" style=\"width: 4in\">\n";
foreach my $ref (@$arg2) {
my ($sense, $gloss) = @$ref;
print "<option value=\"$sense\">$sense: $gloss</option>\n";
}
print "</select><br />\n";
}
print '<label for="measurepull" class="leftlabel">Measure:</label>', "\n";
print '<select name="measure" id="measurepull" ',
'onchange="measureChanged();">', "\n";
my @measures = (['all', 'Use all measures'],
['hso', 'Hirst & St-Onge'],
['lch', 'Leacock & Chodorow'],
['lesk', 'Extended Gloss Overlaps'],
['lin', 'Lin'],
['jcn', 'Jiang & Conrath'],
['path', 'Path length'],
['random', 'Random numbers'],
['res', 'Resnik'],
['vector', 'Gloss Vector'],
['vector_pairs', 'Gloss Vector (pairwise)'],
['wup', 'Wu & Palmer']);
foreach (@measures) {
my $selected = $_->[0] eq $arg3 ? 'selected="selected"' : '';
print "<option value=\"$_->[0]\" $selected>$_->[1]</option>\n";
}
print "</select>\n";
print <<"EOFORM";
<a href="$doc_base/measures.html">About the measures</a><br />
<input type="checkbox" name="rootnode" id="rootin" value="yes"
checked="checked" />
<label for="rootin">Use <a href="#" onclick="showWindow ('$doc_base/explanations.html#rootnodedef', 'Definitions'); return false;">root node</a>?</label>
<br />
<!--
<input type="checkbox" name="trace" id="tracein" value="yes" />
<label for="tracein">Show trace?</label><br />
-->
<!--
<input type="checkbox" name="gloss" id="glossin" value="yes" />
<label for="glossin">Show glosses (definitions)?</label><br />
-->
<!--
<input type="checkbox" name="synset" id="synsetin" value="yes" />
<label for="synsetin">Show complete synsets?</label><br />
-->
<!--
<input type="checkbox" name="sense" id="sensein" value="yes" />
<label for="sensein">Show all senses?</label><br />
-->
<input type="submit" value="Compute" />
<input type="reset" value="Clear" />
</p>
</form>
<p><a href="similarity.cgi?version=yes">Show version info</a></p>
<hr />
EOFORM
}
sub showPageEnd
{
print <<'ENDOFPAGE';
<div class="footer">
Created by Ted Pedersen and Jason Michelizzi
<br />E-mail: tpederse (at) d (dot) umn (dot) edu
or mich0212 (at) d (dot) umn (dot) edu
</div>
</body>
</html>
ENDOFPAGE
}
__END__
=head1 NAME
similarity.cgi - a CGI script implementing a portion of a web interface for
WordNet::Similarity
=head1 DESCRIPTION
This script works in conjunction with similarity_server.pl and wps.cgi to
provide a web interface for WordNet::Similarity. The documentation
for similarity_server.pl describes how messages are passed between this
script and that one.
=head1 AUTHORS
Ted Pedersen, University of Minnesota Duluth
tpederse at d.umn.edu
Jason Michelizzi, University of Minnesota Duluth
mich0212 at d.umn.edu
=head1 BUGS
None known.
=head1 COPYRIGHT
Copyright (c) 2005, Ted Pedersen and Jason Michelizzi
This program is free software; you may redistribute and/or modify it under
the terms of the GNU General Public License version 2 or, at your option, any
later version.
=cut