WWW::DataWiki::HowNotToDoIt - version 0
#!/usr/bin/perl use 5.010; use common::sense; use CGI; use CGI::Carp 'fatalsToBrowser'; use DateTime; use DateTime::Format::HTTP; use DateTime::Format::ISO8601; use DateTime::Format::Strptime; use Digest::MD5 qw[]; use HTTP::Negotiate qw[choose]; use PerlIO::gzip; # Defer loading RDF::Trine until we need it, as it's quite big # and we don't always need it. sub use_Trine { local $@; eval 'use RDF::TriN3;' unless RDF::Trine::Model->can('new'); die $@ if $@; } # Defer loading RDF::Query until we need it, as it's quite big # and we don't always need it. sub use_Query { local $@; eval 'use RDF::Query;' unless RDF::Query->can('new'); die $@ if $@; } # Parse requested URI our $CGI = CGI->new; our $PATH = '/home/tai/vhosts/wiki.ontologi.es/Versions/'; our $URI = 'http://wiki.ontologi.es/'; our $V_URI = 'http://wiki.ontologi.es/Versions/'; our $E_URI = 'http://buzzword.org.uk/2010/n3edit/?wiki='; our $COMPACTDATE = DateTime::Format::Strptime->new(pattern=>'%Y%m%dT%H%M%SZ'); our ($SHORT,$DATE,$EXT) = split /\./, substr($CGI->path_info, 1); our $NS = undef; our $namespaces = { }; if ($SHORT =~ /^([a-z0-9-]+:)?([a-z][a-z0-9-]*[a-z0-9])$/) { ($NS,$SHORT) = ($1,$2); if ($NS) { $NS =~ s/:$//; die "Namespace $NS does not exist." unless $namespaces->{ $NS }; $PATH = $namespaces->{ $NS }{'PATH'} ? $namespaces->{ $NS }{'PATH'} : ($PATH . $NS . '__'); $URI = $namespaces->{ $NS }{'URI'} ? $namespaces->{ $NS }{'URI'} : ($URI . $NS . ':'); $V_URI= $namespaces->{ $NS }{'V_URI'} ? $namespaces->{ $NS }{'V_URI'} : ($V_URI . $NS . '__'); $E_URI= $namespaces->{ $NS }{'E_URI'} ? $namespaces->{ $NS }{'E_URI'} : ($E_URI . $NS . ':'); $NS .= ':'; } } else { die "Unsupported page name. Must use only lower alphanumeric, plus hyphen."; } if ($DATE =~ /[a-su-y]/i and $DATE !~ /^latest$/i) { $EXT = $DATE; $DATE = undef; } our @all_versions = sort map { $_ =~ s!^.*/([^/]+)\.n3.gz$!$1!; $_; } glob("${PATH}${SHORT}/*.n3.gz"); # This allows a greater degree of flexibility than CGI::header. sub decent_headers { my (%headers) = @_; $headers{'Status'} ||= '200 OK'; $headers{'Content-Type'} ||= 'text/plain; charset=utf-8'; my @keys = sort { { 'Status' => 0 , 'Content-Type' => 10 , }->{$_} || 999 } keys %headers; foreach my $h (@keys) { my @lines = (ref $headers{$h} eq 'ARRAY') ? @{ $headers{$h} } : ($headers{$h}); foreach my $line (@lines) { printf("%s: %s\r\n", $h, ( (ref $line eq 'ARRAY') ? (join ', ', @$line) : $line )); } } print "\r\n"; } # HTTP 2xx/304 responses. sub SendData { my ($data, $datetime, $variant, $headers, $skip304check) = @_; $headers ||= {}; my %headers = %$headers; my $digest = Digest::MD5->new; $digest->add($data); my $etag = $NS.$SHORT."\@${datetime}.${variant}".'='.$digest->clone->hexdigest; my $format = { 'n3' => 'text/n3; charset=utf-8', 'nt' => 'text/plain; charset=utf-8', 'canonical' => 'text/plain; charset=utf-8', 'html' => 'text/html; charset=utf-8', 'xhtml' => 'application/xhtml+xml; charset=utf-8', 'turtle' => 'text/turtle; charset=utf-8', 'rdf' => 'application/rdf+xml; charset=utf-8', 'json' => 'application/json', }->{$variant} || 'application/octet-stream'; $headers{'Content-Type'} ||= $format; $headers{'MS-Author-Via'} ||= ['DAV','SPARQL']; $headers{'Content-Base'} ||= $URI.$SHORT; $headers{'Content-Location'} ||= $URI.$SHORT.'.'.$datetime.'.'.$variant; $headers{'Link'} ||= [["<${URI}${SHORT}.latest>;rel=\"latest-version\""], ["<${V_URI}${SHORT}/>;rel=\"version-history\""], ["<${E_URI}${SHORT}>;rel=\"edit\";anchor=\"${URI}${SHORT}.latest\""]]; $headers{'Last-Modified'} ||= DateTime::Format::HTTP->format_datetime( $COMPACTDATE->parse_datetime($datetime)); $headers{'ETag'} ||= "\"$etag\""; $headers{'Content-MD5'} ||= $digest->b64digest.'=='; $headers{'Vary'} ||= [[qw(Accept Accept-Datetime Accept-Encoding)]]; unless ($skip304check) { my ($condition) = checkConditions($etag, $datetime); if ($condition) { $headers{'Status'} = $condition; $data = ''; } } decent_headers(%headers); print $data; exit; } # HTTP 3xx/4xx/5xx responses. sub SendError { my ($status, $body, $headers) = @_; # open my $elog, ">>${PATH}/e.log"; # print $elog "$status >>>> $body\n"; # close $elog; $headers ||= {}; my %headers = %$headers; $headers{'Status'} ||= ($status || '599 Unspecified Error'); $headers{'Content-Type'} ||= 'text/plain'; $headers{'MS]Author-Via'} ||= ['SPARQL', 'DAV']; decent_headers(%headers); print $body . "\n"; exit; } sub checkConditions { my ($etag, $datetime) = @_; if ($CGI->http('if_modified_since')) { my $ims = $COMPACTDATE->format_datetime( DateTime::Format::HTTP->parse_datetime( $CGI->http('if_modified_since'))); if ($ims ge $datetime) { return ('304 Not Modified', "Has not been modified since request If-Modified-Since header."); } } if ($CGI->http('if_unmodified_since')) { my $ims = $COMPACTDATE->format_datetime( DateTime::Format::HTTP->parse_datetime( $CGI->http('if_unmodified_since'))); if ($ims lt $datetime) { return ('412 Precondition Failed', "Has been modified since request If-Unmodified-Since header."); } } if ($CGI->http('if_none_match') =~ /^\s*\*\s*$/) { return '304 Not Modified'; } elsif (length $CGI->http('if_none_match')) { my $header = $CGI->http('if_none_match'); $header =~ s!W/\"!\"!g; # not issuing any weak etags $header =~ s/(^\s*\"|\"\s*$)//g; my @matchers = split /\"\s+\"/, $header; foreach my $m (@matchers) { if ($m eq $etag) { return ('304 Not Modified', "Matched tag $m in request If-None-Match header."); } } } if ($CGI->http('if_match') =~ /^\s*\*\s*$/) { # continue } elsif (length $CGI->http('if_match')) { my $header = $CGI->http('if_match'); $header =~ s!W/\"!\"!g; # not issuing any weak etags $header =~ s/(^\s*\"|\"\s*$)//g; my @matchers = split /\"\s+\"/, $header; foreach my $m (@matchers) { if ($m eq $etag) { return; } } return ('412 Precondition Failed', "Tag $etag not matched in request If-Match header."); } return; } # Handle PUT/POST if ($CGI->request_method =~ /^(put|post)$/i) { SendError('405 Method Not Allowed', "Allowed: HEAD, GET. To PUT data, don't append datetime ($DATE) or format ($EXT) suffixes to the URL.") if defined $DATE || (defined $EXT and $EXT ne 'n3'); # We've been posted/put some content with a content-type. my ($IN, $CT); if ($CGI->request_method =~ /put/i) { $IN = $CGI->param('PUTDATA'); $CT = $CGI->content_type; } elsif ($CGI->request_method =~ /post/i and $CGI->content_type =~ /sparql.query/i) { $IN = $CGI->param('POSTDATA'); $CT = $CGI->content_type; } elsif($CGI->param('data')) { $IN = $CGI->param('data'); $CT = $CGI->param('format') || 'text/n3'; } my $olddata; if (@all_versions) { my $oldversion = $all_versions[-1]; local $/ = undef; open my $fh, "<:gzip", $PATH.$SHORT."/${oldversion}.n3.gz"; $olddata = <$fh>; close $fh; my $digest = Digest::MD5->new; $digest->add($olddata); my $oldetag = $NS.$SHORT."\@${oldversion}.n3".'='.$digest->clone->hexdigest; my ($bail, $reason) = &checkConditions($oldetag, $oldversion); if ($bail) { SendError('412 Precondition Failed', $reason); } } else { if ($CGI->http('if_match') =~ /^\s*\*\s*$/) { SendError('412 Precondition Failed', "'If-Match: *' in request header did not match, as no such resource exists."); } } &use_Trine; # Check it's a supported type. my ($parser, $sparql); given ($CT) { when (/turtle/i) { $parser = RDF::Trine::Parser::Turtle->new; } when (/n3/i) { $parser = RDF::Trine::Parser::Notation3->new; } when (/html/i) { $parser = RDF::Trine::Parser::RDFa->new; } when (/xml/i) { $parser = RDF::Trine::Parser::RDFXML->new; } when (/json/i) { $parser = RDF::Trine::Parser::RDFJSON->new; } when (/text.plain/i) { $parser = RDF::Trine::Parser::NTriples->new; } when (/sparql.query/){ $sparql++; } default { SendError("415 Unsupported Media Type ($CT)", 'Acceptable: text/turtle, text/n3, text/plain (i.e. N-Triples), application/xhtml+xml (i.e. XHTML+RDFa 1.0), application/rdf+xml and application/json (i.e. RDF/JSON).'); } } # Check it's syntactically sound my $model; if ($sparql) { # MS-Author-Via: SPARQL &use_Query; # These are horrible hacks. if (1 || $CGI->http('user-agent') =~ /firefox/i) { if ($IN =~ /^ \s* WHERE \s* { (.*) } \s* (INSERT|DELETE) \s* { (.*) } \s* $/six) { $IN = "$2 { $3 } WHERE { $1 }"; } $IN =~ s/INSERT/INSERT DATA/i unless $IN =~ /WHERE/i || $IN =~ /INSERT\s+DATA/is; $IN =~ s/DELETE/DELETE DATA/i unless $IN =~ /WHERE/i || $IN =~ /DELETE\s+DATA/is; } $@ = undef; eval { $model = RDF::Trine::Model->temporary_model; RDF::Trine::Parser::Notation3->new ->parse_into_model($URI.$SHORT, $olddata, $model); my $query = RDF::Query->new($IN, { update=>1, load_data=>0, base=>$URI.$SHORT, lang=>'sparql11' }); die RDF::Query->error if defined RDF::Query->error; $query->execute($model); }; if ($@) { SendError('422 Unprocessable Entity', "$IN => ".$@); } } else { $@ = undef; eval { $model = RDF::Trine::Model->temporary_model; $parser->parse_into_model($URI.$SHORT, $IN, $model); }; if ($@) { SendError('422 Unprocessable Entity', $@); } } # We want to save as Notation 3, not whatever the hell # format it was posted to us in. my $best; if ( defined $parser and ( $parser->isa('RDF::Trine::Parser::Turtle') or $parser->isa('RDF::Trine::Parser::Notation3') or $parser->isa('RDF::Trine::Parser::NTriples') ) ) { # It's already some flavour of N3. $best = $IN; } else { # Serialise to N3 (Turtle to be exact). my $ser = RDF::Trine::Serializer::Turtle->new; $best = $ser->serialize_model_to_string($model); } # Save it! my $now = DateTime->now(formatter=>$COMPACTDATE); $now->set_time_zone('UTC'); mkdir $PATH.$SHORT unless -d $PATH.$SHORT; open my $fh, ">:gzip", $PATH.$SHORT."/${now}.n3.gz"; print $fh $best; close $fh; # Respond. SendData($best, "$now", "n3", {Status => ($olddata ? '200 OK' : '201 Created')}, 1); } # Handle GET/HEAD and command-line usage elsif ($CGI->request_method =~ /^(get|head|)$/i) { my ($chosen_version, $chosen_format); # BEGIN: Handle choosing version, including Accept-Datetime header. { unless (@all_versions) { SendError('404 Not Found', 'This page has not yet been created.'); } # If no date given in URI, but requested in Accept-Datetime # header, then reformat that to the expected datetime format. if ($CGI->http('accept_datetime') && !$DATE) { $DATE = $COMPACTDATE->format_datetime( DateTime::Format::HTTP->parse_datetime( $CGI->http('accept_datetime'))); } # if no date given, or latest version requested, find latest version. if ($DATE =~ /^latest$/ || !$DATE) { $chosen_version = $all_versions[-1]; $chosen_version =~ s!^.*/([^/]+)\.n3.gz$!$1!; } # otherwise, find latest version that is no later than given date. elsif ($DATE) { my $req_date = DateTime::Format::ISO8601->parse_datetime($DATE); my $test = $COMPACTDATE->format_datetime($req_date); my @candidates = sort grep { $_ le $test } @all_versions; $chosen_version = (@candidates) ? $candidates[-1] : undef; } # No appropriate version found. if (! $chosen_version) { if ($CGI->http('accept_datetime')) { my $suggested_version = (@all_versions) ? $all_versions[0] : undef; $suggested_version =~ s!^.*/([^/]+)\.n3.gz$!$1!; SendError('406 Not Acceptable', "You requested Accept-Datetime: ".$CGI->http('accept_datetime')."\n". "Earliest available version: ".DateTime::Format::HTTP->format_datetime($COMPACTDATE->parse_datetime($suggested_version))."\n" ); } else { SendError('303 See Other', "See ${URI}${SHORT}", {-Location => $URI.$SHORT.($EXT?".${EXT}":'')}); } } } # END: Handle choosing version, including Accept-Datetime header. # Choose format my $chosen_format = $EXT || choose([ ['n3', 1.0, 'text/n3', undef, 'utf-8'], ['turtle', 0.6, 'text/turtle', undef, 'utf-8'], ['nt', 0.6, 'text/plain', undef, 'utf-8'], ['json', 0.6, 'application/json', undef, 'utf-8'], ['rdf', 0.6, 'application/rdf+xml', undef, 'utf-8'], ['xhtml', 0.02, 'application/xhtml+xml', undef, 'utf-8'], ['html', 0.01, 'text/html', undef, 'utf-8'], ]) || 'n3'; # Handle requests for Gzipped Notation 3. if ($chosen_format eq 'n3' and $CGI->http('Accept-Encoding')=~/gzip/i) { my $data; { local $/ = undef; open my $fh, "<", $PATH.$SHORT."/${chosen_version}.n3.gz"; $data = <$fh>; close $fh; } SendData($data, $chosen_version, $chosen_format, {'Content-Encoding' => 'gzip'}); } # Otherwise we're going to need to unzip the data... my $data; { local $/ = undef; open my $fh, "<:gzip", $PATH.$SHORT."/${chosen_version}.n3.gz"; $data = <$fh>; close $fh; } # Handle requests for uncompressed Notation 3. if ($chosen_format eq 'n3') { SendData($data, $chosen_version, $chosen_format); } &use_Trine; # Sometimes, if we've been requested for Turtle/NTriples, we might # be able to get away with serving the Notation 3 as-is. if ($chosen_format =~ m/^(nt|turtle)$/) { # check that by attempting to parse it with a non-N3 parser. my $parser = $chosen_format eq 'nt' ? RDF::Trine::Parser::NTriples->new : RDF::Trine::Parser::Turtle->new; eval { my $tmpmodel = RDF::Trine::Model->temporary_model; $parser->parse_into_model($URI.$SHORT, $data, $tmpmodel); }; # No errors, so... if (! $@) { SendData($data, $chosen_version, $chosen_format); } } # OK, we're going to need to reserialize the data... my $sclass = 'RDF::Trine::Serializer::' . { 'rdf' => 'RDFXML', 'json' => 'RDFJSON', 'canonical' => 'NTriples::Canonical', 'turtle' => 'Turtle', 'nt' => 'NTriples', }->{$chosen_format}; if ($sclass eq 'RDF::Trine::Serializer::' or !$sclass->can('new')) { SendError('404 Not Found', "No variant of '/${NS}${SHORT}' found with suffix '.${EXT}'. Try '.n3'??") unless ($chosen_format eq 'html' or $chosen_format eq 'xhtml'); } my $parser = RDF::Trine::Parser::Notation3->new; my $tmpmodel_n3 = RDF::Trine::Model->temporary_model; $parser->parse_into_model($URI.$SHORT, $data, $tmpmodel_n3); my $tmpmodel = RDF::Trine::Model->temporary_model; my $iter = $tmpmodel_n3->as_stream; while (my $st = $iter->next) { $tmpmodel->add_statement($st) if $st->rdf_compatible; } if ($chosen_format eq 'html' or $chosen_format eq 'xhtml') { { local $@ = undef; eval 'use RDF::RDFa::Generator;'; die $@ if $@; eval 'use HTML::HTML5::Writer;'; die $@ if $@; } my $gen = RDF::RDFa::Generator->new( base => $URI.$SHORT, safe_xml_literals => 1, style => 'HTML::Pretty', ); my $dom = $gen->inject_document(<<TEMPLATE, $tmpmodel); <html xmlns="http://www.w3.org/1999/xhtml"> <head profile="http://www.w3.org/1999/xhtml/vocab"> <title>Data Wiki: ${NS}${SHORT}</title> <style type="text/css"> dt { font-weight: bold ; } div[about] { border: 2px solid green; background: #dfd; padding: 1em; margin: 1em 0; } img { float: right; } </style> </head> <body> <h1>${NS}${SHORT}</h1> <p>This is a page of data on the wiki.ontologi.es Data Wiki.</p> <h2>Data</h2> </body> </html> TEMPLATE if ($chosen_format eq 'html') { SendData(HTML::HTML5::Writer->new->document($dom), $chosen_version, $chosen_format); } else { SendData(HTML::HTML5::Writer->new( markup => 'xhtml', doctype => HTML::HTML5::Writer->DOCTYPE_XHTML_RDFA )->document($dom), $chosen_version, $chosen_format); } } $data = $sclass->new( namespaces => $parser->{bindings}||{}, # little hack to retain prefixes. style => 'HTML::Pretty', )->serialize_model_to_string($tmpmodel); SendData($data, $chosen_version, $chosen_format); } else { SendError('405 Method Not Allowed', "Allowed: HEAD, GET, PUT, POST."); }
WWW::DataWiki.
Toby Inkster <tobyink@cpan.org>.
This software is copyright (c) 2010-2011 by Toby Inkster.
This is free software; you can redistribute it and/or modify it under the same terms as the Perl 5 programming language system itself.
THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
To install WWW::DataWiki, copy and paste the appropriate command in to your terminal.
cpanm
cpanm WWW::DataWiki
CPAN shell
perl -MCPAN -e shell install WWW::DataWiki
For more information on module installation, please visit the detailed CPAN module installation guide.