# Contacts a mediawiki server and de-wikifies it making it appear to be a static page # # Author: Austin Che # See http://openwetware.org/wiki/User:Austin/Extensions/Dewikify # # 2005/09/15: Initial version # 2005/11/25: Dewikifying for multiple virtual hosts # 2005/12/15: Page templates for categories # 2006/01/13: add mediawiki extension to allow text to only show up either on wiki/non-wiki # 2006/04/29: Change script to always use dewikify=yes instead of action=dewikify for the extension # 2006/07/25: Change method of determining category from virtual server name # # To use this, put this library file in a location readable by the webserver # It does not need to be under the document root for the webserver # Create an index.cgi file to be executed by the webserver # Load this library in the file and overwrite whatever variables that # you wish to customize, e.g.: # --------- # #!/usr/bin/perl -Tw # use strict; # use lib "../lib"; # use Dewikify; # $Dewikify::WIKI_CACHE_DIR = "/home/web/tmp/cache"; # $Dewikify::CACHE_LIFE = 1; # %Dewikify::MAPPING = ( # "foo" => "Foo_Bar", # "bar" => "Hello", # ); # Dewikify::main(); # --------- # # If you use the associated dewikify.php mediawiki extension (not necessary), # on your wiki, new tags will be defined to allow text to only show up on the wiki # or only on the dewikified version. # # You'll also need to add some mod_rewrite rules. Here are what I use: # RewriteEngine On # # first point the empty uri to the script # RewriteRule ^/$ /index.cgi [L] # # rewrite everything else ending in html to the script also # RewriteRule ^/(.*)\.html$ /index.cgi?page=$1 [L] # # this is needed for our edit trick where we add x to end of url to edit it # RewriteRule ^/(.*x)$ /index.cgi?page=$1 [L] package Dewikify; use strict; use CGI qw/:standard/; use CGI::Carp qw(fatalsToBrowser); # global defaults our $WIKI_CACHE_DIR = "/tmp"; # should be writable for web server our $CACHE_LIFE = 0; # cache lifetime in days our $WIKI_SERVER = "http://openwetware.org"; # base location of wiki our $DEWIKIFY_NAMESPACE = "Dewikify"; # stuff to control this program on the wiki our $DEWIKIFY_DEFAULT_TEMPLATE = "Dewikify:Default"; # name of default template our $TEMPLATE_BODY = "!BODY!"; # what is replaced by the page's body in templates our $DEFAULT_TEMPLATE = "$TEMPLATE_BODY"; # if even DEWIKIFY_DEFAULT_TEMPLATE page doesn't exist our $WIKI_PATH = "/wiki/"; # deal with aliased "clean" path our %MAPPING = ( ); # mapping from server names to wiki categories # full url for retrieving a page # this script designed for default monobook # dewikify=yes is for if the dewikify extension is installed # should have no effect otherwise our $WIKI_REAL_PATH = "/index.php?useskin=monobook&dewikify=yes&title="; # set this to something if it should be fixed # otherwise it will be set depending on $SERVER our $WIKI_CATEGORY; # these depend on the virtual host we're called by our $SERVER = server_name(); our $MY_URL = url(-base=>1); our $FULL_URL = url(); our $CURL_PROG = "/usr/bin/curl"; # path to curl # untaint some environment variables $ENV{"CDPATH"} = ""; $ENV{"PATH"} = ""; sub do_redirect($) { my ($url) = @_; print redirect($url); exit; } sub clear_cache() { opendir(CACHEDIR, $WIKI_CACHE_DIR) || die "cannot open cache directory: $!"; # remove all files in directory that are in the current category or general namespace # or expired files in the cache foreach my $file (readdir(CACHEDIR)) { next if ($file !~ m/([\w%]*)/); $file = $1; my $path = "$WIKI_CACHE_DIR/$file"; unlink($path) if (($file =~ m/^$WIKI_CATEGORY/) || ($file =~ m/^$DEWIKIFY_NAMESPACE/) || ( -M $path > $CACHE_LIFE)); } closedir(CACHEDIR); } sub encode_safe_url($) { # we just encode non-alpha characters # this removes any potentially dangerous characters # we untaint the result my ($url) = @_; $url =~ s/(\W)/sprintf("%%%02X", ord($1))/seg; # the following reverses the above encoding # $url =~ s/\%([A-Fa-f0-9]{2})/pack('C', hex($1))/seg; $url =~ /([\w%]*)/; return $1; } sub get_current_page($) { my ($page) = @_; my $data; my $content; my $header; my $template; # get current version of page from wiki, need to untaint $page open(PAGE, "$CURL_PROG -q -s -f \"${WIKI_SERVER}${WIKI_REAL_PATH}" . encode_safe_url($page) . "\" |") || return; $data = join("", ); close(PAGE); # check if page doesn't exist return undef if ($data =~ m@.*There is currently no text in this page@s); $data =~ m@^(.*).*(.*)@s || die "Unable to parse wiki page: $data"; $header = $1; $content = $2; # make all related pages point back to us # make sure to detect internal links (# in the regexp) $content =~ s@href="${WIKI_PATH}$WIKI_CATEGORY:([^"#]*)@href="$MY_URL/$1.html@g; $content =~ s@href="${WIKI_PATH}$WIKI_CATEGORY(["#]?)@href="$MY_URL/$1@g; $content =~ s@href=\"\#@href=\"$FULL_URL\#@g; # remove image links $content =~ s@()@$1@gs; # remove editing things #$content =~ s@
@@g; # section edits $content =~ s@@@g; # section edits $content =~ s@]*?action=edit.*?>(.*?)@$1@gs; # links to non-existent pages # footer not needed $content =~ s@(