#!/usr/bin/perl -w use CGI qw(:standard); use LWP::Simple qw(getstore); use HTML::TokeParser; use Date::Manip qw(ParseDate UnixDate); print header ('text/xml'); # Check file cache my $title = param("title"); my $url = param("url"); my $description = param("description"); my $maxnumberofresults = 15; my $pubDate = 0; # Build a nice filename and stick the file in the "./cache/" directory $file = $url; $file =~ s/http:\/\///ig; $file =~ s/\//-/ig; $file = "./cache/$file"; # calculate file modified time and get a var for 30 minutes ago # mtime (modified time) is all we are going to use here so we'll use a dummy variable # to avoid a whole bunch of "Name "[whatever]" used only once: possible typo..." warnings # in the error log $dummy = ""; ($dummy, $dummy, $dummy, $dummy, $dummy, $dummy, $dummy, $dummy, $dummy, $mtime, $dummy, $dummy, $dummy) = stat($file); # Check the time of the local file if ($mtime >= (time() - 900)) { # If the file is more than 15 minutes old (900 seconds), fetch it remotely. # print "loading locally"; } else { # else the file is recent, so use the local one instead. getstore ($url,"$file"); # print "hitting $url"; # hack to break URLs that are near Sticky topics open (FILE, "$file"); #open undef $/; my $filecontents = ; close FILE; # kill the open file $filecontents =~ s/Sticky:.*? $file"); #open print FILE $filecontents; close FILE; } # begin RSS output print " $title $url $description "; # Use HTML::TokeParser to parse HTML and pull out titles and URLs $p = HTML::TokeParser->new(shift||$file); my $output = ""; my $count = 0; %items = (); $pubDate = "1/1/1970"; while (my $token = $p->get_tag("tr")) { my $text = $p->get_trimmed_text("/tr"); if ( $text =~ /\s+?Topic\s/ ) { #OK, found the header #get the next row my $token = $p->get_tag("tr"); while (my $otherToken = $p->get_tag("a")) { my $link = $otherToken->[1]{href} || "-"; my $linkText = $p->get_trimmed_text("/a"); if ( $link =~ /showMessage\?topicID/ ) { #skip the posts and views cells $otherToken = $p->get_tag("td"); $otherToken = $p->get_tag("td"); #get the last post time $otherToken = $p->get_tag("td"); my $timeText = $p->get_trimmed_text("p", "td", "br"); my $date = ParseDate($timeText); my $epochDate = UnixDate($date, "%s"); my $epochPubDate = UnixDate($pubDate, "%s"); # keep track of the newest post, this will be our lastBuildDate if ($epochDate > $epochPubDate) {$pubDate = $date}; my $dateText = UnixDate($date, "%a, %b %e %Y %H:%M:%S %Z"); $linkText =~ s/[^A-Za-z0-9 ,\.:'\/\\-]//ig; # Parse out weird characters $linkText =~ s/Sticky\sTopic//ig; # Remove Sticky Topic text #use the epoch time the key for a hash of hashes that we can sort later $items{$epochDate}{Link} = $link ; $items{$epochDate}{Text} = $linkText ; $items{$epochDate}{Date} = $dateText ; } $token = $p->get_tag("tr"); } } } # output the rest of the RSS my $pubDateText = UnixDate($pubDate, "%a, %b %e %Y %H:%M:%S %Z"); print "$pubDateText\n"; #sort the posts by last modified time using the epoch times @itemKeys = reverse sort keys %items; #write out well formed items, now in the right order foreach $epoch (@itemKeys) { my $item = $items{$epoch}; print "\n"; print " "; print $items{$epoch}{Text}; print "\n"; print " "; print "New update as of ".$items{$epoch}{Date}; print "\n"; print " "; print $items{$epoch}{Date}; print "\n"; print " "; print $items{$epoch}{Link}; print "\n"; print "\n"; } # End of the RSS feed print " "; exit (0);