#!/usr/bin/perl -w
use HTML::Parse;
use HTML::FormatText;
use HTML::TableContentParser;
use POSIX qw(strftime);
use Time::ParseDate;
use strict;
# Ineffieicnt, but hey, it's a hack anyway.
my @html = <>;
my $html = "@html";
my $ft = HTML::FormatText->new();
my $tables = HTML::TableContentParser->new()->parse($html);
# Clear out the tables used for layout (bastards).
shift @$tables;
shift @$tables;
pop @$tables;
# "31 (WEEKDAY) : HALETHORPE / UMBC / PENN STATION"
my $title = shift @$tables;
my (%times);
my (@days, @directions, @timepoints);
my (%seendays, %seendirs, %seentps);
# Parse.
my ($t);
for $t (@$tables) {
my ($day, $dir, $timepoint, $tprow, $cell);
my ($r, $c);
my (@tmppoints, @tpdir);
# "Weekday"
$day = shift @{$t->{rows}};
# "TO PENN STATION"
$dir = shift @{$t->{rows}};
# time point names
$tprow = shift @{$t->{rows}};
# Indication of whether time is leaving or arriving. Useless for this app
shift @{$t->{rows}};
# Clean up day and direction for use.
$day = $ft->format(parse_html(${$day->{cells}}[0]->{data}));
$dir = $ft->format(parse_html(${$dir->{cells}}[0]->{data}));
chomp $day;
chomp $dir;
$day =~ s/^\s+|\s+$//g;
$dir =~ s/^\s+|\s+$//g;
if (not $seendays{$day}) {
push @days, $day;
$seendays{$day}++;
}
if (not $seendirs{$dir}) {
push @directions, $dir;
$seendirs{$dir}++;
}
# Last entry is "attributes".
pop @{$tprow->{cells}};
for $c (@{$tprow->{cells}}) {
$cell = HTML::FormatText->new->format(parse_html($c->{data}));
chomp($cell);
$cell =~ s/\band\b/&/gi;
$cell =~ s/(\w+)/\u\L$1/g;
$cell =~ s/^\s+|\s+$//g;
$cell =~ s/\.+$//g;
if (not $seentps{$cell}) {
push @timepoints, $cell;
$seentps{$cell}++;
}
push @tpdir, $cell;
}
# Each row is one line from the schedule.
for $r (@{$t->{rows}}) {
# Last cell of the row is attributes, so not a valid stop.
pop @{$r->{cells}};
@tmppoints = @tpdir;
for $c (@{$r->{cells}}) {
$timepoint = shift @tmppoints;
$cell = HTML::FormatText->new->format(parse_html($c->{data}));
chomp($cell);
$cell =~ s/^\s+|\s+$//g;
if ($cell eq "") {
$cell = "-----";
} else {
if (parsedate($cell) == 0) {
print STDERR "Don't like '$cell'.\ndir: $dir\nday: $day\ntimepoint: $timepoint\n";
}
$cell = POSIX::strftime('%H:%M', localtime(parsedate($cell)));
}
push @{$times{$dir}{$day}{$timepoint}}, $cell;
}
}
}
# Sort the timepoints. The order is dictated by the first direction; order of
# the second direction is swapped so both arrays point the same way.
@timepoints = sort {
my $result = cmptimes($a, $b, $directions[0]);
if ($result == 0) {
$result = cmptimes($b, $a, $directions[1]);
}
$result;
} @timepoints;
# Start output.
# Time points are easy now.
print join("\t", @timepoints), "\n";
# As are days.
print join("\t", @days), "\n";
my $trips = "";
my @startlines;
my $line = 4;
my $dir = $directions[0];
for my $day (@days) {
# Remember where this block of data started.
push @startlines, $line;
while (more_data($dir, $day)) {
for my $tp (@timepoints) {
if (exists $times{$dir}{$day}{$tp}) {
$trips .= shift(@{$times{$dir}{$day}{$tp}}) . "\t";
} else {
$trips .= "-----\t";
}
}
chop($trips);
$trips .= "\n";
$line++;
}
}
# Kinda bad, but it'd be more work to actually write a subroutine for it.
$dir = $directions[1];
@timepoints = reverse @timepoints;
for my $day (@days) {
# Remember where this block of data started.
push @startlines, $line;
while (more_data($dir, $day)) {
for my $tp (@timepoints) {
if (exists $times{$dir}{$day}{$tp}) {
$trips .= shift(@{$times{$dir}{$day}{$tp}}) . "\t";
} else {
$trips .= "-----\t";
}
}
chop($trips);
$trips .= "\n";
$line++;
}
}
print join("\t", @startlines), "\n";
print $trips;
sub cmptimes {
my ($a, $b, $dir) = @_;
my $i;
my $day = $days[0];
if (exists $times{$dir}{$day}{$a} and exists $times{$dir}{$day}{$b}) {
for ($i = 0; $i < $#{$times{$dir}{$day}{$a}}; $i++) {
if (${$times{$dir}{$day}{$a}}[$i] ne "-----" and
${$times{$dir}{$day}{$b}}[$i] ne "-----") {
last;
}
}
if ($i < $#{$times{$dir}{$day}{$a}}) {
# Because of the format of the data, string compare will work.
return ${$times{$dir}{$day}{$a}}[$i] cmp
${$times{$dir}{$day}{$b}}[$i];
}
}
return 0;
}
sub more_data {
my ($dir, $day) = @_;
my @keys = keys %{$times{$dir}{$day}};
my $count = @{$times{$dir}{$day}{$keys[0]}};
return $count > 0;
}