#!C:\perl\bin\perl.exe -w

# Purpose: Script for parsing html, especially information in tables
# Created by: Tom Bombadil, June 6, 2007
# Version: 0.2

print "Content-type: text/html\n\n";

use CGI::Carp qw(fatalsToBrowser);
use strict;
use HTML::TableExtract;

my $table;                                                 # table of interest
my $html_file = "http://www.securityfocus.com/bid";        # url of web site
my $te;                                                    # table extract
my $ts;                                                    # table search
my $row;                                                   # row of table of interest
my @securityfocus;                                         # array


@securityfocus=("Bugtraq ID: \n","Class: \n","CVE: \n","Remote: \n","Local: \n",
"Published: \n","Updated: \n","Credit: \n","Vulnerable: \n","Not Vulnerable: \n");
open(OUTPUTFILE,">bid.txt");
print OUTPUTFILE @securityfocus;
close(OUTPUTFILE);

open(OUTPUTFILE,"bid.txt");
while (<OUTPUTFILE>)
{
chomp;
print " $_ \n";
}
close(OUTPUTFILE);

# Depth represents how deeply a table resides in other tables. The depth of a top-level
# table in the document is 0. A table within a top-level table has a depth of 1, and so
# on. Each depth can be thought of as a layer; tables sharing the same depth are on the
# same layer. Within each of these layers, Count represents the order in which a table
# was seen at that depth, starting with 0. Providing both a depth and a count will
# uniquely specify a table within a document -> the table of interest is on the second
# level (depth = 1), the first one (count = 0).

for(1..30000) {
  my $table = $html_file."/".$_;
  $te = HTML::TableExtract->new( depth => 1, count => 0 );
  $te->parse_file($table);
}

foreach $ts ($te->tables) {
   print "Table found at ", join(',', $ts->coords), ":\n";
   foreach $row ($ts->rows) {
       print "   ", join(',', @$row), "\n";
    }
}