#!C:\perl\bin\perl.exe -w # Purpose: Script for parsing html, especially information in tables # Created by: Tom Bombadil, June 6, 2007 # Version: 0.2 print "Content-type: text/html\n\n"; use CGI::Carp qw(fatalsToBrowser); use strict; use HTML::TableExtract; my $table;                                                 # table of interest my $html_file = "http://www.securityfocus.com/bid";        # url of web site my $te;                                                    # table extract my $ts;                                                    # table search my $row;                                                   # row of table of interest my @securityfocus;                                         # array @securityfocus=("Bugtraq ID: \n","Class: \n","CVE: \n","Remote: \n","Local: \n", "Published: \n","Updated: \n","Credit: \n","Vulnerable: \n","Not Vulnerable: \n"); open(OUTPUTFILE,">bid.txt"); print OUTPUTFILE @securityfocus; close(OUTPUTFILE); open(OUTPUTFILE,"bid.txt"); while () { chomp; print " $_ \n"; } close(OUTPUTFILE); # Depth represents how deeply a table resides in other tables. The depth of a top-level # table in the document is 0. A table within a top-level table has a depth of 1, and so # on. Each depth can be thought of as a layer; tables sharing the same depth are on the # same layer. Within each of these layers, Count represents the order in which a table # was seen at that depth, starting with 0. Providing both a depth and a count will # uniquely specify a table within a document -> the table of interest is on the second # level (depth = 1), the first one (count = 0). for(1..30000) {  my $table = $html_file."/".$_;  $te = HTML::TableExtract->new( depth => 1, count => 0 );  $te->parse_file($table); } foreach $ts ($te->tables) {   print "Table found at ", join(',', $ts->coords), ":\n";   foreach $row ($ts->rows) {       print "   ", join(',', @$row), "\n";    } }