<input name=datei type="file" ....>
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
#!/usr/bin/perl use strict; use warnings; use LWP::Simple; use utf8; use Spreadsheet::WriteExcel; use Spreadsheet::ParseExcel; binmode(STDOUT, ":utf8"); print "Geben Sie die Inputdatei ein : "; my $input = <STDIN>; chomp $input; my $parser = Spreadsheet::ParseExcel->new(); my $book = $parser->parse($input); my %tt; if ( !defined $book ) { die $parser->error(), ".\n"; } # Extraktion von Titelzeile for my $sheet ( $book->worksheets() ) { my ( $row_min, $row_max ) = $sheet->row_range(); my ( $col_min, $col_max ) = $sheet->col_range(); my $i = 0; for my $col ( $col_min .. $col_max ) { my $title = $sheet -> get_cell(0, $col); $tt{$title->value()} = $i; print "" ,$title->value(), ", "; $i++; } } print "\n"; print "Wählen Sie einen Spalte aus : "; my $feld = <STDIN>; chomp $feld; my @query; # Extraktion der Inhalte for my $sheet ( $book->worksheets() ) { my ( $row_min, $row_max ) = $sheet->row_range(); for my $row ( $row_min .. $row_max ) { my $cell = $sheet->get_cell( $row, $tt{$feld} ); next unless $cell; print "", $cell->value(),"\n"; push @query, $cell->value(); } } # Definition : Output-Datei (*.xls Format) & Titel Zeile my $workbook = Spreadsheet::WriteExcel -> new('e_components.xls'); my $worksheet = $workbook -> add_worksheet(); $worksheet -> write(0,0,'Component'); $worksheet -> write(0,1,'Arroweurop'); ... ... our $row = 1; shift @query; foreach(@query){ chomp; $worksheet -> write($row,0,$_); my $min = 10000; # Arroweurop my $lief = 'Arroweurop'; my $url = 'http://components.arrow.com/part/search/'.$_.'?region=arrowce'; my $html = get($url); ... ...
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107
#!/usr/bin/perl use strict; use warnings; use LWP::Simple; use CGI qw(:standard :html4); use CGI::Carp qw(carpout fatalsToBrowser); use Spreadsheet::ParseExcel; use Spreadsheet::WriteExcel; use utf8; my $query = new CGI; sub Show_HTML { my $html = <<EOT; <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> <html> <HEAD> <title>Crawler</title> <meta http-equiv="Content-Type" content="text/html; encoding=UTF-8 charset=UTF-8"> </HEAD> <BODY onload="document.customForm.input.focus()"> <table border="0" cellpadding="0" cellspacing="0" height="2" width="890"> <tr> <td width="300" height="5" align="right" valign="right" bgcolor=""></td> <td width="50" height="5" align="right" valign="right" bgcolor="#ff6600"><font size="6" color="FFFFFF" face="Arial,Times New Roman">e_Components Crawler</font></td> <td width="15" height="5" align="right" valign="right" bgcolor="#ff6600"></td> </tr> <tr> <td width="300" height="2" align="right" valign="right" bgcolor="#ff6600"></td> <td width="50" height="2" align="right" valign="right" bgcolor="#ff6600"></td> <td width="15" height="2" align="right" valign="right" bgcolor="#ff6600"></td> </tr> </table> <form method="post" enctype="multipart/form-data"> <p>Wählen Sie eine Exceldatei (*.xls) von Ihrem Rechner aus:<br> <input name="Datei" type="file" size="50" maxlength="100000" accept="text/*"><br> <p>Geben Sie eine Spalte von der Exceldatei ein:<br> <input name="query" type="text" size="50" accept="text/*"> <td><input type='submit' value='Senden'> </p> </form> </body> </html> EOT return "$html"; } my $filename = $query->param("Datei"); my $upload_filehandle = $query->upload($filename); my $parser = Spreadsheet::ParseExcel->new(); my $book = $parser->parse($filename); my %tt; if ( !defined $book ) { die $parser->error(), ".\n"; } # Extraktion von Titelzeile for my $sheet ( $book->worksheets() ) { my ( $row_min, $row_max ) = $sheet->row_range(); my ( $col_min, $col_max ) = $sheet->col_range(); my $i = 0; for my $col ( $col_min .. $col_max ) { my $title = $sheet -> get_cell(0, $col); $tt{$title->value()} = $i; #print "" ,$title->value(), ", "; $i++; } } my $feld = $query->param("query"); my @query; # Extraktion der Inhalte for my $sheet ( $book->worksheets() ) { my ( $row_min, $row_max ) = $sheet->row_range(); for my $row ( $row_min .. $row_max ) { my $cell = $sheet->get_cell( $row, $tt{$feld} ); next unless $cell; #print "", $cell->value(),"\n"; push @query, $cell->value(); } } my $workbook = Spreadsheet::WriteExcel -> new('e_components.xls'); my $worksheet = $workbook -> add_worksheet(); $worksheet -> write(0,0,'Component'); $worksheet -> write(0,1,'Arroweurop'); ... ... our $row = 1; shift @query; foreach(@query){ chomp; $worksheet -> write($row,0,$_); my $min = 10000; # Arroweurop my $lief = 'Arroweurop'; my $url = 'http://components.arrow.com/part/search/'.$_.'?region=arrowce'; my $html = get($url); ... ...
2012-10-09T09:56:54 kimmyich habe ein relativ einfaches Skript(crawler) geschrieben mit LWP::Simple. Und ich habe das Skript in CGI-Version umgeschrieben. Der einzige Unterschied ist statt Filehandle 'Inputtag'Code: (dl )<input name=datei type="file" ....>
2012-10-09T09:56:54 kimmyEs gibt im Prinzip keinen.Wenn ich das Skript in Shell aufrufe, dauert es ca. 5Min, aber wenn ich es im Web-Browser aufrufe, dauert es viel länger, ca 20 Min.
Woran liegt eigentlich der Unterschied?