Leser: 10
8 Einträge, 1 Seite |
1
2
3
4
5
6
<div class="p">diesen Text brauche ich!</div>
<div class="p">diesen Text brauche ich!</div>
<div class="em">uninteressant</div>
<div class="p">diesen Text brauche ich!</div>
<div class="p">diesen Text brauche ich!</div>
<div class="em">uninteressant</div>
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38
#!/usr/bin/perl use strict; use warnings; use HTML::Parser; my @found = (); my $p = HTML::Parser->new( start_h => [\&start, 'self,tagname,attr'], end_h => [\&end, 'self,tagname'], text_h => [\&text, 'self,dtext'] ); chdir("c:/users/frosch/documents/schulen/"); $p->parse_file("bw.html"); sub start { my ($self, $tagname, $attr) = @_; $self->{'div'} = 1 if $tagname eq 'div'; } sub end { my ($self, $tagname) = @_; } sub text { my ($self, $dtext) = @_; $dtext =~ s/\n\r//; push @found, $dtext; } for (@found) { print $_,"\n"; } print $#found," Ergebnisse.\n";
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
#!/usr/bin/perl use strict; use warnings; use HTML::Parser; my @found = (); my $p = HTML::Parser->new( start_h => [\&start, 'self,tagname,attr'], end_h => [\&end, 'self,tagname'], text_h => [\&text, 'self,dtext'] ); chdir("c:/users/frosch/documents/schulen/"); $p->parse_file("bw.html"); sub start { my ($self, $tagname, $attr) = @_; $self->{'div'} = 1 if $tagname eq 'div'; $self->{parse_div} = 1 if $tagname eq 'div' and $attr->{class} eq 'p'; } sub end { my ($self, $tagname) = @_; $self->{parse_div} = 0 if $tagname eq 'div'; } sub text { my ($self, $dtext) = @_; $dtext =~ s/\n\r//; push @found, $dtext if $self->{parse_div}; } for (@found) { print $_,"\n"; } print $#found," Ergebnisse.\n";
if ( $token->is_start_tag( 'font' ) ) { ... }
$token->get_attr([$attribute])
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
#!/usr/bin/perl use strict; use warnings; use Web::Scraper; my $scraper = scraper { process 'div[class="p"]' => 'divs[]' => 'TEXT'; result 'divs'; }; my $content = do{ local (@ARGV,$/) = 'bw.html'; <> }; my $res = $scraper->scrape( $content ); use Data::Dumper; print Dumper $res;
renee+2008-07-22 09:21:59--Code (perl): (dl )1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40#!/usr/bin/perl use strict; use warnings; use HTML::Parser; my @found = (); my $p = HTML::Parser->new( start_h => [\&start, 'self,tagname,attr'], end_h => [\&end, 'self,tagname'], text_h => [\&text, 'self,dtext'] ); chdir("c:/users/frosch/documents/schulen/"); $p->parse_file("bw.html"); sub start { my ($self, $tagname, $attr) = @_; $self->{'div'} = 1 if $tagname eq 'div'; $self->{parse_div} = 1 if $tagname eq 'div' and $attr->{class} eq 'p'; } sub end { my ($self, $tagname) = @_; $self->{parse_div} = 0 if $tagname eq 'div'; } sub text { my ($self, $dtext) = @_; $dtext =~ s/\n\r//; push @found, $dtext if $self->{parse_div}; } for (@found) { print $_,"\n"; } print $#found," Ergebnisse.\n";
Im Wiki stehen aber auch Beispiele wie man an Attribute kommt...
Wenn es nicht so sehr auf Geschwindigkeit ankommt, würde ich eher Web::Scraper verwenden.
8 Einträge, 1 Seite |