Leser: 22
1
2
3
4
5
6
7
8
9
10
1 <html>
2 <head>
3 <title>Dummy...</title>
4 </head>
5 <body>
6 <p>Nr.1 to parse</p>
7 <a href=/dev/null>Bla</a>
8 <p>blablub</p>
9 </body>
10 </html>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
1 #!/usr/bin/perl
2
3 use strict;
4 use warnings;
5 use Data::Dumper;
6 use HTML::Parser;
7
8 my $p = HTML::Parser->new( api_version => 3, start_h => [\&start, "tagname,self"],);
9 my @results;
10
11 sub start {
12 my $tagname = shift;
13 my $self = shift;
14
15 $self->handler( text => sub { my $text = shift; print "$text\n"; }, "dtext");
16 }
17
18 $p->report_tags("p");
19 $p->parse_file("index.html");
1 2 3 4
use HTML::TreeBuilder::XPath; my $tree = HTML::TreeBuilder::XPath->new; $tree->parse_file( "index.html"); my @nb = $tree->findvalues( '/html/body//p');