Leser: 1
8 Einträge, 1 Seite |
$body =~ s/<.+?>//g;
1
2
3
<! Das >-Zeichen sollte man besser escapen -->
# oder:
<input type="button" value="--->" name="Knopf">
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
package Sources::HtmlParser;
use strict;
use warnings;
use base 'HTML::Parser';
sub parseText
{
my($self, $text) = @_;
$self->{mytext} = "";
$self->parse($text);
}
sub text
{
my($self, $origtext, $is_cdata) = @_;
$self->{mytext} .= $origtext;
1;
}
sub get_text
{
my($self) = @_;
return $self->{mytext};
}
1;
my $hp = Sources::HtmlParser->new();
$hp->parseText($httpbody);
$httpbody = HTML::Entities::decode_entities($hp->get_text);
$httpbody =~ s/<!--.+?-->//gms;
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
use HTML::Parser;
my $parser = HTML::Parser->new(
start_h => [ \&_starttag, 'self, tagname, attr' ],
end_h => [ \&_endtag, 'self, tagname' ],
text_h => [ \&_text, 'self, dtext' ]
);
$parser->parse($email_text);
sub _starttag {
my ($self, $tag, $attr) = @_;
$self->{'_body'} = 1 if($tag eq 'body');
}
sub _endtag {
my ($self, $tag) = @_;
$self->{'_body' } = 0 if($tag eq 'body' );
}
sub _text {
my ($self, $dtext) = @_;
$dtext =~ s/\A\s+//;
$dtext =~ s/\s+\z//;
return() unless ( length($dtext) > 0 and $dtext =~ /[^\s]/ );
if ($self->{'_body'} == 1) {
print $dtext;
}
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
#!/usr/bin/perl -w
use strict;
use warnings;
use HTML::FormatText;
use HTML::Parse;
my $html;
my $formatter;
$html = HTML::Parse::parse_htmlfile("beispiel.html");
# alternativ ginge auch... $html = HTML::Parse::parse_html($text);
# ...um den HTML-Text aus einer Variablen zu holen
$formatter = HTML::FormatText->new();
print $formatter->format($html);
8 Einträge, 1 Seite |