#!/usr/bin/perl
use strict;
use warnings;
use utf8;
use HTML::TreeBuilder;
# use Data::Dumper;
my $Tree = HTML::TreeBuilder->new();
my $data;
my $html = qq~
Scanner
Überschrift H1
Überschrift H2
Überschrift H3
Überschrift H4
FETT
SCHIEF
UNTERSTRICHEN
Roter Text
Blauer Text
Obenlinks | Obenrechts |
Untenlinks | Untenrechts |
Dolor dignissimos voluptas debitis neque quas. Debitis corporis libero consectetur odio molestias eum sunt.
- Punkt 1
- Punkt 2
- Punkt 3
- Unterpunkt 1
- Unterpunkt 2
- Unterpunkt 3
~;
$Tree->parse_content($html);
my @nodes = $Tree->elementify();
my @content = $nodes[0]->content_list();
foreach my $elem (@content) {
$data->{$elem->tag()} = $elem;
}
my $content = do_content_parsing($data->{'body'});
$Tree->delete;
#################################################
sub do_content_parsing {
my $content = shift;
foreach my $elem ($content->content_list()) {
if (!$elem->is_empty()) {
my @list = $elem->content_refs_list();
#my $tag = $elem->tag();
if (ref($list[0]) eq 'SCALAR') {
print $elem->as_text(),"\n";
} else {
do_content_parsing($elem);
}
}
}
}