Thread HTML::TreeBuilder- durch HTML tags interieren (5 answers)
Opened by iamlooking at 2015-02-17 09:14

MagisterTechnicus
 2015-02-17 10:46
#179704 #179704
User since
2015-02-17
2 Artikel
BenutzerIn
[default_avatar]
Ja stimmt, du hast recht. Hier der aufbereitete Code.

Code (perl): (dl )
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#!/usr/bin/perl 

use strict;
use warnings;
use utf8;

use HTML::TreeBuilder;
use Data::Dumper;
use utf8;

my $Tree = HTML::TreeBuilder->new();

my $data;

my $html = qq~
<!DOCTYPE html>
<html lang="en">
<head>
  <meta http-equiv="content-type" content="text/html;">
  <title>Scanner</title>
</head>
<body>
<p>
<h1>Überschrift H1</h1>
<h2>Überschrift H2</h2>
<h3>Überschrift H3</h3>
<h4>Überschrift H4</h4>
</p>
<p><strong>FETT</strong></p>
<p><em>SCHIEF</em></p>
<p><u>UNTERSTRICHEN</u></p>
<p style="color: #FF0000">Roter Text</p>
<p style="color: #0000FF">Blauer Text</p>
<p><img width="100px" src="bus.jpg"></p>
<p><table style="border: solid 1px">
 <tr><td>Obenlinks</td><td>Obenrechts</td></tr>
 <tr><td>Untenlinks</td><td>Untenrechts</td></tr>
</table></p>
<p>Dolor dignissimos voluptas debitis neque quas. Debitis corporis libero consectetur odio molestias eum sunt. </p>
<p>
<ul>
 <li>Punkt 1</li>
 <li>Punkt 2</li>
 <li>Punkt 3</li>
   <ul>
     <li>Unterpunkt 1</li>
     <li>Unterpunkt 2</li>
     <li>Unterpunkt 3</li>
   </ul>
</ul>
</body>
</html>
~;

$Tree->parse_content($html);

my @nodes = $Tree->elementify();

my @content = $nodes[0]->content_list();

foreach my $elem (@content) {
  $data->{$elem->tag()} = $elem;
}

foreach my $elem ($data->{'body'}->content_list()) {
  if (!$elem->is_empty()) {
    my @list = $elem->content_refs_list();
    my $tag = $elem->tag();

    if (ref($list[0]) eq 'SCALAR') {
      print $elem->as_text(),"\n";
    } else {
      #HIER MUESSTE ICH WEITER ABSTEIGEN KOENNEN, ODER?
    }
  }
}

$Tree->delete;

Last edited: 2015-02-17 10:50:57 +0100 (CET)

View full thread HTML::TreeBuilder- durch HTML tags interieren