Wie fragt man den canonical Link eines Dokuments am besten ab?
Hier mal was ich habe, das tut aber eigentlich nicht:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
#!/usr/bin/perl
use strict;
use warnings;
use LWP;
use Data::Dumper;
my $browser = LWP::UserAgent->new();
my $site = $browser->get('http://de.wikibooks.org');
my $code = $site->decoded_content();
use XML::LibXML;
my $dom = XML::LibXML->load_xml(string => $code);
foreach my $node ($dom->findnodes('//link[@rel="canonical"]/@href')) {
print $node->to_literal(), " # canonical LibXML\n";
}
use XML::XPath;
my $xp = XML::XPath->new(xml => $code);
foreach my $node ($xp->findnodes('//link[@rel="canonical"]')) {
print $node->getAttribute('href'), " # canonical XPath\n";
}
Last edited: 2020-02-09 12:21:24 +0100 (CET)