1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56
#!/usr/bin/perl use strict; use warnings; use Data::Dumper; my %hash = read_txt(); my $start = '00000'; my $xml = get_xml( \%hash, $start ); print $xml; sub get_xml { my ($hashref, $start) = @_; my $real_start = $hashref->{$start}->[0]; return get_node( $hashref, $real_start, 0 ); } sub get_node { my ($hashref,$name,$level) = @_; my $tab = "\t" x $level; my @children = map{ get_node( $hashref, $_, $level + 1 ) }@{ $hashref->{$name} || [] }; if ( @children ) { my $children_string = join "\n", @children; return sprintf "%s<%s>\n%s\n%s</%s>", $tab, $name, $children_string, $tab, $name; } return sprintf "%s<%s/>", $tab, $name; } sub read_txt { my %parent_child_map; while ( my $line = <DATA> ) { chomp $line; next unless $line; my ($parent,$child) = split /\s+/, $line; push @{ $parent_child_map{ $parent } }, $child; } return %parent_child_map } __DATA__ 00000 A0361 A0361 A0323 A0323 A0351 A0351 A0362 A0351 A0363 A0323 A0324 A0324 A0325 A0324 A0326
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
<?xml version="1.1" encoding="UTF-8"?>
<LANGUALSCHEME>
<HEADER>
<EXPORTDATE>2008-10-12</EXPORTDATE>
<UPDATEDATE>2010-05-25</UPDATEDATE>
<TITLE>LanguaL 2008 Thesaurus Full Structure, Facet A extracted</TITLE>
<NOTES>
<NOTE>This is the official XML export file for LanguaL</NOTE>
<NOTE>This file was updated 22 NOV 2008 to include USDA ( A1269, A1271-A1294) , BLS ( A1295-A1537)</NOTE>
<NOTE>added FTC A1295 ... A1537</NOTE>
<NOTE>added FTC B3600, B3601, B3602, B3603, B3604</NOTE>
<NOTE>added FTC H0800, H0801, H0802, H0803, H0804, H0805, H0806, H0807, H0808, H0809</NOTE>
<NOTE>added FTC H0810, H0811, H0812, H0813, H0814, H0815, H0816, H0817, H0818, H0819</NOTE>
<NOTE>added FTC H0820, H0821</NOTE>
</NOTES>
</HEADER>
<DESCRIPTORS/>
</LANGUALSCHEME>