#!/usr/bin/perl use strict; use warnings; $|++; use Spreadsheet::XLSX; use Excel::Writer::XLSX; use Encode qw(decode encode); use Time::HiRes qw(gettimeofday tv_interval); my $filetype = $ARGV[0]; if (! $filetype) { $filetype = "xlsx"; } elsif ($filetype ne "xlsx" and $filetype ne "csv") { die "unknown filetype '$filetype'"; } # config my $adrcount = 1_500_000; # divisible by 3 my $transactcount = 35_000; my $adrfile = "black_perl_adr." . $filetype; my $transactfile = "black_perl_btr." . $filetype; my $outputfile = "black_perl_out." . $filetype; # /config createAdrFile($filetype, $adrfile, $adrcount); createTransactFile($filetype, $transactfile, $adrcount, $transactcount); my $alldataHR = readAllData($filetype, $adrfile, $transactfile); writeResultFile($alldataHR, $filetype, $outputfile); sub createAdrFile { my $t0 = [ gettimeofday ]; my $filetype = shift; my $adrfile = shift; my $adrcount = shift; print " ... Creating address file $adrfile ...\n"; my @header = (qw(Kundennummer Name Vorname Straße Hausnummer PLZ Ort Telefon)); my @adrdata = ( [qw(Müller Maier Schröder Lehmann)] , [qw(Hans Grete Ulrich Susanne)] , [qw(Goethestraße Ulmenweg Bahnhofstraße Hauptstraße)] , [qw(14 23a 16/1 9)] , [qw(12345 23456 34567 45678)] , [qw(Foobach Musterstadt Irgendwingen Nirgendhausen)] , ['+49 123 45678912', '+49 234 56789123', '+49 345 67891234', '+49 456 78912345'] ); if ($filetype eq 'xlsx') { my $workbook = Excel::Writer::XLSX->new($adrfile) or die $!; my $adrcountperws = $adrcount / 3; for my $ws (0 .. 2) { my $worksheet = $workbook->add_worksheet(); for my $row (0 .. $#header) { $worksheet->write(0, $row, $header[$row]); } for my $row (1 .. $adrcountperws) { my $kdnr = $ws * $adrcountperws + $row; $worksheet->write($row, 0, $kdnr); for my $col (1 .. @adrdata) { my $i = int(rand()*4); my $val = $adrdata[$col-1]->[$i]; $worksheet->write($row, $col, $val); } } } } elsif ($filetype eq 'csv') { open my $csvFH, ">", $adrfile; print $csvFH (join ";", @header); for my $kdnr (1 .. $adrcount) { print $csvFH "\n"; print $csvFH $kdnr; for my $col (1 .. @adrdata) { my $i = int(rand()*4); my $val = $adrdata[$col-1]->[$i]; print $csvFH ";" . $val; } } } print "Created address file $adrfile in " . (tv_interval( $t0 )) . " seconds\n"; } sub createTransactFile { my $t0 = [ gettimeofday ]; my $filetype = shift; my $transactfile = shift; my $adrcount = shift; my $transactcount = shift; print " ... Creating transation file $transactfile ...\n"; my @header = (qw(Kundennummer Geldbetrag)); my ($workbook, $worksheet, $csvFH); if ($filetype eq 'xlsx') { $workbook = Excel::Writer::XLSX->new($transactfile) or die $!; $worksheet = $workbook->add_worksheet(); for my $col (0 .. $#header) { $worksheet->write(0, $col, $header[$col]); } for my $row (1 .. $transactcount) { my $kdnr = int(rand()*$adrcount+1); my $betr = int(rand()*100_000+1)/100; $worksheet->write($row, 0, $kdnr); $worksheet->write($row, 1, $betr); } } elsif ($filetype eq 'csv') { open $csvFH, ">", $transactfile; print $csvFH (join ";", @header); } for my $row (1 .. $transactcount) { my $kdnr = int(rand()*$adrcount+1); my $betr = int(rand()*100_000+1)/100; if ($filetype eq 'xlsx') { $worksheet->write($row, 0, $kdnr); $worksheet->write($row, 1, $betr); } else { print $csvFH "\n$kdnr;$betr"; } } print "Created transaction file $transactfile in " . (tv_interval( $t0 )) . " seconds\n"; } sub readAllData { my $t0 = [ gettimeofday ]; my $filetype = shift; my $adrfile = shift; my $transactfile = shift; my %alldata; print " ... Reading data from transaction file $transactfile ...\n"; if ($filetype eq 'xlsx') { my $xlsx = Spreadsheet::XLSX->new($transactfile) or die $!; my $worksheet = $xlsx->{Worksheet}->[0]; for my $row (1 .. $worksheet->{MaxRow}) { my $kdnr = $worksheet->{Cells}->[$row]->[0]->{Val}; my $betr = $worksheet->{Cells}->[$row]->[1]->{Val}; $alldata{$kdnr}->{$row} = [$betr]; } } elsif ($filetype eq 'csv') { open my $csvFH, "<", $transactfile; my $header = <$csvFH>; while (my $row = <$csvFH>) { chomp $row; my ($kdnr, $betr) = split /;/, $row; next unless $kdnr == $kdnr*1; $alldata{$kdnr}->{$.} = [$betr]; } } print " ... Reading data from address file $adrfile ...\n"; if ($filetype eq 'xlsx') { my $xlsx = Spreadsheet::XLSX->new($adrfile) or die $!; for my $worksheet (@{$xlsx->{Worksheet}}) { my $colcount = $worksheet->{MaxCol}; for my $row (1 .. $worksheet->{MaxRow}) { my $kdnr = $worksheet->{Cells}->[$row]->[0]->{Val}; if ($alldata{$kdnr}) { for my $entry (keys %{$alldata{$kdnr}}) { for my $col (1 .. $colcount) { push @{$alldata{$kdnr}->{$entry}}, $worksheet->{Cells}->[$row]->[$col]->{Val}; } } } } } } else { open my $csvFH, "<", $adrfile; my $header = <$csvFH>; while (my $row = <$csvFH>) { chomp $row; my ($kdnr, @adr) = split /;/, $row; if ($alldata{$kdnr}) { for my $entry (keys %{$alldata{$kdnr}}) { push @{$alldata{$kdnr}->{$entry}}, @adr; } } } } print "All data read from $adrfile, $transactfile in " . (tv_interval( $t0 )) . " seconds\n"; return \%alldata; } sub writeResultFile { my $t0 = [ gettimeofday ]; my $alldataHR = shift; my $filetype = shift; my $outputfile = shift; print " ... Creating output file $outputfile ...\n"; my @header = (qw(Kundennummer Geldbetrag Name Vorname Straße Hausnummer PLZ Ort Telefon)); if ($filetype eq 'xlsx') { my $workbook = Excel::Writer::XLSX->new($outputfile) or die $!; my $worksheet = $workbook->add_worksheet(); for my $row (0 .. $#header) { $worksheet->write(0, $row, $header[$row]); } my $row = 0; for my $kdnr (sort {$a <=> $b} keys %{$alldataHR}) { for my $entry (keys %{$alldataHR->{$kdnr}}) { $row++; $worksheet->write($row, 0, $kdnr); for my $col (1 .. @{$alldataHR->{$kdnr}->{$entry}}) { my $val = $alldataHR->{$kdnr}->{$entry}->[$col-1]; $val = decode('utf-8',$val); $worksheet->write($row, $col, $val); } } } } elsif ($filetype eq 'csv') { open my $csvFH, ">", $outputfile; print $csvFH (join ";", @header); for my $kdnr (sort {$a <=> $b} keys %{$alldataHR}) { for my $entry (keys %{$alldataHR->{$kdnr}}) { print $csvFH "\n" . (join ";", ($kdnr, @{$alldataHR->{$kdnr}->{$entry}})); } } } print "All data written to output file $outputfile in " . (tv_interval( $t0 )) . " seconds\n"; }