Reguläre Ausdrücke suchen (gelöst) - #157602 (Allgemeines zu Perl)

topeg

2012-04-18 15:51

User since
2006-07-10
2611 Artikel
BenutzerIn

user image

Verstehe ich es richtig das immer die gesamte Datei betrachtet werden soll und nicht nur immer eine Zeile pro Datei?

Wenn dem so ist hier mal eine Analyse soweit du sie Beschrieben hast:

more (24.5kb):

Code (perl): (dl )

#!/usr/bin/perl
use strict;
use warnings;

my $dir='.';
my @motives = ("PO", "OHO", 'G[A-Z]{2}G');

# Ordner öffnen und durchgehen
opendir(my $dh, $dir) or die("Error open $dir,($!)\n");
while(my $file=readdir($dh))
{
  # Pfad machen.
  my $path="$dir/$file";

  # testen ob es eine Datei ist
  next unless(-f $path);

  print "Analyse File $file\n";

  my @found=analyse_file($path,\@motives);

  printf ( qq("%s" and "%s" found in Line %u and Line %u\n),@$_ ) for(@found);
}
closedir($dh);

########################################################################

sub analyse_file
{
  my $file=shift;
  my $motives=shift; # Referenz auf Array

  #die gesamte Datei einlesen
  my $content=read_file($file);
  die("File empty!") unless($content);


  # eine Liste mit den Positionen der Zeileenden erstellen:
  my @line_ends=find_lineends($content);

  # alle Suchbegriffe durchgehen und sich alle Positionen merken.
  my %match;
  for my $motive (@$motives)
  {
    pos($content)=0;
    while($content=~/$motive/g)
    {
      # zur aktuellen Postion im String wird die Zeile Bestimmt
      my $line=calculate_line(\@line_ends,pos($content));

      # es wird ein Hash of Arrays erstellt
      push(@{$match{$motive}},$line);
    }
  }

  # Auswertung

  my @found;
  my @keys=keys(%match);
  # jedes Fund mit jedem anderen Kombinieren
  # doppelte vermeiden
  for my $p1 (0..$#keys)
  {
    my $motive1=$keys[$p1];

    for my $p2 ($p1..$#keys)
    {
      my $motive2=$keys[$p2];

      next if($motive1 eq $motive2 and @{$match{$motive1}}<2);

      # doppelte ausschließen
      my %double;

      for my $line1 (@{$match{$motive1}})
      {
        for my $line2 (@{$match{$motive2}})
        {
          next if($double{"$line1-$line2"}++);
          next if($double{"$line2-$line1"}++);
          push(@found,[$motive1,$motive2,$line1,$line2]);
        }
      }
    }
  }

  return @found;
}

sub read_file
{
  my $file=shift;
  die("ERROR open $file ($!)\n") unless( open(my $handle, '<', $file) );
  #zeilenende auf undef setzen
  local $/=undef;
  # alles einlesen
  return <$handle>;
}

sub calculate_line
{
  my $endings=shift;
  my $pos=shift;
  return 0 if($pos<0);
  return $endings->[-1] if($pos > $endings->[-1]);
  for my $l (1..$#$endings)
  { return $l if($pos >= $endings->[$l-1] && $endings->[$l] >= $pos); }
  return -1;
}

sub find_lineends
{
  my $content=shift;
  my @list=(0);
  my $pos=0;
  while(( my $p=index($content,"\n",$pos) )>-1)
  {
    push(@list,$p);
    $pos=$p+1;
    last if($pos>=length($content));
  }
  return @list;
}

EDIT: Zeile 59: Etwas Code fehlte.
Last edited: 2012-04-19 03:14:58 +0200 (CEST)