Reguläre Ausdrücke suchen (gelöst) - #157627 (Allgemeines zu Perl)

Bionerd

2012-04-19 07:53

User since
2012-04-18
35 Artikel
BenutzerIn
[default_avatar]

Hier mein aktuelles Programm, welches Paare findet, die sich in unterschiedlichen Zeilen befinden, diese schön in Files schreibt zur späteren Verarbeitung.

more (29.3kb):

Code (perl): (dl )

#!/usr/bin/perl
use strict;
use warnings;

my $dir='Sequenzen/';
my @motives = ("PO", "OHO", 'G[A-Z]{2}G');
my %anzahl;
my $out_dir='Paarungen';
# Ordner öffnen und durchgehen
opendir(my $dh, $dir) or die("Error open $dir,($!)\n");
while(my $file=readdir($dh))
{
  # Pfad machen.
  my $path="$dir/$file";

  # testen ob es eine Datei ist
  next unless(-f $path);

  print "Analyse File $file\n";
my %all_match;
  my @found=analyse_file($path,\@motives);
$anzahl{join('-!-', sort @$_[0,1])}++for(@found);
printf ( qq("%s" and "%s" found in Line %u and Line %u\n),@$_ ) for(@found);
 
  for my $e (@found)
  {
    my ($motive1,$motive2)= sort @$e[0,1];
    my $outpath="$out_dir/$motive1$motive2.txt";

    if(open(my $fh, '>>', $outpath))
    { printf $fh ("%s + %s found in Line %u and Line %u + %s\n", @$e,$file); }
    else
    { warn("Error open $outpath ($!)"); }
  }
 printf ("%s+%s %u\n",split(/-!-/,$_),$all_match{$_}) for (sort keys(%anzahl));  
}
closedir($dh);

########################################################################

sub analyse_file
{
  my $file=shift;
  my $motives=shift; # Referenz auf Array

  #die gesamte Datei einlesen
  my $content=read_file($file);
  die("File empty!") unless($content);


  # eine Liste mit den Positionen der Zeileenden erstellen:
  my @line_ends=find_lineends($content);

  # alle Suchbegriffe durchgehen und sich alle Positionen merken.
  my %match;
  for my $motive (@$motives)
  {
    pos($content)=0;
    while($content=~/$motive/g)
    {
      # zur aktuellen Postion im String wird die Zeile Bestimmt
      my $line=calculate_line(\@line_ends,pos($content));

      # es wird ein Hash of Arrays erstellt
      push(@{$match{$motive}},$line);
    }
  }

  # Auswertung

  my @found;
my @keys=keys(%match);
  # jedes Fund mit jedem anderen Kombinieren
  # doppelte vermeiden
  for my $p1 (0..$#keys)
  {
    my $motive1=$keys[$p1];

    for my $p2 ($p1..$#keys)
    {
      my $motive2=$keys[$p2];

      next if($motive1 eq $motive2 and @{$match{$motive1}}<2);

      # doppelte ausschließen
      my %double;

      for my $line1 (@{$match{$motive1}})
      {
        for my $line2 (@{$match{$motive2}})
        {
          next if($double{"$line1-$line2"}++);
          next if($double{"$line2-$line1"}++);
          push(@found,[$motive1,$motive2,$line1,$line2]);
        }
      }
    }
  }

  return @found;
}

sub read_file
{
  my $file=shift;
  die("ERROR open $file ($!)\n") unless( open(my $handle, '<', $file) );
  #zeilenende auf undef setzen
  local $/=undef;
  # alles einlesen
  return <$handle>;
}

sub calculate_line
{
  my $endings=shift;
  my $pos=shift;
  return 0 if($pos<0);
  return $endings->[-1] if($pos > $endings->[-1]);
  for my $l (1..$#$endings)
  { return $l if($pos >= $endings->[$l-1] && $endings->[$l] >= $pos); }
  return -1;
}

sub find_lineends
{
  my $content=shift;
  my @list=(0);
  my $pos=0;
  while(( my $p=index($content,"\n",$pos) )>-1)
  {
    push(@list,$p);
    $pos=$p+1;
    last if($pos>=length($content));
  }
  return @list;
}

modedit Editiert von pq: more-tags hinzugefügt
Last edited: 2012-04-19 09:50:37 +0200 (CEST)