#!C:/STRAWB~1/perl/bin/perl.exe -w print "Enter a file containing DNA or RNA:\n\n"; $DNA = ; chomp $DNA; unless(open(DNAFILE, $DNA)){ print "File not found\n\n"; } @DNA = ; $DNA = join('',@DNA); $DNA =~ s/\s//g; #Überprüfen ob file DNA oder RNA enthält @BASE_DNA = ("A","C","T","G"); #Für jedes Element wird nun überprüft, ob es einem Nukleotid entspricht, #jedesmal, wenn die Bedingung erfüllt ist wird der count von $i erhöht #Dabei wird bei jedem U, das im String vorkommt der RNA-count um 1 erhöht for($pos = 0; $pos < length $DNA; ++$pos){ $place = substr($DNA, $pos, 1); if(grep {$place eq $_} @BASE_DNA){ ++$i_DNA; } elsif($place =~ /U/){ ++$i_RNA; } } $i_RNA += $i_DNA; #Da die RNA aus allen U plus den anderen Basen besteht #wenn nun $i nicht der Länge der Sequenz entspricht, war mindestens 1 Element kein Nukleotid if($i_DNA eq length $DNA){ print "Sequence successfully identified as DNA\n\n"; } elsif($i_RNA eq length $DNA){ print "Sequence succesfully identified as RNA\n\n"; } else { print "Sequence is whether DNA nor RNA\n\n"; } #DNA wird in RNA umgeschrieben $RNA = $DNA; $RNA =~ s/T/U/ig; #print "The RNA sequence is:\n\n"; #print "$RNA\n\n"; #unpack kann auch als alternative zu split verwendet werden wobei dann "A1" x length $RNA benutzt würde @triplets = unpack("A3" x (length ($RNA)/3), $RNA); #Initialisierung der CODONS @Ala = ("GCU","GCC","GCA","GCG"); @Cys = ("UGU","UGC"); @Asp = ("GAU","GAC"); @Glu = ("GAA","GAG"); @Phe = ("UUU","UUC"); @Gly = ("GGU","GGC","GGA","GGG"); @His = ("CAU","CAC"); @Ile = ("AUU","AUC","AUA"); @Lys = ("AAA","AAG"); @Leu = ("CUU","CUC","CUA","CUG","UUA","UUG"); @Met = ("AUG"); @Asn = ("AAU","AAC"); @Pro = ("CCU","CCC","CCA","CCG"); @Gln = ("CAA","CAG"); @Arg = ("CGU","CGC","CGA","CGG","AGA","AGG"); @Ser = ("AGU","AGC","UCU","UCC","UCA","UCG"); @Thr = ("ACU","ACC","ACA","ACG"); @Val = ("GUU","GUC","GUA","GUG"); @Trp = ("UGG"); @Tyr = ("UAU","UAC"); @STOP = ("UAA","UAG","UGA","UGG"); #Jedes Triplet wird nun ausgetauscht durch den 1-lettercode seiner Aminosäure foreach $trip (@triplets){ if(grep{$trip eq $_} @Ala){ $trip = A; ++$ala; } elsif(grep{$trip eq $_} @Cys){ $trip = C; ++$cys } elsif(grep{$trip eq $_} @Asp){ $trip = D; ++$asp; } elsif(grep{$trip eq $_} @Glu){ $trip = E; ++$glu } elsif(grep{$trip eq $_} @Phe){ $trip = F; ++$phe; } elsif(grep{$trip eq $_} @Gly){ $trip = G; ++$gly; } elsif(grep{$trip eq $_} @His){ $trip = H; ++$his } elsif(grep{$trip eq $_} @Ile){ $trip = I; ++$ile } elsif(grep{$trip eq $_} @Lys){ $trip = K; ++$lys; } elsif(grep{$trip eq $_} @Leu){ $trip = L; ++$leu; } elsif(grep{$trip eq $_} @Met){ $trip = M; ++$met; } elsif(grep{$trip eq $_} @Asn){ $trip = N; ++$asn; } elsif(grep{$trip eq $_} @Pro){ $trip = P; ++$pro; } elsif(grep{$trip eq $_} @Gln){ $trip = Q; ++$gln; } elsif(grep{$trip eq $_} @Arg){ $trip = R; ++$arg; } elsif(grep{$trip eq $_} @Ser){ $trip = S; ++$ser; } elsif(grep{$trip eq $_} @Thr){ $trip = T; ++$thr; } elsif(grep{$trip eq $_} @Val){ $trip = V; ++$val; } elsif(grep{$trip eq $_} @Trp){ $trip = W; ++$trp; } elsif(grep{$trip eq $_} @Tyr){ $trip = Y; ++$tyr } elsif(grep{$trip eq $_} @STOP){ $trip = "!STOP!"; } } #Berechnung des prozentualen Vorkommen jeder Aminosäure $tot = @triplets; $ALA = $ala / $tot * 100; $CYS = $cys / $tot * 100; $ASP = $asp / $tot * 100; $GLU = $glu / $tot * 100; $PHE = $phe / $tot * 100; $GLY = $gly / $tot * 100; $HIS = $his / $tot * 100; $ILE = $ile / $tot * 100; $LYS = $lys / $tot * 100; $LEU = $leu / $tot * 100; $MET = $met / $tot * 100; $ASN = $asn / $tot * 100; $PRO = $pro / $tot * 100; $GLN = $gln / $tot * 100; $ARg = $arg / $tot * 100; $SER = $ser / $tot * 100; $THR = $thr / $tot * 100; $VAL = $val / $tot * 100; $TRP = $trp / $tot * 100; $TYR = $tyr / $tot * 100; $triplets_spaced = join( " " ,@triplets); $file = "Protein"; #Das ganze wird auf das File Protein geschrieben open(PROTEINFILE , ">$file"); print PROTEINFILE "$triplets_spaced\n\n"; printf PROTEINFILE "Ala = %1.2f%\n\n", $ALA; printf PROTEINFILE "Cys = %1.2f%\n\n", $CYS; printf PROTEINFILE "Asp = %1.2f%\n\n", $ASP; printf PROTEINFILE "Glu = %1.2f%\n\n", $GLU; printf PROTEINFILE "Phe = %1.2f%\n\n", $PHE; printf PROTEINFILE "Gly = %1.2f%\n\n", $GLY; printf PROTEINFILE "His = %1.2f%\n\n", $HIS; printf PROTEINFILE "Ile = %1.2f%\n\n", $ILE; printf PROTEINFILE "Lys = %1.2f%\n\n", $LYS; printf PROTEINFILE "Leu = %1.2f%\n\n", $LEU; printf PROTEINFILE "Met = %1.2f%\n\n", $MET; printf PROTEINFILE "Asn = %1.2f%\n\n", $ASN; printf PROTEINFILE "Pro = %1.2f%\n\n", $PRO; printf PROTEINFILE "Gln = %1.2f%\n\n", $GLN; printf PROTEINFILE "Arg = %1.2f%\n\n", $ARg; printf PROTEINFILE "Ser = %1.2f%\n\n", $SER; printf PROTEINFILE "Thr = %1.2f%\n\n", $THR; printf PROTEINFILE "Val = %1.2f%\n\n", $VAL; printf PROTEINFILE "Trp = %1.2f%\n\n", $TRP; printf PROTEINFILE "Tyr = %1.2f%\n\n", $TYR; close PROTEINFILE; print "Proteinsequence written on 'Protein'\n\n"; exit;