Genau dafür habe ich ja mal Bio::FASTASequence geschrieben...
ffn-Datei
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
>gb|CP000351.1|:139-891 ParA-like protein [Leptospira borgpetersenii serovar Hardjo-bovis JB197]
ATGATAGTTGTATCCATAGCAAACCAGAAAGGTGGAGAAGGTAAAACTACAACCTCTCTTAATTTATCGA
TGGGGCTTGCGAGAAGAGGAAAAAAAACTCTGCTCGTTGATATAGATCCTCAAGCAAATTCAACCGGTAT
TTTTACAAATCCAGAAGGTATTGAGAAATCAATGCATGGAGTTTTTAACTCAAAAATGACTATCCAAGAA
ATCATGATTGAAACGAGGTTACCTGATCTTTTTTTGGCCCCTTCTAAAATGAATCTTGCGGAAGTAGAAA
CACTTTCCGGAAATTCCGTAGATGCGCCTTATATTCTGAGAGACTCTCTTCAAAGTGTGAGTGGGATCGA
CTTTTGTATCATCGATTGTCCACCTAGCTTATCTATTTTTACGATTAATGCACTCGTCGGATCAAATTAC
GTAATTATTCCACTTCAGGCTGAAAAATTTTCCGTAGATGGAATTGTAGGACTTCAACAAACAATCACAA
GCATCAAAAAAAGAATCAATCCTAACCTTGAAATTTTAGGAGCCCTAGTTACTCAACTCAAGCCTCAAAC
ACTTTTGACGAAAACTATCGTACCCGTTTTAACAAAATACTTTCGAATTTTCGAAACAAGCATCTCTGAT
GGAGTTGCAGTAGGAGAATCCCATCTTGCTAAAAAGTCGGTATTTGAATACAACAAGACGAGTAAACAAG
CCCAGGAATATGAAGGGTTTATAGAGGAGTTTTTAAATGAGCTCAAAAAGTAA
>gb|CP000351.1|:875-1720 ParB-like protein [Leptospira borgpetersenii serovar Hardjo-bovis JB197]
ATGAGCTCAAAAAGTAAACGACTCGGCTCTCTCGCAGATGTATTCCAAGCCGAAAAGTTGGAGGGGACTA
TTCGTAAAATTCGGCTCGATAAAATTCTTCCATCCGAAAACCAGCCCAGACAAGATCGAAAAAAAGGAAT
CGAAGACCTCGCGAGAAGTTTAGACAAAGACGGACTACTCCAACCAATCATTGTCACAAAACAAAATCCG
GAAGATGAGAACTATAAAATTGTAGCCGGAGAAAGAAGATACCACGCAGCGAAACAATTAGGCTGGGCAG
AAATAGAATGTAAAATTTTAGACCGGGACGAAAAAGAAACCTTTCGACTTGCAATTATAGAAAATCTTCA
AAGAGAAAATTTATCCCCTTATGAAGAAGTGGAAGCCATGTCACACTTAAAGAATAGCTTCAAATATACA
GATCAAGAATTAGGAACTCTCTTTGGAAAAAGTAGAAGTTACATGACGGAGCTTCTTGGAATTTCAAATC
TAAGCAAAGAAGAACTTAGATCCTGCAAAGAAGCAGGAATTGAAAGTAAAAATTTATTGATCCAAGCAGT
TGCAGCTTCTCGAAAAGGAACCTTCTCTGAGTTTTTAAATTTATTTCAAACGGGTGCACTTAAAACCGTT
AAAGATGCAAAATCTTTTAACCGGGAAGAGGAAAACTTGTCCACACCTAAAATTACAAGTGCGACAAACC
CAAAAGTTTCAAATTTAAATTCAACGGAATATAAGATCACAAAAAAACAAGGTCTAATTCAAATTAGTTC
TGATAATGAAGAACTGTTAGGTAATATTTTTAAACTAATCAAAAAAGAAATCCGTAAAAAATTCAATTCT
ATATAA
.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
use strict;
use warnings;
use Bio::FASTASequence::File;
use File::Find::Rule;
my @ffns = File::Find::Rule->file->name( '*.ffn' )->in( '.' );
my %result;
my $output = 'org.fasta';
my $bff = Bio::FASTASequence::File->new;
for my $ffn ( @ffns ) {
my $parsed = $bff->file( $ffn );
for my $orga ( keys %{$parsed} ) {
my $subsequence = substr $parsed->{$orga}->getSequence(), 3, 24;
my ($name) = $parsed->{$orga}->getDescription() =~ m{ \[ (.*?) \] }x;
push @{ $result{$name} }, $subsequence;
}
}
open my $fh, '>', $output;
for my $key ( sort keys %result ) {
print $fh "> $key\n", join( '', @{ $result{$key} } ),"\n";
}
close $fh;
Ausgabe:
> Leptospira borgpetersenii serovar Hardjo-bovis JB197
ATAGTTGTATCCATAGCAAACCAGAGCTCAAAAAGTAAACGACTCGGC