1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
#!/usr/bin/perl
use strict;
use warnings;
use File::Find::Rule;
use Bio::FASTASequence::File;
my $directory = '/path/to/dir/';
my @fasta_files = File::Find::Rule->file->name( '*.fasta' )->in( $directory );
my $parser = Bio::FASTASequence::File->new;
for my $fasta_file ( @fasta_files ) {
my $hashref = $parser->file( $fasta_file );
for my $org ( keys %{$hashref} ) {
print $org,"\n";
my $sequence = $hashref->{$org}->getSequence;
# search for anything in the sequence and get part of it...
}
}
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
#!/usr/bin/perl use strict; use warnings; # Das Verzeichnis mit den auszulesenden Dateien: my $sourceDir = 'C:\test'; # Die Datei, in die geschrieben werden soll: my $outFile = "out.txt"; # Das Verzeichnis wird ausgelesen (hier: nur .txt-Dateien) und der Inhalt wird sortiert in ein Array geschrieben: opendir(my $sourceDH, $sourceDir) or die $!; my @allfiles = grep /\.txt$/, sort readdir($sourceDH); closedir $sourceDH; # Die Zieldatei wird zum schreiben geöffnet: open my $outFH, ">", $outFile or die $!; # Schleife über alle Dateien im Array (NB: for ist in Perl dasselbe wie foreach) for my $sourceFile (@allfiles) { # Eine Datei wird lesend geöffnet open my $sourceFH, "<", $sourceDir . '/' . $sourceFile or die $!; # Schleife über die Zeilen in einer Datei while (my $row = <$sourceFH>) { # Text, der zwischen 'Blockanfang' und 'Blockende' liegt (Flipflop-Operator), wird in die Zieldatei geschrieben. print $outFH $row if $row =~ /Blockanfang/ .. $row =~ /Blockende/; } } close $outFH; print "fertig!";
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70
#!/usr/bin/perl #use strict; #use warnings; my $sourceDir = '../Perl/Test'; my $outFile = "out.ffn"; my $i = 0; opendir(my $sourceDH, $sourceDir) or die $!; my @allfiles = grep /\.ffn$/, sort readdir($sourceDH); closedir $sourceDH; open my $outFH, ">>", $outFile or die $!; for my $sourceFile (@allfiles) { open my $sourceFH, "<", $sourceDir . '/' . $sourceFile or die $!; $NameReadIn = <$sourceFH>; close $sourceFH; # Get organism name chop($NameReadIn); $NameReadIn =~s/(>)/_/g; $NameReadIn =~s/(\[)/>/g; $positionName=index($NameReadIn,">"); $ZeichenName = substr($NameReadIn,$positionName,); chop($Name); if ( -z "$outFH" ) { print $outFH "$Name\n"; } else { print $outFH "\n$Name\n"; } # Get sequences: open my $sourceFH, "<", $sourceDir . '/' . $sourceFile or die $!; @DNAReadIn = <$sourceFH>; close $sourceFH; $DNAString=join("",@DNAReadIn); $DNAString =~s/(\]\n)/,/g; while ($DNAString =~ m{,\w{3}(\w{24})}g) { print $outFH "$1"; } $i++; } close $outFH; print "Done! $i files have been processed!\n";
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
#!/usr/bin/perl #use strict; #use warnings; my @dir = '../Perl/Test/'; my $ending = '.ffn'; # Destination file: my $outFile = "out.ffn"; my $dir = ''; for $dir (@dir) { if (opendir( DIR, $dir)) { for (readdir(DIR)) { next if (/^\./); push @dir ,"$dir$_/" if (-d "$dir$_"); &file($dir,$_) if (-f "$dir$_") && $ending eq (/.*(\..*)/)[0]; } closedir DIR; } }
1 2 3
opendir(my $sourceDH, $sourceDir) or die $!; my @allfiles = grep /\.ffn$/, sort readdir($sourceDH); closedir $sourceDH;
1 2 3 4
use File::Find::Rule; my $directory = '/path/to/dir/'; my @fasta_files = File::Find::Rule->file->name( '*.fasta' )->in( $directory );
2013-04-24T13:21:20 String_TestDanke schonmal, leider habe ich subs bisher noch nicht kennengelernt und weiß nicht wie man mit diesen arbeitet :(
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
#!/usr/bin/perl use strict; use warnings; use File::Util; # Das Verzeichnis mit den auszulesenden Dateien: my $sourceDir = 'C:\test'; # Die Datei, in die geschrieben werden soll: my $outFile = "out.txt"; # Die Zieldatei wird zum schreiben geöffnet: open my $outFH, ">", $outFile or die $!; # File-Util-Objekt erzeugen: my $fu = File::Util->new(); # Das Verzeichnis wird ausgelesen (hier: nur .txt-Dateien) und der Inhalt wird in ein Array geschrieben: my @allfiles = grep /\.txt$/, $fu->list_dir($sourceDir, qw(--files-only --recurse)); # Schleife über alle Dateien im Array (NB: for ist in Perl dasselbe wie foreach) for my $sourceFile (@allfiles) { # Eine Datei wird lesend geöffnet open my $sourceFH, "<", $sourceFile or die $!; # Schleife über die Zeilen in einer Datei while (my $row = <$sourceFH>) { # Text, der zwischen 'Blockanfang' und 'Blockende' liegt (Flipflop-Operator), wird in die Zieldatei geschrieben. print $outFH $row if $row =~ /Blockanfang/ .. $row =~ /Blockende/; } close $sourceFH; } close $outFH; print "fertig!";
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
#!/usr/bin/perl use strict; use warnings; use File::Util; # Destination file: my $outFile = "out_fileutil.ffn"; # For-loop counter: my $i = 0; # File-Util-Objekt erzeugen: my $fu = File::Util->new(); # Directory which includes files: my $sourceDir = '../Perl/Test'; # Directory gets read out (only ".ffn" files) and contents will be sorted and written to an array: opendir(my $sourceDH, $sourceDir) or die $!; # Das Verzeichnis wird ausgelesen (hier: nur .txt-Dateien) und der Inhalt wird in ein Array geschrieben: my @allfiles = grep /\.ffn$/, $fu->list_dir($sourceDir, qw(--files-only --recurse)); #my @allfiles = grep /\.ffn$/, sort readdir($sourceDH); closedir $sourceDH; ########################################################################### # Opens destination file to read out: open my $outFH, ">>", $outFile or die $!; # Loop over all files in the array: for my $sourceFile (@allfiles) { # One file gets opened to get read out: open my $sourceFH, "<", $sourceDir . '/' . $sourceFile or die $!; my $NameReadIn = <$sourceFH>; close $sourceFH;
open my $sourceFH, "<", $sourceFile or die $!;
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51
#! /usr/bin/perl use strict; use warnings; # https://www.perl-community.de/bat/poard/thread/18288 use File::Find; # for searching for files/directories use File::Spec::Functions qw( catfile ); # for creating portable file-paths use Cwd; # for determing current work directory my @directories = ( cwd, # search in current work directory ); my @matches; # storage for matching file paths sub find_ffn_files { return if ! -d $File::Find::name; # skip if not a directory my $dir = $File::Find::name; # short name of directory opendir my $dh, $dir or die "Cannot open '$dir': $!\n"; # read '.ffn' files from directory and create full file path my @files = map { catfile( $dir, $_ ) } grep { m/\.ffn$/ } readdir $dh; closedir $dh; # create array of array for matches push @matches, \@files if @files; } # search for files and fill @matches find( \&find_ffn_files, @directories ); # check @matches for my $matching_dir ( @matches ) { # each matching_dir is an array reference which contains # the full paths of the files found # process .ffn files per directory print join( " ", sort { $a cmp $b } @{ $matching_dir } ), "\n"; }
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68
#! /usr/bin/perl use strict; use warnings; # https://www.perl-community.de/bat/poard/thread/18288 use File::Find; # for searching for files/directories use File::Spec::Functions qw( catfile ); # for creating portable file-paths use Cwd; # for determing current work directory my @directories = ( cwd, # search in current work directory ); my %match; # storage for matching file paths sub combine_to { my $newfile = shift; my $directory = shift; my $filesRef = shift; my $outfile = catfile( $directory, $newfile ); open my $wh, '>', $outfile or die "open(w,$outfile) failed: $!\n"; for my $file ( @$filesRef ) { my $infile = catfile( $directory, $file ); open my $rh, '<', $infile or die "open(ro,$infile) failed: $!\n"; print $wh (<$rh>); close $rh; } close $wh or die "close($outfile) failed: $!\n"; } sub find_ffn_files { return if ! -d $File::Find::name; # skip if not a directory my $dir = $File::Find::name; # short name of directory opendir my $dh, $dir or die "Cannot open '$dir': $!\n"; # read '.ffn' files from directory and create full file path my @files = grep { m/\.ffn$/ } readdir $dh; closedir $dh; # create hash of array for matches; we must have found exactly 2 files $match{$dir} = \@files if 2 == @files; } # search for files and fill @matches find( \&find_ffn_files, @directories ); # check %match for my $dir ( keys %match ) { # combine found files into 'combined.ffn' in corresponding directory combine_to( 'combined.ffn', $dir => $match{$dir} ); }
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62
#! /usr/bin/perl use strict; use warnings; # https://www.perl-community.de/bat/poard/thread/18288 use File::Find; # for searching for files/directories use File::Spec::Functions qw( catfile ); # for creating portable file-paths use Cwd; # for determing current work directory my @directories = ( # where do you want to search? '/path/to/data/dir', # herein are the .ffn files; ADJUST THIS PATH ); my %match; # storage for matching file paths # this routine is to be changed to do the job of data extraction and combination # ADJUST THIS CODE if test run was successful sub combine_to { my $newfile = shift; my $directory = shift; my $filesRef = shift; # DEBUG output warn <<DEBUGTEXT; DATADIR: '$directory' OUTPUTFILE: '$newfile' DATAFILES: @{$filesRef} DEBUGTEXT } sub find_ffn_files { return if ! -d $File::Find::name; # skip if not a directory my $dir = $File::Find::name; # short name of directory opendir my $dh, $dir or die "Cannot open '$dir': $!\n"; # read '.ffn' files from directory and create full file path my @files = grep { m/\.ffn$/ } readdir $dh; closedir $dh; # create hash of array for matches; # we must have found 1 or exactly 2 files $match{$dir} = \@files if @files && 2 >= @files; } # search for files and fill @matches find( \&find_ffn_files, @directories ); # check %match for my $dir ( keys %match ) { # combine found files into 'combined.ffn' in corresponding directory combine_to( 'combined.ffn', $dir => $match{$dir} ); }
1 2 3
my @directories = ( # where do you want to search? '../TEST/Bakterien', # herein are the .ffn files; ADJUST THIS PATH );
QuoteDatei oder Verzeichnis nicht gefunden
at Linuxercode.pl line 54
Quote/home/alexander/TEST/Bakterien
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
# this routine is to be changed to do the job of data extraction and combination # ADJUST THIS CODE if test run was successful sub combine_to { my $newfile = shift; my $directory = shift; my $filesRef = shift; # DEBUG output warn <<DEBUGTEXT; DATADIR: '$directory' OUTPUTFILE: '$newfile' DATAFILES: @{$filesRef} DEBUGTEXT }
1
2
3
4
5
6
7
8
9
10
11
12
/home/linuxer/testing/bioperl
├── Bakterien
│ ├── emil
│ │ └── emil.ffn
│ ├── fritz_anna
│ │ ├── anna.ffn
│ │ └── fritz.ffn
│ └── hans
│ └── hans.ffn
└── recombine.pl
4 directories, 5 files
1
2
3
4
5
6
7
8
9
10
11
DATADIR: '/home/linuxer/testing/bioperl/Bakterien/emil'
OUTPUTFILE: 'combined.ffn'
DATAFILES: emil.ffn
DATADIR: '/home/linuxer/testing/bioperl/Bakterien/hans'
OUTPUTFILE: 'combined.ffn'
DATAFILES: hans.ffn
DATADIR: '/home/linuxer/testing/bioperl/Bakterien/fritz_anna'
OUTPUTFILE: 'combined.ffn'
DATAFILES: anna.ffn fritz.ffn
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99
#! /usr/bin/perl use strict; use warnings; # https://www.perl-community.de/bat/poard/thread/18288 use File::Find; # for searching for files/directories use File::Spec::Functions qw( catfile ); # for creating portable file-paths use Cwd; # for determing current work directory my @directories = ( # where do you want to search? '/dev/shm/data', # herein are the .ffn files; ADJUST THIS PATH ); my $outfile_name = 'combined.ffn'; my %match; # storage for matching file paths # this routine extracts and combines the data sub combine_to { my $newfile = shift; # output filename my $directory = shift; # files' directory my $filesRef = shift; # reference to array of filenames # create full path of output file my $outfile = catfile( $directory, $newfile ); my $organism = ''; # name of organism my @sequences; # list of extracted gene sequences # open output file for writing open my $outfh, '>', $outfile or die "open(w, $outfile) failed: $!\n"; # dereference $filesRef and iterate through the input filenames for my $infile ( @{ $filesRef } ) { # create full path of each input file $infile = catfile( $directory, $infile ); # open input file for reading open my $infh, '<', $infile or die "open(ro,$infile) failed: $!\n"; # read input file linewise while ( my $line = <$infh> ) { # skip empty lines next if $line =~ m{^\s*$}; # if organism's name is still empty, extract it from comment/ # description lines (so hopefully taken only from 1st line) if ( $organism eq '' && $line =~ m/^>.+\[([^]]+)\]/ ) { $organism = $1; } # extract sequence elsif ( $line =~ m/\A...([ATGC]{25})/ ) { push @sequences, $1; } } close $infh; } # print collected data to output file print $outfh "> $organism\n", @sequences; close $outfh or die "close($outfile) failed: $!\n"; } sub find_ffn_files { return if ! -d $File::Find::name; # skip if not a directory my $dir = $File::Find::name; # short name of directory opendir my $dh, $dir or die "Cannot open '$dir': $!\n"; # read '.ffn' files from directory and create full file path my @files = grep { m/\.ffn$/ && $_ ne $outfile_name } readdir $dh; closedir $dh; # create hash of array for matches; # we must have found 1 or exactly 2 files $match{$dir} = \@files if @files && 2 >= @files; } # search for files and fill @matches find( \&find_ffn_files, @directories ); # check %match for my $dir ( keys %match ) { # combine found files into 'combined.ffn' in corresponding directory combine_to( $outfile_name, $dir => $match{$dir} ); }
Quote>gb|CP000828.1|:c2319-1627 NUDIX hydrolase [Acaryochloris marina MBIC11017]
ATGCCCTATACCTATGATTATCCGCGC
>gb|CP000828.1|:2404-3255 hypothetical protein AM1_0004 [Acaryochloris marina MBIC11017]
ATGCAACAGCATTTTGAAGTCGCGGCC
outfile:
> Acaryochloris marina MBIC11017
CCCTATACCTATGATTATCCGCGCCTCGATCTCAAAGTCCTACTGATT
_________________________________________________________________________
>gb|CP000350.1|:c1908-1 Glucose inhibited division protein A [Leptospira borgpetersenii serovar Hardjo-bovis JB197]
ATGATCGAATCCAAAAACCAATCTTTT
outfile:
> Leptospira borgpetersenii serovar Hardjo-bovis JB197
ATAGTTGTATCCATAGCAAACCAG
elsif ( $line =~ m/\A...([ATGC]{25})/ ) {
1 2 3 4
my $DNAString=join("",@DNAReadIn); $DNAString =~s/(\]\n)/,/g; while ($DNAString =~ m{,\w{3}(\w{24})}g)
Quote>gb|CP000351.1|:139-891 ParA-like protein [Leptospira borgpetersenii serovar Hardjo-bovis JB197]
ATGATAGTTGTATCCATAGCAAACCAGAAAGGTGGAGAAGGTAAAACTACAACCTCTCTTAATTTATCGA
TGGGGCTTGCGAGAAGAGGAAAAAAAACTCTGCTCGTTGATATAGATCCTCAAGCAAATTCAACCGGTAT
TTTTACAAATCCAGAAGGTATTGAGAAATCAATGCATGGAGTTTTTAACTCAAAAATGACTATCCAAGAA
ATCATGATTGAAACGAGGTTACCTGATCTTTTTTTGGCCCCTTCTAAAATGAATCTTGCGGAAGTAGAAA
CACTTTCCGGAAATTCCGTAGATGCGCCTTATATTCTGAGAGACTCTCTTCAAAGTGTGAGTGGGATCGA
CTTTTGTATCATCGATTGTCCACCTAGCTTATCTATTTTTACGATTAATGCACTCGTCGGATCAAATTAC
GTAATTATTCCACTTCAGGCTGAAAAATTTTCCGTAGATGGAATTGTAGGACTTCAACAAACAATCACAA
GCATCAAAAAAAGAATCAATCCTAACCTTGAAATTTTAGGAGCCCTAGTTACTCAACTCAAGCCTCAAAC
ACTTTTGACGAAAACTATCGTACCCGTTTTAACAAAATACTTTCGAATTTTCGAAACAAGCATCTCTGAT
GGAGTTGCAGTAGGAGAATCCCATCTTGCTAAAAAGTCGGTATTTGAATACAACAAGACGAGTAAACAAG
CCCAGGAATATGAAGGGTTTATAGAGGAGTTTTTAAATGAGCTCAAAAAGTAA
>gb|CP000351.1|:875-1720 ParB-like protein [Leptospira borgpetersenii serovar Hardjo-bovis JB197]
ATGAGCTCAAAAAGTAAACGACTCGGCTCTCTCGCAGATGTATTCCAAGCCGAAAAGTTGGAGGGGACTA
TTCGTAAAATTCGGCTCGATAAAATTCTTCCATCCGAAAACCAGCCCAGACAAGATCGAAAAAAAGGAAT
CGAAGACCTCGCGAGAAGTTTAGACAAAGACGGACTACTCCAACCAATCATTGTCACAAAACAAAATCCG
GAAGATGAGAACTATAAAATTGTAGCCGGAGAAAGAAGATACCACGCAGCGAAACAATTAGGCTGGGCAG
AAATAGAATGTAAAATTTTAGACCGGGACGAAAAAGAAACCTTTCGACTTGCAATTATAGAAAATCTTCA
AAGAGAAAATTTATCCCCTTATGAAGAAGTGGAAGCCATGTCACACTTAAAGAATAGCTTCAAATATACA
GATCAAGAATTAGGAACTCTCTTTGGAAAAAGTAGAAGTTACATGACGGAGCTTCTTGGAATTTCAAATC
TAAGCAAAGAAGAACTTAGATCCTGCAAAGAAGCAGGAATTGAAAGTAAAAATTTATTGATCCAAGCAGT
TGCAGCTTCTCGAAAAGGAACCTTCTCTGAGTTTTTAAATTTATTTCAAACGGGTGCACTTAAAACCGTT
AAAGATGCAAAATCTTTTAACCGGGAAGAGGAAAACTTGTCCACACCTAAAATTACAAGTGCGACAAACC
CAAAAGTTTCAAATTTAAATTCAACGGAATATAAGATCACAAAAAAACAAGGTCTAATTCAAATTAGTTC
TGATAATGAAGAACTGTTAGGTAATATTTTTAAACTAATCAAAAAAGAAATCCGTAAAAAATTCAATTCT
ATATAA
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102
#! /usr/bin/perl use strict; use warnings; # https://www.perl-community.de/bat/poard/thread/18288 use File::Find; # for searching for files/directories use File::Spec::Functions qw( catfile ); # for creating portable file-paths use Cwd; # for determing current work directory my @directories = ( # where do you want to search? '/dev/shm/data', # herein are the .ffn files; ADJUST THIS PATH ); my $outfile_name = 'combined.ffn'; my %match; # storage for matching file paths # this routine extracts and combines the data sub combine_to { my $newfile = shift; # output filename my $directory = shift; # files' directory my $filesRef = shift; # reference to array of filenames # create full path of output file my $outfile = catfile( $directory, $newfile ); my $organism = ''; # name of organism my @sequences; # list of extracted gene sequences # open output file for writing open my $outfh, '>', $outfile or die "open(w, $outfile) failed: $!\n"; # dereference $filesRef and iterate through the input filenames for my $infile ( @{ $filesRef } ) { # create full path of each input file $infile = catfile( $directory, $infile ); # open input file for reading open my $infh, '<', $infile or die "open(ro,$infile) failed: $!\n"; # read input file linewise while ( my $line = <$infh> ) { # skip empty lines next if $line =~ m{^\s*$}; # identify line protein info and name of organism if ( $line =~ m/^>.+\[([^]]+)\]/ ) { # store name of organism only if not known yet $organism = $1 if $organism eq ''; # read next line to extract beginning of sequence $line = <$infh>; # extract sequence information if ( $line =~ m/\A...([ATGC]{25})/ ) { push @sequences, $1; } } } close $infh; } # print collected data to output file print $outfh "> $organism\n", @sequences; close $outfh or die "close($outfile) failed: $!\n"; } sub find_ffn_files { return if ! -d $File::Find::name; # skip if not a directory my $dir = $File::Find::name; # short name of directory opendir my $dh, $dir or die "Cannot open '$dir': $!\n"; # read '.ffn' files from directory and create full file path my @files = grep { m/\.ffn$/ && $_ ne $outfile_name } readdir $dh; closedir $dh; # create hash of array for matches; # we must have found 1 or exactly 2 files $match{$dir} = \@files if @files && 2 >= @files; } # search for files and fill @matches find( \&find_ffn_files, @directories ); # check %match for my $dir ( keys %match ) { # combine found files into 'combined.ffn' in corresponding directory combine_to( $outfile_name, $dir => $match{$dir} ); }
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
>gb|CP000351.1|:139-891 ParA-like protein [Leptospira borgpetersenii serovar Hardjo-bovis JB197]
ATGATAGTTGTATCCATAGCAAACCAGAAAGGTGGAGAAGGTAAAACTACAACCTCTCTTAATTTATCGA
TGGGGCTTGCGAGAAGAGGAAAAAAAACTCTGCTCGTTGATATAGATCCTCAAGCAAATTCAACCGGTAT
TTTTACAAATCCAGAAGGTATTGAGAAATCAATGCATGGAGTTTTTAACTCAAAAATGACTATCCAAGAA
ATCATGATTGAAACGAGGTTACCTGATCTTTTTTTGGCCCCTTCTAAAATGAATCTTGCGGAAGTAGAAA
CACTTTCCGGAAATTCCGTAGATGCGCCTTATATTCTGAGAGACTCTCTTCAAAGTGTGAGTGGGATCGA
CTTTTGTATCATCGATTGTCCACCTAGCTTATCTATTTTTACGATTAATGCACTCGTCGGATCAAATTAC
GTAATTATTCCACTTCAGGCTGAAAAATTTTCCGTAGATGGAATTGTAGGACTTCAACAAACAATCACAA
GCATCAAAAAAAGAATCAATCCTAACCTTGAAATTTTAGGAGCCCTAGTTACTCAACTCAAGCCTCAAAC
ACTTTTGACGAAAACTATCGTACCCGTTTTAACAAAATACTTTCGAATTTTCGAAACAAGCATCTCTGAT
GGAGTTGCAGTAGGAGAATCCCATCTTGCTAAAAAGTCGGTATTTGAATACAACAAGACGAGTAAACAAG
CCCAGGAATATGAAGGGTTTATAGAGGAGTTTTTAAATGAGCTCAAAAAGTAA
>gb|CP000351.1|:875-1720 ParB-like protein [Leptospira borgpetersenii serovar Hardjo-bovis JB197]
ATGAGCTCAAAAAGTAAACGACTCGGCTCTCTCGCAGATGTATTCCAAGCCGAAAAGTTGGAGGGGACTA
TTCGTAAAATTCGGCTCGATAAAATTCTTCCATCCGAAAACCAGCCCAGACAAGATCGAAAAAAAGGAAT
CGAAGACCTCGCGAGAAGTTTAGACAAAGACGGACTACTCCAACCAATCATTGTCACAAAACAAAATCCG
GAAGATGAGAACTATAAAATTGTAGCCGGAGAAAGAAGATACCACGCAGCGAAACAATTAGGCTGGGCAG
AAATAGAATGTAAAATTTTAGACCGGGACGAAAAAGAAACCTTTCGACTTGCAATTATAGAAAATCTTCA
AAGAGAAAATTTATCCCCTTATGAAGAAGTGGAAGCCATGTCACACTTAAAGAATAGCTTCAAATATACA
GATCAAGAATTAGGAACTCTCTTTGGAAAAAGTAGAAGTTACATGACGGAGCTTCTTGGAATTTCAAATC
TAAGCAAAGAAGAACTTAGATCCTGCAAAGAAGCAGGAATTGAAAGTAAAAATTTATTGATCCAAGCAGT
TGCAGCTTCTCGAAAAGGAACCTTCTCTGAGTTTTTAAATTTATTTCAAACGGGTGCACTTAAAACCGTT
AAAGATGCAAAATCTTTTAACCGGGAAGAGGAAAACTTGTCCACACCTAAAATTACAAGTGCGACAAACC
CAAAAGTTTCAAATTTAAATTCAACGGAATATAAGATCACAAAAAAACAAGGTCTAATTCAAATTAGTTC
TGATAATGAAGAACTGTTAGGTAATATTTTTAAACTAATCAAAAAAGAAATCCGTAAAAAATTCAATTCT
ATATAA
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
use strict;
use warnings;
use Bio::FASTASequence::File;
use File::Find::Rule;
my @ffns = File::Find::Rule->file->name( '*.ffn' )->in( '.' );
my %result;
my $output = 'org.fasta';
my $bff = Bio::FASTASequence::File->new;
for my $ffn ( @ffns ) {
my $parsed = $bff->file( $ffn );
for my $orga ( keys %{$parsed} ) {
my $subsequence = substr $parsed->{$orga}->getSequence(), 3, 24;
my ($name) = $parsed->{$orga}->getDescription() =~ m{ \[ (.*?) \] }x;
push @{ $result{$name} }, $subsequence;
}
}
open my $fh, '>', $output;
for my $key ( sort keys %result ) {
print $fh "> $key\n", join( '', @{ $result{$key} } ),"\n";
}
close $fh;
1
2
> Leptospira borgpetersenii serovar Hardjo-bovis JB197
ATAGTTGTATCCATAGCAAACCAGAGCTCAAAAAGTAAACGACTCGGC