Leser: 21
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105
sub get_next_record { my($fh) = @_; my($offset); my($record) = ''; my($save_input_separator) = $/; $/ = "//\n"; $record = <$fh>; $/ = $save_input_separator; return $record; } sub parse_annotation { my($annotation) = @_; my(%results) = ( ); while( $annotation =~ /^[A-Z].*\n(^\s.*\n)*/gm ) { my $value = $&; (my $key = $value) =~ s/^([A-Z]+).*/$1/s; $results{$key} = $value; } return %results; } sub get_annotation_and_dna { my($record) = @_; my($annotation) = ''; my($dna) = ''; # Now separate the annotation from the sequence data ($annotation, $dna) = ($record =~ /^(LOCUS.*ORIGIN\s*\n)(.*)\/\/\n/s); # clean the sequence of any whitespace or / characters # (the / has to be written \/ in the character class, because # / is a metacharacter, so it must be "escaped" with \) $dna =~ s/[\s\/\d]//g; return($annotation, $dna) } sub parse_features { my($features) = @_; # entire FEATURES field in a scalar variable # Declare and initialize variables my(@features) = (); # used to store the individual features # Extract the features while( $features =~ /^ {5}\S.*\n(^ {21}\S.*\n)*/gm ) { my $feature = $&; push(@features, $feature); } return @features; } # Open library $fh = open_file($library); open(TT,">TT.txt"); while ($record = get_next_record($fh)){ #Get the fields from the first GenBank record in a library %fields = parse_annotation($annotation); #Annotation and DNA ($annotation, $dna) = get_annotation_and_dna($record); # Extract the features from the FEATURES table @features = parse_features($fields{'FEATURES'}); foreach my $id(@ids){ #Hier liegt das Problem!!! *1 if($record =~ /$id/){ print $id."\n"; print_sequence($dna, 60); # Print out the features foreach my $feature (@features) { # extract the name of the feature (or "feature key") my($featurename) = ($feature =~ /^ {5}(\S+)/); print TT "******** $featurename *********\n"; print TT $feature; } } } } close TT;
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
LOCUS AF503441 1598 bp DNA linear PLN 20-JAN-2004
DEFINITION Nicotiana langsdorffii x Nicotiana sanderae nectarin 5 (Nec5) gene,
partial cds.
ACCESSION AF503441 # Nach diesem Teil wird gesucht (Nur Nummer)
VERSION AF503441.1 GI:30315242
KEYWORDS .
SOURCE Nicotiana langsdorffii x Nicotiana sanderae
ORGANISM Nicotiana langsdorffii x Nicotiana sanderae
Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta;
Spermatophyta; Magnoliophyta; eudicotyledons; core eudicotyledons;
asterids; lamiids; Solanales; Solanaceae; Nicotianoideae;
Nicotianeae; Nicotiana.
...
FEATURES Location/Qualifiers
source 1..1598
/organism="Nicotiana langsdorffii x Nicotiana sanderae"
/mol_type="genomic DNA"
/db_xref="taxon:164110"
gene 663..>1598
/gene="Nec5"
mRNA 663..>1598
/gene="Nec5"
CDS 788..>1598
1 2 3 4 5 6 7
sub get_next_record { my($fh) = @_; local $/ = "//\n"; my $record = <$fh>; return $record; }