#!/usr/bin/perl use strict; use warnings; my @seq=( 'BB[CDA]BFBB', 'AA[^CDA]ZZBH', 'BB[CDA]{1}BFBB', 'AA[HIX]{0,5}ZZBH', 'A[A-Z]F', '[RH][STA][LIVMFYQ]H[RFH][LIVKM]KWX[LIVMF]', 'QX[AFG]KK', ); for my $str (@seq) { print "$str :\n"; for(expand($str)) { print " $_\n"; } } ################################################################################ ################################################################################ ################################################################################ sub expand { my $regexp=shift; my $tree= _expand_parse($regexp); return _expand_join($tree); } sub _expand_parse { my $regexp=shift; my @tree=(); while(length($regexp)) { my $char=substr($regexp,0,1,''); if($char eq '[') { push @tree,{type=>'group', elms => _expand_parse_group(\$regexp), range=>[1,1]} } elsif($char eq '{') { my $range=_expand_parse_range(\$regexp); if(@tree) { $tree[-1]->{range}=$range; } } else { push(@tree, {type=>'char', elms=>$char, range=>[1,1]}); } } return \@tree; } sub _expand_parse_group { my $ref_regexp=shift; my @elms; if($$ref_regexp=~s/^(\^?)(.*?)\]//) { my $invert=$1?1:0; my $group=$2; while(length($group)) { my $char=substr($group,0,1,''); if($char eq '-' && length($group) && @elms) { my $start=pop(@elms); my $end=substr($group,0,1,''); for($start .. $end) { push(@elms,$_) } } else { push(@elms,$char); } } if($invert) { my %not_elms=map{$_ => 1}@elms; @elms=(); for('A' .. 'Z') { push(@elms,$_) unless($not_elms{$_}); } } } return \@elms; } sub _expand_parse_range { my $ref_regexp=shift; my @elms; if($$ref_regexp=~s/^(.*?)\}//) { my $range=$1; if($range=~/^(\d+)$/) { @elms=($1,$1); } elsif($range=~/^(\d+),$/) { @elms=($1,$1+100); } elsif(($range=~/^(\d+),(\d)$/)) { @elms=($1,$2); } elsif(($range=~/^,(\d)$/)) { @elms=(0,1); } else { @elms=(1,1); } } return \@elms; } sub _expand_join { my $tree=shift; my @strings=(''); while(@$tree) { my $elm=shift(@$tree); my @list; my $type=$elm->{type};verse if($type eq 'char') { for my $cnt ($elm->{range}->[0] .. $elm->{range}->[0]) { push(@list, $elm->{elms} x $cnt) } } elsif($type eq 'group') { @list=_expand_join_group($elm); } my @str_cpy=@strings; @strings=(); for my $str_add (@list) { for my $str_old (@str_cpy) { push(@strings, $str_old.$str_add); } } } return @strings; } sub _expand_join_group { my $elm=shift; my @list=(); for my $length ($elm->{range}->[0] .. $elm->{range}->[1]) { my @strings=_expand_join_group_rec($elm->{elms},$length); push(@list,@strings); } return @list; } sub _expand_join_group_rec { my $chars=shift(); my $length=shift; return '' unless($length && $length > 0); my @ret; my @append=_expand_join_group_rec($chars,$length-1); for my $char (@$chars) { push(@ret,$char.$_) for(@append); } return @ret; }