Обновлено 16.04.2010 Автор: Administrator
Задача: объединить (склеить) все множественные выравнивания в каталоге. Все крайне просто, но вдруг кому-нибудь пригодится. Реализация на Perl + BioPerl.
#
# concatenates together all alignments from a given directory
#
use Getopt::Long;
use Bio::AlignIO;
sub process_file {
my $file=$_[0]; # assigning parameters
my $alnfile = new Bio::AlignIO(-format => $FORMAT, -file => $file);
my $aln = $alnfile->next_aln(); #NB: assume there's only one alignment in file
if(!($aln)) { print "$file possibly non- $FORMAT format\n"; die; }
if(!($aln->is_flush)) { print "$file: not all alignments are of the same length!\n"; die;}
foreach my $seq ($aln->each_seq ) { #for each seq in alignment
my $org=$seq->display_id."_".$seq->desc;
$SEQS{$org}=$SEQS{$org}.$seq->seq;
}
}
sub help{
print "Concatenates files in given subdir. Output to STDOUT\n";
print "Usage:\n";
print " -d subdirectory \n";
print " -f alignment format (default: fasta)\n";
print " -e extension of files to be concatenated\n";
}
#--------------------- main part --------------------------------
$ALIGNDIR=undef; # name of subdirectory with command line parameters
$FORMAT="fasta"; # possible values: "clustalw" or "fasta"
$EXTENSION=undef; # extension of files to process
GetOptions ('d=s' => \$ALIGNDIR,
'f=s' => \$FORMAT,
'e=s' => \$EXTENSION,
'help|?|h' => \$help );
if($help) { help(); exit;}
if (!($ALIGNDIR)) {$ALIGNDIR="alignments"; print "#ALIGNDIR (-d) ommited! Set to $ALIGNDIR\n";}
if ( (!($FORMAT=="fasta")) || (!($FORMAT=="clustalw")) )
{ die "Unknown format: $FORMAT\nOnly fasta and clustalw formats are supported"; }
if(!($EXTENSION)) {
if($FORMAT=="fasta") {$EXTENSION = "faa"; }
if($FORMAT=="clustalw") {$EXTENSION = "mscaln"; }
}
# redirecting STDERR
open STDERR, '>', "STDERR.out" or die "Can't redirect STDERR: $!";
%SEQS = (); #keys - org names, values - pasted sequences
opendir(DATADIR, "./".$ALIGNDIR."/") or die ("Error opening directory $name");
@FILES=grep(/\.*$EXTENSION$/i, readdir DATADIR);
foreach(@FILES) { process_file("./".$ALIGNDIR.$name.'/'.$_) }
closedir(DATADIR);
foreach $key(sort keys %SEQS){
print">$key\n", $SEQS{$key}, "\n";
}
# print "#Work is finished";
< Предыдущая | Следующая > |
---|