Bio::Tools::Run
RemoteBlast
Summary
Bio::Tools::Run::RemoteBlast - Object for remote execution of the NCBI Blast via HTTP
Package variables
No package variables defined.
Included modules
HTTP::Request::Common
IO::String
LWP
Inherit
Synopsis
Remote-blast "factory object" creation and blast-parameter initialization:
use Bio::Tools::Run::RemoteBlast;
use strict;
my $v = 1;
my $prog = 'blastp';
my $db = 'swissprot';
my $e_val= '1e-10';
my @params = ( '-prog' => $prog,
'-data' => $db,
'-expect' => $e_val );
my $factory = Bio::Tools::Run::RemoteBlast->new(@params);
$v = 1;
my $str = Bio::SeqIO->new(-file=>'amino.fa' , '-format' => 'fasta' );
my $input = $str->next_seq();
# Blast a sequence against a database:
my $r = $factory->submit_blast($input);
print STDERR "waiting..." if( $v > 0 );
while ( my @rids = $factory->each_rid ) {
foreach my $rid ( @rids ) {
my $rc = $factory->retrieve_blast($rid);
if( !ref($rc) ) {
if( $rc < 0 ) {
$factory->remove_rid($rid);
}
print STDERR "." if ( $v > 0 );
sleep 5;
} else {
$factory->remove_rid($rid);
my $result = $rc->next_result;
print "db is ", $result->database_name(), "\n";
my $count = 0;
while( my $hit = $result->next_hit ) {
$count++;
next unless ( $v > 0);
print "hit name is ", $hit->name, "\n";
while( my $hsp = $hit->next_hsp ) {
print "score is ", $hsp->score, "\n";
}
}
}
}
}
Various additional options and input formats are available. See the
DESCRIPTION section for details.
Description
Class for remote execution of the NCBI Blast via HTTP.
Methods
Methods description
Title : header
Usage : my $header = $self->header
Function: Get/Set HTTP header for blast query
Returns : string
Args : none |
Title : readmethod
Usage : my $readmethod = $self->readmethod
Function: Get/Set the method to read the blast report
Returns : string
Args : string [ Blast, BPlite ] |
Title : program
Usage : my $prog = $self->program
Function: Get/Set the program to run
Returns : string
Args : string [ blastp, blastn, blastx, tblastn, tblastx ] |
Title : database
Usage : my $db = $self->database
Function: Get/Set the database to search
Returns : string
Args : string [ swissprot, nr, nt, etc... ] |
Title : expect
Usage : my $expect = $self->expect
Function: Get/Set the E value cutoff
Returns : string
Args : string [ '1e-4' ] |
Title : ua
Usage : my $ua = $self->ua or
$self->ua($ua)
Function: Get/Set a LWP::UserAgent for use
Returns : reference to LWP::UserAgent Object
Args : none
Comments: Will create a UserAgent if none has been requested before. |
Title : proxy
Usage : $httpproxy = $db->proxy('http') or
$db->proxy(['http','ftp'], 'http://myproxy' ) Function: Get/Set a proxy for use of proxy Returns : a string indicating the proxy Args : $protocol : an array ref of the protocol(s) to set/get $proxyurl : url of the proxy to use for the specified protocol |
Title : submit_blast
Usage : $self->submit_blast([$seq1,$seq2]);
Function: Submit blast jobs to ncbi blast queue on sequence(s)
Returns : Blast report object as defined by $self->readmethod
Args : input can be:
* sequence object
* array ref of sequence objects
* filename of file containing fasta formatted sequences |
Title : retrieve_blast
Usage : my $blastreport = $blastfactory->retrieve_blast($rid);
Function: Attempts to retrieve a blast report from remote blast queue
Returns : -1 on error,
0 on 'job not finished',
Bio::Tools::BPlite or Bio::Tools::Blast object
(depending on how object was initialized) on success
Args : Remote Blast ID (RID) |
Methods code
BEGIN { $URLBASE = 'http://www.ncbi.nlm.nih.gov/blast/Blast.cgi';
%HEADER = ('CMD' => 'Put',
'PROGRAM' => '',
'DATABASE' => '',
'FILTER' => 'L',
'EXPECT' => '',
'QUERY' => '',
'CDD_SEARCH' => 'off',
'COMPOSITION_BASED_STATISTICS' => 'off',
'FORMAT_OBJECT' => 'Alignment',
'SERVICE' => 'plain',
);
%RETRIEVALHEADER = ('CMD' => 'Get',
'RID' => '',
'ALIGNMENT_VIEW' => 'Pairwise',
'DESCRIPTIONS' => 100,
'ALIGNMENTS' => 50,
'FORMAT_TYPE' => 'Text',
);
$RIDLINE = 'RID\s+=\s+(\d+-\d+-\d+)';
%BLAST_PARAMS = ( 'prog' => 'blastp',
'data' => 'nr',
'expect' => '1e-3',
'readmethod' => 'SearchIO'
);} |
sub new
{ my ($caller, @args) = @_;
my $self = $caller->SUPER::new(@args);
$self->_initialize_io();
my ($prog, $data, $expect,
$readmethod) = $self->_rearrange([qw(PROG DATA
EXPECT
READMETHOD)],
@args);
$readmethod = $BLAST_PARAMS{'readmethod'} unless defined $readmethod;
$prog = $BLAST_PARAMS{'prog'} unless defined $prog;
$data = $BLAST_PARAMS{'data'} unless defined $data;
$expect = $BLAST_PARAMS{'expect'} unless defined $expect;
$self->readmethod($readmethod);
$self->program($prog);
$self->database($data);
$self->expect($expect);
return $self;} |
sub header
{ my ($self) = @_;
my %h = %HEADER;
$h{'PROGRAM'} = $self->program;
$h{'DATABASE'} = $self->database;
$h{'EXPECT'} = $self->expect;
return %h;} |
sub readmethod
{ my ($self, $val) = @_;
if( defined $val ) {
$self->{'_readmethod'} = $val;
}
return $self->{'_readmethod'};} |
sub program
{ my ($self, $val) = @_;
if( defined $val ) {
$val = lc $val;
if( $val !~ /t?blast[pnx]/ ) {
$self->warn("trying to set program to an invalid program name ($val) -- defaulting to blastp");
$val = 'blastp';
}
$self->{'_program'} = $val;
}
return $self->{'_program'};} |
sub database
{ my ($self, $val) = @_;
if( defined $val ) {
$self->{'_database'} = $val;
}
return $self->{'_database'};} |
sub expect
{ my ($self, $val) = @_;
if( defined $val ) {
$self->{'_expect'} = $val;
}
return $self->{'_expect'};} |
sub ua
{ my ($self, $value) = @_;
if( ! defined $self->{'_ua'} ) {
$self->{'_ua'} = new LWP::UserAgent;
}
return $self->{'_ua'};} |
sub proxy
{ my ($self,$protocol,$proxy) = @_;
return undef if ( !defined $self->ua || !defined $protocol
|| !defined $proxy );
return $self->ua->proxy($protocol,$proxy);} |
sub add_rid
{ my ($self, @vals) = @_;
foreach ( @vals ) {
$self->{'_rids'}->{$_} = 1;
}
return scalar keys %{$self->{'_rids'}};} |
sub remove_rid
{ my ($self, @vals) = @_;
foreach ( @vals ) {
delete $self->{'_rids'}->{$_};
}
return scalar keys %{$self->{'_rids'}};} |
sub each_rid
{ my ($self) = @_;
return keys %{$self->{'_rids'}};} |
sub submit_blast
{ my ($self, $input) = @_;
my @seqs = $self->_load_input($input);
return 0 unless ( @seqs );
my $tcount = 0;
my %header = $self->header;
foreach my $seq ( @seqs ) {
$header{'QUERY'} = $seq->seq();
my $request = POST $URLBASE, [%header];
$self->warn($request->as_string) if ( $self->verbose > 0);
my $response = $self->ua->request( $request);
if( $response->is_success ) {
if( $self->verbose > 0 ) {
my ($tempfh) = $self->tempfile();
print $tempfh $response->content;
}
my @subdata = split(/\n/, $response->content );
my $count = 0;
foreach ( @subdata ) {
if( /$RIDLINE/ ) {
$count++;
print STDERR $_ if( $self->verbose > 0);
$self->add_rid($1);
last;
}
}
if( $count == 0 ) {
$self->warn("req was ". $request->as_string() . "\n");
$self->warn(join('', @subdata));
}
$tcount += $count;
} else {
$self->warn("req was ". $request->as_string() . "\n" .
$response->error_as_HTML);
$tcount = -1;
}
}
return $tcount;} |
sub retrieve_blast
{ my($self, $rid) = @_;
my ($fh,$tempfile) = $self->tempfile();
my %hdr = %RETRIEVALHEADER;
$hdr{'RID'} = $rid;
my $req = POST $URLBASE, [%hdr];
if( $self->verbose > 0 ) {
$self->warn("retrieve request is " . $req->as_string());
}
my $response = $self->ua->request($req, $tempfile);
if( $self->verbose > 0 ) {
open(TMP, $tempfile) or $self->throw("cannot open $tempfile");
while(<TMP>) { print $_; }
close TMP;
}
if( $response->is_success ) {
my $size = -s $tempfile;
if( $size > 1000 ) {
my $blastobj;
if( $self->readmethod =~ /BPlite/ ) {
$blastobj = new Bio::Tools::BPlite(-file => $tempfile);
} else {
$blastobj = new Bio::SearchIO(-file => $tempfile,
-format => 'blast');
}
return $blastobj;
} elsif( $size < 500 ) { open(ERR, "<$tempfile") or $self->throw("cannot open file $tempfile");
$self->warn(join("", <ERR>));
close ERR;
return -1;
} else { return 0;
}
} else {
$self->warn($response->error_as_HTML);
return -1;
}} |
sub _load_input
{ my ($self, $input) = @_;
if( ! defined $input ) {
$self->throw("Calling remote blast with no input");
}
my @seqs;
if( ! ref $input ) {
if( -e $input ) {
my $seqio = new Bio::SeqIO(-format => 'fasta', -file => $input);
while( my $seq = $seqio->next_seq ) {
push @seqs, $seq;
}
} else {
$self->throw("Input $input was not a valid filename");
}
} elsif( ref($input) =~ /ARRAY/i ) {
foreach ( @$input ) {
if( ref($_) && $_->isa('Bio::PrimarySeqI') ) {
push @seqs, $_;
} else {
$self->warn("Trying to add a " . ref($_) .
" but expected a Bio::PrimarySeqI");
}
}
if( ! @seqs) {
$self->throw("Did not pass in valid input -- no sequence objects found");
}
} elsif( $input->isa('Bio::PrimarySeqI') ) {
push @seqs, $input;
}
return @seqs;} |
General documentation
User feedback is an integral part of the evolution of this and other
Bioperl modules. Send your comments and suggestions preferably to one
of the Bioperl mailing lists. Your participation is much appreciated.
bioperl-l@bioperl.org - General discussion
http://bio.perl.org/MailList.html - About the mailing lists
Report bugs to the Bioperl bug tracking system to help us keep track
the bugs and their resolution. Bug reports can be submitted via email
or the web:
bioperl-bugs@bio.perl.org
http://bio.perl.org/bioperl-bugs/
| AUTHOR - Jason Stajich | Top |
The rest of the documentation details each of the object
methods. Internal methods are usually preceded with a _