Bio::SeqIO
FTHelper
Summary
Bio::SeqIO::FTHelper - Helper class for Embl/Genbank feature tables
Package variables
No package variables defined.
Included modules
Inherit
Synopsis
Used by Bio::SeqIO::EMBL to help process the Feature Table
Description
Represents one particular Feature with the following fields
key - the key of the feature
loc - the location string of the feature
- other fields
Methods
Methods description
Title : _generic_seqfeature
Usage : $fthelper->_generic_seqfeature($annseq, "GenBank")
Function: processes fthelper into a generic seqfeature
Returns : TRUE on success and otherwise FALSE
Args : Bio::Seq, string indicating the source (GenBank/EMBL/SwissProt) |
Title : _parse_loc
Usage : $fthelper->_parse_loc( $loc_string)
Function: Parses the given location string and returns a location object
with start() and end() and strand() set appropriately.
Note that this method is private.
Returns : location object or 0 on fail
Args : location string |
Title : from_SeqFeature
Usage : @fthelperlist = Bio::SeqIO::FTHelper::from_SeqFeature($sf,
$context_annseq);
Function: constructor of fthelpers from SeqFeatures
:
: The additional annseq argument is to allow the building of FTHelper
: lines relevant to particular sequences (ie, when features are spread over
: enteries, knowing how to build this)
Returns : an array of FThelpers
Args : seq features |
Title : key
Usage : $obj->key($newval)
Function:
Example :
Returns : value of key
Args : newvalue (optional) |
Title : loc
Usage : $obj->loc($newval)
Function:
Example :
Returns : value of loc
Args : newvalue (optional) |
Title : field
Usage :
Function:
Example :
Returns :
Args : |
Title : add_field
Usage :
Function:
Example :
Returns :
Args : |
Methods code
sub new
{ my ($class, @args) = @_;
my $self = {};
bless $self,$class;
$self->{'_field'} = {};
return $self;} |
sub _generic_seqfeature
{ my ($fth, $annseq, $source) = @_;
my ($sf);
if(! defined($source)) {
$source = "EMBL/GenBank/SwissProt";
}
$sf = Bio::SeqFeature::Generic->direct_new();
my $strand = ( $fth->loc =~ /complement/ ) ? -1 : 1;
$sf->strand($strand);
if ( $fth->loc =~ /(join)/i || $fth->loc =~ /(order)/i ||
$fth->loc =~ /(bond)/i ) {
my $combotype=$1;
$sf->primary_tag($fth->key);
$sf->source_tag($source);
my $splitlocation = new Bio::Location::Split(-strand=>$strand,
-seqid => $annseq->id,
-splittype => $combotype);
my $loc = $fth->loc;
$loc =~ s/^.*$combotype\((\S+)\)/$1/;
foreach my $next_loc ( split(/\s*,\s*/, $loc) ) {
my $remote=0;
my $seqid = $annseq->id;
if ( $next_loc =~ s/\(?\s*([A-Za-z\d\_]+(\.\d+)?):// ) {
$seqid = $1;
$remote=1;
}
if( my $location = $fth->_parse_loc($sf,$next_loc)) {
$fth->debug( "I got remote: ".
join(",", ($location->start(),
$location->end(),
$location->strand()))
. " for $next_loc\n");
$location->seq_id($seqid);
if ($remote) {
$location->is_remote(1);
}
$splitlocation->add_sub_Location($location);
} else {
$fth->warn("unable to parse location successfully out of " .
$next_loc . ", ignoring feature (seqid=" .
$annseq->id() . ")");
$sf = undef;
last;
}
}
$sf->location($splitlocation) if( defined $sf);
}
else {
$sf->source_tag($source);
$sf->primary_tag($fth->key);
my $loc = $fth->loc();
my $seqid;
if( $loc =~ /^(\d+)\.\.(\d+)$/ ) {
my $start = $1;
my $end = $2;
my $location = {};
bless $location,'Bio::Location::Simple';
$location->{'_start'} = $start;
$location->{'_end'} = $end;
$location->{'_strand'} = 1;
$sf->location($location);
} elsif ( $loc =~ /^complement\((\d+)\.\.(\d+)\)$/ ) {
my $start = $1;
my $end = $2;
my $location = {};
bless $location,'Bio::Location::Simple';
$location->{'_start'} = $start;
$location->{'_end'} = $end;
$location->{'_strand'} = -1;
$sf->location($location);
} else {
if ( $loc =~ s/\(?\s*([A-Za-z\d\_]+(\.\d+)?):// ) {
($seqid) = $1;
}
if( my $location = $fth->_parse_loc($sf,$loc) ) {
$location->seq_id($seqid) if ( $seqid);
$sf->location($location);
} else {
$annseq->warn("unexpected location line [" . $loc .
"] in reading $source, ignoring feature " .
$fth->key() . " (seqid=" . $annseq->id() . ")");
$sf = undef;
}
}
}
if(defined($sf)) {
foreach my $key ( keys %{$fth->field} ){
foreach my $value ( @{$fth->field->{$key}} ) {
$sf->add_tag_value($key,$value);
}
}
$annseq->add_SeqFeature($sf);
return 1;
} else {
$fth->warn("unable to parse feature " . $fth->key() .
" in $source sequence entry (id=" .
$annseq->id() . "), ignoring");
return 0;
}} |
sub _parse_loc
{ my ($self, $sf,$locstr) = @_;
my ($fea_type, $tagval) = ('','');
my ($strand,$start,$end) = (1);
$self->debug( "Location parse, processing $locstr\n");
if( $locstr =~ /complement\((.+)/ ) {
$locstr = $1;
$strand = -1;
}
my ($delim) = '';
if($locstr =~ /^\s*(\w+[A-Za-z])?\({0,2}([\<\>\?]?\d*[\<\>\?]?([\.\^]\d+)?)\)?([\.\^\s]{1,3})\(?([\<\>\?]?\d*[\<\>\?]?([\.\^]\d+)?)\){0,2}[,;\" ]*([A-Za-z]\w*)?\"?\)?\s*$/) {
$fea_type = $1 if $1;
$start = $2;
$delim = $4;
$end = $5;
$tagval = $7 if $7;
}
elsif($locstr =~ /^\s*(\w+[A-Za-z])?\(?([\<\>\?]?\d*[\<\>\?]?([\.\^]\d+)?)\)?[,;\" ]*([A-Za-z]\w*)?\"?\)?\s*$/) {
$fea_type = $1 if $1;
$start = $end = $2;
$tagval = $4 if $4;
} else {
$self->warn( "$locstr didn't match\n") if( $self->verbose > 0);
return 0;
}
my $type = 'Bio::Location::Simple';
my @args = ('-start'=>$start, '-end' => $end,
'-strand' => $strand);
if ( $start =~ /[\>\<\?]/ ||
$end =~ /[\>\<\?]/ ||
$delim =~ /^[\.^]$/ )
{
$type = 'Bio::Location::Fuzzy';
push @args, ('-loc_type' => $delim);
}
my $location = $type->new(@args);
if(defined($tagval) && $tagval ne '') {
if(! $fea_type) {
$fea_type = "note";
}
$sf->add_tag_value($fea_type, $tagval);
}
return $location; } |
sub from_SeqFeature
{ my ($sf, $context_annseq) = @_;
my @ret;
if ( $sf->can("to_FTHelper") ) {
return $sf->to_FTHelper($context_annseq);
}
my $fth = Bio::SeqIO::FTHelper->new();
my $key = $sf->primary_tag();
my $locstr = $sf->location->to_FTstring;
$fth->loc($locstr);
$fth->key($key);
$fth->field->{'note'} = [];
($sf->can('score') && $sf->score) && do { push(@{$fth->field->{'note'}},
"score=" . $sf->score ); };
($sf->can('frame') && $sf->frame) && do { push(@{$fth->field->{'note'}},
"frame=" . $sf->frame ); };
foreach my $tag ( $sf->all_tags ) {
next if $tag =~ /^_/;
if ( !defined $fth->field->{$tag} ) {
$fth->field->{$tag} = [];
}
foreach my $val ( $sf->each_tag_value($tag) ) {
push(@{$fth->field->{$tag}},$val);
}
}
push(@ret, $fth);
unless (@ret) {
$context_annseq->throw("Problem in processing seqfeature $sf - no fthelpers. Error!");
}
foreach my $ft (@ret) {
if ( !$ft->isa('Bio::SeqIO::FTHelper') ) {
$sf->throw("Problem in processing seqfeature $sf - made a $fth!");
}
}
return @ret;} |
sub key
{ my ($obj, $value) = @_;
if ( defined $value ) {
$obj->{'key'} = $value;
}
return $obj->{'key'};} |
sub loc
{ my ($obj, $value) = @_;
if ( defined $value ) {
$obj->{'loc'} = $value;
}
return $obj->{'loc'};} |
sub field
{ my ($self) = @_;
return $self->{'_field'};} |
sub add_field
{ my ($self, $key, $val) = @_;
if ( !exists $self->field->{$key} ) {
$self->field->{$key} = [];
}
push( @{$self->field->{$key}} , $val);} |
General documentation
User feedback is an integral part of the evolution of this and other
Bioperl modules. Send your comments and suggestions preferably to one
of the Bioperl mailing lists. Your participation is much appreciated.
bioperl-l@bioperl.org - General discussion
http://www.bioperl.org/MailList.shtml - About the mailing lists
Report bugs to the Bioperl bug tracking system to help us keep track
the bugs and their resolution. Bug reports can be submitted via email
or the web:
bioperl-bugs@bio.perl.org
http://bio.perl.org/bioperl-bugs/
The rest of the documentation details each of the object
methods. Internal methods are usually preceded with a _