function tripal_feature_load_gff3_fasta
2.x tripal_feature.gff_loader.inc | tripal_feature_load_gff3_fasta($fh, $interval, &$num_read, &$intv_read, &$line_num, |
1.x gff_loader.inc | tripal_feature_load_gff3_fasta($fh, $interval, &$num_read, &$intv_read, &$line_num) |
Load the FASTA sequences at the bottom of a GFF3 file
Parameters
$fh:
$interval:
$num_read:
$intv_read:
$line_num:
$filesize:
$job:
Related topics
1 call to tripal_feature_load_gff3_fasta()
- tripal_feature_load_gff3 in tripal_feature/
includes/ tripal_feature.gff_loader.inc - Actually load a GFF3 file. This is the function called by tripal jobs
File
- tripal_feature/
includes/ tripal_feature.gff_loader.inc, line 2094 - Provides gff3 loading functionality. Creates features based on their specification in a GFF3 file.
Code
function tripal_feature_load_gff3_fasta($fh, $interval, &$num_read, &$intv_read, &$line_num, $filesize, $job) {
print "\nLoading FASTA sequences\n";
$residues = '';
$id = NULL;
$percent = sprintf("%.2f", ($num_read / $filesize) * 100);
print "Parsing Line $line_num (" . $percent . "%). Memory: " . number_format(memory_get_usage()) . " bytes.\r";
// iterate through the remaining lines of the file
while ($line = fgets($fh)) {
$line_num++;
$size = drupal_strlen($line);
$num_read += $size;
$intv_read += $size;
$line = trim($line);
// update the job status every 1% features
if ($job and $intv_read >= $interval) {
$intv_read = 0;
$percent = sprintf("%.2f", ($num_read / $filesize) * 100);
print "Parsing Line $line_num (" . $percent . "%). Memory: " . number_format(memory_get_usage()) . " bytes.\r";
tripal_set_job_progress($job, intval(($num_read / $filesize) * 100));
}
// if we encounter a definition line then get the name, uniquename,
// accession and relationship subject from the definition line
if (preg_match('/^>/', $line)) {
// if we are beginning a new sequence then save to the database the last one we just finished.
if ($id) {
$values = array('uniquename' => $id);
$result = chado_select_record('tripal_gff_temp', array('*'), $values);
if (count($result) == 0) {
tripal_report_error('tripal_feature', TRIPAL_WARNING, 'Cannot find feature to assign FASTA sequence: %uname',
array('%uname' => $id));
}
else {
// if we have a feature then add the residues
$feature = $result[0];
$values = array(
'residues' => $residues,
'seqlen' => strlen($residues)
);
$match = array('feature_id' => $feature->feature_id);
chado_update_record('feature', $match, $values);
}
}
// get the feature ID for this ID from the tripal_gff_temp table. It
// should be the name up to the first space
$id = preg_replace('/^>([^\s]+).*$/', '\1', $line);
$residues = '';
}
else {
$residues .= trim($line);
}
}
// add in the last sequence
$values = array('uniquename' => $id);
$result = chado_select_record('tripal_gff_temp', array('*'), $values);
if (count($result) == 0) {
tripal_report_error('tripal_feature', TRIPAL_WARNING, 'Cannot find feature to assign FASTA sequence: %uname',
array('%uname' => $id));
}
else {
// if we have a feature then add the residues
$feature = $result[0];
$values = array(
'residues' => $residues,
'seqlen' => strlen($residues)
);
$match = array('feature_id' => $feature->feature_id);
chado_update_record('feature', $match, $values);
}
}