function tripal_feature_load_fasta_residues
2.x tripal_feature.fasta_loader.inc | tripal_feature_load_fasta_residues($fh, $feature_id, $seq_start, $seq_end) |
Adds the residues column to the feature.
This function seeks to the proper location in the file for the sequence and reads in chunks of sequence and appends them to the feature.residues column in the database.
Parameters
unknown $fh:
unknown $feature_id:
unknown $seq_start:
unknown $seq_end:
1 call to tripal_feature_load_fasta_residues()
- tripal_feature_load_fasta_feature in tripal_feature/
includes/ tripal_feature.fasta_loader.inc - A helper function for tripal_feature_load_fasta() to load a single feature
File
- tripal_feature/
includes/ tripal_feature.fasta_loader.inc, line 923 - Provides fasta loading functionality. Creates features based on their specification in a fasta file.
Code
function tripal_feature_load_fasta_residues($fh, $feature_id, $seq_start, $seq_end) {
// First position the file at the beginning of the sequence
fseek($fh, $seq_start, SEEK_SET);
$chunk_size = 100000000;
$chunk = '';
$seqlen = ($seq_end - $seq_start);
// Calculate the interval at which we updated the precent complete.
$interval = intval($seqlen * 0.01);
if ($interval < 1) {
$interval = 1;
}
// We don't to repeat the update too often or it slows things down, so
// if the interval is less than 1000 then bring it up to that.
if ($interval < 100000) {
$interval = 100000;
}
$chunk_intv_read = 0;
$intv_read = 0;
$num_read = 0;
$total_seq_size = 0;
// First, make sure we don't have a null in the residues
$sql = "UPDATE {feature} SET residues = '' WHERE feature_id = :feature_id";
chado_query($sql, array(':feature_id' => $feature_id
));
// Read in the lines until we reach the end of the sequence. Once we
// get a specific bytes read then append the sequence to the one in the
// database.
print "Sequence complete: 0%. Memory: " . number_format(memory_get_usage()) . " bytes. \r";
$partial_seq_size = 0;
while ($line = fgets($fh)) {
$num_read += strlen($line) + 1;
$chunk_intv_read += strlen($line) + 1;
$partial_seq_size += strlen($line);
$intv_read += strlen($line) + 1;
$chunk .= trim($line);
// If we've read in enough of the sequence then append it to the database.
if ($chunk_intv_read >= $chunk_size) {
$sql = "
UPDATE {feature}
SET residues = residues || :chunk
WHERE feature_id = :feature_id
";
$success = chado_query($sql, array(':feature_id' => $feature_id, ':chunk' => $chunk
));
if (!$success) {
return FALSE;
}
$total_seq_size += $partial_seq_size;
$partial_seq_size = 0;
$chunk = '';
$chunk_intv_read = 0;
}
if ($intv_read >= $interval) {
$percent = sprintf("%.2f", ($total_seq_size / $seqlen) * 100);
print "Sequence complete: " . $percent . "%. Memory: " . number_format(memory_get_usage()) .
" bytes. \r";
$intv_read = 0;
}
// If we've reached the end of the sequence then break out of the loop
if (ftell($fh) == $seq_end) {
break;
}
}
// write the last bit of sequence if it remains
if (strlen($chunk) > 0) {
$sql = "
UPDATE {feature}
SET residues = residues || :chunk
WHERE feature_id = :feature_id
";
$success = chado_query($sql, array(':feature_id' => $feature_id, ':chunk' => $chunk
));
if (!$success) {
return FALSE;
}
$total_seq_size += $partial_seq_size;
$chunk = '';
$chunk_intv_read = 0;
}
// Now update the seqlen and md5checksum fields
$sql = "UPDATE {feature} SET seqlen = char_length(residues), md5checksum = md5(residues) WHERE feature_id = :feature_id";
chado_query($sql, array(':feature_id' => $feature_id
));
$percent = sprintf("%.2f", ($total_seq_size / $seqlen) * 100);
print "Sequence complete: " . $percent . "%. Memory: " . number_format(memory_get_usage()) .
" bytes. \r";
}