function tripal_cv_obo_parse
2.x tripal_cv.obo_loader.inc | tripal_cv_obo_parse($obo_file, &$header, $jobid) |
1.x obo_loader.inc | tripal_cv_obo_parse($obo_file, &$header, $jobid) |
Actually parse the OBO file
Related topics
1 call to tripal_cv_obo_parse()
- tripal_cv_load_obo_v1_2 in tripal_cv/
includes/ obo_loader.inc
File
- tripal_cv/
includes/ obo_loader.inc, line 815 - Tripal Ontology Loader
Code
function tripal_cv_obo_parse($obo_file, &$header, $jobid) {
$in_header = 1;
$stanza = array();
$default_db = '_global';
$line_num = 0;
$num_read = 0;
$intv_read = 0;
$filesize = filesize($obo_file);
$interval = intval($filesize * 0.01);
if ($interval < 1) {
$interval = 1;
}
// iterate through the lines in the OBO file and parse the stanzas
$fh = fopen($obo_file, 'r');
while ($line = fgets($fh)) {
$line_num++;
$size = drupal_strlen($line);
$num_read += $size;
$intv_read += $size;
$line = trim($line);
// update the job status every 1% features
if ($jobid and $intv_read >= $interval) {
$percent = sprintf("%.2f", ($num_read / $filesize) * 100);
print "Parsing Line $line_num (" . $percent . "%). Memory: " . number_format(memory_get_usage()) . " bytes.\r";
tripal_job_set_progress($jobid, intval(($num_read / $filesize) * 33.33333333));
$intv_read = 0;
}
// remove newlines
$line = rtrim($line);
// remove any special characters that may be hiding
$line = preg_replace('/[^(\x20-\x7F)]*/', '', $line);
// skip empty lines
if (strcmp($line, '') == 0) {
continue;
}
//remove comments from end of lines
$line = preg_replace('/^(.*?)\!.*$/', '\1', $line); // TODO: if the explamation is escaped
// at the first stanza we're out of header
if (preg_match('/^\s*\[/', $line)) {
$in_header = 0;
// store the stanza we just finished reading
if (sizeof($stanza) > 0) {
// add the term to the temp table
$values = array(
'id' => $stanza['id'][0],
'stanza' => base64_encode(serialize($stanza)),
'type' => $type,
);
$options = array('statement_name' => 'ins_tripalobotemp_all');
$success = tripal_core_chado_insert('tripal_obo_temp', $values, $options);
if (!$success) {
watchdog('T_obo_loader', "ERROR: Cannot insert stanza into temporary table.", array(), 'error');
exit;
}
}
// get the stanza type: Term, Typedef or Instance
$type = preg_replace('/^\s*\[\s*(.+?)\s*\]\s*$/', '\1', $line);
// start fresh with a new array
$stanza = array();
continue;
}
// break apart the line into the tag and value but ignore any escaped colons
preg_replace("/\\:/", "|-|-|", $line); // temporarily replace escaped colons
$pair = explode(":", $line, 2);
$tag = $pair[0];
$value = ltrim(rtrim($pair[1])); // remove surrounding spaces
// if this is the ID then look for the default DB
$matches = array();
if ($tag == 'id' and preg_match('/^(.+?):.*$/', $value, $matches)) {
$default_db = $matches[1];
}
$tag = preg_replace("/\|-\|-\|/", "\:", $tag); // return the escaped colon
$value = preg_replace("/\|-\|-\|/", "\:", $value);
if ($in_header) {
if (!array_key_exists($tag, $header)) {
$header[$tag] = array();
}
$header[$tag][] = $value;
}
else {
if (!array_key_exists($tag, $stanza)) {
$stanza[$tag] = array();
}
$stanza[$tag][] = $value;
}
}
// now add the last term in the file
if (sizeof($stanza) > 0) {
$values = array(
'id' => $stanza['id'][0],
'stanza' => base64_encode(serialize($stanza)),
'type' => $type,
);
$options = array('statement_name' => 'ins_tripalobotemp_all');
tripal_core_chado_insert('tripal_obo_temp', $values, $options);
if (!$success) {
watchdog('T_obo_loader', "ERROR: Cannot insert stanza into temporary table.", array(), 'error');
exit;
}
$percent = sprintf("%.2f", ($num_read / $filesize) * 100);
print "Parsing Line $line_num (" . $percent . "%). Memory: " . number_format(memory_get_usage()) . " bytes.\r";
tripal_job_set_progress($jobid, intval(($num_read / $filesize) * 33.33333333));
}
return $default_db;
}