function tripal_cv_obo_parse

2.x tripal_cv.obo_loader.inc tripal_cv_obo_parse($obo_file, &$header, $jobid)
1.x obo_loader.inc tripal_cv_obo_parse($obo_file, &$header, $jobid)

Parse the OBO file and populate the templ loading table

Parameters

$file: The path on the file system where the ontology can be found

$header: An array passed by reference that will be populated with the header information from the OBO file

$jobid: The job_id of the job from the Tripal jobs management system.

Related topics

1 call to tripal_cv_obo_parse()
tripal_cv_load_obo_v1_2 in tripal_cv/includes/tripal_cv.obo_loader.inc
Imports a given OBO file into Chado. This function is usually called by one of three wrapper functions: tripal_cv_load_obo_v1_2_id, tripal_cv_load_obo_v1_2_file or tirpal_cv_load_obo_v1_2_url. But, it can be called directly if the full path to an…

File

tripal_cv/includes/tripal_cv.obo_loader.inc, line 1137
Functions to aid in loading ontologies into the chado cv module

Code

function tripal_cv_obo_parse($obo_file, &$header, $jobid) {
  $in_header = 1;
  $stanza = array();
  $default_db = '_global';
  $line_num = 0;
  $num_read = 0;
  $intv_read = 0;

  $filesize = filesize($obo_file);
  $interval = intval($filesize * 0.01);
  if ($interval < 1) {
    $interval = 1;
  }

  // iterate through the lines in the OBO file and parse the stanzas
  $fh = fopen($obo_file, 'r');
  while ($line = fgets($fh)) {

    $line_num++;
    $size = drupal_strlen($line);
    $num_read += $size;
    $intv_read += $size;
    $line = trim($line);

    // update the job status every 1% features
    if ($jobid and $intv_read >= $interval) {
      $percent = sprintf("%.2f", ($num_read / $filesize) * 100);
      print "Parsing Line $line_num (" . $percent . "%). Memory: " . number_format(memory_get_usage()) . " bytes.\r";
      tripal_set_job_progress($jobid, intval(($num_read / $filesize) * 33.33333333));
      $intv_read = 0;
    }

    // remove newlines
    $line = rtrim($line);

    // remove any special characters that may be hiding
    $line = preg_replace('/[^(\x20-\x7F)]*/', '', $line);

    // skip empty lines
    if (strcmp($line, '') == 0) {
      continue;
    }

    //remove comments from end of lines
    $line = preg_replace('/^(.*?)\!.*$/', '\1', $line); // TODO: if the explamation is escaped

    // at the first stanza we're out of header
    if (preg_match('/^\s*\[/', $line)) {
      $in_header = 0;

      // store the stanza we just finished reading
      if (sizeof($stanza) > 0) {
        // add the term to the temp table
        $values = array(
          'id' => $stanza['id'][0],
          'stanza' => base64_encode(serialize($stanza)),
          'type' => $type,
        );
        $success = chado_insert_record('tripal_obo_temp', $values);
        if (!$success) {
          tripal_report_error('T_obo_loader', "ERROR: Cannot insert stanza into temporary table.", array(), 'error');
          exit;
        }

      }
      // get the stanza type:  Term, Typedef or Instance
      $type = preg_replace('/^\s*\[\s*(.+?)\s*\]\s*$/', '\1', $line);

      // start fresh with a new array
      $stanza = array();
      continue;
    }
    // break apart the line into the tag and value but ignore any escaped colons
    preg_replace("/\\:/", "|-|-|", $line); // temporarily replace escaped colons
    $pair = explode(":", $line, 2);
    $tag = $pair[0];
    $value = ltrim(rtrim($pair[1])); // remove surrounding spaces

    // if this is the ID then look for the default DB
    $matches = array();
    if ($tag == 'id' and preg_match('/^(.+?):.*$/', $value, $matches)) {
      $default_db = $matches[1];
    }

    $tag = preg_replace("/\|-\|-\|/", "\:", $tag); // return the escaped colon
    $value = preg_replace("/\|-\|-\|/", "\:", $value);
    if ($in_header) {
      if (!array_key_exists($tag, $header)) {
        $header[$tag] = array();
      }
      $header[$tag][] = $value;
    }
    else {
      if (!array_key_exists($tag, $stanza)) {
        $stanza[$tag] = array();
      }
      $stanza[$tag][] = $value;
    }
  }
  // now add the last term in the file
  if (sizeof($stanza) > 0) {
    $values = array(
      'id' => $stanza['id'][0],
      'stanza' => base64_encode(serialize($stanza)),
      'type' => $type,
    );
    chado_insert_record('tripal_obo_temp', $values);
    if (!$success) {
      tripal_report_error('T_obo_loader', "ERROR: Cannot insert stanza into temporary table.", array(), 'error');
      exit;
    }
    $percent = sprintf("%.2f", ($num_read / $filesize) * 100);
    print "Parsing Line $line_num (" . $percent . "%). Memory: " . number_format(memory_get_usage()) . " bytes.\r";
    tripal_set_job_progress($jobid, intval(($num_read / $filesize) * 33.33333333));
  }
  return $default_db;
}