function tripal_feature_load_gff3_fasta

2.x tripal_feature.gff_loader.inc tripal_feature_load_gff3_fasta($fh, $interval, &$num_read, &$intv_read, &$line_num, $filesize, $job)
1.x gff_loader.inc tripal_feature_load_gff3_fasta($fh, $interval, &$num_read, &$intv_read, &$line_num)

Load the FASTA sequences at the bottom of a GFF3 file

Parameters

$fh:

$interval:

$num_read:

$intv_read:

$line_num:

$filesize:

$job:

Related topics

1 call to tripal_feature_load_gff3_fasta()
tripal_feature_load_gff3 in tripal_feature/includes/tripal_feature.gff_loader.inc
Actually load a GFF3 file. This is the function called by tripal jobs

File

tripal_feature/includes/tripal_feature.gff_loader.inc, line 2094
Provides gff3 loading functionality. Creates features based on their specification in a GFF3 file.

Code

function tripal_feature_load_gff3_fasta($fh, $interval, &$num_read, &$intv_read, &$line_num, $filesize, $job) {
  print "\nLoading FASTA sequences\n";
  $residues = '';
  $id = NULL;

  $percent = sprintf("%.2f", ($num_read / $filesize) * 100);
  print "Parsing Line $line_num (" . $percent . "%). Memory: " . number_format(memory_get_usage()) . " bytes.\r";
  // iterate through the remaining lines of the file
  while ($line = fgets($fh)) {

    $line_num++;
    $size = drupal_strlen($line);
    $num_read += $size;
    $intv_read += $size;

    $line = trim($line);

    // update the job status every 1% features
    if ($job and $intv_read >= $interval) {
      $intv_read = 0;
      $percent = sprintf("%.2f", ($num_read / $filesize) * 100);
      print "Parsing Line $line_num (" . $percent . "%). Memory: " . number_format(memory_get_usage()) . " bytes.\r";
      tripal_set_job_progress($job, intval(($num_read / $filesize) * 100));
    }

    // if we encounter a definition line then get the name, uniquename,
    // accession and relationship subject from the definition line
    if (preg_match('/^>/', $line)) {

      // if we are beginning a new sequence then save to the database the last one we just finished.
      if ($id) {
        $values = array('uniquename' => $id);
        $result = chado_select_record('tripal_gff_temp', array('*'), $values);
        if (count($result) == 0) {
          tripal_report_error('tripal_feature', TRIPAL_WARNING, 'Cannot find feature to assign FASTA sequence: %uname', 
          array('%uname' => $id));
        }
        else {
          // if we have a feature then add the residues
          $feature = $result[0];
          $values = array(
            'residues' => $residues,
            'seqlen' => strlen($residues)
          );
          $match = array('feature_id' => $feature->feature_id);
          chado_update_record('feature', $match, $values);
        }
      }

      // get the feature ID for this ID from the tripal_gff_temp table. It
      // should be the name up to the first space
      $id = preg_replace('/^>([^\s]+).*$/', '\1', $line);
      $residues = '';
    }
    else {
      $residues .= trim($line);
    }
  }

  // add in the last sequence
  $values = array('uniquename' => $id);
  $result = chado_select_record('tripal_gff_temp', array('*'), $values);
  if (count($result) == 0) {
    tripal_report_error('tripal_feature', TRIPAL_WARNING, 'Cannot find feature to assign FASTA sequence: %uname', 
    array('%uname' => $id));
  }
  else {
    // if we have a feature then add the residues
    $feature = $result[0];
    $values = array(
      'residues' => $residues,
      'seqlen' => strlen($residues)
    );
    $match = array('feature_id' => $feature->feature_id);
    chado_update_record('feature', $match, $values);
  }

}