function tripal_feature_load_fasta_residues

2.x tripal_feature.fasta_loader.inc tripal_feature_load_fasta_residues($fh, $feature_id, $seq_start, $seq_end)

Adds the residues column to the feature.

This function seeks to the proper location in the file for the sequence and reads in chunks of sequence and appends them to the feature.residues column in the database.

Parameters

unknown $fh:

unknown $feature_id:

unknown $seq_start:

unknown $seq_end:

1 call to tripal_feature_load_fasta_residues()
tripal_feature_load_fasta_feature in tripal_feature/includes/tripal_feature.fasta_loader.inc
A helper function for tripal_feature_load_fasta() to load a single feature

File

tripal_feature/includes/tripal_feature.fasta_loader.inc, line 923
Provides fasta loading functionality. Creates features based on their specification in a fasta file.

Code

function tripal_feature_load_fasta_residues($fh, $feature_id, $seq_start, $seq_end) {

  // First position the file at the beginning of the sequence
  fseek($fh, $seq_start, SEEK_SET);
  $chunk_size = 100000000;
  $chunk = '';
  $seqlen = ($seq_end - $seq_start);

  // Calculate the interval at which we updated the precent complete.
  $interval = intval($seqlen * 0.01);
  if ($interval < 1) {
    $interval = 1;
  }
  // We don't to repeat the update too often or it slows things down, so
  // if the interval is less than 1000 then bring it up to that.
  if ($interval < 100000) {
    $interval = 100000;
  }
  $chunk_intv_read = 0;
  $intv_read = 0;
  $num_read = 0;
  $total_seq_size = 0;

  // First, make sure we don't have a null in the residues
  $sql = "UPDATE {feature} SET residues = '' WHERE feature_id = :feature_id";
  chado_query($sql, array(':feature_id' => $feature_id
  ));

  // Read in the lines until we reach the end of the sequence. Once we
  // get a specific bytes read then append the sequence to the one in the
  // database.
  print "Sequence complete: 0%. Memory: " . number_format(memory_get_usage()) . " bytes. \r";
  $partial_seq_size = 0;
  while ($line = fgets($fh)) {
    $num_read += strlen($line) + 1;
    $chunk_intv_read += strlen($line) + 1;
    $partial_seq_size += strlen($line);
    $intv_read += strlen($line) + 1;
    $chunk .= trim($line);

    // If we've read in enough of the sequence then append it to the database.
    if ($chunk_intv_read >= $chunk_size) {
      $sql = "
        UPDATE {feature}
        SET residues = residues || :chunk
        WHERE feature_id = :feature_id
      ";
      $success = chado_query($sql, array(':feature_id' => $feature_id, ':chunk' => $chunk
      ));
      if (!$success) {
        return FALSE;
      }
      $total_seq_size += $partial_seq_size;
      $partial_seq_size = 0;
      $chunk = '';
      $chunk_intv_read = 0;
    }
    if ($intv_read >= $interval) {
      $percent = sprintf("%.2f", ($total_seq_size / $seqlen) * 100);
      print "Sequence complete: " . $percent . "%. Memory: " . number_format(memory_get_usage()) .
        " bytes. \r";
      $intv_read = 0;
    }

    // If we've reached the end of the sequence then break out of the loop
    if (ftell($fh) == $seq_end) {
      break;
    }
  }

  // write the last bit of sequence if it remains
  if (strlen($chunk) > 0) {
    $sql = "
        UPDATE {feature}
        SET residues = residues || :chunk
        WHERE feature_id = :feature_id
      ";
    $success = chado_query($sql, array(':feature_id' => $feature_id, ':chunk' => $chunk
    ));
    if (!$success) {
      return FALSE;
    }
    $total_seq_size += $partial_seq_size;
    $chunk = '';
    $chunk_intv_read = 0;
  }

  // Now update the seqlen and md5checksum fields
  $sql = "UPDATE {feature} SET seqlen = char_length(residues),  md5checksum = md5(residues) WHERE feature_id = :feature_id";
  chado_query($sql, array(':feature_id' => $feature_id
  ));

  $percent = sprintf("%.2f", ($total_seq_size / $seqlen) * 100);
  print "Sequence complete: " . $percent . "%. Memory: " . number_format(memory_get_usage()) .
    " bytes. \r";
}