function tripal_feature_load_gff3_target

2.x tripal_feature.gff_loader.inc tripal_feature_load_gff3_target($feature, $tags, $target_organism_id, $target_type, $create_target, $attr_locgroup)
1.x gff_loader.inc tripal_feature_load_gff3_target($feature, $tags, $target_organism_id, $target_type, $create_target, $attr_locgroup)

Load the target attribute of a gff3 record

Parameters

$feature:

$tags:

$target_organism_id:

$target_type:

$create_target:

$attr_locgroup:

Related topics

1 call to tripal_feature_load_gff3_target()
tripal_feature_load_gff3 in tripal_feature/includes/tripal_feature.gff_loader.inc
Actually load a GFF3 file. This is the function called by tripal jobs

File

tripal_feature/includes/tripal_feature.gff_loader.inc, line 2185
Provides gff3 loading functionality. Creates features based on their specification in a GFF3 file.

Code

function tripal_feature_load_gff3_target($feature, $tags, $target_organism_id, $target_type, $create_target, $attr_locgroup) {
  // format is: "target_id start end [strand]", where strand is optional and may be "+" or "-"
  $matched = preg_match('/^(.*?)\s+(\d+)\s+(\d+)(\s+[\+|\-])*$/', trim($tags['Target'][0]), $matches);

  // the organism and type of the target may also be specified as an attribute. If so, then get that
  // information
  $gff_target_organism = array_key_exists('target_organism', $tags) ? $tags['target_organism'][0] : '';
  $gff_target_type = array_key_exists('target_type', $tags) ? $tags['target_type'][0] : '';

  // if we have matches and the Target is in the correct format then load the alignment
  if ($matched) {
    $target_feature = $matches[1];
    $start = $matches[2];
    $end = $matches[3];
    // if we have an optional strand, convert it to a numeric value.
    if ($matches[4]) {
      if (preg_match('/^\+$/', trim($matches[4]))) {
        $target_strand = 1;
      }
      elseif (preg_match('/^\-$/', trim($matches[4]))) {
        $target_strand = -1;
      }
      else {
        $target_strand = 0;
      }
    }
    else {
      $target_strand = 0;
    }

    $target_fmin = $start - 1;
    $target_fmax = $end;
    if ($end < $start) {
      $target_fmin = $end - 1;
      $target_fmax = $start;
    }

    // default the target organism to be the value passed into the function, but if the GFF
    // file species the target organism then use that instead.
    $t_organism_id = $target_organism_id;
    if ($gff_target_organism) {
      // get the genus and species
      $success = preg_match('/^(.*?):(.*?)$/', $gff_target_organism, $matches);
      if ($success) {
        $values = array(
          'genus' => $matches[1],
          'species' => $matches[2],
        );
        $torganism = chado_select_record('organism', array('organism_id'), $values);
        if (count($torganism) == 1) {
          $t_organism_id = $torganism[0]->organism_id;
        }
        else {
          tripal_report_error('tripal_feature', TRIPAL_WARNING, "Cannot find organism for target %target.", 
          array('%target' => $gff_target_organism));
          $t_organism_id = '';
        }
      }
      else {
        tripal_report_error('tripal_feature', TRIPAL_WARNING, "The target_organism attribute is improperly formatted: %target.
          It should be target_organism=genus:species.", 
        array('%target' => $gff_target_organism));
        $t_organism_id = '';
      }
    }

    // default the target type to be the value passed into the function, but if the GFF file
    // species the target type then use that instead
    $t_type_id = '';
    if ($target_type) {
      $values = array(
        'name' => $target_type,
        'cv_id' => array(
          'name' => 'sequence',
        )
      );
      $type = chado_select_record('cvterm', array('cvterm_id'), $values);
      if (count($type) == 1) {
        $t_type_id = $type[0]->cvterm_id;
      }
      else {
        tripal_report_error('tripal_feature', TRIPAL_ERROR, "The target type does not exist in the sequence ontology: %type. ", 
        array('%type' => $target_type));
        exit;
      }
    }
    if ($gff_target_type) {
      $values = array(
        'name' => $gff_target_type,
        'cv_id' => array(
          'name' => 'sequence',
        )
      );

      // get the cvterm_id for the target type
      $type = chado_select_record('cvterm', array('cvterm_id'), $values);
      if (count($type) == 1) {
        $t_type_id = $type[0]->cvterm_id;
      }
      else {
        // check to see if this is a synonym
        $sql = "
          SELECT CVTS.cvterm_id
          FROM {cvtermsynonym} CVTS
            INNER JOIN {cvterm} CVT ON CVT.cvterm_id = CVTS.cvterm_id
            INNER JOIN {cv} CV      ON CV.cv_id = CVT.cv_id
          WHERE CV.name = 'sequence' and CVTS.synonym = :synonym
        ";
        $synonym = chado_query($sql, array(':synonym' => $gff_target_type))->fetchObject();
        if ($synonym) {
          $t_type_id = $synonym->cvterm_id;
        }
        else {
          tripal_report_error('tripal_feature', TRIPAL_WARNING, "The target_type attribute does not exist in the sequence ontology: %type. ", 
          array('%type' => $gff_target_type));
          $t_type_id = '';
        }
      }
    }

    // we want to add a featureloc record that uses the target feature as the srcfeature (landmark)
    // and the landmark as the feature.
    tripal_feature_load_gff3_featureloc($feature, $organism, $target_feature, $target_fmin, 
    $target_fmax, $target_strand, $phase, $attr_fmin_partial, $attr_fmax_partial, $attr_residue_info, 
    $attr_locgroup, $t_type_id, $t_organism_id, $create_target, TRUE);
  }
  // the target attribute is not correctly formatted
  else {
    tripal_report_error('tripal_feature', TRIPAL_ERROR, "Could not add 'Target' alignment as it is improperly formatted:  '%target'", 
    array('%target' => $tags['Target'][0]));
  }
}