function tripal_feature_load_gff3_target

2.x tripal_feature.gff_loader.inc tripal_feature_load_gff3_target($feature, $tags, $target_organism_id, $target_type, $create_target, $attr_locgroup)
1.x gff_loader.inc tripal_feature_load_gff3_target($feature, $tags, $target_organism_id, $target_type, $create_target, $attr_locgroup)
1 call to tripal_feature_load_gff3_target()
tripal_feature_load_gff3 in tripal_feature/includes/gff_loader.inc

File

tripal_feature/includes/gff_loader.inc, line 1894
@todo Add file header description

Code

function tripal_feature_load_gff3_target($feature, $tags, $target_organism_id, $target_type, $create_target, $attr_locgroup) {
  // format is: "target_id start end [strand]", where strand is optional and may be "+" or "-"
  $matched = preg_match('/^(.*?)\s+(\d+)\s+(\d+)(\s+[\+|\-])*$/', trim($tags['Target'][0]), $matches);

  // the organism and type of the target may also be specified as an attribute. If so, then get that
  // information
  $gff_target_organism = array_key_exists('target_organism', $tags) ? $tags['target_organism'][0] : '';
  $gff_target_type = array_key_exists('target_type', $tags) ? $tags['target_type'][0] : '';

  // if we have matches and the Target is in the correct format then load the alignment 
  if ($matched) {
    $target_feature = $matches[1];
    $start = $matches[2];
    $end = $matches[3];
    // if we have an optional strand, convert it to a numeric value. 
    if ($matches[4]) {
      if (preg_match('/^\+$/', trim($matches[4]))) {
        $target_strand = 1;
      }
      elseif (preg_match('/^\-$/', trim($matches[4]))) {
        $target_strand = -1;
      }
      else {
        $target_strand = 0;
      }
    }
    else {
      $target_strand = 0;
    }

    $target_fmin = $start - 1;
    $target_fmax = $end;
    if ($end < $start) {
      $target_fmin = $end - 1;
      $target_fmax = $start;
    }

    // default the target organism to be the value passed into the function, but if the GFF
    // file species the target organism then use that instead.
    $t_organism_id = $target_organism_id;
    if ($gff_target_organism) {
      // get the genus and species
      $success = preg_match('/^(.*?):(.*?)$/', $gff_target_organism, $matches);
      if ($success) {
        $values = array(
          'genus' => $matches[1],
          'species' => $matches[2],
        );
        $options = array('statement_name' => 'sel_organism_gesp');
        $torganism = tripal_core_chado_select('organism', array('organism_id'), $values, $options);
        if (count($torganism) == 1) {
          $t_organism_id = $torganism[0]->organism_id;
        }
        else {
          watchdog('T_gff3_loader', "Cannot find organism for target %target.", 
          array('%target' => $gff_target_organism), WATCHDOG_WARNING);
          $t_organism_id = '';
        }
      }
      else {
        watchdog('T_gff3_loader', "The target_organism attribute is improperly formatted: %target. 
          It should be target_organism=genus:species.", 
        array('%target' => $gff_target_organism), WATCHDOG_WARNING);
        $t_organism_id = '';
      }
    }

    // default the target type to be the value passed into the function, but if the GFF file
    // species the target type then use that instead
    $t_type_id = '';
    if ($target_type) {
      $values = array(
        'name' => $target_type,
        'cv_id' => array(
          'name' => 'sequence',
        )
      );
      $options = array('statement_name' => 'sel_cvterm_nacv');
      $type = tripal_core_chado_select('cvterm', array('cvterm_id'), $values, $options);
      if (count($type) == 1) {
        $t_type_id = $type[0]->cvterm_id;
      }
      else {
        watchdog('T_gff3_loader', "The target type does not exist in the sequence ontology: %type. ", 
        array('%type' => $target_type), WATCHDOG_ERROR);
        exit;
      }
    }
    if ($gff_target_type) {
      $values = array(
        'name' => $gff_target_type,
        'cv_id' => array(
          'name' => 'sequence',
        )
      );
      $options = array('statement_name' => 'sel_cvterm_nacv');
      $type = tripal_core_chado_select('cvterm', array('cvterm_id'), $values, $options);
      if (count($type) == 1) {
        $t_type_id = $type[0]->cvterm_id;
      }
      else {
        watchdog('T_gff3_loader', "The target_type attribute does not exist in the sequence ontology: %type. ", 
        array('%type' => $gff_target_type), WATCHDOG_WARNING);
        $t_type_id = '';
      }
    }

    // we want to add a featureloc record that uses the target feature as the srcfeature (landmark)
    // and the landmark as the feature.
    tripal_feature_load_gff3_featureloc($feature, $organism, $target_feature, $target_fmin, 
    $target_fmax, $target_strand, $phase, $attr_fmin_partial, $attr_fmax_partial, $attr_residue_info, 
    $attr_locgroup, $t_type_id, $t_organism_id, $create_target, TRUE);
  }
  // the target attribute is not correctly formatted
  else {
    watchdog('T_gff3_loader', "Could not add 'Target' alignment as it is improperly formatted:  '%target'", 
    array('%target' => $tags['Target'][0]), WATCHDOG_ERROR);
  }
}