protected function TripalProteinFASTADownloader::formatEntity

3.x TripalProteinFASTADownloader.inc protected TripalProteinFASTADownloader::formatEntity($entity)

Overrides TripalFieldDownloader::formatEntity

See also

TripalFieldDownloader::format()

File

tripal/includes/TripalFieldDownloaders/TripalProteinFASTADownloader.inc, line 23

Class

TripalProteinFASTADownloader

Code

protected function formatEntity($entity) {
  $lines = array();
  $site = !property_exists($entity, 'site_id') ? 'local' : $entity->site_id;
  $bundle_name = $entity->bundle;

  // Holds the list of sequence identifiers that will be used to build the
  // definition line.
  $identifiers = array(
    'identifier' => '',
    'name' => '',
    'accession' => '',
  );
  // Holds the list of non identifiers that will be used in the definitino
  // line.
  $others = array();
  // Holds the sequence string for the FASTA item.
  $residues = '';

  // Iterate through all of the fields and build the definition line and
  // the sequence string.
  foreach ($this->fields[$site][$bundle_name] as $field_id => $info) {
    $field = $info['field'];
    $instance = $info['instance'];
    $field_name = $field['field_name'];
    $accession = $instance['settings']['term_vocabulary'] . ':' . $instance['settings']['term_accession'];

    // If this field really is not attched to the entity then skip it.
    if (!property_exists($entity, $field_name)) {
      continue;
    }

    // If we only have one element then this is good.
    if (count($entity->{$field_name}['und']) == 1) {

      $value = $entity->{$field_name}['und'][0]['value'];

      // Add in the unique identifier for this sequence to the defline.
      if ($accession == 'data:0842') {
        $identifiers['identifier'] = $value;
      }
      // Add in the non-unique name for this sequence to the defline.
      else if ($accession == 'schema:name') {
        $identifiers['name'] = $value;
      }
      // Add in the local site accession for this sequence to the defline.
      else if ($accession == 'data:2091') {
        $identifiers['accession'] = $value;
      }
      // Add in the sequence coordinataes to the defline.
      else if ($accession == 'data:2012') {
        $others[$instance['label']] = $value["data:3002"] . ':' . $value["local:fmin"] . '-' . $value["local:fmax"] . $value["data:0853"];
      }
      // Skip the nuclotide sequence.
      else if ($accession == 'data:2044') {
        // do nothing.
      }
      // Get the protein sequence if it exists.
      else if ($accession == 'data:2976') {
        $residues = $entity->{$field_name}['und'][0]['value'];
      }
      // Add in the organism.
      else if ($accession == 'OBI:0100026') {
        $others[$instance['label']] = strip_tags($value['rdfs:label']);
      }
      // All other fields add them to the others list.
      else {
        if (!is_array($value)) {
          $others[$instance['label']] = $value;
        }
        else {
          // TODO: What to do with fields that are arrays?
        }
      }
    }
    else {
      // TODO: What to do with fields that have multiple values?
    }
  }

  // First add the definition line.
  if (count(array_keys($identifiers)) == 0) {
    $defline = ">Unknown feature identifier. The data collection must have a name or accession field";
    $lines[] = $defline;
  }
  else {
    $defline = ">";
    $defline .= $identifiers['identifier'] ? $identifiers['identifier'] . ' ' : '';
    $defline .= $identifiers['name'] ? $identifiers['name'] . ' ' : '';
    $defline .= $identifiers['accession'] ? $identifiers['accession'] . ' ' : '';

    foreach ($others as $k => $v) {
      if ($v) {
        // If the value has non alpha-numeric characters then wrap it in
        // quotes.
        if (preg_match('/[^\w]/', $v)) {
          $defline .= $k . ':"' . $v . '"; ';
        }
        else {
          $defline .= $k . ':' . $v . '; ';
        }
      }
    }
    $lines[] = $defline;
  }

  // Now add the residues.
  if ($residues) {
    $sequence = explode('|', wordwrap($residues, 50, "|", TRUE));
    foreach ($sequence as $line) {
      $lines[] = $line;
    }
  }
  return $lines;
}