protected function TripalProteinFASTADownloader::formatEntity
3.x TripalProteinFASTADownloader.inc | protected TripalProteinFASTADownloader::formatEntity($entity) |
Overrides TripalFieldDownloader::formatEntity
See also
TripalFieldDownloader::format()
File
- tripal/
includes/ TripalFieldDownloaders/ TripalProteinFASTADownloader.inc, line 23
Class
Code
protected function formatEntity($entity) {
$lines = array();
$site = !property_exists($entity, 'site_id') ? 'local' : $entity->site_id;
$bundle_name = $entity->bundle;
// Holds the list of sequence identifiers that will be used to build the
// definition line.
$identifiers = array(
'identifier' => '',
'name' => '',
'accession' => '',
);
// Holds the list of non identifiers that will be used in the definitino
// line.
$others = array();
// Holds the sequence string for the FASTA item.
$residues = '';
// Iterate through all of the fields and build the definition line and
// the sequence string.
foreach ($this->fields[$site][$bundle_name] as $field_id => $info) {
$field = $info['field'];
$instance = $info['instance'];
$field_name = $field['field_name'];
$accession = $instance['settings']['term_vocabulary'] . ':' . $instance['settings']['term_accession'];
// If this field really is not attched to the entity then skip it.
if (!property_exists($entity, $field_name)) {
continue;
}
// If we only have one element then this is good.
if (count($entity->{$field_name}['und']) == 1) {
$value = $entity->{$field_name}['und'][0]['value'];
// Add in the unique identifier for this sequence to the defline.
if ($accession == 'data:0842') {
$identifiers['identifier'] = $value;
}
// Add in the non-unique name for this sequence to the defline.
else if ($accession == 'schema:name') {
$identifiers['name'] = $value;
}
// Add in the local site accession for this sequence to the defline.
else if ($accession == 'data:2091') {
$identifiers['accession'] = $value;
}
// Add in the sequence coordinataes to the defline.
else if ($accession == 'data:2012') {
$others[$instance['label']] = $value["data:3002"] . ':' . $value["local:fmin"] . '-' . $value["local:fmax"] . $value["data:0853"];
}
// Skip the nuclotide sequence.
else if ($accession == 'data:2044') {
// do nothing.
}
// Get the protein sequence if it exists.
else if ($accession == 'data:2976') {
$residues = $entity->{$field_name}['und'][0]['value'];
}
// Add in the organism.
else if ($accession == 'OBI:0100026') {
$others[$instance['label']] = strip_tags($value['rdfs:label']);
}
// All other fields add them to the others list.
else {
if (!is_array($value)) {
$others[$instance['label']] = $value;
}
else {
// TODO: What to do with fields that are arrays?
}
}
}
else {
// TODO: What to do with fields that have multiple values?
}
}
// First add the definition line.
if (count(array_keys($identifiers)) == 0) {
$defline = ">Unknown feature identifier. The data collection must have a name or accession field";
$lines[] = $defline;
}
else {
$defline = ">";
$defline .= $identifiers['identifier'] ? $identifiers['identifier'] . ' ' : '';
$defline .= $identifiers['name'] ? $identifiers['name'] . ' ' : '';
$defline .= $identifiers['accession'] ? $identifiers['accession'] . ' ' : '';
foreach ($others as $k => $v) {
if ($v) {
// If the value has non alpha-numeric characters then wrap it in
// quotes.
if (preg_match('/[^\w]/', $v)) {
$defline .= $k . ':"' . $v . '"; ';
}
else {
$defline .= $k . ':' . $v . '; ';
}
}
}
$lines[] = $defline;
}
// Now add the residues.
if ($residues) {
$sequence = explode('|', wordwrap($residues, 50, "|", TRUE));
foreach ($sequence as $line) {
$lines[] = $line;
}
}
return $lines;
}