TripalProteinFASTADownloader.inc

File

tripal/includes/TripalFieldDownloaders/TripalProteinFASTADownloader.inc
View source
  1. <?php
  2. class TripalProteinFASTADownloader extends TripalFieldDownloader {
  3. /**
  4. * Sets the label shown to the user describing this formatter. It
  5. * should be a short identifier. Use the $full_label for a more
  6. * descriptive label.
  7. */
  8. static public $label = 'FASTA';
  9. /**
  10. * A more verbose label that better describes the formatter.
  11. */
  12. static public $full_label = 'Protein FASTA';
  13. /**
  14. * Indicates the default extension for the outputfile.
  15. */
  16. static public $default_extension = 'faa';
  17. /**
  18. * @see TripalFieldDownloader::format()
  19. */
  20. protected function formatEntity($entity) {
  21. $lines = array();
  22. $site = !property_exists($entity, 'site_id') ? 'local' : $entity->site_id;
  23. $bundle_name = $entity->bundle;
  24. // Holds the list of sequence identifiers that will be used to build the
  25. // definition line.
  26. $identifiers = array(
  27. 'identifier' => '',
  28. 'name' => '',
  29. 'accession' => '',
  30. );
  31. // Holds the list of non identifiers that will be used in the definitino
  32. // line.
  33. $others = array();
  34. // Holds the sequence string for the FASTA item.
  35. $residues = '';
  36. // Iterate through all of the fields and build the definition line and
  37. // the sequence string.
  38. foreach ($this->fields[$site][$bundle_name] as $field_id => $info) {
  39. $field = $info['field'];
  40. $instance = $info['instance'];
  41. $field_name = $field['field_name'];
  42. $accession = $instance['settings']['term_vocabulary'] . ':' . $instance['settings']['term_accession'];
  43. // If this field really is not attched to the entity then skip it.
  44. if (!property_exists($entity, $field_name)) {
  45. continue;
  46. }
  47. // If we only have one element then this is good.
  48. if (count($entity->{$field_name}['und']) == 1) {
  49. $value = $entity->{$field_name}['und'][0]['value'];
  50. // Add in the unique identifier for this sequence to the defline.
  51. if ($accession == 'data:0842') {
  52. $identifiers['identifier'] = $value;
  53. }
  54. // Add in the non-unique name for this sequence to the defline.
  55. else if ($accession == 'schema:name') {
  56. $identifiers['name'] = $value;
  57. }
  58. // Add in the local site accession for this sequence to the defline.
  59. else if ($accession == 'data:2091') {
  60. $identifiers['accession'] = $value;
  61. }
  62. // Add in the sequence coordinataes to the defline.
  63. else if ($accession == 'data:2012') {
  64. $others[$instance['label']] = $value["data:3002"] . ':' . $value["local:fmin"] . '-' . $value["local:fmax"] . $value["data:0853"];
  65. }
  66. // Skip the nuclotide sequence.
  67. else if ($accession == 'data:2044') {
  68. // do nothing.
  69. }
  70. // Get the protein sequence if it exists.
  71. else if ($accession == 'data:2976') {
  72. $residues = $entity->{$field_name}['und'][0]['value'];
  73. }
  74. // Add in the organism.
  75. else if ($accession == 'OBI:0100026') {
  76. $others[$instance['label']] = strip_tags($value['rdfs:label']);
  77. }
  78. // All other fields add them to the others list.
  79. else {
  80. if (!is_array($value)) {
  81. $others[$instance['label']] = $value;
  82. }
  83. else {
  84. // TODO: What to do with fields that are arrays?
  85. }
  86. }
  87. }
  88. else {
  89. // TODO: What to do with fields that have multiple values?
  90. }
  91. }
  92. // First add the definition line.
  93. if (count(array_keys($identifiers)) == 0) {
  94. $defline = ">Unknown feature identifier. The data collection must have a name or accession field";
  95. $lines[] = $defline;
  96. }
  97. else {
  98. $defline = ">";
  99. $defline .= $identifiers['identifier'] ? $identifiers['identifier'] . ' ' : '';
  100. $defline .= $identifiers['name'] ? $identifiers['name'] . ' ' : '';
  101. $defline .= $identifiers['accession'] ? $identifiers['accession'] . ' ' : '';
  102. foreach ($others as $k => $v) {
  103. if ($v) {
  104. // If the value has non alpha-numeric characters then wrap it in
  105. // quotes.
  106. if (preg_match('/[^\w]/', $v)) {
  107. $defline .= $k . ':"' . $v . '"; ';
  108. }
  109. else {
  110. $defline .= $k . ':' . $v . '; ';
  111. }
  112. }
  113. }
  114. $lines[] = $defline;
  115. }
  116. // Now add the residues.
  117. if ($residues) {
  118. $sequence = explode('|', wordwrap($residues, 50, "|", TRUE));
  119. foreach ($sequence as $line) {
  120. $lines[] = $line;
  121. }
  122. }
  123. return $lines;
  124. }
  125. /**
  126. * @see TripalFieldDownloader::getHeader()
  127. */
  128. protected function getHeader() {
  129. }
  130. }