tripal_phylogeny.taxonomy.inc

  1. 2.x tripal_phylogeny/includes/tripal_phylogeny.taxonomy.inc
  2. 3.x legacy/tripal_phylogeny/includes/tripal_phylogeny.taxonomy.inc

File

tripal_phylogeny/includes/tripal_phylogeny.taxonomy.inc
View source
  1. <?php
  2. /**
  3. * Generates a page that contains the taxonomy view.
  4. */
  5. function tripal_phylogeny_taxonomy_view() {
  6. $values = array(
  7. 'type_id' => array(
  8. 'name' => 'taxonomy',
  9. ),
  10. );
  11. $message = t('Site administrators: This page is meant to provide
  12. a heirarchical taxonomic tree for all of the organism present
  13. in this site. This may not be useful if you only have a few
  14. species. If so, you can turn off this page by disabling this page on
  15. the site\'s <a href="@menu">Navigation Menu</a>. Otherwise, to generate the taxonomy go to this site\'s
  16. <a href="@taxloader">NCBI taxonomy loader</a> to import the taxonomy information from NCBI.
  17. <br><br>Note: If you add new species to this site, you should rerun the
  18. NCBI taxonomy loader to update the view</p>',
  19. array(
  20. '@menu' => url('admin/structure/menu/manage/navigation'),
  21. '@taxloader' => url('admin/tripal/loaders/ncbi_taxonomy_loader'
  22. ))
  23. );
  24. $admin_message = tripal_set_message($message, TRIPAL_INFO, array('return_html' => TRUE));
  25. $phylotree = chado_generate_var('phylotree', $values);
  26. if ($phylotree) {
  27. $node = new stdClass();
  28. $node->phylotree = $phylotree;
  29. $html = theme('tripal_phylogeny_taxonomic_tree', array('node' => $node)) .
  30. $admin_message;
  31. return $html;
  32. }
  33. return array(
  34. '#type' => 'markup',
  35. '#markup' => t('This site has not yet prepared the taxonomy for viewing.') . $admin_message,
  36. );
  37. }
  38. /**
  39. *
  40. */
  41. function tripal_phylogeny_taxonomy_load_form($form, &$form_state) {
  42. $form['instructions'] = array(
  43. '#type' => 'item',
  44. '#markup' => '',
  45. );
  46. $form['import_existing'] = array(
  47. '#type' => 'checkbox',
  48. '#title' => 'Import taxonomy for existing species.',
  49. '#description' => t('The NCBI Taxonmic Importer examines the organisms
  50. currently present in the database and queries NCBI for the
  51. taxonomic details. If the importer is able to match the
  52. genus and species with NCBI the species details will be imported,
  53. and a page containing the taxonomic tree will be created.'),
  54. );
  55. $form['submit'] = array(
  56. '#type' => 'submit',
  57. '#name' => 'import',
  58. '#value' => 'Submit',
  59. );
  60. return $form;
  61. }
  62. /**
  63. *
  64. * @param unknown $form
  65. * @param unknown $form_state
  66. */
  67. function tripal_phylogeny_taxonomy_load_form_validate($form, &$form_state) {
  68. global $user;
  69. if (!$form_state['values']['import_existing']) {
  70. form_set_error('import_exists', 'Please confirm the import by clicking the checkbox.');
  71. }
  72. }
  73. /**
  74. *
  75. * @param unknown $form
  76. * @param unknown $form_state
  77. */
  78. function tripal_phylogeny_taxonomy_load_form_submit($form, &$form_state) {
  79. global $user;
  80. if ($form_state['values']['import_existing']) {
  81. $args = array();
  82. tripal_add_job("Import NCBI Taxonomy", 'tripal_phylogeny',
  83. 'tripal_phylogeny_ncbi_taxonomy_import', $args, $user->uid);
  84. }
  85. }
  86. /**
  87. *
  88. * @param unknown $job_id
  89. */
  90. function tripal_phylogeny_ncbi_taxonomy_import($job_id) {
  91. print "\nNOTE: Importing of NCBI taxonomy data is performed using a database transaction. \n" .
  92. "If the load fails or is terminated prematurely then the entire set of \n" .
  93. "insertions/updates is rolled back and will not be found in the database\n\n";
  94. $transaction = db_transaction();
  95. try {
  96. // TDDO: there should be an API function named tripal_insert_analysis().
  97. // But until then we have to insert the analysis manually.
  98. // Get the version of this module for the analysis record:
  99. $info = system_get_info('module', 'tripal_phylogeny');
  100. $version = $info['version'];
  101. $analysis_name = 'NCBI Taxonomy Tree Import';
  102. // If the analysis record already exists then don't add it again.
  103. $analysis = chado_select_record('analysis', array('*'), array('name' => $analysis_name));
  104. if (count($analysis) == 0) {
  105. $values = array(
  106. 'name' => 'NCBI Taxonomy Tree Import',
  107. 'description' => 'Used to import NCBI taxonomy details for organisms in this database.',
  108. 'program' => 'Tripal Phylogeny Module NCBI Taxonomy Importer',
  109. 'programversion' => $version,
  110. 'sourcename' => 'NCBI Taxonomy',
  111. 'sourceuri' => 'http://www.ncbi.nlm.nih.gov/taxonomy',
  112. );
  113. $analysis = chado_insert_record('analysis', $values);
  114. if (!$analysis) {
  115. throw new Exception("Cannot add NCBI Taxonomy Tree Import Analysis.");
  116. }
  117. }
  118. else {
  119. $analysis = $analysis[0];
  120. }
  121. // If the tree already exists then don't insert it again.
  122. global $site_name;
  123. $tree_name = $site_name . 'Taxonomy Tree';
  124. $phylotree = chado_select_record('phylotree', array('*'), array('name' => $tree_name));
  125. if (count($phylotree) == 0) {
  126. // Add the taxonomic tree.
  127. $options = array(
  128. 'name' => $site_name . 'Taxonomy Tree',
  129. 'description' => 'The taxonomic tree of species present on this site. Click a species name for more details.',
  130. 'leaf_type' => 'taxonomy',
  131. 'analysis_id' => $analysis->analysis_id,
  132. 'tree_file' => '/dev/null',
  133. 'format' => 'taxonomy',
  134. 'no_load' => TRUE,
  135. );
  136. $errors = array();
  137. $warnings = array();
  138. $success = tripal_insert_phylotree($options, $errors, $warnings);
  139. if (!$success) {
  140. throw new Exception("Cannot add the Taxonomy Tree record.");
  141. }
  142. $phylotree = (object) $options;
  143. }
  144. else {
  145. $phylotree = $phylotree[0];
  146. }
  147. // Clean out the phylotree in the event this is a reload
  148. chado_delete_record('phylonode', array('phylotree_id' => $phylotree->phylotree_id));
  149. // The taxonomic tree must have a root, so create that first.
  150. $tree = array(
  151. 'name' => 'root',
  152. 'depth' => 0,
  153. 'is_root' => 1,
  154. 'is_leaf' => 0,
  155. 'is_internal' => 0,
  156. 'left_index' => 0,
  157. 'right_index' => 0,
  158. 'branch_set' => array(),
  159. );
  160. // Get the "rank" cvterm. It requires that the TAXRANK vocabulary is loaded.
  161. $rank_cvterm = tripal_get_cvterm(array(
  162. 'name' => 'rank',
  163. 'cv_id' => array('name' => 'tripal_phylogeny')
  164. ));
  165. // Get the list of organisms
  166. $sql = "SELECT O.* FROM {organism} O";
  167. $organisms = chado_query($sql);
  168. while ($organism = $organisms->fetchObject()) {
  169. // Build the query string to get the information about this species.
  170. $term = $organism->genus . ' ' . $organism->species;
  171. $term = urlencode($term);
  172. $search_url = "http://www.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?".
  173. "db=taxonomy" .
  174. "&term=$term";
  175. // Get the search response from NCBI.
  176. $rfh = fopen($search_url, "r");
  177. $xml_text = '';
  178. while (!feof($rfh)) {
  179. $xml_text .= fread($rfh, 255);
  180. }
  181. fclose($rfh);
  182. // Parse the XML to get the taxonomy ID
  183. $xml = new SimpleXMLElement($xml_text);
  184. if ($xml) {
  185. $taxid = (string) $xml->IdList->Id;
  186. if ($taxid) {
  187. print "$taxid\t$organism->genus $organism->species\n";
  188. // If we have a taxonomy ID we can now get the details.
  189. $fetch_url = "http://www.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?".
  190. "db=taxonomy" .
  191. "&id=$taxid";
  192. // Get the search response from NCBI.
  193. $rfh = fopen($fetch_url, "r");
  194. $xml_text = '';
  195. while (!feof($rfh)) {
  196. $xml_text .= fread($rfh, 255);
  197. }
  198. fclose($rfh);
  199. $xml = new SimpleXMLElement($xml_text);
  200. if ($xml) {
  201. $taxon = $xml->Taxon;
  202. // Add in the organism properties
  203. $lineage = (string) $taxon->Lineage;
  204. tripal_phylogeny_taxonomy_add_organism_property($organism->organism_id, 'lineage', $lineage);
  205. $genetic_code = (string) $taxon->GeneticCode->GCId;
  206. tripal_phylogeny_taxonomy_add_organism_property($organism->organism_id, 'genetic_code', $genetic_code);
  207. $genetic_code_name = (string) $taxon->GeneticCode->GCName;
  208. tripal_phylogeny_taxonomy_add_organism_property($organism->organism_id, 'genetic_code_name', $genetic_code_name);
  209. $mito_genetic_code = (string) $taxon->MitoGeneticCode->MGCId;
  210. tripal_phylogeny_taxonomy_add_organism_property($organism->organism_id, 'mitochondrial_genetic_code', $mito_genetic_code);
  211. $mito_genetic_code_name = (string) $taxon->MitoGeneticCode->MGCName;
  212. tripal_phylogeny_taxonomy_add_organism_property($organism->organism_id, 'mitochondrial_genetic_code_name', $mito_genetic_code_name);
  213. $division = (string) $taxon->Division;
  214. tripal_phylogeny_taxonomy_add_organism_property($organism->organism_id, 'division', $division);
  215. $name_ranks = array();
  216. foreach ($taxon->OtherNames->children() as $child) {
  217. $type = $child->getName();
  218. $name = (string) $child;
  219. if (!array_key_exists($type, $name_ranks)) {
  220. $name_ranks[$type] = 0;
  221. }
  222. switch ($type) {
  223. case 'GenbankCommonName':
  224. tripal_phylogeny_taxonomy_add_organism_property($organism->organism_id, 'genbank_common_name', $name, $name_ranks[$type]);
  225. break;
  226. case 'Synonym':
  227. tripal_phylogeny_taxonomy_add_organism_property($organism->organism_id, 'synonym', $name, $name_ranks[$type]);
  228. break;
  229. case 'CommonName':
  230. case 'Includes':
  231. tripal_phylogeny_taxonomy_add_organism_property($organism->organism_id, 'other_name', $name, $name_ranks[$type]);
  232. break;
  233. case 'EquivalentName':
  234. tripal_phylogeny_taxonomy_add_organism_property($organism->organism_id, 'equivalent_name', $name, $name_ranks[$type]);
  235. break;
  236. case 'Anamorph':
  237. tripal_phylogeny_taxonomy_add_organism_property($organism->organism_id, 'anamorph', $name, $name_ranks[$type]);
  238. break;
  239. case 'Name':
  240. // skip the Name stanza
  241. break;
  242. default:
  243. print "NOTICE: Skipping unrecognzed name type: $type\n";
  244. // do nothing for unrecognized types
  245. }
  246. $name_ranks[$type]++;
  247. }
  248. // Generate a nested array structure that can be used for importing the tree.
  249. $parent = (string) $taxon->ParentTaxId;
  250. $rank = (string) $taxon->Rank;
  251. $sci_name = (string) $taxon->ScientificName;
  252. $lineage_depth = preg_split('/;\s*/', $lineage);
  253. $parent = $tree;
  254. $i = 1;
  255. foreach ($taxon->LineageEx->children() as $child) {
  256. $tid = (string) $child->TaxID;
  257. $name = (string) $child->ScientificName;
  258. $node_rank = (string) $child->Rank;
  259. $node = array(
  260. 'name' => $name,
  261. 'depth' => $i,
  262. 'is_root' => 0,
  263. 'is_leaf' => 0,
  264. 'is_internal' => 1,
  265. 'left_index' => 0,
  266. 'right_index' => 0,
  267. 'parent' => $parent,
  268. 'branch_set' => array(),
  269. 'parent' => $parent['name'],
  270. 'properties' => array(
  271. $rank_cvterm->cvterm_id => $node_rank,
  272. ),
  273. );
  274. $parent = $node;
  275. tripal_phylogeny_taxonomy_import_add_node($tree, $node, $lineage_depth);
  276. $i++;
  277. }
  278. // Now add in the leaf node
  279. $node = array(
  280. 'name' => $sci_name,
  281. 'depth' => $i,
  282. 'is_root' => 0,
  283. 'is_leaf' => 1,
  284. 'is_internal' => 0,
  285. 'left_index' => 0,
  286. 'right_index' => 0,
  287. 'parent' => $parent['name'],
  288. 'organism_id' => $organism->organism_id,
  289. 'properties' => array(
  290. $rank_cvterm->cvterm_id => $rank,
  291. ),
  292. );
  293. tripal_phylogeny_taxonomy_import_add_node($tree, $node, $lineage_depth);
  294. // Set the indecies for the tree.
  295. tripal_phylogeny_assign_tree_indices($tree);
  296. } // end: if ($xml) { ...
  297. } // end: if ($taxid) { ...
  298. } // end: if ($xml) { ...
  299. } // end: while ($organism = $organisms->fetchObject()) { ...
  300. // print json_encode(($tree));
  301. // Now add the tree
  302. $options = array('taxonomy' => 1);
  303. tripal_phylogeny_import_tree($tree, $phylotree, $options);
  304. // If ther user requested to sync the tree then do it.
  305. //if ($sync) {
  306. chado_node_sync_records('phylotree', FALSE, FALSE,
  307. array(), $ids = array($phylotree->phylotree_id));
  308. //}
  309. }
  310. catch (Exception $e) {
  311. $transaction->rollback();
  312. print "\n"; // make sure we start errors on new line
  313. watchdog_exception('tripal_phylogeny', $e);
  314. print "FAILED: Rolling back database changes...\n";
  315. }
  316. }
  317. /**
  318. *
  319. * @param unknown $node
  320. */
  321. function tripal_phylogeny_taxonomy_import_add_node(&$tree, $node, $lineage_depth) {
  322. // Get the branch set for the tree root.
  323. $branch_set = &$tree['branch_set'];
  324. // Iterate through the tree up until the depth where this node will
  325. // be placed.
  326. $node_depth = $node['depth'];
  327. for ($i = 1; $i <= $node_depth; $i++) {
  328. // Iterate through any existing nodes in the branch set to see if
  329. // the node name matches the correct name for the lineage at this
  330. // depth. If it matches then it is inside of this branch set that
  331. // we will place the node.
  332. for ($j = 0; $j < count($branch_set); $j++) {
  333. // If this node already exists in the tree then return.
  334. if ($branch_set[$j]['name'] == $node['name'] and
  335. $branch_set[$j]['depth'] = $node['depth']) {
  336. return;
  337. }
  338. // Otherwise, set the branch to be the current branch and continue.
  339. if ($branch_set[$j]['name'] == $lineage_depth[$i-1]) {
  340. $branch_set = &$branch_set[$j]['branch_set'];
  341. break;
  342. }
  343. }
  344. }
  345. // Add the node to the last branch set. This should be where this node goes.
  346. $branch_set[] = $node;
  347. }
  348. /**
  349. *
  350. * @param unknown $organism_id
  351. * @param unknown $term_name
  352. * @param unknown $value
  353. */
  354. function tripal_phylogeny_taxonomy_add_organism_property($organism_id, $term_name, $value, $rank = 0) {
  355. if (!$value) {
  356. return;
  357. }
  358. $record = array(
  359. 'table' => 'organism',
  360. 'id' => $organism_id
  361. );
  362. $property = array(
  363. 'type_name' => $term_name,
  364. 'cv_name' => organism_property,
  365. 'value' => $value
  366. );
  367. // Delete all properties of this type if the rank is zero.
  368. if ($rank == 0) {
  369. chado_delete_property($record, $property);
  370. }
  371. chado_insert_property($record, $property);
  372. }