function tripal_phylogeny_ncbi_taxonomy_import
2.x tripal_phylogeny.taxonomy.inc | tripal_phylogeny_ncbi_taxonomy_import($job_id) |
Parameters
unknown $job_id:
1 string reference to 'tripal_phylogeny_ncbi_taxonomy_import'
- tripal_phylogeny_taxonomy_load_form_submit in tripal_phylogeny/
includes/ tripal_phylogeny.taxonomy.inc - _state
File
- tripal_phylogeny/
includes/ tripal_phylogeny.taxonomy.inc, line 102
Code
function tripal_phylogeny_ncbi_taxonomy_import($job_id) {
print "\nNOTE: Importing of NCBI taxonomy data is performed using a database transaction. \n" .
"If the load fails or is terminated prematurely then the entire set of \n" .
"insertions/updates is rolled back and will not be found in the database\n\n";
$transaction = db_transaction();
try {
// TDDO: there should be an API function named tripal_insert_analysis().
// But until then we have to insert the analysis manually.
// Get the version of this module for the analysis record:
$info = system_get_info('module', 'tripal_phylogeny');
$version = $info['version'];
$analysis_name = 'NCBI Taxonomy Tree Import';
// If the analysis record already exists then don't add it again.
$analysis = chado_select_record('analysis', array('*'), array('name' => $analysis_name));
if (count($analysis) == 0) {
$values = array(
'name' => 'NCBI Taxonomy Tree Import',
'description' => 'Used to import NCBI taxonomy details for organisms in this database.',
'program' => 'Tripal Phylogeny Module NCBI Taxonomy Importer',
'programversion' => $version,
'sourcename' => 'NCBI Taxonomy',
'sourceuri' => 'http://www.ncbi.nlm.nih.gov/taxonomy',
);
$analysis = chado_insert_record('analysis', $values);
if (!$analysis) {
throw new Exception("Cannot add NCBI Taxonomy Tree Import Analysis.");
}
}
else {
$analysis = $analysis[0];
}
// If the tree already exists then don't insert it again.
global $site_name;
$tree_name = $site_name . 'Taxonomy Tree';
$phylotree = chado_select_record('phylotree', array('*'), array('name' => $tree_name));
if (count($phylotree) == 0) {
// Add the taxonomic tree.
$options = array(
'name' => $site_name . 'Taxonomy Tree',
'description' => 'The taxonomic tree of species present on this site. Click a species name for more details.',
'leaf_type' => 'taxonomy',
'analysis_id' => $analysis->analysis_id,
'tree_file' => '/dev/null',
'format' => 'taxonomy',
'no_load' => TRUE,
);
$errors = array();
$warnings = array();
$success = tripal_insert_phylotree($options, $errors, $warnings);
if (!$success) {
throw new Exception("Cannot add the Taxonomy Tree record.");
}
$phylotree = (object) $options;
}
else {
$phylotree = $phylotree[0];
}
// Clean out the phylotree in the event this is a reload
chado_delete_record('phylonode', array('phylotree_id' => $phylotree->phylotree_id));
// The taxonomic tree must have a root, so create that first.
$tree = array(
'name' => 'root',
'depth' => 0,
'is_root' => 1,
'is_leaf' => 0,
'is_internal' => 0,
'left_index' => 0,
'right_index' => 0,
'branch_set' => array(),
);
// Get the "rank" cvterm. It requires that the TAXRANK vocabulary is loaded.
$rank_cvterm = tripal_get_cvterm(array(
'name' => 'rank',
'cv_id' => array('name' => 'tripal_phylogeny')
));
// Get the list of organisms
$sql = "SELECT O.* FROM {organism} O";
$organisms = chado_query($sql);
while ($organism = $organisms->fetchObject()) {
// Build the query string to get the information about this species.
$term = $organism->genus . ' ' . $organism->species;
$term = urlencode($term);
$search_url = "http://www.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?" .
"db=taxonomy" .
"&term=$term";
// Get the search response from NCBI.
$rfh = fopen($search_url, "r");
$xml_text = '';
while (!feof($rfh)) {
$xml_text .= fread($rfh, 255);
}
fclose($rfh);
// Parse the XML to get the taxonomy ID
$xml = new SimpleXMLElement($xml_text);
if ($xml) {
$taxid = (string) $xml->IdList->Id;
if ($taxid) {
print "$taxid\t$organism->genus $organism->species\n";
// If we have a taxonomy ID we can now get the details.
$fetch_url = "http://www.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?" .
"db=taxonomy" .
"&id=$taxid";
// Get the search response from NCBI.
$rfh = fopen($fetch_url, "r");
$xml_text = '';
while (!feof($rfh)) {
$xml_text .= fread($rfh, 255);
}
fclose($rfh);
$xml = new SimpleXMLElement($xml_text);
if ($xml) {
$taxon = $xml->Taxon;
// Add in the organism properties
$lineage = (string) $taxon->Lineage;
tripal_phylogeny_taxonomy_add_organism_property($organism->organism_id, 'lineage', $lineage);
$genetic_code = (string) $taxon->GeneticCode->GCId;
tripal_phylogeny_taxonomy_add_organism_property($organism->organism_id, 'genetic_code', $genetic_code);
$genetic_code_name = (string) $taxon->GeneticCode->GCName;
tripal_phylogeny_taxonomy_add_organism_property($organism->organism_id, 'genetic_code_name', $genetic_code_name);
$mito_genetic_code = (string) $taxon->MitoGeneticCode->MGCId;
tripal_phylogeny_taxonomy_add_organism_property($organism->organism_id, 'mitochondrial_genetic_code', $mito_genetic_code);
$mito_genetic_code_name = (string) $taxon->MitoGeneticCode->MGCName;
tripal_phylogeny_taxonomy_add_organism_property($organism->organism_id, 'mitochondrial_genetic_code_name', $mito_genetic_code_name);
$division = (string) $taxon->Division;
tripal_phylogeny_taxonomy_add_organism_property($organism->organism_id, 'division', $division);
$name_ranks = array();
foreach ($taxon->OtherNames->children() as $child) {
$type = $child->getName();
$name = (string) $child;
if (!array_key_exists($type, $name_ranks)) {
$name_ranks[$type] = 0;
}
switch ($type) {
case 'GenbankCommonName':
tripal_phylogeny_taxonomy_add_organism_property($organism->organism_id, 'genbank_common_name', $name, $name_ranks[$type]);
break;
case 'Synonym':
tripal_phylogeny_taxonomy_add_organism_property($organism->organism_id, 'synonym', $name, $name_ranks[$type]);
break;
case 'CommonName':
case 'Includes':
tripal_phylogeny_taxonomy_add_organism_property($organism->organism_id, 'other_name', $name, $name_ranks[$type]);
break;
case 'EquivalentName':
tripal_phylogeny_taxonomy_add_organism_property($organism->organism_id, 'equivalent_name', $name, $name_ranks[$type]);
break;
case 'Anamorph':
tripal_phylogeny_taxonomy_add_organism_property($organism->organism_id, 'anamorph', $name, $name_ranks[$type]);
break;
case 'Name':
// skip the Name stanza
break;
default:
print "NOTICE: Skipping unrecognzed name type: $type\n";
// do nothing for unrecognized types
}
$name_ranks[$type]++;
}
// Generate a nested array structure that can be used for importing the tree.
$parent = (string) $taxon->ParentTaxId;
$rank = (string) $taxon->Rank;
$sci_name = (string) $taxon->ScientificName;
$lineage_depth = preg_split('/;\s*/', $lineage);
$parent = $tree;
$i = 1;
foreach ($taxon->LineageEx->children() as $child) {
$tid = (string) $child->TaxID;
$name = (string) $child->ScientificName;
$node_rank = (string) $child->Rank;
$node = array(
'name' => $name,
'depth' => $i,
'is_root' => 0,
'is_leaf' => 0,
'is_internal' => 1,
'left_index' => 0,
'right_index' => 0,
'parent' => $parent,
'branch_set' => array(),
'parent' => $parent['name'],
'properties' => array(
$rank_cvterm->cvterm_id => $node_rank,
),
);
$parent = $node;
tripal_phylogeny_taxonomy_import_add_node($tree, $node, $lineage_depth);
$i++;
}
// Now add in the leaf node
$node = array(
'name' => $sci_name,
'depth' => $i,
'is_root' => 0,
'is_leaf' => 1,
'is_internal' => 0,
'left_index' => 0,
'right_index' => 0,
'parent' => $parent['name'],
'organism_id' => $organism->organism_id,
'properties' => array(
$rank_cvterm->cvterm_id => $rank,
),
);
tripal_phylogeny_taxonomy_import_add_node($tree, $node, $lineage_depth);
// Set the indecies for the tree.
tripal_phylogeny_assign_tree_indices($tree);
} // end: if ($xml) { ...
} // end: if ($taxid) { ...
} // end: if ($xml) { ...
} // end: while ($organism = $organisms->fetchObject()) { ...
// print json_encode(($tree));
// Now add the tree
$options = array('taxonomy' => 1);
tripal_phylogeny_import_tree($tree, $phylotree, $options);
// If ther user requested to sync the tree then do it.
//if ($sync) {
chado_node_sync_records('phylotree', FALSE, FALSE,
array(), $ids = array($phylotree->phylotree_id));
//}
}
catch (Exception $e) {
$transaction->rollback();
print "\n"; // make sure we start errors on new line
watchdog_exception('tripal_phylogeny', $e);
print "FAILED: Rolling back database changes...\n";
}
}