tripal_feature.api.inc

  1. 2.x tripal_feature/api/tripal_feature.api.inc
  2. 1.x tripal_feature/api/tripal_feature.api.inc

Provides an application programming interface (API) for working with features

File

tripal_feature/api/tripal_feature.api.inc
View source
  1. <?php
  2. /**
  3. * @file
  4. * Provides an application programming interface (API) for working with features
  5. */
  6. /**
  7. * @defgroup tripal_feature_api Feature API
  8. * @ingroup tripal_api
  9. * @{
  10. * Provides an application programming interface (API) for working with features
  11. * @}
  12. */
  13. /**
  14. * Performs a reverse compliment of a nucleotide sequence
  15. *
  16. * @param $sequence
  17. * The nucelotide sequence
  18. *
  19. * @return
  20. * an upper-case reverse complemented sequence
  21. *
  22. * @ingroup tripal_feature_api
  23. */
  24. function tripal_reverse_compliment_sequence($sequence) {
  25. $seq = strtoupper($sequence);
  26. $seq = strrev($seq);
  27. $seq = str_replace("A", "t", $seq);
  28. $seq = str_replace("T", "a", $seq);
  29. $seq = str_replace("G", "c", $seq);
  30. $seq = str_replace("C", "g", $seq);
  31. $seq = str_replace("Y", "r", $seq);
  32. $seq = str_replace("R", "y", $seq);
  33. $seq = str_replace("W", "w", $seq);
  34. $seq = str_replace("S", "s", $seq);
  35. $seq = str_replace("K", "m", $seq);
  36. $seq = str_replace("M", "k", $seq);
  37. $seq = str_replace("D", "h", $seq);
  38. $seq = str_replace("V", "b", $seq);
  39. $seq = str_replace("H", "d", $seq);
  40. $seq = str_replace("B", "v", $seq);
  41. return strtoupper($seq);
  42. }
  43. /**
  44. * Retrieves the sequences for a given feature.
  45. *
  46. * If a feature has multiple alignments or multiple relationships then
  47. * multiple sequences will be returned.
  48. *
  49. * @param $feature
  50. * An associative array describing the feature. Valid keys include:
  51. * - feature_id: The feature_id of the feature for which the sequence will
  52. * be retrieved
  53. * - name: The feature name. This will appear on the FASTA definition line
  54. * - parent_id: (optional) only retrieve a sequence if 'derive_from_parent'
  55. * is true and the parent matches this ID.
  56. * - featureloc_id: (optional) only retrieve a sequence if 'derive_from_parent' is
  57. * true and the alignment is defined with this featureloc_id
  58. * @param $options
  59. * An associative array of options. Valid keys include:
  60. * - width: Indicate the number of bases to use per line. A new line will
  61. * be added after the specified number of bases on each line.
  62. * - is_html: Set to '1' if the sequence is meant to be displayed on a web
  63. * page. This will cause a <br> tag to separate lines of the FASTA sequence.
  64. * - derive_from_parent: Set to '1' if the sequence should be obtained from
  65. * the parent to which this feature is aligned.
  66. * - aggregate: Set to '1' if the sequence should only contain sub features,
  67. * excluding intro sub feature sequence. For example, set this option to
  68. * obtain just the coding sequence of an mRNA.
  69. * - upstream: An integer specifing the number of upstream bases to include
  70. * in the output
  71. * - downstream: An integer specifying the number of downstream bases to
  72. * include in the output.
  73. * - sub_feature_types: Only include sub features (or child features) of
  74. * the types provided in the array
  75. * - relationship_type: If a relationship name is provided (e.g. sequence_of)
  76. * then any sequences that are in relationships of this type with matched
  77. * sequences are also included
  78. * - relationship_part: If a relationship is provided in the preceeding
  79. * argument then the rel_part must be either 'object' or 'subject' to
  80. * indicate which side of the relationship the matched features belong
  81. *
  82. * @return
  83. * an array of matching sequence in the following keys for each sequence:
  84. * 'types' => an array of feature types that were used to derive
  85. * the sequence (e.g. from an aggregated sequence)
  86. * 'upstream' => the number of upstream bases included in the sequence
  87. * 'downstream' => the number of downstream bases included in the
  88. * sequence
  89. * 'defline' => the definintion line used to create a FASTA sequence
  90. * 'residues' => the residues
  91. * 'featureloc_id' => the featureloc_id if the sequences is from an
  92. * alignment
  93. *
  94. * @ingroup tripal_feature_api
  95. */
  96. function tripal_get_feature_sequences($feature, $options) {
  97. // Default values for finding the feature.
  98. $feature_id = array_key_exists('feature_id', $feature) ? $feature['feature_id'] : 0;
  99. $parent_id = array_key_exists('parent_id', $feature) ? $feature['parent_id'] : 0;
  100. $featureloc_id = array_key_exists('featureloc_id', $feature) ? $feature['featureloc_id'] : 0;
  101. $feature_name = array_key_exists('name', $feature) ? $feature['name'] : '';
  102. // Default values for building the sequence.
  103. $num_bases_per_line = array_key_exists('width', $options) ? $options['width'] : 50;
  104. $derive_from_parent = array_key_exists('derive_from_parent', $options) ? $options['derive_from_parent'] : 0;
  105. $aggregate = array_key_exists('aggregate', $options) ? $options['aggregate'] : 0;
  106. $upstream = array_key_exists('upstream', $options) ? $options['upstream'] : 0;
  107. $downstream = array_key_exists('downstream', $options) ? $options['downstream'] : 0;
  108. $sub_features = array_key_exists('sub_feature_types', $options) ? $options['sub_feature_types'] : array();
  109. $relationship = array_key_exists('relationship_type', $options) ? $options['relationship_type'] : '';
  110. $rel_part = array_key_exists('relationship_part', $options) ? $options['relationship_part'] : '';
  111. $is_html = array_key_exists('is_html', $options) ? $options['is_html'] : 0;
  112. if (!$upstream) {
  113. $upstream = 0;
  114. }
  115. if (!$downstream) {
  116. $downstream = 0;
  117. }
  118. // Make sure the sub_features variable is an array.
  119. if (!is_array($sub_features)) {
  120. tripal_report_error('tripal_feature', TRIPAL_ERROR,
  121. "'sub_features' option must be an array for function tripal_get_feature_sequences().",
  122. array()
  123. );
  124. return array();
  125. }
  126. // If a relationship was specified then retreive and the sequences that
  127. // have the given relationship and the recurse to extract the appropriate
  128. // sequence.
  129. if ($rel_part == "object" or $rel_part == "subject") {
  130. if ($rel_part == "subject") {
  131. $sql = '
  132. SELECT FO.feature_id, FO.name, FO.uniquename, CVTO.name as feature_type, O.genus, O.species
  133. FROM {feature} FS
  134. INNER JOIN {feature_relationship} FR ON FR.subject_id = FS.feature_id
  135. INNER JOIN {cvterm} CVTFR ON CVTFR.cvterm_id = FR.type_id
  136. INNER JOIN {feature} FO ON FO.feature_id = FR.object_id
  137. INNER JOIN {cvterm} CVTO ON CVTO.cvterm_id = FO.type_id
  138. INNER JOIN {organism} O ON O.organism_id = FO.organism_id
  139. WHERE
  140. FS.feature_id = :feature_id AND
  141. CVTFR.name = :relationship
  142. ';
  143. $features = chado_query($sql, array(':feature_id' => $feature_id, ':relationship' => $relationship));
  144. }
  145. if ($rel_part == "object") {
  146. $sql = '
  147. SELECT FS.feature_id, FS.name, FS.uniquename, CVTO.name as feature_type, O.genus, O.species
  148. FROM {feature} FO
  149. INNER JOIN {feature_relationship} FR ON FR.object_id = FO.feature_id
  150. INNER JOIN {cvterm} CVTFR ON CVTFR.cvterm_id = FR.type_id
  151. INNER JOIN {feature} FS ON FS.feature_id = FR.subject_id
  152. INNER JOIN {cvterm} CVTO ON CVTO.cvterm_id = FS.type_id
  153. INNER JOIN {organism} O ON O.organism_id = FS.organism_id
  154. WHERE
  155. FO.feature_id = :feature_id AND
  156. CVTFR.name = :relationship
  157. ';
  158. $features = chado_query($sql, array(':feature_id' => $feature_id, ':relationship' => $relationship));
  159. }
  160. $sequences = '';
  161. while ($feature = $features->fetchObject()) {
  162. // Recurse and get the sequences for these in the relationship.
  163. if ($rel_part == "subject") {
  164. $defline = "$feature_name, $relationship, $feature->uniquename $feature->feature_type ($feature->genus $feature->species)";
  165. }
  166. if ($rel_part == "object") {
  167. $defline = "$feature->uniquename $feature->feature_type ($feature->genus $feature->species), $relationship, $feature_name";
  168. }
  169. return tripal_get_feature_sequences(
  170. array(
  171. 'feature_id' => $feature->feature_id,
  172. 'name' => $defline,
  173. 'parent_id' => $parent_id,
  174. ),
  175. array(
  176. 'width' => $num_bases_per_line,
  177. 'derive_from_parent' => $derive_from_parent,
  178. 'aggregate' => $aggregate,
  179. 'upstream' => $upstream,
  180. 'downstream' => $downstream,
  181. 'sub_features' => $sub_features,
  182. )
  183. );
  184. }
  185. }
  186. // Prepare the queries we're going to use later during the render phase
  187. // This SQL statement uses conditionals in the select clause to handle
  188. // cases cases where the alignment is in the reverse direction and when
  189. // the upstream and downstream extensions go beyond the lenght of the
  190. // parent sequence.
  191. $parent_sql ='
  192. SELECT featureloc_id, srcname, srcfeature_id, strand, srctypename, typename,
  193. fmin, fmax, upstream, downstream, adjfmin, adjfmax,
  194. substring(residues from (cast(adjfmin as int4) + 1) for cast((upstream + (fmax - fmin) + downstream) as int4)) as residues,
  195. genus, species
  196. FROM (
  197. SELECT
  198. FL.featureloc_id, OF.name srcname, FL.srcfeature_id, FL.strand,
  199. OCVT.name as srctypename, SCVT.name as typename,
  200. FL.fmin, FL.fmax, OO.genus, OO.species,
  201. CASE
  202. WHEN FL.strand >= 0 THEN
  203. CASE
  204. WHEN FL.fmin - :upstream <= 0 THEN 0
  205. ELSE FL.fmin - :upstream
  206. END
  207. WHEN FL.strand < 0 THEN
  208. CASE
  209. WHEN FL.fmin - :downstream <= 0 THEN 0
  210. ELSE FL.fmin - :downstream
  211. END
  212. END as adjfmin,
  213. CASE
  214. WHEN FL.strand >= 0 THEN
  215. CASE
  216. WHEN FL.fmax + :downstream > OF.seqlen THEN OF.seqlen
  217. ELSE FL.fmax + :downstream
  218. END
  219. WHEN FL.strand < 0 THEN
  220. CASE
  221. WHEN FL.fmax + :upstream > OF.seqlen THEN OF.seqlen
  222. ELSE FL.fmax + :upstream
  223. END
  224. END as adjfmax,
  225. CASE
  226. WHEN FL.strand >= 0 THEN
  227. CASE
  228. WHEN FL.fmin - :upstream <= 0 THEN FL.fmin
  229. ELSE :upstream
  230. END
  231. ELSE
  232. CASE
  233. WHEN FL.fmax + :upstream > OF.seqlen THEN OF.seqlen - FL.fmax
  234. ELSE :upstream
  235. END
  236. END as upstream,
  237. CASE
  238. WHEN FL.strand >= 0 THEN
  239. CASE
  240. WHEN FL.fmax + :downstream > OF.seqlen THEN OF.seqlen - FL.fmax
  241. ELSE :downstream
  242. END
  243. ELSE
  244. CASE
  245. WHEN FL.fmin - :downstream <= 0 THEN FL.fmin
  246. ELSE :downstream
  247. END
  248. END as downstream,
  249. OF.residues
  250. FROM {featureloc} FL
  251. INNER JOIN {feature} SF on FL.feature_id = SF.feature_id
  252. INNER JOIN {cvterm} SCVT on SF.type_id = SCVT.cvterm_id
  253. INNER JOIN {feature} OF on FL.srcfeature_id = OF.feature_id
  254. INNER JOIN {cvterm} OCVT on OF.type_id = OCVT.cvterm_id
  255. INNER JOIN {organism} OO on OF.organism_id = OO.organism_id
  256. WHERE SF.feature_id = :feature_id and NOT (OF.residues = \'\' or OF.residues IS NULL)) as tbl1
  257. ';
  258. // This query is meant to get all of the sub features of any given
  259. // feature (arg #1) and order them as they appear on the reference
  260. // feature (arg #2).
  261. $sfsql = '
  262. SELECT SF.feature_id, CVT.name as type_name, SF.type_id
  263. FROM {feature_relationship} FR
  264. INNER JOIN {feature} SF ON SF.feature_id = FR.subject_id
  265. INNER JOIN {cvterm} CVT ON CVT.cvterm_id = SF.type_id
  266. INNER JOIN {featureloc} FL ON FL.feature_id = FR.subject_id
  267. INNER JOIN {feature} PF ON PF.feature_id = FL.srcfeature_id
  268. WHERE FR.object_id = :feature_id and PF.feature_id = :srcfeature_id
  269. ORDER BY FL.fmin ASC
  270. ';
  271. // For counting the number of children.
  272. $fsql ='
  273. SELECT count(*) as num_children
  274. FROM {feature_relationship} FR
  275. INNER JOIN {feature} SF ON SF.feature_id = FR.subject_id
  276. INNER JOIN {cvterm} CVT ON CVT.cvterm_id = SF.type_id
  277. INNER JOIN {featureloc} FL ON FL.feature_id = FR.subject_id
  278. INNER JOIN {feature} PF ON PF.feature_id = FL.srcfeature_id
  279. WHERE FR.object_id = :feature_id and PF.feature_id = :srcfeature_id
  280. ';
  281. // The array to be returned.
  282. $sequences = array();
  283. // If we need to get the sequence from the parent then do so now.
  284. if ($derive_from_parent) {
  285. // Execute the query to get the sequence from the parent.
  286. $parents = chado_query($parent_sql, array(':upstream' => $upstream, ':downstream' => $downstream, ':feature_id' => $feature_id));
  287. while ($parent = $parents->fetchObject()) {
  288. // If the user specified a particular parent and this one doesn't
  289. // match then skip it.
  290. if ($parent_id and $parent_id != $parent->srcfeature_id) {
  291. continue;
  292. }
  293. // if the user specified a particular featureloc_id and this one
  294. // doesn't match then skip it.
  295. if ($featureloc_id and $featureloc_id != $parent->featureloc_id) {
  296. continue;
  297. }
  298. // Initialize the sequence for each parent.
  299. $seq = '';
  300. $notes = '';
  301. $types = array();
  302. // if we are to aggregate then we will ignore the feature returned
  303. // by the query above and rebuild it using the sub features
  304. if ($aggregate) {
  305. // now get the sub features that are located on the parent.
  306. $children = chado_query($sfsql, array(':feature_id' => $feature_id, ':srcfeature_id' => $parent->srcfeature_id));
  307. $num_children = chado_query($fsql, array(':feature_id' => $feature_id, ':srcfeature_id' => $parent->srcfeature_id))->fetchField();
  308. // Iterate through the sub features and concat their sequences. They
  309. // should already be in order.
  310. $i = 0;
  311. while ($child = $children->fetchObject()) {
  312. // If the callee has specified that only certain sub features should be
  313. // included then continue if this child is not one of those allowed
  314. // subfeatures.
  315. if (count($sub_features) > 0 and !in_array($child->type_name, $sub_features)) {
  316. $i++;
  317. continue;
  318. }
  319. // keep up with the types
  320. if (!in_array($child->type_name, $types)) {
  321. $types[] = $child->type_name;
  322. }
  323. // if the first sub feature we need to include the upstream bases. first check if
  324. // the feature is in the foward direction or the reverse.
  325. if ($i == 0 and $parent->strand >= 0) { // forward direction
  326. // -------------------------- ref
  327. // ....----> ---->
  328. // up 1 2
  329. $q = chado_query($parent_sql, array(':upstream' => $upstream, ':downstream' => 0, ':feature_id' => $child->feature_id));
  330. }
  331. elseif ($i == 0 and $parent->strand < 0) { // reverse direction
  332. // -------------------------- ref
  333. // ....<---- <----
  334. // down 1 2
  335. $q = chado_query($parent_sql, array(':upstream' => 0, ':downstream' => $downstream, ':feature_id' => $child->feature_id));
  336. }
  337. // Next, if the last sub feature we need to include the downstream bases. first check if
  338. // the feature is in teh forward direction or the reverse
  339. elseif ($i == $num_children - 1 and $parent->strand >= 0) { // forward direction
  340. // -------------------------- ref
  341. // ----> ---->....
  342. // 1 2 down
  343. $q = chado_query($parent_sql, array(':upstream' => 0, ':downstream' => $downstream, ':feature_id' => $child->feature_id));
  344. }
  345. elseif ($i == $num_children - 1 and $parent->strand < 0) { // reverse direction
  346. // -------------------------- ref
  347. // <---- <----....
  348. // 1 2 up
  349. $q = chado_query($parent_sql, array(':upstream' => $upstream, ':downstream' => 0, ':feature_id' => $child->feature_id));
  350. }
  351. // for internal sub features we don't want upstream or downstream bases
  352. else {
  353. $q = chado_query($parent_sql, array(':upstream' => 0, ':downstream' => 0, ':feature_id' => $child->feature_id));
  354. }
  355. while ($subseq = $q->fetchObject()) {
  356. // concatenate the sequences of all the sub features
  357. if ($subseq->srcfeature_id == $parent->srcfeature_id) {
  358. $seq .= $subseq->residues;
  359. }
  360. if ($subseq->upstream > 0 ) {
  361. $notes .= "Includes " . $subseq->upstream . " bases upstream. ";
  362. }
  363. if ($subseq->downstream > 0) {
  364. $notes .= "Includes " . $subseq->downstream . " bases downstream. ";
  365. }
  366. }
  367. $i++;
  368. }
  369. }
  370. // if this isn't an aggregate then use the parent residues
  371. else {
  372. $seq = $parent->residues;
  373. if ($parent->upstream > 0) {
  374. $notes .= "Includes " . $parent->upstream . " bases upstream. ";
  375. }
  376. if ($parent->downstream > 0) {
  377. $notes .= "Includes " . $parent->downstream . " bases downstream. ";
  378. }
  379. }
  380. // get the reverse compliment if feature is on the reverse strand
  381. $dir = 'forward';
  382. $length = strlen($seq);
  383. if ($parent->strand < 0) {
  384. $seq = tripal_reverse_compliment_sequence($seq);
  385. $dir = 'reverse';
  386. }
  387. // now format for display
  388. if ($is_html) {
  389. $seq = wordwrap($seq, $num_bases_per_line, "<br>", TRUE);
  390. }
  391. else {
  392. $seq = wordwrap($seq, $num_bases_per_line, "\r\n", TRUE);
  393. }
  394. if (!$seq) {
  395. $notes .= "No sequence available.";
  396. }
  397. if (count($types) > 0) {
  398. $notes .= "Excludes all bases but those of type(s): " . implode(', ', $types) . ". " ;
  399. }
  400. // Construct the definition line for this feature. To construct the
  401. // defline we need a featureloc record, so we'll create one using
  402. // the information we have.
  403. $featureloc = new stdClass;
  404. $featureloc->feature_id = $feature;
  405. $featureloc->fmin = $parent->adjfmin;
  406. $featureloc->fmax = $parent->adjfmax;
  407. $featureloc->strand = $parent->strand;
  408. $featureloc->srcfeature_id = new stdClass;
  409. $featureloc->srcfeature_id->name = $parent->srcname;
  410. $featureloc->srcfeature_id->type_id = $parent->srctypename;
  411. $featureloc->srcfeature_id->organism_id = new stdClass;
  412. $featureloc->srcfeature_id->organism_id->genus = $parent->genus;
  413. $featureloc->srcfeature_id->organism_id->species = $parent->species;
  414. // Get a proper feature object.
  415. $f = chado_generate_var('feature', array('feature_id' => $feature_id));
  416. $defline = tripal_get_fasta_defline($f, $notes, $featureloc, '', $length);
  417. $sequences[] = array(
  418. 'types' => $types,
  419. 'upstream' => $parent->upstream,
  420. 'downstream' => $parent->downstream,
  421. 'defline' => $defline,
  422. 'residues' => $seq,
  423. 'featureloc_id' => $parent->featureloc_id,
  424. 'length' => $length,
  425. );
  426. }
  427. }
  428. // If we are not getting the sequence from the parent sequence then
  429. // use what comes through from the feature record.
  430. else {
  431. $f = chado_generate_var('feature', array('feature_id' => $feature_id));
  432. $f = chado_expand_var($f, 'field', 'feature.residues');
  433. $residues = $f->residues;
  434. $length = strlen($residues);
  435. if ($is_html) {
  436. $residues = wordwrap($residues, $num_bases_per_line, "<br>", TRUE);
  437. }
  438. else {
  439. $residues = wordwrap($residues, $num_bases_per_line, "\r\n", TRUE);
  440. }
  441. // get the definintion line for this feature
  442. $defline = tripal_get_fasta_defline($f, '', NULL, '', $length);
  443. // add to the sequence array
  444. $sequences[] = array(
  445. 'types' => $f->type_id->name,
  446. 'upstream' => 0,
  447. 'downstream' => 0,
  448. 'defline' => $defline,
  449. 'residues' => $residues,
  450. 'length' => $length,
  451. );
  452. }
  453. return $sequences;
  454. }
  455. /**
  456. *
  457. * @param $options
  458. * An associative array of options for selecting a feature. Valid keys include:
  459. * - org_commonname: The common name of the organism for which sequences
  460. * should be retrieved
  461. * - genus: The genus of the organism for which sequences should be retrieved
  462. * - species: The species of the organism for which sequences should be
  463. * retrieved
  464. * - analysis_name: The name of an analysis to which sequences belong. Only
  465. * those that are associated with the analysis will be retrieved.
  466. * - type: The type of feature (a sequence ontology term).
  467. * - feature_name: the name of the feature. Can be an array of feature names.
  468. * - feature_uname: the uniquename of the feature. Can be an array of
  469. * feature unique names.
  470. * - upstream: An integer specifing the number of upstream bases to include
  471. * in the output
  472. * - downstream: An integer specifying the number of downstream bases to
  473. * include in the output.
  474. * - derive_from_parent: Set to '1' if the sequence should be obtained from
  475. * the parent to which this feature is aligned.
  476. * - aggregate: Set to '1' if the sequence should only contain sub features,
  477. * excluding intro sub feature sequence. For example, set this option to
  478. * obtain just the coding sequence of an mRNA.
  479. * - sub_feature_types: Only include sub features (or child features) of
  480. * the types provided in the array
  481. * - relationship_type: If a relationship name is provided (e.g. sequence_of)
  482. * then any sequences that are in relationships of this type with matched
  483. * sequences are also included
  484. * - relationship_part: If a relationship is provided in the preceeding
  485. * argument then the rel_part must be either 'object' or 'subject' to
  486. * indicate which side of the relationship the matched features belong
  487. * - width: Indicate the number of bases to use per line. A new line will
  488. * be added after the specified number of bases on each line.
  489. * - is_html: Set to '1' if the sequence is meant to be displayed on a
  490. * web page. This will cause a <br> tag to separate lines of the FASTA
  491. * sequence.
  492. * @return
  493. * Returns an array of sequences. The sequences will be in an array with the
  494. * following keys for each sequence:
  495. * 'types' => an array of feature types that were used to derive
  496. * the sequence (e.g. from an aggregated sequence)
  497. * 'upstream' => the number of upstream bases in the sequence
  498. * 'downstream' => the number of downstream bases in the sequence
  499. * 'defline' => the definintion line used to create a FASTA sequence
  500. * 'residues' => the residues
  501. * 'featureloc_id' => the featureloc_id if from an alignment
  502. *
  503. * @ingroup tripal_feature_api
  504. */
  505. function tripal_get_bulk_feature_sequences($options) {
  506. // default values for building the sequence
  507. $org_commonname = array_key_exists('org_commonname', $options) ? $options['org_commonname'] : '';
  508. $genus = array_key_exists('genus', $options) ? $options['genus'] : '';
  509. $species = array_key_exists('species', $options) ? $options['species'] : '';
  510. $analysis_name = array_key_exists('analysis_name', $options) ? $options['analysis_name'] : '';
  511. $type = array_key_exists('type', $options) ? $options['type'] : '';
  512. $feature_name = array_key_exists('feature_name', $options) ? $options['feature_name'] : '';
  513. $feature_uname = array_key_exists('feature_uname', $options) ? $options['feature_uname'] : '';
  514. $derive_from_parent = array_key_exists('derive_from_parent', $options) ? $options['derive_from_parent'] : 0;
  515. $aggregate = array_key_exists('aggregate', $options) ? $options['aggregate'] : 0;
  516. $sub_features = array_key_exists('sub_feature_types', $options) ? $options['sub_feature_types'] : array();
  517. $relationship = array_key_exists('relationship_type', $options) ? $options['relationship_type'] : '';
  518. $rel_part = array_key_exists('relationship_part', $options) ? $options['relationship_part'] : '';
  519. $num_bases_per_line = array_key_exists('width', $options) ? $options['width'] : 50;
  520. $upstream = array_key_exists('upstream', $options) ? $options['upstream'] : 0;
  521. $downstream = array_key_exists('downstream', $options) ? $options['downstream'] : 0;
  522. if (!$type and !$feature_name and !$genus) {
  523. print "Please provide a type, feature name or genus\n";
  524. return;
  525. }
  526. // get the list of features
  527. $vars = array();
  528. $sql = "
  529. SELECT DISTINCT F.feature_id, F.name, F.uniquename,
  530. O.genus, O.species, CVT.name as feature_type
  531. FROM {feature} F
  532. INNER JOIN {organism} O on O.organism_id = F.organism_id
  533. INNER JOIN {cvterm} CVT on CVT.cvterm_id = F.type_id
  534. ";
  535. if ($analysis_name) {
  536. $sql .= "
  537. INNER JOIN {analysisfeature} AF on AF.feature_id = F.feature_id
  538. INNER JOIN {analysis} A on AF.analysis_id = A.analysis_id
  539. ";
  540. }
  541. $sql .= "WHERE (1=1) ";
  542. if ($org_commonname) {
  543. $sql .= "AND O.common_name = :common_name ";
  544. $vars[':common_name'] = $org_commonname;
  545. }
  546. if ($genus) {
  547. $sql .= "AND O.genus = :genus ";
  548. $vars[':genus'] = $genus;
  549. }
  550. if ($species) {
  551. $sql .= "AND O.species = :species ";
  552. $vars[':species'] = $species;
  553. }
  554. if ($type) {
  555. $sql .= "AND CVT.name = :cvtname ";
  556. $vars[':cvtname'] = $type;
  557. }
  558. if ($feature_name) {
  559. if (is_array($feature_name)) {
  560. $sql .= "AND F.name IN (";
  561. foreach ($feature_name as $i => $fname) {
  562. $sql .= ":fname$i, ";
  563. $vars[":fname$i"] = $fname;
  564. }
  565. // remove the trailing comma and close the paren
  566. $sql = substr($sql, 0, -2) . ")";
  567. }
  568. else {
  569. $sql .= "AND F.name = :fname";
  570. $vars[':fname'] = $feature_name;
  571. }
  572. }
  573. if ($feature_uname) {
  574. if (is_array($feature_uname)) {
  575. $sql .= "AND F.uniquename IN (";
  576. foreach ($feature_uname as $i => $funame) {
  577. $sql .= ":funame$i, ";
  578. $vars[":funame$i"] = $funame;
  579. }
  580. // remove the trailing comma and close the paren
  581. $sql = substr($sql, 0, -2) . ")";
  582. }
  583. else {
  584. $sql .= "AND F.uniquename = :funame";
  585. $vars[':funame'] = $feature_uname;
  586. }
  587. }
  588. if ($analysis_name) {
  589. $sql .= "AND A.name = :aname";
  590. $vars[':aname'] = $analysis_name;
  591. }
  592. $num_bases_per_line = 50;
  593. $num_seqs = 0;
  594. $q = chado_query($sql, $vars);
  595. $sequences = array();
  596. while ($feature = $q->fetchObject()) {
  597. // get the sequences
  598. $seqs = tripal_get_feature_sequences(array('feature_id' => $feature->feature_id), $options);
  599. $sequences = array_merge($sequences, $seqs);
  600. $num_seqs++;
  601. }
  602. return $sequences;
  603. }
  604. /**
  605. * Returns a definition line that can be used in a FASTA file
  606. *
  607. * @param $feature
  608. * A single feature object containing all the fields from the chado.feature table.
  609. * Best case is to provide an object generated by the chado_generate_var() function.
  610. * @param $notes
  611. * Optional: additional notes to be added to the definition line
  612. * @param $featureloc
  613. * Optional: a single featureloc object generated using chado_generate_var that
  614. * contains a record from the chado.featureloc table. Provide this if the
  615. * sequence was obtained by using the alignment rather than from the feature.residues
  616. * column
  617. * @param $type
  618. * Optional: the type of sequence. By default the feature type is used.
  619. * @param $length
  620. * Optional: the length of the sequence
  621. *
  622. * @return
  623. * A string of the format: uniquename|name|type|feature_id
  624. * or if an alignment: srcfeature_name:fmin..fmax[+-]; alignment of uniquename|name|type|feature_id
  625. */
  626. function tripal_get_fasta_defline($feature, $notes = '', $featureloc = NULL, $type = '', $length = 0) {
  627. // make sure the featureloc object has the srcfeature if not, then add it
  628. if ($featureloc) {
  629. if (!is_object($featureloc->srcfeature_id)) {
  630. $featureloc->srcfeature_id = chado_generate_var('feature', array('feature_id' => $featureloc->srcfeature_id));
  631. }
  632. if (!is_object($featureloc->srcfeature_id->organism_id)) {
  633. $featureloc->srcfeature_id->organism_id = chado_generate_var('organism', array('organism_id' => $featureloc->srcfeature_id->organism_id));
  634. }
  635. }
  636. // make sure the feature object has the organism if not, then add it
  637. if (!is_object($feature->organism_id)) {
  638. $feature->organism_id = chado_generate_var('organism', array('organism_id' => $feature->organism_id));
  639. }
  640. // if a type is not provided then use the default type
  641. if (!$type) {
  642. $type = $feature->type_id->name;
  643. }
  644. // construct the definition line
  645. $defline = $feature->uniquename . " " .
  646. 'ID=' . $feature->uniquename . "|" .
  647. 'Name=' . $feature->name . "|" .
  648. 'organism=' . $feature->organism_id->genus . " " . $feature->organism_id->species . "|" .
  649. 'type=' . $type . '|';
  650. if ($length > 0) {
  651. $defline .= "length=" . $length . "bp|";
  652. }
  653. if ($featureloc) {
  654. $defline .= "location=Sequence derived from alignment at " . tripal_get_location_string($featureloc);
  655. $defline .= " (" . $featureloc->srcfeature_id->organism_id->genus . " " . $featureloc->srcfeature_id->organism_id->species . ")|";
  656. }
  657. if ($notes) {
  658. $defline .= "Notes=$notes|";
  659. }
  660. $defline = substr($defline, 0, -1); // remove the trailing |
  661. return $defline;
  662. }
  663. /**
  664. * Returns a string representing a feature location in an alignment
  665. *
  666. * @param unknown $featureloc
  667. * A single featureloc object generated using chado_generate_var that
  668. * contains a record from the chado.featureloc table.
  669. */
  670. function tripal_get_location_string($featureloc) {
  671. $feature = $featureloc->feature_id;
  672. $strand = '';
  673. if ($featureloc->strand == 1) {
  674. $strand = '+';
  675. }
  676. elseif ($featureloc->strand == -1) {
  677. $strand = '-';
  678. }
  679. return $featureloc->srcfeature_id->name . ":" . ($featureloc->fmin + 1) . ".." . $featureloc->fmax . $strand;
  680. }