function tripal_feature_fasta_load_form

2.x tripal_feature.fasta_loader.inc	`tripal_feature_fasta_load_form()`
1.x fasta_loader.inc	`tripal_feature_fasta_load_form()`
The form to submit a fasta loading job
File

tripal_feature/includes/tripal_feature.fasta_loader.inc, line 23: Provides fasta loading functionality. Creates features based on their specification in a fasta file.
Code

function tripal_feature_fasta_load_form() {
  $form['fasta_file'] = array('#type' => 'textfield', '#title' => t('FASTA File'),
    '#description' => t('Please enter the full system path for the FASTA file, or a path within the Drupal
                           installation (e.g. /sites/default/files/xyz.obo).  The path must be accessible to the
                           server on which this Drupal instance is running.'), '#required' => TRUE
  );

  // get the list of organisms
  $sql = "SELECT * FROM {organism} ORDER BY genus, species";
  $org_rset = chado_query($sql);
  $organisms = array();
  $organisms[''] = '';
  while ($organism = $org_rset->fetchObject()) {
    $organisms[$organism->organism_id] = "$organism->genus $organism->species ($organism->common_name)";
  }
  $form['organism_id'] = array('#title' => t('Organism'), '#type' => t('select'),
    '#description' => t("Choose the organism to which these sequences are associated"),
    '#required' => TRUE, '#options' => $organisms
  );

  // get the sequence ontology CV ID
  $values = array('name' => 'sequence');
  $cv = chado_select_record('cv', array('cv_id'), $values);
  $cv_id = $cv[0]->cv_id;

  $form['seqtype'] = array('#type' => 'textfield', '#title' => t('Sequence Type'),
    '#required' => TRUE,
    '#description' => t('Please enter the Sequence Ontology (SO) term name that describes the sequences in the FASTA file (e.g. gene, mRNA, polypeptide, etc...)'),
    '#autocomplete_path' => "admin/tripal/chado/tripal_cv/cvterm/auto_name/$cv_id"
  );

  $form['method'] = array('#type' => 'radios', '#title' => 'Method', '#required' => TRUE,
    '#options' => array(t('Insert only'), t('Update only'), t('Insert and update')
    ),
    '#description' => t('Select how features in the FASTA file are handled.
       Select "Insert only" to insert the new features. If a feature already
       exists with the same name or unique name and type then it is skipped.
       Select "Update only" to only update featues that already exist in the
       database.  Select "Insert and Update" to insert features that do
       not exist and upate those that do.'), '#default_value' => 2
  );
  $form['match_type'] = array('#type' => 'radios', '#title' => 'Name Match Type', '#required' => TRUE,
    '#options' => array(t('Name'), t('Unique name')
    ),
    '#description' => t('Used for "updates only" or "insert and update" methods. Not required if method type is "insert".
      Feature data is stored in Chado with both a human-readable
      name and a unique name. If the features in your FASTA file are uniquely identified using
      a human-readable name then select the "Name" button. If your features are
      uniquely identified using the unique name then select the "Unique name" button.  If you
      loaded your features first using the GFF loader then the unique name of each
      features were indicated by the "ID=" attribute and the name by the "Name=" attribute.
      By default, the FASTA loader will use the first word (character string
      before the first space) as  the name for your feature. If
      this does not uniquely identify your feature consider specifying a regular expression in the advanced section below.
      Additionally, you may import both a name and a unique name for each sequence using the advanced options.'),
    '#default_value' => 1
  );

  $form['analysis'] = array('#type' => 'fieldset', '#title' => t('Analysis Used to Derive Features'),
    '#collapsed' => TRUE
  );
  $form['analysis']['desc'] = array(
    '#markup' => t("Why specify an analysis for a data load?  All data comes
       from some place, even if downloaded from Genbank. By specifying
       analysis details for all data uploads, it allows an end user to reproduce the
       data set, but at least indicates the source of the data.")
  );

  // get the list of organisms
  $sql = "SELECT * FROM {analysis} ORDER BY name";
  $org_rset = chado_query($sql);
  $analyses = array();
  $analyses[''] = '';
  while ($analysis = $org_rset->fetchObject()) {
    $analyses[$analysis->analysis_id] = "$analysis->name ($analysis->program $analysis->programversion, $analysis->sourcename)";
  }
  $form['analysis']['analysis_id'] = array('#title' => t('Analysis'), '#type' => t('select'),
    '#description' => t("Choose the analysis to which these features are associated"),
    '#required' => TRUE, '#options' => $analyses
  );

  // Advanced Options
  $form['advanced'] = array('#type' => 'fieldset', '#title' => t('Advanced Options'),
    '#collapsible' => TRUE, '#collapsed' => TRUE
  );
  $form['advanced']['re_help'] = array('#type' => 'item',
    '#value' => t('A regular expression is an advanced method for extracting information from a string of text.
                   Your FASTA file may contain both a human-readable name and a unique name for each sequence.
                   If you want to import
                   both the name and unique name for all sequences, then you must provide regular expressions
                   so that the loader knows how to separate them.
                   Otherwise the name and uniquename will be the same.
                   By default, this loader will use the first word in the definition
                   lines of the FASTA file
                   as the name or unique name of the feature.')
  );
  $form['advanced']['re_name'] = array('#type' => 'textfield',
    '#title' => t('Regular expression for the name'), '#required' => FALSE,
    '#description' => t('Enter the regular expression that will extract the
       feature name from the FASTA definition line. For example, for a
       defintion line with a name and unique name separated by a bar \'|\' (>seqname|uniquename),
       the regular expression for the name would be, "^(.*?)\|.*$".  All FASTA
       definition lines begin with the ">" symbol.  You do not need to incldue
       this symbol in your regular expression.')
  );
  $form['advanced']['re_uname'] = array('#type' => 'textfield',
    '#title' => t('Regular expression for the unique name'), '#required' => FALSE,
    '#description' => t('Enter the regular expression that will extract the
       feature name from the FASTA definition line. For example, for a
       defintion line with a name and unique name separated by a bar \'|\' (>seqname|uniquename),
       the regular expression for the unique name would be "^.*?\|(.*)$").  All FASTA
       definition lines begin with the ">" symbol.  You do not need to incldue
       this symbol in your regular expression.')
  );

  // Advanced database cross-reference optoins
  $form['advanced']['db'] = array('#type' => 'fieldset',
    '#title' => t('External Database Reference'), '#weight' => 6, '#collapsed' => TRUE
  );
  $form['advanced']['db']['re_accession'] = array('#type' => 'textfield',
    '#title' => t('Regular expression for the accession'), '#required' => FALSE,
    '#description' => t('Enter the regular expression that will extract the accession for the external database for each feature from the FASTA definition line.'),
    '#weight' => 2
  );

  // get the list of databases
  $sql = "SELECT * FROM {db} ORDER BY name";
  $db_rset = chado_query($sql);
  $dbs = array();
  $dbs[''] = '';
  while ($db = $db_rset->fetchObject()) {
    $dbs[$db->db_id] = "$db->name";
  }
  $form['advanced']['db']['db_id'] = array('#title' => t('External Database'),
    '#type' => t('select'),
    '#description' => t("Plese choose an external database for which these sequences have a cross reference."),
    '#required' => FALSE, '#options' => $dbs, '#weight' => 1
  );

  $form['advanced']['relationship'] = array('#type' => 'fieldset', '#title' => t('Relationships'),
    '#weight' => 6, '#collapsed' => TRUE
  );
  $rels = array();
  $rels[''] = '';
  $rels['part_of'] = 'part of';
  $rels['derives_from'] = 'produced by (derives from)';

  // Advanced references options
  $form['advanced']['relationship']['rel_type'] = array('#title' => t('Relationship Type'),
    '#type' => t('select'),
    '#description' => t("Use this option to create associations, or relationships between the
                        features of this FASTA file and existing features in the database. For
                        example, to associate a FASTA file of peptides to existing genes or transcript sequence,
                        select the type 'produced by'. For a CDS sequences select the type 'part of'"),
    '#required' => FALSE, '#options' => $rels, '#weight' => 5
  );
  $form['advanced']['relationship']['re_subject'] = array('#type' => 'textfield',
    '#title' => t('Regular expression for the parent'), '#required' => FALSE,
    '#description' => t('Enter the regular expression that will extract the unique
                         name needed to identify the existing sequence for which the
                         relationship type selected above will apply.'), '#weight' => 6
  );
  $form['advanced']['relationship']['parent_type'] = array('#type' => 'textfield',
    '#title' => t('Parent Type'), '#required' => FALSE,
    '#description' => t('Please enter the Sequence Ontology term for the parent.  For example
                         if the FASTA file being loaded is a set of proteins that are
                         products of genes, then use the SO term \'gene\' or \'transcript\' or equivalent. However,
                         this type must match the type for already loaded features.'),
    '#weight' => 7
  );

  $form['button'] = array('#type' => 'submit', '#value' => t('Import FASTA file'), '#weight' => 10
  );
  return $form;
}
Tripal Search

function tripal_feature_fasta_load_form

Related topics

File

Code