public function GFF3Importer::form
3.x GFF3Importer.inc | public GFF3Importer::form($form, &$form_state) |
Overrides TripalImporter::form
See also
File
- tripal_chado/
includes/ TripalImporter/ GFF3Importer.inc, line 49
Class
Code
public function form($form, &$form_state) {
// get the list of organisms
$sql = "SELECT * FROM {organism} ORDER BY genus, species";
$org_rset = chado_query($sql);
$organisms = array();
$organisms[''] = '';
while ($organism = $org_rset->fetchObject()) {
$organisms[$organism->organism_id] = "$organism->genus $organism->species ($organism->common_name)";
}
$form['organism_id'] = array(
'#title' => t('Organism'),
'#type' => t('select'),
'#description' => t("Choose the organism to which these sequences are associated"),
'#required' => TRUE,
'#options' => $organisms,
);
// Advanced Options
$form['advanced'] = array(
'#type' => 'fieldset',
'#title' => t('Additional Options'),
'#collapsible' => TRUE,
'#collapsed' => TRUE,
);
$form['advanced']['line_number'] = array(
'#type' => 'textfield',
'#title' => t('Start Line Number'),
'#description' => t('Enter the line number in the GFF file where you would like to begin processing. The
first line is line number 1. This option is useful for examining loading problems with large GFF files.'),
'#size' => 10,
);
$form['advanced']['landmark_type'] = array(
'#title' => t('Landmark Type'),
'#type' => t('textfield'),
'#description' => t("Optional. Use this field to specify a Sequence Ontology type
for the landmark sequences in the GFF fie (e.g. 'chromosome'). If the GFF file
contains a '##sequence-region' line that describes the landmark sequences to
which all others are aligned and a type is provided here then the features
will be created if they do not already exist. If they do exist then this
field is not used."),
);
$form['advanced']['alt_id_attr'] = array(
'#title' => t('ID Attribute'),
'#type' => t('textfield'),
'#description' => t("Optional. Sometimes lines in the GFF file are missing the
required ID attribute that specifies the unique name of the feature, but there
may be another attribute that can uniquely identify the feature. If so,
you may specify the name of the attribute to use for the name."),
);
$form['advanced']['protein_names'] = array(
'#type' => 'fieldset',
'#title' => t('Protein Names'),
'#collapsible' => TRUE,
'#collapsed' => FALSE,
'#weight' => 5,
);
$form['advanced']['protein_names']['re_help'] = array(
'#type' => 'item',
'#markup' => t('A regular expression is an advanced method for extracting information from a string of text.
If your GFF3 file does not contain polypeptide (or protein) features, but contains CDS features, proteins will be automatically created.
By default the loader will give each protein a name based on the name of the corresponding mRNA followed by the "-protein" suffix.
If you want to customize the name of the created protein, you can use the following regex.')
);
$form['advanced']['protein_names']['re_mrna'] = array(
'#type' => 'textfield',
'#title' => t('Regular expression for the mRNA name'),
'#required' => FALSE,
'#description' => t('Enter the regular expression that will extract portions of
the mRNA unique name. For example, for a
mRNA with a unique name finishing by -RX (e.g. SPECIES0000001-RA),
the regular expression would be, "^(.*?)-R([A-Z]+)$".')
);
$form['advanced']['protein_names']['re_protein'] = array(
'#type' => 'textfield',
'#title' => t('Replacement string for the protein name'),
'#required' => FALSE,
'#description' => t('Enter the replacement string that will be used to create
the protein name based on the mRNA regular expression. For example, for a
mRNA regular expression "^(.*?)-R()[A-Z]+)$", the corresponding protein regular
expression would be "$1-P$2".')
);
$form['advanced']['use_transaction'] = array(
'#type' => 'checkbox',
'#title' => t('Use a transaction'),
'#required' => FALSE,
'#description' => t('Use a database transaction when loading the GFF file. If an error occurs
the entire datset loaded prior to the failure will be rolled back and will not be available
in the database. If this option is unchecked and failure occurs all records up to the point
of failure will be present in the database.'),
'#default_value' => 1,
);
$form['advanced']['add_only'] = array(
'#type' => 'checkbox',
'#title' => t('Import only new features'),
'#required' => FALSE,
'#description' => t('The job will skip features in the GFF file that already
exist in the database and import only new features.'),
);
$form['advanced']['update'] = array(
'#type' => 'checkbox',
'#title' => t('Import all and update'),
'#required' => FALSE,
'#default_value' => 'checked',
'#description' => t('Existing features will be updated and new features will be added. Attributes
for a feature that are not present in the GFF but which are present in the
database will not be altered.'),
'#default_value' => 1,
);
// SPF: there are bugs in refreshing and removing features. The bugs arise
// if a feature in the GFF does not have a uniquename. GenSAS will auto
// generate this uniquename and it will not be the same as a previous
// load because it uses the date. This causes orphaned CDS/exons, UTRs
// to be left behind during a delete or refresh. So, the short term
// fix is to remove these options.
// $form['import_options']['refresh']= array(
// '#type' => 'checkbox',
// '#title' => t('Import all and replace'),
// '#required' => FALSE,
// '#description' => t('Existing features will be updated and feature properties not
// present in the GFF file will be removed.'),
// );
// $form['import_options']['remove']= array(
// '#type' => 'checkbox',
// '#title' => t('Delete features'),
// '#required' => FALSE,
// '#description' => t('Features present in the GFF file that exist in the database
// will be removed rather than imported'),
// );
$form['advanced']['create_organism'] = array(
'#type' => 'checkbox',
'#title' => t('Create organism'),
'#required' => FALSE,
'#description' => t('The Tripal GFF loader supports the "organism" attribute. This allows features of a
different organism to be aligned to the landmark sequence of another species. The format of the
attribute is "organism=[genus]:[species]", where [genus] is the organism\'s genus and [species] is the
species name. Check this box to automatically add the organism to the database if it does not already exists.
Otherwise lines with an oraganism attribute where the organism is not present in the database will be skipped.'),
);
$form['advanced']['targets'] = array(
'#type' => 'fieldset',
'#title' => t('Targets'),
'#collapsible' => TRUE,
'#collapsed' => FALSE,
'#weight' => 1,
);
$form['advanced']['targets']['adesc'] = array(
'#markup' => t("When alignments are represented in the GFF file (e.g. such as
alignments of cDNA sequences to a whole genome, or blast matches), they are
represented using two feature types: 'match' (or cDNA_match, EST_match, etc.)
and 'match_part'. These features may also have a 'Target' attribute to
specify the sequence that is being aligned.
However, the organism to which the aligned sequence belongs may not be present in the
GFF file. Here you can specify the organism and feature type of the target sequences.
The options here will apply to all targets unless the organism and type are explicity
set in the GFF file using the 'target_organism' and 'target_type' attributes."),
);
$form['advanced']['targets']['target_organism_id'] = array(
'#title' => t('Target Organism'),
'#type' => t('select'),
'#description' => t("Optional. Choose the organism to which target sequences belong.
Select this only if target sequences belong to a different organism than the
one specified above. And only choose an organism here if all of the target sequences
belong to the same species. If the targets in the GFF file belong to multiple
different species then the organism must be specified using the 'target_organism=genus:species'
attribute in the GFF file."),
'#options' => $organisms,
);
$form['advanced']['targets']['target_type'] = array(
'#title' => t('Target Type'),
'#type' => t('textfield'),
'#description' => t("Optional. If the unique name for a target sequence is not unique (e.g. a protein
and an mRNA have the same name) then you must specify the type for all targets in the GFF file. If
the targets are of different types then the type must be specified using the 'target_type=type' attribute
in the GFF file. This must be a valid Sequence Ontology (SO) term."),
);
$form['advanced']['targets']['create_target'] = array(
'#type' => 'checkbox',
'#title' => t('Create Target'),
'#required' => FALSE,
'#description' => t("If the target feature cannot be found, create one using the organism and type specified above, or
using the 'target_organism' and 'target_type' fields specified in the GFF file. Values specified in the
GFF file take precedence over those specified above."),
);
return $form;
}