function process_data_array_for_line

2.x tripal_bulk_loader.loader.inc process_data_array_for_line($priority, &$data, &$default_data, $addt)
3.x tripal_bulk_loader.loader.inc process_data_array_for_line($priority, &$data, &$default_data, $addt)
1.x tripal_bulk_loader.loader.inc process_data_array_for_line($priority, &$data, &$default_data, $addt)

Process the data array for a given line

Parameters

$addt: Requires: field2column', 'record2priority', 'line', 'line_num', 'group_index', 'node', 'nid'

Related topics

1 call to process_data_array_for_line()
tripal_bulk_loader_load_data in tripal_bulk_loader/includes/tripal_bulk_loader.loader.inc
Tripal Bulk Loader

File

tripal_bulk_loader/includes/tripal_bulk_loader.loader.inc, line 491
Handles the actual loading of data.

Code

function process_data_array_for_line($priority, &$data, &$default_data, $addt) {
  //$time_start = microtime(true);

  $table_data = $data[$priority];
  $addt = (object) $addt;
  $no_errors = TRUE;

  $table = $table_data['table'];
  $values = $table_data['values_array'];

  // populate the values array with real value either from the input data file line
  // or from the foreign key / referral record
  if (array_key_exists('need_further_processing', $table_data) and $table_data['need_further_processing']) {
    if (array_key_exists($priority, $addt->field2column)) {
      $values = tripal_bulk_loader_add_spreadsheetdata_to_values($values, $addt->line, $addt->field2column[$priority]);
    }
    $values = tripal_bulk_loader_add_foreignkey_to_values($table_data, $values, $data, $addt->record2priority, $addt->nid, $priority, $default_data);
  }

  $values = tripal_bulk_loader_regex_tranform_values($values, $table_data, $addt->line);
  if (!$values) {
    //tripal_bulk_loader_throw_error('Line ' . $addt->line_num . ' Regex:<pre>' . print_r($values, TRUE) . print_r($table_data, TRUE) . '</pre>' . '</pre>', array(), TRIPAL_NOTICE);
  }

  // get the table description
  $table_desc = chado_get_schema($table);
  if (!$table_desc) {
    $msg = "Tripal does not know about the table named '%table'. If this is a custom table,
      please define it first";
    tripal_bulk_loader_throw_error($msg, array('%table' => $table), TRIPAL_ERROR);
    $data[$priority]['error'] = TRUE;
    return;
  }

  // Check that template required fields are present. if a required field is
  // missing and this
  // is an optional record then just return. otherwise raise an error
  $skip_optional = 0;
  foreach ($table_data['required'] as $field => $required) {
    if ($required) {
      // check if the field has no value (or array is empty)
      if (!isset($values[$field]) or 
        (is_array($values[$field]) and count($values[$field]) == 0)) {
        // check if the record is optional.  For backwards compatiblity we need to
        // check if the 'mode' is set to 'optional'
        if ($table_data['optional'] or preg_match('/optional/', $table_data['mode']) or 
          $table_data['select_optional']) {
          $skip_optional = 1;
          // set the values array to be empty since we all required fields are
          // optional and we can't do a select/insert so we don't want to keep
          // the values if this record is used in a later FK relationship.
          $values = array();
        }
        else {
          $msg = "Line " . $addt->line_num . ' "' . $table_data['record_id'] .
            '" (' . $table_data['mode'] . ') Missing template required value: ' . $table . '.' . $field;
          tripal_bulk_loader_throw_error($msg, array(), TRIPAL_WARNING);
          $data[$priority]['error'] = TRUE;
          $no_errors = FALSE;
        }
      }
    }
  }

  // for an insert, check that all database required fields are present in the values array
  // we check for 'optional' in the mode for backwards compatibility. The 'optional'
  // mode used to be a type of insert
  if (!$skip_optional and (preg_match('/insert/', $table_data['mode']) or
      preg_match('/optional/', $table_data['mode']))) {
    // Check all database table required fields are set
    $fields = $table_desc['fields'];
    foreach ($fields as $field => $def) {
      // a field is considered missing if it cannot be null and there is no default
      // value for it or it is not of type 'serial'
      if (array_key_exists('not null', $def) and $def['not null'] == 1 and // field must have a value
        !array_key_exists($field, $values) and // there is not a value for it
        !array_key_exists('default', $def) and // there is no default for it
        strcmp($def['type'], 'serial') != 0) { // it is not a 'serial' type column
        $msg = "Line " . $addt->line_num . ' ' . $table_data['record_id'] .
          ' (' . $table_data['mode'] . ') Missing Database Required Value: ' . $table . '.' . $field;
        tripal_bulk_loader_throw_error($msg, array(), TRIPAL_ERROR);
        $data[$priority]['error'] = TRUE;
        $no_errors = FALSE;
      }
    }
  }

  // add updated values array into the data array
  $data[$priority]['values_array'] = $values;

  // if there was an error already -> don't insert
  if (array_key_exists('error', $data[$priority]) and $data[$priority]['error']) {
    tripal_bulk_loader_throw_error('Skipping processing of %table due to previous errors', array('%table' => $table), TRIPAL_NOTICE);
    return $no_errors;
  }

  // skip optional fields
  if ($skip_optional) {
    // SPF -- Commented out the following line.  This state is intentional due
    // to the loader setup and and is not an error.  If informational it
    // prints too much to the terminal.
    // tripal_bulk_loader_throw_error('Skipping an optional record (%record)',array('%record'=>$table_data['record_id']),TRIPAL_NOTICE);
    return $no_errors;
  }

  // check if it is already inserted
  if (array_key_exists('inserted', $table_data) and $table_data['inserted']) {
    // SPF -- Commented out the following line.  This state is intentional due
    // to the loader setup and and is not an error.  If informational it
    // prints too much to the terminal.
    // tripal_bulk_loader_throw_error('Skipping %record since it is already inserted',array('%record'=>$table_data['record_id']),TRIPAL_NOTICE);
    return $no_errors;
  }

  // check if it is already selected, if so, just get the value stored in
  // the default_data array
  if (array_key_exists('selected', $table_data) and $table_data['selected']) {
    $data[$priority]['values_array'] = $default_data[$priority]['values_array'];
    // SPF -- Commented out the following line.  This state is intentional due
    // to the loader setup and and is not an error.  If informational it
    // prints too much to the terminal.
    // tripal_bulk_loader_throw_error('%record was already selected thus we are just returning the values previously selected.',array('%record'=>$table_data['record_id']),TRIPAL_NOTICE);
    return $no_errors;
  }

  // make sure we have some value in the select_if_duplicate and update_if_duplicate options
  if (!array_key_exists('select_if_duplicate', $table_data)) {
    $table_data['select_if_duplicate'] = 0;
  }
  if (!array_key_exists('update_if_duplicate', $table_data)) {
    $table_data['update_if_duplicate'] = 0;
  }

  // if "select if duplicate" is enabled then check to ensure unique constraint is not violated.
  // If it is violated then simply return, the record already exists in the database.
  // We check for "insert_unique" for backwards compatibilty but that mode no longer exists
  $data[$priority]['is_duplicate'] = 0;
  if (preg_match('/insert_unique/', $table_data['mode']) or 
    $table_data['select_if_duplicate'] == 1 or 
    $table_data['update_if_duplicate'] == 1) {
    $options = array('is_duplicate' => TRUE, 'print_errors' => TRUE);
    $duplicate = chado_select_record($table, array_keys($table_desc['fields']), $values, $options);

    // if this is a duplicate then substitute the values in the table_data array so
    // that for future records that may depend on this one, they can get the values needed
    if ($duplicate and is_array($duplicate) and count($duplicate) == 1) {
      $dup_record = $duplicate[0];
      // save the duplicate record for later.  If this is an update_if_duplicate
      // then we'll need this record as the match
      $data[$priority]['is_duplicate'] = (array) $dup_record;

      // if all we have is one field then we will just use the value returned
      // rather than create an array of values. This way it will prevent
      // the tripal_core_chado_(select|insert|update) from recursing on
      // foreign keys and make the loader go faster.
      if (count((array) $dup_record) == 1) {
        foreach ($dup_record as $key => $value) {
          $data[$priority]['values_array'] = $value;
        }
      }
      // if we have multiple fields returned then we need to set the values
      // the new array.
      else {
        // convert object to array
        $new_values = array();
        foreach ($dup_record as $key => $value) {
          $new_values[$key] = $value;
        }
        $data[$priority]['values_array'] = $new_values;
      }
      // return if this is a select_if_duplicate
      if ($table_data['select_if_duplicate'] == 1) {
        // SPF -- Commented out the following line.  This state is intentional due
        // to the loader setup and and is not an error.  If informational it
        // prints too much to the terminal.
        // tripal_bulk_loader_throw_error('Simply returning values for %record since it was already inserted',array('%record'=>$table_data['record_id']),TRIPAL_NOTICE);
        return $no_errors;
      }
    }
  }
  else {
    # TODO: what to do if there are more than one value returned when
    # checking for a duplicate?
  }

  if (!preg_match('/select/', $table_data['mode'])) {
    // Use prepared statement?
    if (variable_get('tripal_bulk_loader_prepare', TRUE)) {
      $options = array('statement_name' => 'record_' . $addt->nid . '_' . $priority);
      if (($addt->line_num > 1 && $addt->group_index == 1) OR $addt->group_index > 1) {
        //$options['is_prepared'] = TRUE;
      }
    }
    else {
      $options = array();
    }
    // Skip chado_insert_record() built-in validation?
    if (variable_get('tripal_bulk_loader_skip_validation', FALSE)) {
      $options['skip_validation'] = TRUE;
    }

    if ($table_data['update_if_duplicate'] == 1) {
      if (array_key_exists('statement_name', $options)) {
        $options['statement_name'] = 'upd_' . $options['statement_name'];
      }
      // This should have been set on the first round of inserts for this record
      $match = $data[$priority]['is_duplicate'];
      // However, sometimes there is a pre-existing record before the loader starts
      // Thus check that this value is set and if not, then generate a match array
      // based on the unique keys for this record.
      if (empty($match)) {
        $match = array();
        // First check to see if we have fields for the primary key
        foreach ($table_desc['primary key'] as $k_field) {
          if (!empty($values[$k_field])) {
            $match[$k_field] = $values[$k_field];
          }
        }
        // Otherwise check the fields that are part of the unique key
        if (empty($match)) {
          foreach ($table_desc['unique keys'] as $u_keys) {
            foreach ($u_keys as $u_field) {
              if (!empty($values[$u_field])) {
                $match[$u_field] = $values[$u_field];
              }
            }
          }
        }
      }
      if (!empty($match)) {
        // Now we need to check if it already exists via a select
        $results = chado_select_record($table, array_keys($table_desc['fields']), $match, array('print_errors' => TRUE));
        // If not then insert
        if (empty($results)) {
          $options['statement_name'] = 'ins_' . $options['statement_name'];
          $options['print_errors'] = TRUE;
          $record = chado_insert_record($table, $values, $options);
        }
        else {
          $options['return_record'] = TRUE;
          $options['print_errors'] = TRUE;
          $record = chado_update_record($table, $match, $values, $options);
        }
      }
      else {
        $msg = "\nLine " . $addt->line_num . ' ' . $table_data['record_id'] . ' (' .
          $table_data['mode'] . ') Unable to update record since none of the unique key or primary key fields were available ' .
          ' where values:' . print_r($values, TRUE);

        tripal_bulk_loader_throw_error($msg, array(), TRIPAL_ERROR);
        $data[$priority]['error'] = TRUE;
        $no_errors = FALSE;
      }
    }
    else {
      $options['print_errors'] = TRUE;
      $record = chado_insert_record($table, $values, $options);
    }

    // if the insert was not successful
    if (!$record) {
      $msg = "\nLine " . $addt->line_num . ' ' . $table_data['record_id'] . ' (' .
        $table_data['mode'] . ') Unable to insert record into ' . $table .
        ' where values:' . print_r($values, TRUE);

      tripal_bulk_loader_throw_error($msg, array(), TRIPAL_ERROR);
      $data[$priority]['error'] = TRUE;
      $no_errors = FALSE;
    }
    // if the insert was succesful
    else {

      // if mode=insert_once then ensure we only insert it once
      if (preg_match('/insert_once/', $table_data['mode'])) {
        $default_data[$priority]['inserted'] = TRUE;
      }

      // add to tripal_bulk_loader_inserted
      if ($addt->node->keep_track_inserted) {
        $insert_record = db_query(
        "SELECT * FROM {tripal_bulk_loader_inserted} WHERE table_inserted_into=:table AND nid=:nid", 
        array(
          ':table' => $table,
          'nid' => $addt->nid
        ))->fetchObject();
        if ($insert_record) {
          $insert_record->ids_inserted .= ',' . $record[$table_desc['primary key'][0]];
          drupal_write_record('tripal_bulk_loader_inserted', $insert_record, 'tripal_bulk_loader_inserted_id');
          //print 'Update: '.print_r($insert_record,TRUE)."\n";
          //return $no_errors;
        }
        else {
          $insert_record = array(
            'nid' => $addt->nid,
            'table_inserted_into' => $table,
            'table_primary_key' => $table_desc['primary key'][0],
            'ids_inserted' => $record[$table_desc['primary key'][0]],
          );
          //print 'New: '.print_r($insert_record,TRUE)."\n";
          $success = drupal_write_record('tripal_bulk_loader_inserted', $insert_record);
          //return $no_errors;
        }
        //end of if insert record
      }
      // end of if keeping track of records inserted

      // substitute the values array for the primary key if it exists
      // and is a single field
      if (array_key_exists('primary key', $table_desc)) {
        if (count($table_desc['primary key']) == 1) {
          $pkey_field = $table_desc['primary key'][0];
          $data[$priority]['values_array'] = $record[$pkey_field];
        }
      }
      else {
        //add changes back to values array
        $data[$priority]['values_array'] = $record;
        $values = $record;
      }
    } //end of if insert was successful
  }
  // perform a select
  else {
    // get the matches for this select
    $matches = array();
    if (is_array($values) and count($values) > 0) {
      $matches = chado_select_record($table, array_keys($table_desc['fields']), $values, array('print_errors' => TRUE));
    }
    // if the record doesn't exist and it's not optional then generate an error
    if (count($matches) == 0) {
      // No record on select
      if ($table_data['select_optional'] != 1) {
        $msg = "\nLine " . $addt->line_num . ' ' . $table_data['record_id'] . ' (' . $table_data['mode'] . ') No Matching record in ' . $table . ' where values:' . print_r($values, TRUE);
        tripal_bulk_loader_throw_error($msg, array(), TRIPAL_ERROR);
        $data[$priority]['error'] = TRUE;
        $no_errors = FALSE;
      }
      // there is no match and select optional is turned on, so we want to set
      // the values to empty for any records with an FK relationship on this one
      else {
        $data[$priority]['values_array'] = NULL;
      }
    }
    // if we have more than one record matching and this select isn't optional then fail
    if (count($matches) > 1) {
      if ($table_data['select_optional'] != 1) {
        $msg = "\nLine " . $addt->line_num . ' ' . $table_data['record_id'] . ' (' . $table_data['mode'] . ') Too many matching records in ' . $table . ' where values:' . print_r($values, TRUE);
        tripal_bulk_loader_throw_error($msg, array(), TRIPAL_WARNING);
        $data[$priority]['error'] = TRUE;
        $no_errors = FALSE;
      }
      // there are too many matches and this is an optional select so set
      // the values to empty for any records with an FK relationship on this one
      else {
        $data[$priority]['values_array'] = NULL;
      }
    }
    // if mode=select_once then ensure we only select it once
    if (preg_match('/select_once/', $table_data['mode'])) {
      $default_data[$priority]['selected'] = TRUE;

      // save the pkey
      if (array_key_exists('primary key', $table_desc)) {
        $new_values = array();
        foreach ($matches[0] as $key => $value) {
          $new_values[$key] = $value;
        }
        $default_data[$priority]['values_default'] = $new_values;
      }
    }
  }

  return $no_errors;
}