function chado_node_sync_records

2.x tripal_core.chado_nodes.api.inc	`chado_node_sync_records($base_table, $max_sync = FALSE, $organism_id = FALSE, $types = array(), $ids = array(), $linking_table = FALSE, $node_type = FALSE, $job_id = NULL)`
3.x tripal_core.chado_nodes.api.inc	`chado_node_sync_records($base_table, $max_sync = FALSE, $organism_id = FALSE, $types = array(), $ids = array(), $linking_table = FALSE, $node_type = FALSE, $job_id = NULL)`

Generic function for syncing records in Chado with Drupal nodes.

Parameters

$base_table: The name of the Chado table containing the record that should be synced

$max_sync: Optional: A numeric value to indicate the maximum number of records to sync.

$organism_id: Optional: Limit the list of records to be synced to only those that are associated with this organism_id. If the record is not assocaited with an organism then this field is not needed.

$types: Optional: Limit the list of records to be synced to only those that match the types listed in this array.

$ids: Optional: Limit the list of records to bye synced to only those whose primary key value matches the ID provided in this array.

$linking_table: Optional: Tripal maintains "linking" tables in the Drupal schema to link Drupal nodes with Chado records. By default these tables are named as 'chado_' . $base_table. But if for some reason the linking table is not named in this way then it can be provided by this argument.

$node_type: Optional: Tripal maintains "linking" tables in the Drupal schema to link Drupal nodes with Chado records. By default, Tripal expects that the node_type and linking table are named the same. However, if this is not the case, you can provide the node type name here.

$job_id: Optional. Used by the Trpial Jobs system when running this function as a job. It is not needed othewise.

File

tripal_core/api/tripal_core.chado_nodes.api.inc, line 787: API to handle much of the common functionality implemented when creating a drupal node type.

Code

function chado_node_sync_records($base_table, $max_sync = FALSE, 
$organism_id = FALSE, $types = array(), $ids = array(), 
$linking_table = FALSE, $node_type = FALSE, $job_id = NULL) {

  global $user;
  $base_table_id = $base_table . '_id';

  if (!$linking_table) {
    $linking_table = 'chado_' . $base_table;
  }
  if (!$node_type) {
    $node_type = 'chado_' . $base_table;
  }

  print "\nSync'ing $base_table records.  ";

  // START BUILDING QUERY TO GET ALL RECORD FROM BASE TABLE THAT MATCH
  $select = array("$base_table.*");
  $joins = array();
  $where_clauses = array();
  $where_args = array();

  // If types are supplied then handle them
  $restrictions = '';
  if (count($types) > 0) {
    $restrictions .= "  Type(s): " . implode(', ', $types) . "\n";

    $select[] = 'cvterm.name as cvtname';
    $joins[] = "LEFT JOIN {cvterm} cvterm ON $base_table.type_id = cvterm.cvterm_id";
    foreach ($types as $type) {
      $sanitized_type = str_replace(' ', '_', $type);
      $where_clauses['type'][] = "cvterm.name = :type_name_$sanitized_type";
      $where_args['type'][":type_name_$sanitized_type"] = $type;
    }
  }

  // if IDs have been supplied
  if ($ids) {
    $restrictions .= "  Specific Records: " . count($ids) . " recored(s) specified.\n";
    foreach ($ids as $id) {
      $where_clauses['id'][] = "$base_table.$base_table_id = :id_$id";
      $where_args['id'][":id_$id"] = $id;
    }
  }

  // If Organism is supplied
  if ($organism_id) {
    $organism = chado_select_record('organism', array('*'), array('organism_id' => $organism_id));
    $restrictions .= "  Organism: " . $organism[0]->genus . " " . $organism[0]->species . "\n";

    $select[] = 'organism.*';
    $joins[] = "LEFT JOIN {organism} organism ON organism.organism_id = $base_table.organism_id";
    $where_clauses['organism'][] = 'organism.organism_id = :organism_id';
    $where_args['organism'][':organism_id'] = $organism_id;
  }

  // Allow module to add to query
  $hook_query_alter = $node_type . '_chado_node_sync_select_query';
  if (function_exists($hook_query_alter)) {
    $update = call_user_func($hook_query_alter, array(
      'select' => $select,
      'joins' => $joins,
      'where_clauses' => $where_clauses,
      'where_args' => $where_args,
    ));
    // Now add in any new changes
    if ($update and is_array($update)) {
      $select = $update['select'];
      $joins = $update['joins'];
      $where_clauses = $update['where_clauses'];
      $where_args = $update['where_args'];
    }
  }
  // Build Query, we do a left join on the chado_xxxx table in the Drupal schema
  // so that if no criteria are specified we only get those items that have not
  // yet been synced.
  // @todo: re-write to support external chado databases.
  $query = "
    SELECT " . implode(', ', $select) . ' ' .
    'FROM {' . $base_table . '} ' . $base_table . ' ' . implode(' ', $joins) . ' ' .
    "  LEFT JOIN [$linking_table] CT ON CT.$base_table_id = $base_table.$base_table_id " .
    "WHERE CT.$base_table_id IS NULL ";

  // extend the where clause if needed
  $where = '';
  $sql_args = array();
  foreach ($where_clauses as $category => $items) {
    $where .= ' AND (';
    foreach ($items as $item) {
      $where .= $item . ' OR ';
    }
    $where = substr($where, 0, -4); // remove the trailing 'OR'
    $where .= ') ';
    $sql_args = array_merge($sql_args, $where_args[$category]);
  }

  if ($where) {
    $query .= $where;
  }
  $query . - " ORDER BY " . $base_table_id;

  // If Maximum number to Sync is supplied
  if ($max_sync) {
    $query .= " LIMIT $max_sync";
    $restrictions .= "  Limited to $max_sync records.\n";
  }

  if ($restrictions) {
    print "Records matching these criteria will be synced: \n$restrictions";
  }
  else {
    print "\n";
  }

  // execute the query
  $results = chado_query($query, $sql_args);

  // Iterate through records that need to be synced
  $count = $results->rowCount();
  $interval = intval($count * 0.01);
  if ($interval < 1) {
    $interval = 1;
  }

  print "\n$count $base_table records found.\n";

  $i = 0;
  //$transaction = db_transaction();
  print "\nNOTE: Syncing is performed using a database transaction. \n" .
    "If the sync fails or is terminated prematurely then the entire set of \n" .
    "synced items is rolled back and will not be found in the database\n\n";
  try {
    $percent = 0;
    foreach ($results as $record) {
      // Update the job status every 1% features.
      if ($job_id and $i % $interval == 0) {
        $percent = sprintf("%.2f", (($i + 1) / $count) * 100);
        print "Syncing $base_table " . ($i + 1) . " of $count (" . $percent . "%). Memory: " . number_format(memory_get_usage()) . " bytes.\r";
        tripal_set_job_progress($job_id, intval(($i / $count) * 100));
      }

      // Check if the record is already in the chado linking table
      // (ie: check to see if it is already linked to a node).
      $result = db_select($linking_table, 'lnk')
        ->fields('lnk', array('nid'))
        ->condition($base_table_id, $record->{$base_table_id}, '=')
        ->execute()
        ->fetchObject();

      if (empty($result)) {
        // Create generic new node.
        $new_node = new stdClass();
        $new_node->type = $node_type;
        $new_node->uid = $user->uid;
        $new_node->{$base_table_id} = $record->{$base_table_id};
        $new_node->$base_table = $record;
        $new_node->language = LANGUAGE_NONE;

        // TODO: should we get rid of this hook and use hook_node_presave() instead?
        // allow base module to set additional fields as needed
        $hook_create_new_node = $node_type . '_chado_node_sync_create_new_node';
        if (function_exists($hook_create_new_node)) {
          $new_node = call_user_func($hook_create_new_node, $new_node, $record);
        }

        // Validate and Save New Node
        $form = array();
        $form_state = array();
        node_validate($new_node, $form, $form_state);

        if (!form_get_errors()) {
          $node = node_submit($new_node);
          // If there are memory leaks on the node_save it is probably
          // caused by the hook_node_insert() function.
          node_save($node);
        }
        else {
          throw new Exception(t("Failed to insert $base_table: %title", array('%title' => $new_node->title)));
        }
      }
      $i++;
    }
    print "\n\nComplete!\n";
  }

  catch (Exception $e) {
    $transaction->rollback();
    print "\n"; // make sure we start errors on new line
    watchdog_exception('trp-fsync', $e);
    print "FAILED: Rolling back database changes...\n";
  }
}

Tripal Search

function chado_node_sync_records

Parameters

Related topics

File

Code