function tripal_pub_remote_search_AGL

2.x tripal_pub.AGL.inc tripal_pub_remote_search_AGL($search_array, $num_to_retrieve, $page)
3.x tripal_chado.pub_importer_AGL.inc tripal_pub_remote_search_AGL($search_array, $num_to_retrieve, $page)
1.x AGL.inc tripal_pub_remote_search_AGL($search_array, $num_to_retrieve, $pager_id)

A hook for performing the search on the AGL database.

Parameters

$search_array: An array containing the serach criteria for the serach

$num_to_retrieve: Indicates the maximum number of publications to retrieve from the remote database

$page: Indicates the page to retrieve. This corresponds to a paged table, where each page has $num_to_retrieve publications.

Return value

An array of publications.

File

tripal_chado/includes/loaders/tripal_chado.pub_importer_AGL.inc, line 98
This file provides support for importing and parsing of results from the USDA National Agricultural Library (AGL) database. The functions here are used by both the publication importer setup form and the publication importer. The USDA AGL database…

Code

function tripal_pub_remote_search_AGL($search_array, $num_to_retrieve, $page) {
  // get some values from the serach array
  $num_criteria = $search_array['num_criteria'];
  $days = array_key_exists('days', $search_array) ? $search_array['days'] : '';

  // set some defaults
  $search_array['limit'] = $num_to_retrieve;

  // To build the CCL search string we want to have a single entry for 'author', 'title', 'abstract'
  // or 'id', and also the corresponding 'not for each of those.
  // But the search form allows the user to have multiple rows of the same type. So, we will build the
  // search string separately for each category and it's negative category (if NOT is selected as the op)
  // and at the end we will put them together into a single search string.  We need to keep
  // track of the first entry of any category because it will not have an op (e.g. 'or' or 'and') but the
  // operation will be pushed out to separate the categories.  The op for any second or third instance of
  // the same category will be included within the search string for the catgory.
  $ccl = '';
  $title = '';
  $author = '';
  $abstract = '';
  $id = '';
  $any = '';
  $negate_title = '';
  $negate_author = '';
  $negate_abstract = '';
  $negate_id = '';
  $negate_any = '';
  $order = array();
  $first_abstract = 1;
  $first_author = 1;
  $first_title = 1;
  $first_id = 1;
  $first_any = 1;
  $first_negate_abstract = 1;
  $first_negate_author = 1;
  $first_negate_title = 1;
  $first_negate_id = 1;
  $first_negate_any = 1;
  for ($i = 1; $i <= $num_criteria; $i++) {
    $search_terms = trim($search_array['criteria'][$i]['search_terms']);
    $scope = $search_array['criteria'][$i]['scope'];
    $is_phrase = $search_array['criteria'][$i]['is_phrase'];
    $op = $search_array['criteria'][$i]['operation'];

    if ($op) {
      $op = strtolower($op);
    }
    $search_terms = trim($search_terms);
    // if this is not a phrase then make sure the AND and OR are lower-case
    if (!$is_phrase) {
      $search_terms = preg_replace('/ OR /', ' or ', $search_terms);
      $search_terms = preg_replace('/ AND /', ' and ', $search_terms);
    }
    // else make sure the search terms are surrounded by quotes
    else {
      $search_terms = "\"$search_terms\"";
    }

    // if this is a 'not' operation then we want to change it to an
    // and
    $negate = '';
    if ($op == 'not') {
      $scope = "negate_$scope";
      $op = 'or';
    }
    $order[] = array('scope' => $scope, 'op' => $op);

    // build each category
    if ($scope == 'title') {
      if ($first_title) {
        $title .= "($search_terms) ";
        $first_title = 0;
      }
      else {
        $title .= "$op ($search_terms) ";
      }
    }
    if ($scope == 'negate_title') {
      if ($first_negate_title) {
        $negate_title .= "($search_terms) ";
        $first_negate_title = 0;
      }
      else {
        $negate_title .= "$op ($search_terms) ";
      }
    }
    elseif ($scope == 'author') {
      if ($first_author) {
        $author .= "($search_terms) ";
        $first_author = 0;
      }
      else {
        $author .= "$op ($search_terms) ";
      }
    }
    elseif ($scope == 'negate_author') {
      if ($first_negate_author) {
        $negate_author .= "($search_terms) ";
        $first_negate_author = 0;
      }
      else {
        $negate_author .= "$op ($search_terms) ";
      }
    }
    elseif ($scope == 'abstract') {
      if ($first_abstract) {
        $abstract .= "($search_terms) ";
        $first_abstract = 0;
      }
      else {
        $abstract .= "$op ($search_terms) ";
      }
    }
    elseif ($scope == 'negate_abstract') {
      if ($first_negate_abstract) {
        $negate_abstract .= "($search_terms) ";
        $first_negate_abstract = 0;
      }
      else {
        $negate_abstract .= "$op ($search_terms) ";
      }
    }
    elseif ($scope == 'journal') {
      if ($first_journal) {
        $journal .= "($search_terms) ";
        $first_jounral = 0;
      }
      else {
        $journal .= "$op ($search_terms) ";
      }
    }
    elseif ($scope == 'negate_journal') {
      if ($first_negate_journal) {
        $negate_journal .= "($search_terms) ";
        $first_negate_journal = 0;
      }
      else {
        $negate_journal .= "$op ($search_terms) ";
      }
    }
    elseif ($scope == 'id') {
      if ($first_id) {
        $id .= "(" . preg_replace('/AGL:([^\s]*)/', '$1', $search_terms) . ") ";
        $first_id = 0;
      }
      else {
        $id .= "$op (" . preg_replace('/AGL:([^\s]*)/', '$1', $search_terms) . ") ";
      }
    }
    elseif ($scope == 'negate_id') {
      if ($first_negate_id) {
        $negate_id .= "(" . preg_replace('/AGL:([^\s]*)/', '$1', $search_terms) . ") ";
        $first_negate_id = 0;
      }
      else {
        $negate_id .= "$op (" . preg_replace('/AGL:([^\s]*)/', '$1', $search_terms) . ") ";
      }
    }
    elseif ($scope == 'any') {
      if ($first_any) {
        $any .= "($search_terms) ";
        $first_any = 0;
      }
      else {
        $any .= "$op ($search_terms) ";
      }
    }
    elseif ($scope == 'negate_any') {
      if ($first_negate_any) {
        $negate_any .= "($search_terms) ";
        $first_any = 0;
      }
      else {
        $negate_any .= "$op ($search_terms) ";
      }
    }
  }
  // now build the CCL string in order
  $abstract_done = 0;
  $author_done = 0;
  $journal_done = 0;
  $title_done = 0;
  $id_done = 0;
  $any_done = 0;
  $negate_abstract_done = 0;
  $negate_journal_done = 0;
  $negate_author_done = 0;
  $negate_title_done = 0;
  $negate_id_done = 0;
  $negate_any_done = 0;
  for ($i = 0; $i < count($order); $i++) {
    if ($order[$i]['scope'] == 'abstract' and !$abstract_done) {
      $op = $order[$i]['op'];
      $ccl .= "$op abstract=($abstract) ";
      $abstract_done = 1;
    }
    if ($order[$i]['scope'] == 'negate_abstract' and !$negate_abstract_done) {
      $ccl .= "not abstract=($negate_abstract) ";
      $negate_abstract_done = 1;
    }
    if ($order[$i]['scope'] == 'author' and !$author_done) {
      $op = $order[$i]['op'];
      $ccl .= "$op author=($author) ";
      $author_done = 1;
    }
    if ($order[$i]['scope'] == 'negate_author' and !$negate_author_done) {
      $ccl .= "not author=($negate_author) ";
      $negate_author_done = 1;
    }
    if ($order[$i]['scope'] == 'journal' and !$journal_done) {
      $op = $order[$i]['op'];
      $ccl .= "$op journal=($journal) ";
      $journal_done = 1;
    }
    if ($order[$i]['scope'] == 'negate_journal' and !$negate_journal_done) {
      $ccl .= "not author=($negate_journal) ";
      $negate_journal_done = 1;
    }
    if ($order[$i]['scope'] == 'id' and !$id_done) {
      $op = $order[$i]['op'];
      $ccl .= "$op id=($id) ";
      $id_done = 1;
    }
    if ($order[$i]['scope'] == 'negate_id' and !$negate_id_done) {
      $ccl .= "not id=($negate_id) ";
      $negate_id_done = 1;
    }
    if ($order[$i]['scope'] == 'title' and !$title_done) {
      $op = $order[$i]['op'];
      $ccl .= "$op title=($title) ";
      $title_done = 1;
    }
    if ($order[$i]['scope'] == 'negate_title' and !$negate_title_done) {
      $ccl .= "not title=($negate_title) ";
      $negate_title_done = 1;
    }
    if ($order[$i]['scope'] == 'any' and !$any_done) {
      $op = $order[$i]['op'];
      $ccl .= "$op ($any) ";
      $any_done = 1;
    }
    if ($order[$i]['scope'] == 'negate_any' and !$negate_any_done) {
      $ccl .= "not ($negate_any) ";
      $negate_any_done = 1;
    }
  }

  // for AGL the 'days' form element was converted to represent the year
  if ($days) {
    $ccl .= "and year=($days)";
  }

  // remove any preceeding 'and' or 'or'
  $ccl = preg_replace('/^\s*(and|or)/', '', $ccl);

  // yaz_connect() prepares for a connection to a Z39.50 server. This function is non-blocking
  // and does not attempt to establish a connection - it merely prepares a connect to be
  // performed later when yaz_wait() is called.
  // NAL Catalog
  //$yazc = yaz_connect('agricola.nal.usda.gov:7090/voyager');
  // NAL Article Citation Database
  $yazc = yaz_connect('agricola.nal.usda.gov:7190/voyager');

  // use the USMARC record type.  But OPAC is also supported by Agricola
  yaz_syntax($yazc, "usmarc");

  // the search query is built using CCL, we need to first
  // configure it so it can map the attributes to defined identifiers
  // The attribute set used by AGL can be found at the bottom of this page:
  // http://agricola.nal.usda.gov/help/z3950.html
  //
  // More in depth details:  http://www.loc.gov/z3950/agency/bib1.html
  //
  // CCL Syntax: http://www.indexdata.com/yaz/doc/tools.html#CCL
  //
  $fields = array(
    "title" => "u=4",
    "author" => "u=1003",
    "abstract" => "u=62",
    "id" => "u=12",
    "year" => "u=30 r=o",
    "journal" => "u=1033"
  );
  yaz_ccl_conf($yazc, $fields);

  if (!yaz_ccl_parse($yazc, $ccl, $cclresult)) {
    drupal_set_message('Error parsing search string: ' . $cclresult["errorstring"], "error");
    watchdog('tpub_import', 'Error: %errstr', array('%errstr' => $cclresult["errorstring"]), WATCHDOG_ERROR);
    return array(
      'total_records' => 0,
      'search_str' => '',
      'pubs' => array(),
    );
  }
  $search_str = $cclresult["rpn"];

  // get the total number of records
  $total_records = tripal_pub_AGL_count($yazc, $search_str);

  // get the pubs in the specified rang
  $start = $page * $num_to_retrieve;
  $results = tripal_pub_AGL_range($yazc, $search_str, $start, $num_to_retrieve, $total_records);

  // close the connection
  yaz_close($yazc);

  return $results;
}