function tripal_pub_remote_search_AGL

2.x tripal_pub.AGL.inc tripal_pub_remote_search_AGL($search_array, $num_to_retrieve, $page)
3.x tripal_chado.pub_importer_AGL.inc tripal_pub_remote_search_AGL($search_array, $num_to_retrieve, $page)
1.x AGL.inc tripal_pub_remote_search_AGL($search_array, $num_to_retrieve, $pager_id)

A hook for performing the search on the AGL database.

Parameters

$search_array: An array containing the serach criteria for the serach

$num_to_retrieve: Indicates the maximum number of publications to retrieve from the remote database

$page: Indicates the page to retrieve. This corresponds to a paged table, where each page has $num_to_retrieve publications.

Return value

An array of publications.

Related topics

File

tripal_pub/includes/importers/tripal_pub.AGL.inc, line 103
Importer for the USDA Agricultural Library (Agricola).

Code

function tripal_pub_remote_search_AGL($search_array, $num_to_retrieve, $page) {
  // get some values from the serach array
  $num_criteria = $search_array['num_criteria'];
  $days = array_key_exists('days', $search_array) ? $search_array['days'] : '';

  // set some defaults
  $search_array['limit'] = $num_to_retrieve;

  // To build the CCL search string we want to have a single entry for
  // 'author', 'title', 'abstract' or 'id', and also the corresponding 'not
  // for each of those. But the search form allows the user to have multiple
  // rows of the same type. So, we will build the search string separately for
  // each category and it's negative category (if NOT is selected as the op)
  // and at the end we will put them together into a single search string.  We
  // need to keep track of the first entry of any category because it will not
  // have an op (e.g. 'or' or 'and') but the operation will be pushed out to
  // separate the categories.  The op for any second or third instance of
  // the same category will be included within the search string for the
  // category.
  $ccl = '';
  $title = '';
  $author = '';
  $abstract = '';
  $id = '';
  $any = '';
  $negate_title = '';
  $negate_author = '';
  $negate_abstract = '';
  $negate_id = '';
  $negate_any = '';
  $order = array();
  $first_abstract = 1;
  $first_author = 1;
  $first_title = 1;
  $first_id = 1;
  $first_any = 1;
  $first_negate_abstract = 1;
  $first_negate_author = 1;
  $first_negate_title = 1;
  $first_negate_id = 1;
  $first_negate_any = 1;
  for ($i = 1; $i <= $num_criteria; $i++) {
    $search_terms = trim($search_array['criteria'][$i]['search_terms']);
    $scope = $search_array['criteria'][$i]['scope'];
    $is_phrase = $search_array['criteria'][$i]['is_phrase'];
    $op = $search_array['criteria'][$i]['operation'];

    if ($op) {
      $op = strtolower($op);
    }
    $search_terms = trim($search_terms);
    // If this is not a phrase then make sure the AND and OR are lower-case.
    if (!$is_phrase) {
      $search_terms = preg_replace('/ OR /', ' or ', $search_terms);
      $search_terms = preg_replace('/ AND /', ' and ', $search_terms);
    }
    // Else make sure the search terms are surrounded by quotes.
    else {
      $search_terms = "\"$search_terms\"";
    }

    // If this is a 'not' operation then we want to change it to an "and".
    $negate = '';
    if ($op == 'not') {
      $scope = "negate_$scope";
      $op = 'or';
    }
    $order[] = array('scope' => $scope, 'op' => $op);

    // Build each category.
    if ($scope == 'title') {
      if ($first_title) {
        $title .= "($search_terms) ";
        $first_title = 0;
      }
      else {
        $title .= "$op ($search_terms) ";
      }
    }
    if ($scope == 'negate_title') {
      if ($first_negate_title) {
        $negate_title .= "($search_terms) ";
        $first_negate_title = 0;
      }
      else {
        $negate_title .= "$op ($search_terms) ";
      }
    }
    elseif ($scope == 'author') {
      if ($first_author) {
        $author .= "($search_terms) ";
        $first_author = 0;
      }
      else {
        $author .= "$op ($search_terms) ";
      }
    }
    elseif ($scope == 'negate_author') {
      if ($first_negate_author) {
        $negate_author .= "($search_terms) ";
        $first_negate_author = 0;
      }
      else {
        $negate_author .= "$op ($search_terms) ";
      }
    }
    elseif ($scope == 'abstract') {
      if ($first_abstract) {
        $abstract .= "($search_terms) ";
        $first_abstract = 0;
      }
      else {
        $abstract .= "$op ($search_terms) ";
      }
    }
    elseif ($scope == 'negate_abstract') {
      if ($first_negate_abstract) {
        $negate_abstract .= "($search_terms) ";
        $first_negate_abstract = 0;
      }
      else {
        $negate_abstract .= "$op ($search_terms) ";
      }
    }
    elseif ($scope == 'journal') {
      if ($first_journal) {
        $journal .= "($search_terms) ";
        $first_jounral = 0;
      }
      else {
        $journal .= "$op ($search_terms) ";
      }
    }
    elseif ($scope == 'negate_journal') {
      if ($first_negate_journal) {
        $negate_journal .= "($search_terms) ";
        $first_negate_journal = 0;
      }
      else {
        $negate_journal .= "$op ($search_terms) ";
      }
    }
    elseif ($scope == 'id') {
      if ($first_id) {
        $id .= "(" . preg_replace('/AGL:([^\s]*)/', '$1', $search_terms) . ") ";
        $first_id = 0;
      }
      else {
        $id .= "$op (" . preg_replace('/AGL:([^\s]*)/', '$1', $search_terms) . ") ";
      }
    }
    elseif ($scope == 'negate_id') {
      if ($first_negate_id) {
        $negate_id .= "(" . preg_replace('/AGL:([^\s]*)/', '$1', $search_terms) . ") ";
        $first_negate_id = 0;
      }
      else {
        $negate_id .= "$op (" . preg_replace('/AGL:([^\s]*)/', '$1', $search_terms) . ") ";
      }
    }
    elseif ($scope == 'any') {
      if ($first_any) {
        $any .= "($search_terms) ";
        $first_any = 0;
      }
      else {
        $any .= "$op ($search_terms) ";
      }
    }
    elseif ($scope == 'negate_any') {
      if ($first_negate_any) {
        $negate_any .= "($search_terms) ";
        $first_any = 0;
      }
      else {
        $negate_any .= "$op ($search_terms) ";
      }
    }
  }
  // Now build the CCL string in order.
  $abstract_done = 0;
  $author_done = 0;
  $journal_done = 0;
  $title_done = 0;
  $id_done = 0;
  $any_done = 0;
  $negate_abstract_done = 0;
  $negate_journal_done = 0;
  $negate_author_done = 0;
  $negate_title_done = 0;
  $negate_id_done = 0;
  $negate_any_done = 0;
  for ($i = 0; $i < count($order); $i++) {
    if ($order[$i]['scope'] == 'abstract' and !$abstract_done) {
      $op = $order[$i]['op'];
      $ccl .= "$op abstract=($abstract) ";
      $abstract_done = 1;
    }
    if ($order[$i]['scope'] == 'negate_abstract' and !$negate_abstract_done) {
      $ccl .= "not abstract=($negate_abstract) ";
      $negate_abstract_done = 1;
    }
    if ($order[$i]['scope'] == 'author' and !$author_done) {
      $op = $order[$i]['op'];
      $ccl .= "$op author=($author) ";
      $author_done = 1;
    }
    if ($order[$i]['scope'] == 'negate_author' and !$negate_author_done) {
      $ccl .= "not author=($negate_author) ";
      $negate_author_done = 1;
    }
    if ($order[$i]['scope'] == 'journal' and !$journal_done) {
      $op = $order[$i]['op'];
      $ccl .= "$op journal=($journal) ";
      $journal_done = 1;
    }
    if ($order[$i]['scope'] == 'negate_journal' and !$negate_journal_done) {
      $ccl .= "not author=($negate_journal) ";
      $negate_journal_done = 1;
    }
    if ($order[$i]['scope'] == 'id' and !$id_done) {
      $op = $order[$i]['op'];
      $ccl .= "$op id=($id) ";
      $id_done = 1;
    }
    if ($order[$i]['scope'] == 'negate_id' and !$negate_id_done) {
      $ccl .= "not id=($negate_id) ";
      $negate_id_done = 1;
    }
    if ($order[$i]['scope'] == 'title' and !$title_done) {
      $op = $order[$i]['op'];
      $ccl .= "$op title=($title) ";
      $title_done = 1;
    }
    if ($order[$i]['scope'] == 'negate_title' and !$negate_title_done) {
      $ccl .= "not title=($negate_title) ";
      $negate_title_done = 1;
    }
    if ($order[$i]['scope'] == 'any' and !$any_done) {
      $op = $order[$i]['op'];
      $ccl .= "$op ($any) ";
      $any_done = 1;
    }
    if ($order[$i]['scope'] == 'negate_any' and !$negate_any_done) {
      $ccl .= "not ($negate_any) ";
      $negate_any_done = 1;
    }
  }

  // For AGL the 'days' form element was converted to represent the year.
  if ($days) {
    $ccl .= "and year=($days)";
  }

  // Remove any preceeding 'and' or 'or'.
  $ccl = preg_replace('/^\s*(and|or)/', '', $ccl);

  // yaz_connect() prepares for a connection to a Z39.50 server. This function
  // is non-blocking and does not attempt to establish a connection - it merely
  // prepares a connect to be performed later when yaz_wait() is called.

  // NAL Catalog
  // $yazc = yaz_connect('agricola.nal.usda.gov:7090/voyager');

  // NAL Article Citation Database
  $yazc = yaz_connect('agricola.nal.usda.gov:7190/voyager');

  // Use the USMARC record type.  But OPAC is also supported by Agricola.
  yaz_syntax($yazc, "usmarc");

  // The search query is built using CCL, we need to first
  // configure it so it can map the attributes to defined identifiers
  // The attribute set used by AGL can be found at the bottom of this page:
  // http://agricola.nal.usda.gov/help/z3950.html
  //
  // More in depth details:  http://www.loc.gov/z3950/agency/bib1.html
  //
  // CCL Syntax: http://www.indexdata.com/yaz/doc/tools.html#CCL
  //
  $fields = array(
    "title" => "u=4",
    "author" => "u=1003",
    "abstract" => "u=62",
    "id" => "u=12",
    "year" => "u=30 r=o",
    "journal" => "u=1033"
  );
  yaz_ccl_conf($yazc, $fields);

  if (!yaz_ccl_parse($yazc, $ccl, $cclresult)) {
    drupal_set_message('Error parsing search string: ' . $cclresult["errorstring"], "error");
    watchdog('tpub_import', 'Error: %errstr', array('%errstr' => $cclresult["errorstring"]), WATCHDOG_ERROR);
    return array(
      'total_records' => 0,
      'search_str' => '',
      'pubs' => array(),
    );
  }
  $search_str = $cclresult["rpn"];

  // get the total number of records
  $total_records = tripal_pub_AGL_count($yazc, $search_str);

  // get the pubs in the specified rang
  $start = $page * $num_to_retrieve;
  $results = tripal_pub_AGL_range($yazc, $search_str, $start, $num_to_retrieve, $total_records);

  // close the connection
  yaz_close($yazc);

  return $results;
}