function tripal_pub_remote_search_PMID

2.x tripal_pub.PMID.inc tripal_pub_remote_search_PMID($search_array, $num_to_retrieve, $page)
3.x tripal_chado.pub_importer_PMID.inc tripal_pub_remote_search_PMID($search_array, $num_to_retrieve, $page)
1.x PMID.inc tripal_pub_remote_search_PMID($search_array, $num_to_retrieve, $pager_id)

A hook for performing the search on the PubMed database.

Parameters

$search_array: An array containing the serach criteria for the serach

$num_to_retrieve: Indicates the maximum number of publications to retrieve from the remote database

$page: Indicates the page to retrieve. This corresponds to a paged table, where each page has $num_to_retrieve publications.

Return value

An array of publications.

Related topics

File

tripal_pub/includes/importers/tripal_pub.PMID.inc, line 80
This file provides support for importing and parsing of results from the NCBI PubMed database. The functions here are used by both the publication importer setup form and the publication importer.

Code

function tripal_pub_remote_search_PMID($search_array, $num_to_retrieve, $page) {
  // convert the terms list provicded by the caller into a string with words
  // separated by a '+' symbol.
  $num_criteria = $search_array['num_criteria'];
  $days = $search_array['days'];

  $search_str = '';

  for ($i = 1; $i <= $num_criteria; $i++) {
    $search_terms = trim($search_array['criteria'][$i]['search_terms']);
    $scope = $search_array['criteria'][$i]['scope'];
    $is_phrase = $search_array['criteria'][$i]['is_phrase'];
    $op = $search_array['criteria'][$i]['operation'];

    if ($op) {
      $search_str .= "$op ";
    }

    // if this is phrase make sure the search terms are surrounded by quotes
    if ($is_phrase) {
      $search_str .= "(\"$search_terms\" |SCOPE|)";
    }
    // if this is not a phase then we want to separate each 'OR or 'AND' into a unique criteria
    else {
      $search_str .= "(";
      if (preg_match('/and/i', $search_terms)) {
        $elements = preg_split('/\s+and+\s/i', $search_terms);
        foreach ($elements as $element) {
          $search_str .= "($element |SCOPE|) AND ";
        }
        $search_str = substr($search_str, 0, -5); // remove trailing 'AND '
      }
      elseif (preg_match('/or/i', $search_terms)) {
        $elements = preg_split('/\s+or+\s/i', $search_terms);
        foreach ($elements as $element) {
          $search_str .= "($element |SCOPE|) OR ";
        }
        $search_str = substr($search_str, 0, -4); // remove trailing 'OR '
      }
      else {
        $search_str .= "($search_terms |SCOPE|)";
      }
      $search_str .= ')';
    }

    if ($scope == 'title') {
      $search_str = preg_replace('/\|SCOPE\|/', '[Title]', $search_str);
    }
    elseif ($scope == 'author') {
      $search_str = preg_replace('/\|SCOPE\|/', '[Author]', $search_str);
    }
    elseif ($scope == 'abstract') {
      $search_str = preg_replace('/\|SCOPE\|/', '[Title/Abstract]', $search_str);
    }
    elseif ($scope == 'journal') {
      $search_str = preg_replace('/\|SCOPE\|/', '[Journal]', $search_str);
    }
    elseif ($scope == 'id') {
      $search_str = preg_replace('/PMID:([^\s]*)/', '$1', $search_str);
      $search_str = preg_replace('/\|SCOPE\|/', '[Uid]', $search_str);
    }
    else {
      $search_str = preg_replace('/\|SCOPE\|/', '', $search_str);
    }
  }
  if ($days) {
    // get the date of the day suggested
    $past_timestamp = REQUEST_TIME - ($days * 86400);
    $past_date = getdate($past_timestamp);
    $search_str .= " AND (\"" . sprintf("%04d/%02d/%02d", $past_date['year'], $past_date['mon'], $past_date['mday']) . "\"[Date - Create] : \"3000\"[Date - Create]))";
  }

  // now initialize the query
  $results = tripal_pub_PMID_search_init($search_str, $num_to_retrieve);
  $total_records = $results['Count'];
  $query_key = $results['QueryKey'];
  $web_env = $results['WebEnv'];

  // initialize the pager
  $start = $page * $num_to_retrieve;

  // if we have no records then return an empty array
  if ($total_records == 0) {
    return array(
      'total_records' => $total_records,
      'search_str' => $search_str,
      'pubs' => array(),
    );
  }

  // now get the list of PMIDs from the initialized search
  $pmids_txt = tripal_pub_PMID_fetch($query_key, $web_env, 'uilist', 'text', $start, $num_to_retrieve);

  // iterate through each PMID and get the publication record. This requires a new search and new fetch
  $pmids = explode("\n", trim($pmids_txt));
  $pubs = array();
  foreach ($pmids as $pmid) {
    // now retrieve the individual record
    $pub_xml = tripal_pub_PMID_fetch($query_key, $web_env, 'null', 'xml', 0, 1, array('id' => $pmid));
    $pub = tripal_pub_PMID_parse_pubxml($pub_xml);
    $pubs[] = $pub;
  }
  return array(
    'total_records' => $total_records,
    'search_str' => $search_str,
    'pubs' => $pubs,
  );
}