AGL.inc

File

tripal_pub/includes/importers/AGL.inc
View source
  1. <?php
  2. /**
  3. *
  4. */
  5. function tripal_pub_remote_alter_form_AGL($form, $form_state) {
  6. $num_criteria = $form['num_criteria']['#default_value'];
  7. // So far we haven't been able to get AGL to filter results to only
  8. // include pubs by the XX number days in the past. So, we will
  9. // change the 'days' element to be the year to query
  10. $form['days']['#title'] = t('Year');
  11. $form['days']['#description'] = t('Please enter a year to limit records by the year they were published, created or modified in the database.');
  12. // The Journal Name filter doesn't seem to work, so remove it
  13. for($i = 1; $i <= $num_criteria; $i++) {
  14. unset($form['criteria'][$i]["scope-$i"]['#options']['journal']);
  15. }
  16. return $form;
  17. }
  18. /**
  19. *
  20. */
  21. function tripal_pub_remote_validate_form_AGL($form, $form_state) {
  22. $days = trim($form_state['values']["days"]);
  23. $num_criteria = $form['num_criteria']['#default_value'];
  24. if ($days and !preg_match('/^\d\d\d\d$/', $days)) {
  25. form_set_error("days", "Please enter a four digit year.");
  26. }
  27. $num_ids = 0;
  28. for ($i = 1; $i <= $num_criteria; $i++) {
  29. $search_terms = trim($form_state['values']["search_terms-$i"]);
  30. $scope = $form_state['values']["scope-$i"];
  31. if ($scope == 'id' and !preg_match('/^AGL:\d+$/', $search_terms)) {
  32. form_set_error("search_terms-$i", "The AGL accession be a numeric value, prefixed with 'AGL:' (e.g. AGL:3890740).");
  33. }
  34. if ($scope == 'id') {
  35. $num_ids++;
  36. }
  37. if($num_ids > 1) {
  38. form_set_error("search_terms-$i", "Unfortuantely, the AGL importer can only support a single accession at a time. Please remove the others.");
  39. }
  40. }
  41. return $form;
  42. }
  43. /**
  44. *
  45. */
  46. function tripal_pub_remote_search_AGL($search_array, $num_to_retrieve, $pager_id) {
  47. // get some values from the serach array
  48. $num_criteria = $search_array['num_criteria'];
  49. $days = $search_array['days'];
  50. // set some defaults
  51. $search_array['limit'] = $num_to_retrieve;
  52. // To build the CCL search string we want to have a single entry for 'author', 'title', 'abstract'
  53. // or 'id', and also the corresponding 'not for each of those.
  54. // But the search form allows the user to have multiple rows of the same type. So, we will build the
  55. // search string separately for each category and it's negative category (if NOT is selected as the op)
  56. // and at the end we will put them together into a single search string. We need to keep
  57. // track of the first entry of any category because it will not have an op (e.g. 'or' or 'and') but the
  58. // operation will be pushed out to separate the categories. The op for any second or third instance of
  59. // the same category will be included within the search string for the catgory.
  60. $ccl = '';
  61. $title = '';
  62. $author = '';
  63. $abstract = '';
  64. $id = '';
  65. $any = '';
  66. $negate_title = '';
  67. $negate_author = '';
  68. $negate_abstract = '';
  69. $negate_id = '';
  70. $negate_any = '';
  71. $order = array();
  72. $first_abstract = 1;
  73. $first_author = 1;
  74. $first_title = 1;
  75. $first_id = 1;
  76. $first_any = 1;
  77. $first_negate_abstract = 1;
  78. $first_negate_author = 1;
  79. $first_negate_title = 1;
  80. $first_negate_id = 1;
  81. $first_negate_any = 1;
  82. for ($i = 1; $i <= $num_criteria; $i++) {
  83. $search_terms = trim($search_array['criteria'][$i]['search_terms']);
  84. $scope = $search_array['criteria'][$i]['scope'];
  85. $is_phrase = $search_array['criteria'][$i]['is_phrase'];
  86. $op = $search_array['criteria'][$i]['operation'];
  87. if ($op) {
  88. $op = strtolower($op);
  89. }
  90. $search_terms = trim($search_terms);
  91. // if this is not a phrase then make sure the AND and OR are lower-case
  92. if (!$is_phrase) {
  93. $search_terms = preg_replace('/ OR /', ' or ', $search_terms);
  94. $search_terms = preg_replace('/ AND /', ' and ', $search_terms);
  95. }
  96. // else make sure the search terms are surrounded by quotes
  97. else {
  98. $search_terms = "\"$search_terms\"";
  99. }
  100. // if this is a 'not' operation then we want to change it to an
  101. // and
  102. $negate = '';
  103. if ($op == 'not') {
  104. $scope = "negate_$scope";
  105. $op = 'or';
  106. }
  107. $order[] = array('scope' => $scope, 'op' => $op);
  108. // build each category
  109. if ($scope == 'title') {
  110. if ($first_title) {
  111. $title .= "($search_terms) ";
  112. $first_title = 0;
  113. }
  114. else {
  115. $title .= "$op ($search_terms) ";
  116. }
  117. }
  118. if ($scope == 'negate_title') {
  119. if ($first_negate_title) {
  120. $negate_title .= "($search_terms) ";
  121. $first_negate_title = 0;
  122. }
  123. else {
  124. $negate_title .= "$op ($search_terms) ";
  125. }
  126. }
  127. elseif ($scope == 'author') {
  128. if ($first_author) {
  129. $author .= "($search_terms) ";
  130. $first_author = 0;
  131. }
  132. else {
  133. $author .= "$op ($search_terms) ";
  134. }
  135. }
  136. elseif ($scope == 'negate_author') {
  137. if ($first_negate_author) {
  138. $negate_author .= "($search_terms) ";
  139. $first_negate_author = 0;
  140. }
  141. else {
  142. $negate_author .= "$op ($search_terms) ";
  143. }
  144. }
  145. elseif ($scope == 'abstract') {
  146. if ($first_abstract) {
  147. $abstract .= "($search_terms) ";
  148. $first_abstract = 0;
  149. }
  150. else {
  151. $abstract .= "$op ($search_terms) ";
  152. }
  153. }
  154. elseif ($scope == 'negate_abstract') {
  155. if ($first_negate_abstract) {
  156. $negate_abstract .= "($search_terms) ";
  157. $first_negate_abstract = 0;
  158. }
  159. else {
  160. $negate_abstract .= "$op ($search_terms) ";
  161. }
  162. }
  163. elseif ($scope == 'journal') {
  164. if ($first_journal) {
  165. $journal .= "($search_terms) ";
  166. $first_jounral = 0;
  167. }
  168. else {
  169. $journal .= "$op ($search_terms) ";
  170. }
  171. }
  172. elseif ($scope == 'negate_journal') {
  173. if ($first_negate_journal) {
  174. $negate_journal .= "($search_terms) ";
  175. $first_negate_journal = 0;
  176. }
  177. else {
  178. $negate_journal .= "$op ($search_terms) ";
  179. }
  180. }
  181. elseif ($scope == 'id') {
  182. if ($first_id) {
  183. $id .= "(" . preg_replace('/AGL:([^\s]*)/', '$1', $search_terms) . ") ";
  184. $first_id = 0;
  185. }
  186. else {
  187. $id .= "$op (" . preg_replace('/AGL:([^\s]*)/', '$1', $search_terms) . ") ";
  188. }
  189. }
  190. elseif ($scope == 'negate_id') {
  191. if ($first_negate_id) {
  192. $negate_id .= "(" . preg_replace('/AGL:([^\s]*)/', '$1', $search_terms) . ") ";
  193. $first_negate_id = 0;
  194. }
  195. else {
  196. $negate_id .= "$op (" . preg_replace('/AGL:([^\s]*)/', '$1', $search_terms) . ") ";
  197. }
  198. }
  199. elseif ($scope == 'any'){
  200. if ($first_any) {
  201. $any .= "($search_terms) ";
  202. $first_any = 0;
  203. }
  204. else {
  205. $any .= "$op ($search_terms) ";
  206. }
  207. }
  208. elseif ($scope == 'negate_any'){
  209. if ($first_negate_any) {
  210. $negate_any .= "($search_terms) ";
  211. $first_any = 0;
  212. }
  213. else {
  214. $negate_any .= "$op ($search_terms) ";
  215. }
  216. }
  217. }
  218. // now build the CCL string in order
  219. $abstract_done = 0;
  220. $author_done = 0;
  221. $journal_done = 0;
  222. $title_done = 0;
  223. $id_done = 0;
  224. $any_done = 0;
  225. $negate_abstract_done = 0;
  226. $negate_journal_done = 0;
  227. $negate_author_done = 0;
  228. $negate_title_done = 0;
  229. $negate_id_done = 0;
  230. $negate_any_done = 0;
  231. for ($i = 0; $i < count($order) ; $i++) {
  232. if ($order[$i]['scope'] == 'abstract' and !$abstract_done) {
  233. $op = $order[$i]['op'];
  234. $ccl .= "$op abstract=($abstract) ";
  235. $abstract_done = 1;
  236. }
  237. if ($order[$i]['scope'] == 'negate_abstract' and !$negate_abstract_done) {
  238. $ccl .= "not abstract=($negate_abstract) ";
  239. $negate_abstract_done = 1;
  240. }
  241. if ($order[$i]['scope'] == 'author' and !$author_done) {
  242. $op = $order[$i]['op'];
  243. $ccl .= "$op author=($author) ";
  244. $author_done = 1;
  245. }
  246. if ($order[$i]['scope'] == 'negate_author' and !$negate_author_done) {
  247. $ccl .= "not author=($negate_author) ";
  248. $negate_author_done = 1;
  249. }
  250. if ($order[$i]['scope'] == 'journal' and !$journal_done) {
  251. $op = $order[$i]['op'];
  252. $ccl .= "$op journal=($journal) ";
  253. $journal_done = 1;
  254. }
  255. if ($order[$i]['scope'] == 'negate_journal' and !$negate_journal_done) {
  256. $ccl .= "not author=($negate_journal) ";
  257. $negate_journal_done = 1;
  258. }
  259. if ($order[$i]['scope'] == 'id' and !$id_done) {
  260. $op = $order[$i]['op'];
  261. $ccl .= "$op id=($id) ";
  262. $id_done = 1;
  263. }
  264. if ($order[$i]['scope'] == 'negate_id' and !$negate_id_done) {
  265. $ccl .= "not id=($negate_id) ";
  266. $negate_id_done = 1;
  267. }
  268. if ($order[$i]['scope'] == 'title' and !$title_done) {
  269. $op = $order[$i]['op'];
  270. $ccl .= "$op title=($title) ";
  271. $title_done = 1;
  272. }
  273. if ($order[$i]['scope'] == 'negate_title' and !$negate_title_done) {
  274. $ccl .= "not title=($negate_title) ";
  275. $negate_title_done = 1;
  276. }
  277. if ($order[$i]['scope'] == 'any' and !$any_done) {
  278. $op = $order[$i]['op'];
  279. $ccl .= "$op ($any) ";
  280. $any_done = 1;
  281. }
  282. if ($order[$i]['scope'] == 'negate_any' and !$negate_any_done) {
  283. $ccl .= "not ($negate_any) ";
  284. $negate_any_done = 1;
  285. }
  286. }
  287. // for AGL the 'days' form element was converted to represent the year
  288. if ($days) {
  289. $ccl .= "and year=($days)";
  290. }
  291. // remove any preceeding 'and' or 'or'
  292. $ccl = preg_replace('/^\s*(and|or)/', '', $ccl);
  293. // yaz_connect() prepares for a connection to a Z39.50 server. This function is non-blocking
  294. // and does not attempt to establish a connection - it merely prepares a connect to be
  295. // performed later when yaz_wait() is called.
  296. //$yazc = yaz_connect('agricola.nal.usda.gov:7090/voyager'); // NAL Catalog
  297. $yazc = yaz_connect('agricola.nal.usda.gov:7190/voyager'); // NAL Article Citation Database
  298. // use the USMARC record type. But OPAC is also supported by Agricola
  299. yaz_syntax($yazc, "usmarc");
  300. // the search query is built using CCL, we need to first
  301. // configure it so it can map the attributes to defined identifiers
  302. // The attribute set used by AGL can be found at the bottom of this page:
  303. // http://agricola.nal.usda.gov/help/z3950.html
  304. //
  305. // More in depth details: http://www.loc.gov/z3950/agency/bib1.html
  306. //
  307. // CCL Syntax: http://www.indexdata.com/yaz/doc/tools.html#CCL
  308. //
  309. $fields = array(
  310. "title" => "u=4",
  311. "author" => "u=1003",
  312. "abstract" => "u=62",
  313. "id" => "u=12",
  314. "year" => "u=30 r=o",
  315. "journal" => "u=1033"
  316. );
  317. yaz_ccl_conf($yazc, $fields);
  318. if (!yaz_ccl_parse($yazc, $ccl, $cclresult)) {
  319. drupal_set_message('Error parsing search string: ' . $cclresult["errorstring"], "error");
  320. watchdog('tpub_import', 'Error: %errstr', array('%errstr' => $cclresult["errorstring"]), WATCHDOG_ERROR);
  321. return array();
  322. }
  323. $search_str = $cclresult["rpn"];
  324. $search_array['search_string'] = $search_str;
  325. // save the YAZ connection in the session for use by other functions
  326. $_SESSION['tripal_pub_AGL_query'][$search_str]['yaz_connection'] = $yazc;
  327. //dpm($search_array);
  328. // we want to get the list of pubs using the search terms but using a Drupal style pager
  329. $pubs = tripal_pager_callback('tripal_pub_AGL_range', $num_to_retrieve, $pager_id,
  330. 'tripal_pub_AGL_count', $search_array);
  331. // close the connection
  332. unset($_SESSION['tripal_pub_AGL_query'][$search_str]['yaz_connection']);
  333. yaz_close($yazc);
  334. return $pubs;
  335. }
  336. /*
  337. * This function is used as the callback function when used with the
  338. * tripal_pager_callback function. This function returns a count of
  339. * the dataset to be paged.
  340. */
  341. function tripal_pub_AGL_count($search_array) {
  342. $search_str = $search_array['search_string'];
  343. $days = $search_array['days'];
  344. $limit = $search_array['limit'];
  345. $yazc = $_SESSION['tripal_pub_AGL_query'][$search_str]['yaz_connection'];
  346. //yaz_sort($yazc, "1=31 id"); // sort by publication date descending
  347. if (!yaz_search($yazc, "rpn", $search_str)){
  348. $error_no = yaz_errno($yazc);
  349. $error_msg = yaz_error($yazc);
  350. $additional = yaz_addinfo($yazc);
  351. if ($additional != $error_msg) {
  352. $error_msg .= " $additional";
  353. }
  354. drupal_set_message("ERROR preparing search at AGL: ($error_no) $error_msg", "error");
  355. watchdog('tpub_import', "ERROR preparing search at AGL: (%error_no) %error_msg",
  356. array('%error_no' => $error_no, '%error_msg' => $error_msg), WATCHDOG_ERROR);
  357. return 0;
  358. }
  359. if (!yaz_wait()) {
  360. $error_no = yaz_errno($yazc);
  361. $error_msg = yaz_error($yazc);
  362. $additional = yaz_addinfo($yazc);
  363. if ($additional != $error_msg) {
  364. $error_msg .= " $additional";
  365. }
  366. drupal_set_message("ERROR waiting on search at AGL: ($error_no) $error_msg", "error");
  367. watchdog('tpub_import', "ERROR waiting on search at AGL: (%error_no) %error_msg",
  368. array('%error_no' => $error_no, '%error_msg' => $error_msg), WATCHDOG_ERROR);
  369. return 0;
  370. }
  371. // get the total number of results from the serach
  372. $count = yaz_hits($yazc);
  373. $_SESSION['tripal_pub_AGL_query'][$search_str]['Count'] = $count;
  374. return $count;
  375. }
  376. /*
  377. * This function is used as the callback function when used with the
  378. * tripal_pager_callback function. This function returns the results
  379. * within the specified range
  380. */
  381. function tripal_pub_AGL_range($search_array, $start = 0, $limit = 10) {
  382. $pubs = array();
  383. $search_str = $search_array['search_string'];
  384. $days = $search_array['days'];
  385. $limit = $search_array['limit'];
  386. $yazc = $_SESSION['tripal_pub_AGL_query'][$search_str]['yaz_connection'];
  387. $count = $_SESSION['tripal_pub_AGL_query'][$search_str]['Count'];
  388. yaz_range($yazc, 1, $num_pubs);
  389. if (!yaz_present($yazc)) {
  390. $error_no = yaz_errno($yazc);
  391. $error_msg = yaz_error($yazc);
  392. $additional = yaz_addinfo($yazc);
  393. if ($additional != $error_msg) {
  394. $error_msg .= " $additional";
  395. }
  396. drupal_set_message("ERROR waiting on search at AGL: ($error_no) $error_msg", "error");
  397. watchdog('tpub_import', "ERROR waiting on search at AGL: (%error_no) %error_msg",
  398. array('%error_no' => $error_no, '%error_msg' => $error_msg), WATCHDOG_ERROR);
  399. return $pubs;
  400. }
  401. if ($start + $limit > $count) {
  402. $limit = $count - $start;
  403. }
  404. for($i = $start; $i < $start + $limit; $i++) {
  405. $pub_xml = yaz_record($yazc, $i + 1, 'xml; charset=marc-8,utf-8');
  406. $pub = tripal_pub_AGL_parse_pubxml($pub_xml);
  407. $pubs[] = $pub;
  408. }
  409. return $pubs;
  410. }
  411. /*
  412. * Description of XML format:
  413. * http://www.loc.gov/marc/bibliographic/bdsummary.html
  414. *
  415. */
  416. function tripal_pub_AGL_parse_pubxml($pub_xml) {
  417. $pub = array();
  418. // we will set the default publication type as a journal article. The NAL
  419. // dataset doesn't specify an article type so we'll have to glean the type
  420. // from other information (e.g. series name has 'Proceedings' in it)
  421. $pub['Publication Type'][0] = 'Journal Article';
  422. if (!$pub_xml) {
  423. return $pub;
  424. }
  425. // read the XML and iterate through it.
  426. $xml = new XMLReader();
  427. $xml->xml(trim($pub_xml));
  428. while ($xml->read()) {
  429. $element = $xml->name;
  430. if ($xml->nodeType == XMLReader::ELEMENT and $element == 'controlfield') {
  431. $tag = $xml->getAttribute('tag');
  432. $xml->read();
  433. $value = $xml->value;
  434. switch ($tag) {
  435. case '001': // control number
  436. $pub['Publication Accession'] = $value;
  437. break;
  438. case '003': // control number identifier
  439. break;
  440. case '005': // datea nd time of latest transaction
  441. break;
  442. case '006': // fixed-length data elemetns
  443. break;
  444. case '007': // physical description fixed field
  445. break;
  446. case '008': // fixed length data elements
  447. $month = array(
  448. '01' => 'Jan', '02' => 'Feb', '03' => 'Mar',
  449. '04' => 'Apr', '05' => 'May', '06' => 'Jun',
  450. '07' => 'Jul', '08' => 'Aug', '09' => 'Sep',
  451. '10' => 'Oct', '11' => 'Nov', '12' => 'Dec'
  452. );
  453. $date0 = substr($value, 0, 6); // date entered on file
  454. $date1 = substr($value, 7, 4); // year of publication
  455. $date2 = substr($value, 11, 4); // month of publication
  456. $place = substr($value, 15, 3);
  457. $lang = substr($value, 35, 3);
  458. if (preg_match('/\d\d\d\d/', $date1)) {
  459. $pub['Year'] = $date1;
  460. $pub['Publication Date'] = $date1;
  461. }
  462. if (preg_match('/\d\d/', $date2)) {
  463. $pub['Publication Date'] = $date1 . " " . $month[substr($date2, 0, 2)] . " " . substr($date2, 3, 2);
  464. }
  465. if (!preg_match('/\s+/', $place)) {
  466. $pub['Published Location'] = $place;
  467. }
  468. if (!preg_match('/\s+/', $lang)) {
  469. $pub['Language Abbr'] = $lang;
  470. }
  471. break;
  472. default: // unhandled tag
  473. break;
  474. }
  475. }
  476. elseif ($xml->nodeType == XMLReader::ELEMENT and $element == 'datafield') {
  477. $tag = $xml->getAttribute('tag');
  478. $ind1 = $xml->getAttribute('ind1');
  479. $ind2 = $xml->getAttribute('ind2');
  480. switch ($tag) {
  481. case '16': // National Bibliographic Agency Control Number
  482. break;
  483. case '35': // System Control Number
  484. $author = array();
  485. $codes = tripal_pub_remote_search_AGL_get_subfield($xml);
  486. foreach ($codes as $code => $value) {
  487. switch ($code) {
  488. case 'a': // System control number
  489. $pub['Publication Accession'] = $value;
  490. break;
  491. }
  492. }
  493. case '40': // Cataloging Source (NR)
  494. $author = array();
  495. $codes = tripal_pub_remote_search_AGL_get_subfield($xml);
  496. foreach ($codes as $code => $value) {
  497. switch ($code) {
  498. case 'a': // original cataolging agency
  499. $pub['Publication Database'] = $value;
  500. break;
  501. }
  502. }
  503. break;
  504. case '72': // Subject Category Code
  505. break;
  506. case '100': // main entry-personal name
  507. $author = tripal_pub_remote_search_AGL_get_author($xml, $ind1);
  508. $pub['Author List'][] = $author;
  509. break;
  510. case '110': // main entry-corporate nmae
  511. $author = array();
  512. $codes = tripal_pub_remote_search_AGL_get_subfield($xml);
  513. foreach ($codes as $code => $value) {
  514. switch ($code) {
  515. case 'a': // Corporate name or jurisdiction name as entry elemen
  516. $author['Collective'] = $value;
  517. break;
  518. case 'b': // Subordinate unit
  519. $author['Collective'] .= ' ' . $value;
  520. break;
  521. }
  522. }
  523. $pub['Author List'][] = $author;
  524. break;
  525. case '111': // main entry-meeting name
  526. break;
  527. case '130': // main entry-uniform title
  528. break;
  529. case '210': // abbreviated title
  530. break;
  531. case '222': // key title
  532. break;
  533. case '240': // uniform title
  534. break;
  535. case '242': // translation of title by cataloging agency
  536. break;
  537. case '243': // collective uniform title
  538. break;
  539. case '245': // title statement
  540. $codes = tripal_pub_remote_search_AGL_get_subfield($xml);
  541. foreach ($codes as $code => $value) {
  542. switch ($code) {
  543. case 'a':
  544. $pub['Title'] = trim(preg_replace('/\.$/', '', $value));
  545. break;
  546. case 'b':
  547. $pub['Title'] .= ' ' . $value;
  548. break;
  549. case 'h':
  550. $pub['Publication Model'] = $value;
  551. break;
  552. }
  553. }
  554. break;
  555. case '246': // varying form of title
  556. break;
  557. case '247': // former title
  558. break;
  559. case '250': // edition statement
  560. break;
  561. case '254': // musicla presentation statement
  562. break;
  563. case '255': // cartographic mathematical data
  564. break;
  565. case '256': // computer file characteristics
  566. break;
  567. case '257': // country of producing entity
  568. break;
  569. case '258': // philatelic issue data
  570. break;
  571. case '260': // publication, distribution ,etc (imprint)
  572. $codes = tripal_pub_remote_search_AGL_get_subfield($xml);
  573. foreach ($codes as $code => $value) {
  574. switch ($code) {
  575. case 'a':
  576. $pub['Published Location'] = $value;
  577. break;
  578. case 'b':
  579. $pub['Publisher'] = $value;
  580. break;
  581. case 'c':
  582. $pub['Publication Date'] = $value;
  583. break;
  584. }
  585. }
  586. break;
  587. case '263': // projected publication date
  588. break;
  589. case '264': // production, publication, distribution, manufacture and copyright notice
  590. break;
  591. case '270': // Address
  592. break;
  593. case '300': // Address
  594. $codes = tripal_pub_remote_search_AGL_get_subfield($xml);
  595. foreach ($codes as $code => $value) {
  596. switch ($code) {
  597. case 'a':
  598. $pages = $value;
  599. $pages = preg_replace('/^p\. /', '', $pages);
  600. $pages = preg_replace('/\.$/', '' , $pages);
  601. if(preg_match('/p$/', $pages)) {
  602. // skip this, it's the number of pages not the page numbers
  603. }
  604. else {
  605. $pub['Pages'] = $pages;
  606. }
  607. break;
  608. }
  609. }
  610. break;
  611. case '500': // series statements
  612. $pub['Notes'] = $value;
  613. break;
  614. case '504': // Bibliography, Etc. Note
  615. break;
  616. case '520': // Summary, etc
  617. $codes = tripal_pub_remote_search_AGL_get_subfield($xml);
  618. foreach ($codes as $code => $value) {
  619. switch ($code) {
  620. case 'a':
  621. $pub['Abstract'] = $value;
  622. break;
  623. }
  624. }
  625. break;
  626. case '650': // Subject Added Entry-Topical Term
  627. $codes = tripal_pub_remote_search_AGL_get_subfield($xml);
  628. foreach ($codes as $code => $value) {
  629. switch ($code) {
  630. case 'a':
  631. $pub['Keywords'][] = $value;
  632. break;
  633. }
  634. }
  635. break;
  636. case '653': // Index Term-Uncontrolled
  637. $codes = tripal_pub_remote_search_AGL_get_subfield($xml);
  638. foreach ($codes as $code => $value) {
  639. switch ($code) {
  640. case 'a':
  641. $pub['Keywords'][] = $value;
  642. break;
  643. }
  644. }
  645. break;
  646. case '700': // Added Entry-Personal Name
  647. $author = tripal_pub_remote_search_AGL_get_author($xml, $ind1);
  648. $pub['Author List'][] = $author;
  649. break;
  650. case '710': // Added Entry-Corporate Name
  651. $author = array();
  652. $codes = tripal_pub_remote_search_AGL_get_subfield($xml);
  653. foreach ($codes as $code => $value) {
  654. switch ($code) {
  655. case 'a': // Corporate name or jurisdiction name as entry elemen
  656. $author['Collective'] = $value;
  657. break;
  658. case 'b': // Subordinate unit
  659. $author['Collective'] .= ' ' . $value;
  660. break;
  661. }
  662. }
  663. $pub['Author List'][] = $author;
  664. break;
  665. case '773': // host item entry
  666. $codes = tripal_pub_remote_search_AGL_get_subfield($xml);
  667. foreach ($codes as $code => $value) {
  668. switch ($code) {
  669. case 'a':
  670. if (preg_match('/Proceedings/i', $value)) {
  671. $pub['Series Name'] = preg_replace('/\.$/', '', $value);
  672. $pub['Publication Type'][0] = 'Conference Proceedings';
  673. }
  674. else {
  675. $pub['Journal Name'] = preg_replace('/\.$/', '', $value);
  676. }
  677. break;
  678. case 't':
  679. if (preg_match('/Proceedings/i', $value)) {
  680. $pub['Series Name'] = preg_replace('/\.$/', '', $value);
  681. $pub['Publication Type'][0] = 'Conference Proceedings';
  682. }
  683. $pub['Journal Name'] = preg_replace('/\.$/', '', $value);
  684. break;
  685. case 'g':
  686. $matches = array();
  687. if (preg_match('/^(\d\d\d\d)/', $value, $matches)) {
  688. $pub['Publication Date'] = $matches[1];
  689. }
  690. elseif (preg_match('/(.*?)(\.|\s+)\s*(\d+),\s(\d\d\d\d)/', $value, $matches)) {
  691. $year = $matches[4];
  692. $month = $matches[1];
  693. $day = $matches[3];
  694. $pub['Publication Date'] = "$year $month $day";
  695. }
  696. elseif (preg_match('/\((.*?)(\.|\s+)(\d\d\d\d)\)/', $value, $matches)) {
  697. $year = $matches[3];
  698. $month = $matches[1];
  699. $pub['Publication Date'] = "$year $month";
  700. }
  701. elseif (preg_match('/^(.*?) (\d\d\d\d)/', $value, $matches)) {
  702. $year = $matches[2];
  703. $month = $matches[1];
  704. $pub['Publication Date'] = "$year $month";
  705. }
  706. if (preg_match('/v\. (.*?)(,|\s+)/', $value, $matches)) {
  707. $pub['Volume'] = $matches[1];
  708. }
  709. if (preg_match('/v\. (.*?)(,|\s+)\((.*?)\)/', $value, $matches)) {
  710. $pub['Volume'] = $matches[1];
  711. $pub['Issue'] = $matches[3];
  712. }
  713. if (preg_match('/no\. (.*?)(\s|$)/', $value, $matches)) {
  714. $pub['Issue'] = $matches[1];
  715. }
  716. break;
  717. case 'p':
  718. $pub['Journal Abbreviation'] = $value;
  719. break;
  720. case 'z':
  721. $pub['ISBN'] = $value;
  722. break;
  723. }
  724. }
  725. break;
  726. case '852': // Location (Where is the publication held)
  727. break;
  728. case '856': // Electronic Location and Access
  729. $codes = tripal_pub_remote_search_AGL_get_subfield($xml);
  730. foreach ($codes as $code => $value) {
  731. switch ($code) {
  732. case 'u':
  733. $pub['URL'] = $value;
  734. break;
  735. }
  736. }
  737. break;
  738. default:
  739. $codes = tripal_pub_remote_search_AGL_get_subfield($xml);
  740. $unhandled[$tag][] = $codes;
  741. break;
  742. }
  743. }
  744. }
  745. //dpm($unhandled);
  746. // build the Dbxref
  747. if ($pub['Publication Database'] != 'AGL') {
  748. }
  749. if ($pub['Publication Accession'] and $pub['Publication Database']) {
  750. $pub['Publication Dbxref'] = $pub['Publication Database'] . ":" . $pub['Publication Accession'];
  751. unset($pub['Publication Accession']);
  752. unset($pub['Publication Database']);
  753. }
  754. // build the full authors list
  755. if (is_array($pub['Author List'])) {
  756. foreach ($pub['Author List'] as $author) {
  757. if ($author['valid'] == 'N') {
  758. // skip non-valid entries. A non-valid entry should have
  759. // a corresponding corrected entry so we can saftely skip it.
  760. continue;
  761. }
  762. if ($author['Collective']) {
  763. $authors .= $author['Collective'] . ', ';
  764. }
  765. else {
  766. $authors .= $author['Surname'] . ' ' . $author['First Initials'] . ', ';
  767. }
  768. }
  769. $authors = substr($authors, 0, -2);
  770. $pub['Authors'] = $authors;
  771. }
  772. else {
  773. $pub['Authors'] = $pub['Author List'];
  774. }
  775. // build the citation
  776. $pub['Citation'] = tripal_pub_create_citation($pub);
  777. $pub['raw'] = $pub_xml;
  778. return $pub;
  779. }
  780. /*
  781. *
  782. *
  783. */
  784. function tripal_pub_remote_search_AGL_get_subfield($xml) {
  785. $codes = array();
  786. while ($xml->read()) {
  787. $sub_element = $xml->name;
  788. // when we've reached the end of the datafield element then break out of the while loop
  789. if ($xml->nodeType == XMLReader::END_ELEMENT and $sub_element == 'datafield') {
  790. return $codes;
  791. }
  792. // if inside the subfield element then get the code
  793. if ($xml->nodeType == XMLReader::ELEMENT and $sub_element == 'subfield') {
  794. $code = $xml->getAttribute('code');
  795. $xml->read();
  796. $value = $xml->value;
  797. $codes[$code] = $value;
  798. }
  799. }
  800. return $codes;
  801. }
  802. /*
  803. *
  804. *
  805. */
  806. function tripal_pub_remote_search_AGL_get_author($xml, $ind1) {
  807. $author = array();
  808. $codes = tripal_pub_remote_search_AGL_get_subfield($xml);
  809. foreach ($codes as $code => $value) {
  810. switch ($code) {
  811. case 'a':
  812. // remove any trailing commas
  813. $value = preg_replace('/,$/', '', $value);
  814. if ($ind1 == 0) { // Given Name is first
  815. $author['Given Name'] = $names[0];
  816. }
  817. if ($ind1 == 1) { // Surname is first
  818. // split the parts of the name using a comma
  819. $names = explode(',', $value);
  820. $author['Surname'] = $names[0];
  821. $author['Given Name'] = '';
  822. unset($names[0]);
  823. foreach($names as $index => $name) {
  824. $author['Given Name'] .= $name . ' ';
  825. }
  826. $first_names = explode(' ', $author['Given Name']);
  827. $author['First Initials'] = '';
  828. foreach ($first_names as $index => $name) {
  829. $author['First Initials'] .= substr($name, 0, 1);
  830. }
  831. }
  832. if ($ind1 == 3) { // A family name
  833. }
  834. break;
  835. }
  836. }
  837. return $author;
  838. }