search.module

  1. 7.x drupal-7.x/modules/search/search.module
  2. 6.x drupal-6.x/modules/search/search.module

Enables site-wide keyword searching.

File

drupal-6.x/modules/search/search.module
View source
  1. <?php
  2. /**
  3. * @file
  4. * Enables site-wide keyword searching.
  5. */
  6. /**
  7. * Matches Unicode character classes to exclude from the search index.
  8. *
  9. * See: http://www.unicode.org/Public/UNIDATA/UCD.html#General_Category_Values
  10. *
  11. * The index only contains the following character classes:
  12. * Lu Letter, Uppercase
  13. * Ll Letter, Lowercase
  14. * Lt Letter, Titlecase
  15. * Lo Letter, Other
  16. * Nd Number, Decimal Digit
  17. * No Number, Other
  18. */
  19. define('PREG_CLASS_SEARCH_EXCLUDE',
  20. '\x{0}-\x{2f}\x{3a}-\x{40}\x{5b}-\x{60}\x{7b}-\x{bf}\x{d7}\x{f7}\x{2b0}-'.
  21. '\x{385}\x{387}\x{3f6}\x{482}-\x{489}\x{559}-\x{55f}\x{589}-\x{5c7}\x{5f3}-'.
  22. '\x{61f}\x{640}\x{64b}-\x{65e}\x{66a}-\x{66d}\x{670}\x{6d4}\x{6d6}-\x{6ed}'.
  23. '\x{6fd}\x{6fe}\x{700}-\x{70f}\x{711}\x{730}-\x{74a}\x{7a6}-\x{7b0}\x{901}-'.
  24. '\x{903}\x{93c}\x{93e}-\x{94d}\x{951}-\x{954}\x{962}-\x{965}\x{970}\x{981}-'.
  25. '\x{983}\x{9bc}\x{9be}-\x{9cd}\x{9d7}\x{9e2}\x{9e3}\x{9f2}-\x{a03}\x{a3c}-'.
  26. '\x{a4d}\x{a70}\x{a71}\x{a81}-\x{a83}\x{abc}\x{abe}-\x{acd}\x{ae2}\x{ae3}'.
  27. '\x{af1}-\x{b03}\x{b3c}\x{b3e}-\x{b57}\x{b70}\x{b82}\x{bbe}-\x{bd7}\x{bf0}-'.
  28. '\x{c03}\x{c3e}-\x{c56}\x{c82}\x{c83}\x{cbc}\x{cbe}-\x{cd6}\x{d02}\x{d03}'.
  29. '\x{d3e}-\x{d57}\x{d82}\x{d83}\x{dca}-\x{df4}\x{e31}\x{e34}-\x{e3f}\x{e46}-'.
  30. '\x{e4f}\x{e5a}\x{e5b}\x{eb1}\x{eb4}-\x{ebc}\x{ec6}-\x{ecd}\x{f01}-\x{f1f}'.
  31. '\x{f2a}-\x{f3f}\x{f71}-\x{f87}\x{f90}-\x{fd1}\x{102c}-\x{1039}\x{104a}-'.
  32. '\x{104f}\x{1056}-\x{1059}\x{10fb}\x{10fc}\x{135f}-\x{137c}\x{1390}-\x{1399}'.
  33. '\x{166d}\x{166e}\x{1680}\x{169b}\x{169c}\x{16eb}-\x{16f0}\x{1712}-\x{1714}'.
  34. '\x{1732}-\x{1736}\x{1752}\x{1753}\x{1772}\x{1773}\x{17b4}-\x{17db}\x{17dd}'.
  35. '\x{17f0}-\x{180e}\x{1843}\x{18a9}\x{1920}-\x{1945}\x{19b0}-\x{19c0}\x{19c8}'.
  36. '\x{19c9}\x{19de}-\x{19ff}\x{1a17}-\x{1a1f}\x{1d2c}-\x{1d61}\x{1d78}\x{1d9b}-'.
  37. '\x{1dc3}\x{1fbd}\x{1fbf}-\x{1fc1}\x{1fcd}-\x{1fcf}\x{1fdd}-\x{1fdf}\x{1fed}-'.
  38. '\x{1fef}\x{1ffd}-\x{2070}\x{2074}-\x{207e}\x{2080}-\x{2101}\x{2103}-\x{2106}'.
  39. '\x{2108}\x{2109}\x{2114}\x{2116}-\x{2118}\x{211e}-\x{2123}\x{2125}\x{2127}'.
  40. '\x{2129}\x{212e}\x{2132}\x{213a}\x{213b}\x{2140}-\x{2144}\x{214a}-\x{2b13}'.
  41. '\x{2ce5}-\x{2cff}\x{2d6f}\x{2e00}-\x{3005}\x{3007}-\x{303b}\x{303d}-\x{303f}'.
  42. '\x{3099}-\x{309e}\x{30a0}\x{30fb}\x{30fd}\x{30fe}\x{3190}-\x{319f}\x{31c0}-'.
  43. '\x{31cf}\x{3200}-\x{33ff}\x{4dc0}-\x{4dff}\x{a015}\x{a490}-\x{a716}\x{a802}'.
  44. '\x{a806}\x{a80b}\x{a823}-\x{a82b}\x{e000}-\x{f8ff}\x{fb1e}\x{fb29}\x{fd3e}'.
  45. '\x{fd3f}\x{fdfc}-\x{fe6b}\x{feff}-\x{ff0f}\x{ff1a}-\x{ff20}\x{ff3b}-\x{ff40}'.
  46. '\x{ff5b}-\x{ff65}\x{ff70}\x{ff9e}\x{ff9f}\x{ffe0}-\x{fffd}');
  47. /**
  48. * Matches all 'N' Unicode character classes (numbers)
  49. */
  50. define('PREG_CLASS_NUMBERS',
  51. '\x{30}-\x{39}\x{b2}\x{b3}\x{b9}\x{bc}-\x{be}\x{660}-\x{669}\x{6f0}-\x{6f9}'.
  52. '\x{966}-\x{96f}\x{9e6}-\x{9ef}\x{9f4}-\x{9f9}\x{a66}-\x{a6f}\x{ae6}-\x{aef}'.
  53. '\x{b66}-\x{b6f}\x{be7}-\x{bf2}\x{c66}-\x{c6f}\x{ce6}-\x{cef}\x{d66}-\x{d6f}'.
  54. '\x{e50}-\x{e59}\x{ed0}-\x{ed9}\x{f20}-\x{f33}\x{1040}-\x{1049}\x{1369}-'.
  55. '\x{137c}\x{16ee}-\x{16f0}\x{17e0}-\x{17e9}\x{17f0}-\x{17f9}\x{1810}-\x{1819}'.
  56. '\x{1946}-\x{194f}\x{2070}\x{2074}-\x{2079}\x{2080}-\x{2089}\x{2153}-\x{2183}'.
  57. '\x{2460}-\x{249b}\x{24ea}-\x{24ff}\x{2776}-\x{2793}\x{3007}\x{3021}-\x{3029}'.
  58. '\x{3038}-\x{303a}\x{3192}-\x{3195}\x{3220}-\x{3229}\x{3251}-\x{325f}\x{3280}-'.
  59. '\x{3289}\x{32b1}-\x{32bf}\x{ff10}-\x{ff19}');
  60. /**
  61. * Matches all 'P' Unicode character classes (punctuation)
  62. */
  63. define('PREG_CLASS_PUNCTUATION',
  64. '\x{21}-\x{23}\x{25}-\x{2a}\x{2c}-\x{2f}\x{3a}\x{3b}\x{3f}\x{40}\x{5b}-\x{5d}'.
  65. '\x{5f}\x{7b}\x{7d}\x{a1}\x{ab}\x{b7}\x{bb}\x{bf}\x{37e}\x{387}\x{55a}-\x{55f}'.
  66. '\x{589}\x{58a}\x{5be}\x{5c0}\x{5c3}\x{5f3}\x{5f4}\x{60c}\x{60d}\x{61b}\x{61f}'.
  67. '\x{66a}-\x{66d}\x{6d4}\x{700}-\x{70d}\x{964}\x{965}\x{970}\x{df4}\x{e4f}'.
  68. '\x{e5a}\x{e5b}\x{f04}-\x{f12}\x{f3a}-\x{f3d}\x{f85}\x{104a}-\x{104f}\x{10fb}'.
  69. '\x{1361}-\x{1368}\x{166d}\x{166e}\x{169b}\x{169c}\x{16eb}-\x{16ed}\x{1735}'.
  70. '\x{1736}\x{17d4}-\x{17d6}\x{17d8}-\x{17da}\x{1800}-\x{180a}\x{1944}\x{1945}'.
  71. '\x{2010}-\x{2027}\x{2030}-\x{2043}\x{2045}-\x{2051}\x{2053}\x{2054}\x{2057}'.
  72. '\x{207d}\x{207e}\x{208d}\x{208e}\x{2329}\x{232a}\x{23b4}-\x{23b6}\x{2768}-'.
  73. '\x{2775}\x{27e6}-\x{27eb}\x{2983}-\x{2998}\x{29d8}-\x{29db}\x{29fc}\x{29fd}'.
  74. '\x{3001}-\x{3003}\x{3008}-\x{3011}\x{3014}-\x{301f}\x{3030}\x{303d}\x{30a0}'.
  75. '\x{30fb}\x{fd3e}\x{fd3f}\x{fe30}-\x{fe52}\x{fe54}-\x{fe61}\x{fe63}\x{fe68}'.
  76. '\x{fe6a}\x{fe6b}\x{ff01}-\x{ff03}\x{ff05}-\x{ff0a}\x{ff0c}-\x{ff0f}\x{ff1a}'.
  77. '\x{ff1b}\x{ff1f}\x{ff20}\x{ff3b}-\x{ff3d}\x{ff3f}\x{ff5b}\x{ff5d}\x{ff5f}-'.
  78. '\x{ff65}');
  79. /**
  80. * Matches all CJK characters that are candidates for auto-splitting
  81. * (Chinese, Japanese, Korean).
  82. * Contains kana and BMP ideographs.
  83. */
  84. define('PREG_CLASS_CJK', '\x{3041}-\x{30ff}\x{31f0}-\x{31ff}\x{3400}-\x{4db5}'.
  85. '\x{4e00}-\x{9fbb}\x{f900}-\x{fad9}');
  86. /**
  87. * Implementation of hook_help().
  88. */
  89. function search_help($path, $arg) {
  90. switch ($path) {
  91. case 'admin/help#search':
  92. $output = '<p>'. t('The search module adds the ability to search for content by keywords. Search is often the only practical way to find content on a large site, and is useful for finding both users and posts.') .'</p>';
  93. $output .= '<p>'. t('To provide keyword searching, the search engine maintains an index of words found in your site\'s content. To build and maintain this index, a correctly configured <a href="@cron">cron maintenance task</a> is required. Indexing behavior can be adjusted using the <a href="@searchsettings">search settings page</a>; for example, the <em>Number of items to index per cron run</em> sets the maximum number of items indexed in each pass of a <a href="@cron">cron maintenance task</a>. If necessary, reduce this number to prevent timeouts and memory errors when indexing.', array('@cron' => url('admin/reports/status'), '@searchsettings' => url('admin/settings/search'))) .'</p>';
  94. $output .= '<p>'. t('For more information, see the online handbook entry for <a href="@search">Search module</a>.', array('@search' => 'http://drupal.org/handbook/modules/search/')) .'</p>';
  95. return $output;
  96. case 'admin/settings/search':
  97. return '<p>'. t('The search engine maintains an index of words found in your site\'s content. To build and maintain this index, a correctly configured <a href="@cron">cron maintenance task</a> is required. Indexing behavior can be adjusted using the settings below.', array('@cron' => url('admin/reports/status'))) .'</p>';
  98. case 'search#noresults':
  99. return t('<ul>
  100. <li>Check if your spelling is correct.</li>
  101. <li>Remove quotes around phrases to match each word individually: <em>"blue smurf"</em> will match less than <em>blue smurf</em>.</li>
  102. <li>Consider loosening your query with <em>OR</em>: <em>blue smurf</em> will match less than <em>blue OR smurf</em>.</li>
  103. </ul>');
  104. }
  105. }
  106. /**
  107. * Implementation of hook_theme()
  108. */
  109. function search_theme() {
  110. return array(
  111. 'search_theme_form' => array(
  112. 'arguments' => array('form' => NULL),
  113. 'template' => 'search-theme-form',
  114. ),
  115. 'search_block_form' => array(
  116. 'arguments' => array('form' => NULL),
  117. 'template' => 'search-block-form',
  118. ),
  119. 'search_result' => array(
  120. 'arguments' => array('result' => NULL, 'type' => NULL),
  121. 'file' => 'search.pages.inc',
  122. 'template' => 'search-result',
  123. ),
  124. 'search_results' => array(
  125. 'arguments' => array('results' => NULL, 'type' => NULL),
  126. 'file' => 'search.pages.inc',
  127. 'template' => 'search-results',
  128. ),
  129. );
  130. }
  131. /**
  132. * Implementation of hook_perm().
  133. */
  134. function search_perm() {
  135. return array('search content', 'use advanced search', 'administer search');
  136. }
  137. /**
  138. * Implementation of hook_block().
  139. */
  140. function search_block($op = 'list', $delta = 0) {
  141. if ($op == 'list') {
  142. $blocks[0]['info'] = t('Search form');
  143. // Not worth caching.
  144. $blocks[0]['cache'] = BLOCK_NO_CACHE;
  145. return $blocks;
  146. }
  147. else if ($op == 'view' && user_access('search content')) {
  148. $block['content'] = drupal_get_form('search_block_form');
  149. $block['subject'] = t('Search');
  150. return $block;
  151. }
  152. }
  153. /**
  154. * Implementation of hook_menu().
  155. */
  156. function search_menu() {
  157. $items['search'] = array(
  158. 'title' => 'Search',
  159. 'page callback' => 'search_view',
  160. 'access arguments' => array('search content'),
  161. 'type' => MENU_SUGGESTED_ITEM,
  162. 'file' => 'search.pages.inc',
  163. );
  164. $items['admin/settings/search'] = array(
  165. 'title' => 'Search settings',
  166. 'description' => 'Configure relevance settings for search and other indexing options',
  167. 'page callback' => 'drupal_get_form',
  168. 'page arguments' => array('search_admin_settings'),
  169. 'access arguments' => array('administer search'),
  170. 'type' => MENU_NORMAL_ITEM,
  171. 'file' => 'search.admin.inc',
  172. );
  173. $items['admin/settings/search/wipe'] = array(
  174. 'title' => 'Clear index',
  175. 'page callback' => 'drupal_get_form',
  176. 'page arguments' => array('search_wipe_confirm'),
  177. 'access arguments' => array('administer search'),
  178. 'type' => MENU_CALLBACK,
  179. 'file' => 'search.admin.inc',
  180. );
  181. $items['admin/reports/search'] = array(
  182. 'title' => 'Top search phrases',
  183. 'description' => 'View most popular search phrases.',
  184. 'page callback' => 'dblog_top',
  185. 'page arguments' => array('search'),
  186. 'access arguments' => array('access site reports'),
  187. 'file' => 'dblog.admin.inc',
  188. 'file path' => drupal_get_path('module', 'dblog'),
  189. );
  190. foreach (module_implements('search') as $name) {
  191. $items['search/'. $name .'/%menu_tail'] = array(
  192. 'title callback' => 'module_invoke',
  193. 'title arguments' => array($name, 'search', 'name', TRUE),
  194. 'page callback' => 'search_view',
  195. 'page arguments' => array($name),
  196. 'access callback' => '_search_menu',
  197. 'access arguments' => array($name),
  198. 'type' => MENU_LOCAL_TASK,
  199. 'parent' => 'search',
  200. 'file' => 'search.pages.inc',
  201. );
  202. }
  203. return $items;
  204. }
  205. function _search_menu($name) {
  206. return user_access('search content') && module_invoke($name, 'search', 'name');
  207. }
  208. /**
  209. * Wipes a part of or the entire search index.
  210. *
  211. * @param $sid
  212. * (optional) The SID of the item to wipe. If specified, $type must be passed
  213. * too.
  214. * @param $type
  215. * (optional) The type of item to wipe.
  216. */
  217. function search_wipe($sid = NULL, $type = NULL, $reindex = FALSE) {
  218. if ($type == NULL && $sid == NULL) {
  219. module_invoke_all('search', 'reset');
  220. }
  221. else {
  222. db_query("DELETE FROM {search_dataset} WHERE sid = %d AND type = '%s'", $sid, $type);
  223. db_query("DELETE FROM {search_index} WHERE sid = %d AND type = '%s'", $sid, $type);
  224. // Don't remove links if re-indexing.
  225. if (!$reindex) {
  226. db_query("DELETE FROM {search_node_links} WHERE sid = %d AND type = '%s'", $sid, $type);
  227. }
  228. }
  229. }
  230. /**
  231. * Marks a word as dirty (or retrieves the list of dirty words). This is used
  232. * during indexing (cron). Words which are dirty have outdated total counts in
  233. * the search_total table, and need to be recounted.
  234. */
  235. function search_dirty($word = NULL) {
  236. static $dirty = array();
  237. if ($word !== NULL) {
  238. $dirty[$word] = TRUE;
  239. }
  240. else {
  241. return $dirty;
  242. }
  243. }
  244. /**
  245. * Implementation of hook_cron().
  246. *
  247. * Fires hook_update_index() in all modules and cleans up dirty words (see
  248. * search_dirty).
  249. */
  250. function search_cron() {
  251. // We register a shutdown function to ensure that search_total is always up
  252. // to date.
  253. register_shutdown_function('search_update_totals');
  254. // Update word index
  255. foreach (module_list() as $module) {
  256. module_invoke($module, 'update_index');
  257. }
  258. }
  259. /**
  260. * This function is called on shutdown to ensure that search_total is always
  261. * up to date (even if cron times out or otherwise fails).
  262. */
  263. function search_update_totals() {
  264. // Update word IDF (Inverse Document Frequency) counts for new/changed words
  265. foreach (search_dirty() as $word => $dummy) {
  266. // Get total count
  267. $total = db_result(db_query("SELECT SUM(score) FROM {search_index} WHERE word = '%s'", $word));
  268. // Apply Zipf's law to equalize the probability distribution
  269. $total = log10(1 + 1/(max(1, $total)));
  270. db_query("UPDATE {search_total} SET count = %f WHERE word = '%s'", $total, $word);
  271. if (!db_affected_rows()) {
  272. @db_query("INSERT INTO {search_total} (word, count) VALUES ('%s', %f)", $word, $total);
  273. }
  274. }
  275. // Find words that were deleted from search_index, but are still in
  276. // search_total. We use a LEFT JOIN between the two tables and keep only the
  277. // rows which fail to join.
  278. $result = db_query("SELECT t.word AS realword, i.word FROM {search_total} t LEFT JOIN {search_index} i ON t.word = i.word WHERE i.word IS NULL");
  279. while ($word = db_fetch_object($result)) {
  280. db_query("DELETE FROM {search_total} WHERE word = '%s'", $word->realword);
  281. }
  282. }
  283. /**
  284. * Simplifies a string according to indexing rules.
  285. */
  286. function search_simplify($text) {
  287. // Decode entities to UTF-8
  288. $text = decode_entities($text);
  289. // Lowercase
  290. $text = drupal_strtolower($text);
  291. // Call an external processor for word handling.
  292. search_invoke_preprocess($text);
  293. // Simple CJK handling
  294. if (variable_get('overlap_cjk', TRUE)) {
  295. $text = preg_replace_callback('/['. PREG_CLASS_CJK .']+/u', 'search_expand_cjk', $text);
  296. }
  297. // To improve searching for numerical data such as dates, IP addresses
  298. // or version numbers, we consider a group of numerical characters
  299. // separated only by punctuation characters to be one piece.
  300. // This also means that searching for e.g. '20/03/1984' also returns
  301. // results with '20-03-1984' in them.
  302. // Readable regexp: ([number]+)[punctuation]+(?=[number])
  303. $text = preg_replace('/(['. PREG_CLASS_NUMBERS .']+)['. PREG_CLASS_PUNCTUATION .']+(?=['. PREG_CLASS_NUMBERS .'])/u', '\1', $text);
  304. // The dot, underscore and dash are simply removed. This allows meaningful
  305. // search behavior with acronyms and URLs.
  306. $text = preg_replace('/[._-]+/', '', $text);
  307. // With the exception of the rules above, we consider all punctuation,
  308. // marks, spacers, etc, to be a word boundary.
  309. $text = preg_replace('/['. PREG_CLASS_SEARCH_EXCLUDE .']+/u', ' ', $text);
  310. return $text;
  311. }
  312. /**
  313. * Basic CJK tokenizer. Simply splits a string into consecutive, overlapping
  314. * sequences of characters ('minimum_word_size' long).
  315. */
  316. function search_expand_cjk($matches) {
  317. $min = variable_get('minimum_word_size', 3);
  318. $str = $matches[0];
  319. $l = drupal_strlen($str);
  320. // Passthrough short words
  321. if ($l <= $min) {
  322. return ' '. $str .' ';
  323. }
  324. $tokens = ' ';
  325. // FIFO queue of characters
  326. $chars = array();
  327. // Begin loop
  328. for ($i = 0; $i < $l; ++$i) {
  329. // Grab next character
  330. $current = drupal_substr($str, 0, 1);
  331. $str = substr($str, strlen($current));
  332. $chars[] = $current;
  333. if ($i >= $min - 1) {
  334. $tokens .= implode('', $chars) .' ';
  335. array_shift($chars);
  336. }
  337. }
  338. return $tokens;
  339. }
  340. /**
  341. * Splits a string into tokens for indexing.
  342. */
  343. function search_index_split($text) {
  344. static $last = NULL;
  345. static $lastsplit = NULL;
  346. if ($last == $text) {
  347. return $lastsplit;
  348. }
  349. // Process words
  350. $text = search_simplify($text);
  351. $words = explode(' ', $text);
  352. array_walk($words, '_search_index_truncate');
  353. // Save last keyword result
  354. $last = $text;
  355. $lastsplit = $words;
  356. return $words;
  357. }
  358. /**
  359. * Helper function for array_walk in search_index_split.
  360. */
  361. function _search_index_truncate(&$text) {
  362. $text = truncate_utf8($text, 50);
  363. }
  364. /**
  365. * Invokes hook_search_preprocess() in modules.
  366. */
  367. function search_invoke_preprocess(&$text) {
  368. foreach (module_implements('search_preprocess') as $module) {
  369. $text = module_invoke($module, 'search_preprocess', $text);
  370. }
  371. }
  372. /**
  373. * Update the full-text search index for a particular item.
  374. *
  375. * @param $sid
  376. * A number identifying this particular item (e.g. node id).
  377. *
  378. * @param $type
  379. * A string defining this type of item (e.g. 'node')
  380. *
  381. * @param $text
  382. * The content of this item. Must be a piece of HTML text.
  383. *
  384. * @ingroup search
  385. */
  386. function search_index($sid, $type, $text) {
  387. $minimum_word_size = variable_get('minimum_word_size', 3);
  388. // Link matching
  389. global $base_url;
  390. $node_regexp = '@href=[\'"]?(?:'. preg_quote($base_url, '@') .'/|'. preg_quote(base_path(), '@') .')(?:\?q=)?/?((?![a-z]+:)[^\'">]+)[\'">]@i';
  391. // Multipliers for scores of words inside certain HTML tags.
  392. // Note: 'a' must be included for link ranking to work.
  393. $tags = array('h1' => 25,
  394. 'h2' => 18,
  395. 'h3' => 15,
  396. 'h4' => 12,
  397. 'h5' => 9,
  398. 'h6' => 6,
  399. 'u' => 3,
  400. 'b' => 3,
  401. 'i' => 3,
  402. 'strong' => 3,
  403. 'em' => 3,
  404. 'a' => 10);
  405. // Strip off all ignored tags to speed up processing, but insert space before/after
  406. // them to keep word boundaries.
  407. $text = str_replace(array('<', '>'), array(' <', '> '), $text);
  408. $text = strip_tags($text, '<'. implode('><', array_keys($tags)) .'>');
  409. // Split HTML tags from plain text.
  410. $split = preg_split('/\s*<([^>]+?)>\s*/', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
  411. // Note: PHP ensures the array consists of alternating delimiters and literals
  412. // and begins and ends with a literal (inserting $null as required).
  413. $tag = FALSE; // Odd/even counter. Tag or no tag.
  414. $link = FALSE; // State variable for link analyser
  415. $score = 1; // Starting score per word
  416. $accum = ' '; // Accumulator for cleaned up data
  417. $tagstack = array(); // Stack with open tags
  418. $tagwords = 0; // Counter for consecutive words
  419. $focus = 1; // Focus state
  420. $results = array(0 => array()); // Accumulator for words for index
  421. foreach ($split as $value) {
  422. if ($tag) {
  423. // Increase or decrease score per word based on tag
  424. list($tagname) = explode(' ', $value, 2);
  425. $tagname = drupal_strtolower($tagname);
  426. // Closing or opening tag?
  427. if ($tagname[0] == '/') {
  428. $tagname = substr($tagname, 1);
  429. // If we encounter unexpected tags, reset score to avoid incorrect boosting.
  430. if (!count($tagstack) || $tagstack[0] != $tagname) {
  431. $tagstack = array();
  432. $score = 1;
  433. }
  434. else {
  435. // Remove from tag stack and decrement score
  436. $score = max(1, $score - $tags[array_shift($tagstack)]);
  437. }
  438. if ($tagname == 'a') {
  439. $link = FALSE;
  440. }
  441. }
  442. else {
  443. if (isset($tagstack[0]) && $tagstack[0] == $tagname) {
  444. // None of the tags we look for make sense when nested identically.
  445. // If they are, it's probably broken HTML.
  446. $tagstack = array();
  447. $score = 1;
  448. }
  449. else {
  450. // Add to open tag stack and increment score
  451. array_unshift($tagstack, $tagname);
  452. $score += $tags[$tagname];
  453. }
  454. if ($tagname == 'a') {
  455. // Check if link points to a node on this site
  456. if (preg_match($node_regexp, $value, $match)) {
  457. $path = drupal_get_normal_path($match[1]);
  458. if (preg_match('!(?:node|book)/(?:view/)?([0-9]+)!i', $path, $match)) {
  459. $linknid = $match[1];
  460. if ($linknid > 0) {
  461. // Note: ignore links to uncachable nodes to avoid redirect bugs.
  462. $node = db_fetch_object(db_query('SELECT n.title, n.nid, n.vid, r.format FROM {node} n INNER JOIN {node_revisions} r ON n.vid = r.vid WHERE n.nid = %d', $linknid));
  463. if (filter_format_allowcache($node->format)) {
  464. $link = TRUE;
  465. $linktitle = $node->title;
  466. }
  467. }
  468. }
  469. }
  470. }
  471. }
  472. // A tag change occurred, reset counter.
  473. $tagwords = 0;
  474. }
  475. else {
  476. // Note: use of PREG_SPLIT_DELIM_CAPTURE above will introduce empty values
  477. if ($value != '') {
  478. if ($link) {
  479. // Check to see if the node link text is its URL. If so, we use the target node title instead.
  480. if (preg_match('!^https?://!i', $value)) {
  481. $value = $linktitle;
  482. }
  483. }
  484. $words = search_index_split($value);
  485. foreach ($words as $word) {
  486. // Add word to accumulator
  487. $accum .= $word .' ';
  488. $num = is_numeric($word);
  489. // Check wordlength
  490. if ($num || drupal_strlen($word) >= $minimum_word_size) {
  491. // Normalize numbers
  492. if ($num) {
  493. $word = (int)ltrim($word, '-0');
  494. }
  495. // Links score mainly for the target.
  496. if ($link) {
  497. if (!isset($results[$linknid])) {
  498. $results[$linknid] = array();
  499. }
  500. $results[$linknid][] = $word;
  501. // Reduce score of the link caption in the source.
  502. $focus *= 0.2;
  503. }
  504. // Fall-through
  505. if (!isset($results[0][$word])) {
  506. $results[0][$word] = 0;
  507. }
  508. $results[0][$word] += $score * $focus;
  509. // Focus is a decaying value in terms of the amount of unique words up to this point.
  510. // From 100 words and more, it decays, to e.g. 0.5 at 500 words and 0.3 at 1000 words.
  511. $focus = min(1, .01 + 3.5 / (2 + count($results[0]) * .015));
  512. }
  513. $tagwords++;
  514. // Too many words inside a single tag probably mean a tag was accidentally left open.
  515. if (count($tagstack) && $tagwords >= 15) {
  516. $tagstack = array();
  517. $score = 1;
  518. }
  519. }
  520. }
  521. }
  522. $tag = !$tag;
  523. }
  524. search_wipe($sid, $type, TRUE);
  525. // Insert cleaned up data into dataset
  526. db_query("INSERT INTO {search_dataset} (sid, type, data, reindex) VALUES (%d, '%s', '%s', %d)", $sid, $type, $accum, 0);
  527. // Insert results into search index
  528. foreach ($results[0] as $word => $score) {
  529. // Try inserting first because this will succeed most times, but because
  530. // the database collates similar words (accented and non-accented), the
  531. // insert can fail, in which case we need to add the word scores together.
  532. @db_query("INSERT INTO {search_index} (word, sid, type, score) VALUES ('%s', %d, '%s', %f)", $word, $sid, $type, $score);
  533. if (!db_affected_rows()) {
  534. db_query("UPDATE {search_index} SET score = score + %f WHERE word = '%s' AND sid = %d AND type = '%s'", $score, $word, $sid, $type);
  535. }
  536. search_dirty($word);
  537. }
  538. unset($results[0]);
  539. // Get all previous links from this item.
  540. $result = db_query("SELECT nid, caption FROM {search_node_links} WHERE sid = %d AND type = '%s'", $sid, $type);
  541. $links = array();
  542. while ($link = db_fetch_object($result)) {
  543. $links[$link->nid] = $link->caption;
  544. }
  545. // Now store links to nodes.
  546. foreach ($results as $nid => $words) {
  547. $caption = implode(' ', $words);
  548. if (isset($links[$nid])) {
  549. if ($links[$nid] != $caption) {
  550. // Update the existing link and mark the node for reindexing.
  551. db_query("UPDATE {search_node_links} SET caption = '%s' WHERE sid = %d AND type = '%s' AND nid = %d", $caption, $sid, $type, $nid);
  552. search_touch_node($nid);
  553. }
  554. // Unset the link to mark it as processed.
  555. unset($links[$nid]);
  556. }
  557. else {
  558. // Insert the existing link and mark the node for reindexing.
  559. db_query("INSERT INTO {search_node_links} (caption, sid, type, nid) VALUES ('%s', %d, '%s', %d)", $caption, $sid, $type, $nid);
  560. search_touch_node($nid);
  561. }
  562. }
  563. // Any left-over links in $links no longer exist. Delete them and mark the nodes for reindexing.
  564. foreach ($links as $nid => $caption) {
  565. db_query("DELETE FROM {search_node_links} WHERE sid = %d AND type = '%s' AND nid = %d", $sid, $type, $nid);
  566. search_touch_node($nid);
  567. }
  568. }
  569. /**
  570. * Change a node's changed timestamp to 'now' to force reindexing.
  571. *
  572. * @param $nid
  573. * The nid of the node that needs reindexing.
  574. */
  575. function search_touch_node($nid) {
  576. db_query("UPDATE {search_dataset} SET reindex = %d WHERE sid = %d AND type = 'node'", time(), $nid);
  577. }
  578. /**
  579. * Implementation of hook_nodeapi().
  580. */
  581. function search_nodeapi(&$node, $op, $teaser = NULL, $page = NULL) {
  582. switch ($op) {
  583. // Transplant links to a node into the target node.
  584. case 'update index':
  585. $result = db_query("SELECT caption FROM {search_node_links} WHERE nid = %d", $node->nid);
  586. $output = array();
  587. while ($link = db_fetch_object($result)) {
  588. $output[] = $link->caption;
  589. }
  590. if (count($output)) {
  591. return '<a>('. implode(', ', $output) .')</a>';
  592. }
  593. break;
  594. // Reindex the node when it is updated. The node is automatically indexed
  595. // when it is added, simply by being added to the node table.
  596. case 'update':
  597. search_touch_node($node->nid);
  598. break;
  599. }
  600. }
  601. /**
  602. * Implementation of hook_comment().
  603. */
  604. function search_comment($a1, $op) {
  605. switch ($op) {
  606. // Reindex the node when comments are added or changed
  607. case 'insert':
  608. case 'update':
  609. case 'delete':
  610. case 'publish':
  611. case 'unpublish':
  612. search_touch_node(is_array($a1) ? $a1['nid'] : $a1->nid);
  613. break;
  614. }
  615. }
  616. /**
  617. * Extract a module-specific search option from a search query. e.g. 'type:book'
  618. */
  619. function search_query_extract($keys, $option) {
  620. if (preg_match('/(^| )'. $option .':([^ ]*)( |$)/i', $keys, $matches)) {
  621. return $matches[2];
  622. }
  623. }
  624. /**
  625. * Return a query with the given module-specific search option inserted in.
  626. * e.g. 'type:book'.
  627. */
  628. function search_query_insert($keys, $option, $value = '') {
  629. if (search_query_extract($keys, $option)) {
  630. $keys = trim(preg_replace('/(^| )'. $option .':[^ ]*/i', '', $keys));
  631. }
  632. if ($value != '') {
  633. $keys .= ' '. $option .':'. $value;
  634. }
  635. return $keys;
  636. }
  637. /**
  638. * Parse a search query into SQL conditions.
  639. *
  640. * We build two queries that matches the dataset bodies. @See do_search for
  641. * more about these.
  642. *
  643. * @param $text
  644. * The search keys.
  645. * @return
  646. * A list of six elements.
  647. * * A series of statements AND'd together which will be used to provide all
  648. * possible matches.
  649. * * Arguments for this query part.
  650. * * A series of exact word matches OR'd together.
  651. * * Arguments for this query part.
  652. * * A bool indicating whether this is a simple query or not. Negative
  653. * terms, presence of both AND / OR make this FALSE.
  654. * * A bool indicating the presence of a lowercase or. Maybe the user
  655. * wanted to use OR.
  656. */
  657. function search_parse_query($text) {
  658. $keys = array('positive' => array(), 'negative' => array());
  659. // Tokenize query string
  660. preg_match_all('/ (-?)("[^"]+"|[^" ]+)/i', ' '. $text, $matches, PREG_SET_ORDER);
  661. if (count($matches) < 1) {
  662. return NULL;
  663. }
  664. // Classify tokens
  665. $or = FALSE;
  666. $warning = '';
  667. $simple = TRUE;
  668. foreach ($matches as $match) {
  669. $phrase = FALSE;
  670. // Strip off phrase quotes
  671. if ($match[2]{0} == '"') {
  672. $match[2] = substr($match[2], 1, -1);
  673. $phrase = TRUE;
  674. $simple = FALSE;
  675. }
  676. // Simplify keyword according to indexing rules and external preprocessors
  677. $words = search_simplify($match[2]);
  678. // Re-explode in case simplification added more words, except when matching a phrase
  679. $words = $phrase ? array($words) : preg_split('/ /', $words, -1, PREG_SPLIT_NO_EMPTY);
  680. // Negative matches
  681. if ($match[1] == '-') {
  682. $keys['negative'] = array_merge($keys['negative'], $words);
  683. }
  684. // OR operator: instead of a single keyword, we store an array of all
  685. // OR'd keywords.
  686. elseif ($match[2] == 'OR' && count($keys['positive'])) {
  687. $last = array_pop($keys['positive']);
  688. // Starting a new OR?
  689. if (!is_array($last)) {
  690. $last = array($last);
  691. }
  692. $keys['positive'][] = $last;
  693. $or = TRUE;
  694. continue;
  695. }
  696. // AND operator: implied, so just ignore it
  697. elseif ($match[2] == 'AND' || $match[2] == 'and') {
  698. $warning = $match[2];
  699. continue;
  700. }
  701. // Plain keyword
  702. else {
  703. if ($match[2] == 'or') {
  704. $warning = $match[2];
  705. }
  706. if ($or) {
  707. // Add to last element (which is an array)
  708. $keys['positive'][count($keys['positive']) - 1] = array_merge($keys['positive'][count($keys['positive']) - 1], $words);
  709. }
  710. else {
  711. $keys['positive'] = array_merge($keys['positive'], $words);
  712. }
  713. }
  714. $or = FALSE;
  715. }
  716. // Convert keywords into SQL statements.
  717. $query = array();
  718. $query2 = array();
  719. $arguments = array();
  720. $arguments2 = array();
  721. $matches = 0;
  722. $simple_and = FALSE;
  723. $simple_or = FALSE;
  724. // Positive matches
  725. foreach ($keys['positive'] as $key) {
  726. // Group of ORed terms
  727. if (is_array($key) && count($key)) {
  728. $simple_or = TRUE;
  729. $queryor = array();
  730. $any = FALSE;
  731. foreach ($key as $or) {
  732. list($q, $num_new_scores) = _search_parse_query($or, $arguments2);
  733. $any |= $num_new_scores;
  734. if ($q) {
  735. $queryor[] = $q;
  736. $arguments[] = $or;
  737. }
  738. }
  739. if (count($queryor)) {
  740. $query[] = '('. implode(' OR ', $queryor) .')';
  741. // A group of OR keywords only needs to match once
  742. $matches += ($any > 0);
  743. }
  744. }
  745. // Single ANDed term
  746. else {
  747. $simple_and = TRUE;
  748. list($q, $num_new_scores, $num_valid_words) = _search_parse_query($key, $arguments2);
  749. if ($q) {
  750. $query[] = $q;
  751. $arguments[] = $key;
  752. if (!$num_valid_words) {
  753. $simple = FALSE;
  754. }
  755. // Each AND keyword needs to match at least once
  756. $matches += $num_new_scores;
  757. }
  758. }
  759. }
  760. if ($simple_and && $simple_or) {
  761. $simple = FALSE;
  762. }
  763. // Negative matches
  764. foreach ($keys['negative'] as $key) {
  765. list($q) = _search_parse_query($key, $arguments2, TRUE);
  766. if ($q) {
  767. $query[] = $q;
  768. $arguments[] = $key;
  769. $simple = FALSE;
  770. }
  771. }
  772. $query = implode(' AND ', $query);
  773. // Build word-index conditions for the first pass
  774. $query2 = substr(str_repeat("i.word = '%s' OR ", count($arguments2)), 0, -4);
  775. return array($query, $arguments, $query2, $arguments2, $matches, $simple, $warning);
  776. }
  777. /**
  778. * Helper function for search_parse_query();
  779. */
  780. function _search_parse_query(&$word, &$scores, $not = FALSE) {
  781. $num_new_scores = 0;
  782. $num_valid_words = 0;
  783. // Determine the scorewords of this word/phrase
  784. if (!$not) {
  785. $split = explode(' ', $word);
  786. foreach ($split as $s) {
  787. $num = is_numeric($s);
  788. if ($num || drupal_strlen($s) >= variable_get('minimum_word_size', 3)) {
  789. $s = $num ? ((int)ltrim($s, '-0')) : $s;
  790. if (!isset($scores[$s])) {
  791. $scores[$s] = $s;
  792. $num_new_scores++;
  793. }
  794. $num_valid_words++;
  795. }
  796. }
  797. }
  798. // Return matching snippet and number of added words
  799. return array("d.data ". ($not ? 'NOT ' : '') ."LIKE '%% %s %%'", $num_new_scores, $num_valid_words);
  800. }
  801. /**
  802. * Do a query on the full-text search index for a word or words.
  803. *
  804. * This function is normally only called by each module that support the
  805. * indexed search (and thus, implements hook_update_index()).
  806. *
  807. * Results are retrieved in two logical passes. However, the two passes are
  808. * joined together into a single query. And in the case of most simple
  809. * queries the second pass is not even used.
  810. *
  811. * The first pass selects a set of all possible matches, which has the benefit
  812. * of also providing the exact result set for simple "AND" or "OR" searches.
  813. *
  814. * The second portion of the query further refines this set by verifying
  815. * advanced text conditions (such negative or phrase matches)
  816. *
  817. * @param $keywords
  818. * A search string as entered by the user.
  819. *
  820. * @param $type
  821. * A string identifying the calling module.
  822. *
  823. * @param $join1
  824. * (optional) Inserted into the JOIN part of the first SQL query.
  825. * For example "INNER JOIN {node} n ON n.nid = i.sid".
  826. *
  827. * @param $where1
  828. * (optional) Inserted into the WHERE part of the first SQL query.
  829. * For example "(n.status > %d)".
  830. *
  831. * @param $arguments1
  832. * (optional) Extra SQL arguments belonging to the first query.
  833. *
  834. * @param $columns2
  835. * (optional) Inserted into the SELECT pat of the second query. Must contain
  836. * a column selected as 'score'.
  837. * defaults to 'i.relevance AS score'
  838. *
  839. * @param $join2
  840. * (optional) Inserted into the JOIN par of the second SQL query.
  841. * For example "INNER JOIN {node_comment_statistics} n ON n.nid = i.sid"
  842. *
  843. * @param $arguments2
  844. * (optional) Extra SQL arguments belonging to the second query parameter.
  845. *
  846. * @param $sort_parameters
  847. * (optional) SQL arguments for sorting the final results.
  848. * Default: 'ORDER BY score DESC'
  849. *
  850. * @return
  851. * An array of objects for the search results.
  852. *
  853. * @ingroup search
  854. */
  855. function do_search($keywords, $type, $join1 = '', $where1 = '1 = 1', $arguments1 = array(), $columns2 = 'i.relevance AS score', $join2 = '', $arguments2 = array(), $sort_parameters = 'ORDER BY score DESC') {
  856. $query = search_parse_query($keywords);
  857. if ($query[2] == '') {
  858. form_set_error('keys', t('You must include at least one positive keyword with @count characters or more.', array('@count' => variable_get('minimum_word_size', 3))));
  859. }
  860. if ($query[6]) {
  861. if ($query[6] == 'or') {
  862. drupal_set_message(t('Search for either of the two terms with uppercase <strong>OR</strong>. For example, <strong>cats OR dogs</strong>.'));
  863. }
  864. }
  865. if ($query === NULL || $query[0] == '' || $query[2] == '') {
  866. return array();
  867. }
  868. // Build query for keyword normalization.
  869. $conditions = "$where1 AND ($query[2]) AND i.type = '%s'";
  870. $arguments1 = array_merge($arguments1, $query[3], array($type));
  871. $join = "INNER JOIN {search_total} t ON i.word = t.word $join1";
  872. if (!$query[5]) {
  873. $conditions .= " AND ($query[0])";
  874. $arguments1 = array_merge($arguments1, $query[1]);
  875. $join .= " INNER JOIN {search_dataset} d ON i.sid = d.sid AND i.type = d.type";
  876. }
  877. // Calculate maximum keyword relevance, to normalize it.
  878. $select = "SELECT SUM(i.score * t.count) AS score FROM {search_index} i $join WHERE $conditions GROUP BY i.type, i.sid HAVING COUNT(*) >= %d ORDER BY score DESC";
  879. $arguments = array_merge($arguments1, array($query[4]));
  880. $normalize = db_result(db_query_range($select, $arguments, 0, 1));
  881. if (!$normalize) {
  882. return array();
  883. }
  884. $columns2 = str_replace('i.relevance', '('. (1.0 / $normalize) .' * SUM(i.score * t.count))', $columns2);
  885. // Build query to retrieve results.
  886. $select = "SELECT i.type, i.sid, $columns2 FROM {search_index} i $join $join2 WHERE $conditions GROUP BY i.type, i.sid HAVING COUNT(*) >= %d";
  887. $count_select = "SELECT COUNT(*) FROM ($select) n1";
  888. $arguments = array_merge($arguments2, $arguments1, array($query[4]));
  889. // Do actual search query
  890. $result = pager_query("$select $sort_parameters", 10, 0, $count_select, $arguments);
  891. $results = array();
  892. while ($item = db_fetch_object($result)) {
  893. $results[] = $item;
  894. }
  895. return $results;
  896. }
  897. /**
  898. * Helper function for grabbing search keys.
  899. */
  900. function search_get_keys() {
  901. static $return;
  902. if (!isset($return)) {
  903. // Extract keys as remainder of path
  904. // Note: support old GET format of searches for existing links.
  905. $path = explode('/', $_GET['q'], 3);
  906. $keys = empty($_REQUEST['keys']) ? '' : $_REQUEST['keys'];
  907. $return = count($path) == 3 ? $path[2] : $keys;
  908. }
  909. return $return;
  910. }
  911. /**
  912. * @defgroup search Search interface
  913. * @{
  914. * The Drupal search interface manages a global search mechanism.
  915. *
  916. * Modules may plug into this system to provide searches of different types of
  917. * data. Most of the system is handled by search.module, so this must be enabled
  918. * for all of the search features to work.
  919. *
  920. * There are three ways to interact with the search system:
  921. * - Specifically for searching nodes, you can implement
  922. * hook_nodeapi('update index') and hook_nodeapi('search result'). However,
  923. * note that the search system already indexes all visible output of a node,
  924. * i.e. everything displayed normally by hook_view() and hook_nodeapi('view').
  925. * This is usually sufficient. You should only use this mechanism if you want
  926. * additional, non-visible data to be indexed.
  927. * - Implement hook_search(). This will create a search tab for your module on
  928. * the /search page with a simple keyword search form.
  929. * - Implement hook_update_index(). This allows your module to use Drupal's
  930. * HTML indexing mechanism for searching full text efficiently.
  931. *
  932. * If your module needs to provide a more complicated search form, then you need
  933. * to implement it yourself without hook_search(). In that case, you should
  934. * define it as a local task (tab) under the /search page (e.g. /search/mymodule)
  935. * so that users can easily find it.
  936. */
  937. /**
  938. * Render a search form.
  939. *
  940. * @param $action
  941. * Form action. Defaults to "search".
  942. * @param $keys
  943. * The search string entered by the user, containing keywords for the search.
  944. * @param $type
  945. * The type of search to render the node for. Must be the name of module
  946. * which implements hook_search(). Defaults to 'node'.
  947. * @param $prompt
  948. * A piece of text to put before the form (e.g. "Enter your keywords")
  949. * @return
  950. * A Form API array for the search form.
  951. */
  952. function search_form(&$form_state, $action = '', $keys = '', $type = NULL, $prompt = NULL) {
  953. // Add CSS
  954. drupal_add_css(drupal_get_path('module', 'search') .'/search.css', 'module', 'all', FALSE);
  955. if (!$action) {
  956. $action = url('search/'. $type);
  957. }
  958. if (is_null($prompt)) {
  959. $prompt = t('Enter your keywords');
  960. }
  961. $form = array(
  962. '#action' => $action,
  963. '#attributes' => array('class' => 'search-form'),
  964. );
  965. $form['module'] = array('#type' => 'value', '#value' => $type);
  966. $form['basic'] = array('#type' => 'item', '#title' => $prompt, '#id' => 'edit-keys');
  967. $form['basic']['inline'] = array('#prefix' => '<div class="container-inline">', '#suffix' => '</div>');
  968. $form['basic']['inline']['keys'] = array(
  969. '#type' => 'textfield',
  970. '#title' => '',
  971. '#default_value' => $keys,
  972. '#size' => $prompt ? 40 : 20,
  973. '#maxlength' => 255,
  974. );
  975. // processed_keys is used to coordinate keyword passing between other forms
  976. // that hook into the basic search form.
  977. $form['basic']['inline']['processed_keys'] = array('#type' => 'value', '#value' => array());
  978. $form['basic']['inline']['submit'] = array('#type' => 'submit', '#value' => t('Search'));
  979. return $form;
  980. }
  981. /**
  982. * Form builder; Output a search form for the search block and the theme's search box.
  983. *
  984. * @ingroup forms
  985. * @see search_box_form_submit()
  986. * @see search-block-form.tpl.php
  987. */
  988. function search_box(&$form_state, $form_id) {
  989. $form[$form_id] = array(
  990. '#title' => t('Search this site'),
  991. '#type' => 'textfield',
  992. '#size' => 15,
  993. '#default_value' => '',
  994. '#attributes' => array('title' => t('Enter the terms you wish to search for.')),
  995. );
  996. $form['submit'] = array('#type' => 'submit', '#value' => t('Search'));
  997. $form['#submit'][] = 'search_box_form_submit';
  998. return $form;
  999. }
  1000. /**
  1001. * Process a block search form submission.
  1002. */
  1003. function search_box_form_submit($form, &$form_state) {
  1004. // The search form relies on control of the redirect destination for its
  1005. // functionality, so we override any static destination set in the request,
  1006. // for example by drupal_access_denied() or drupal_not_found()
  1007. // (see http://drupal.org/node/292565).
  1008. if (isset($_REQUEST['destination'])) {
  1009. unset($_REQUEST['destination']);
  1010. }
  1011. if (isset($_REQUEST['edit']['destination'])) {
  1012. unset($_REQUEST['edit']['destination']);
  1013. }
  1014. $form_id = $form['form_id']['#value'];
  1015. $form_state['redirect'] = 'search/node/'. trim($form_state['values'][$form_id]);
  1016. }
  1017. /**
  1018. * Process variables for search-theme-form.tpl.php.
  1019. *
  1020. * The $variables array contains the following arguments:
  1021. * - $form
  1022. *
  1023. * @see search-theme-form.tpl.php
  1024. */
  1025. function template_preprocess_search_theme_form(&$variables) {
  1026. $variables['search'] = array();
  1027. $hidden = array();
  1028. // Provide variables named after form keys so themers can print each element independently.
  1029. foreach (element_children($variables['form']) as $key) {
  1030. $type = $variables['form'][$key]['#type'];
  1031. if ($type == 'hidden' || $type == 'token') {
  1032. $hidden[] = drupal_render($variables['form'][$key]);
  1033. }
  1034. else {
  1035. $variables['search'][$key] = drupal_render($variables['form'][$key]);
  1036. }
  1037. }
  1038. // Hidden form elements have no value to themers. No need for separation.
  1039. $variables['search']['hidden'] = implode($hidden);
  1040. // Collect all form elements to make it easier to print the whole form.
  1041. $variables['search_form'] = implode($variables['search']);
  1042. }
  1043. /**
  1044. * Process variables for search-block-form.tpl.php.
  1045. *
  1046. * The $variables array contains the following arguments:
  1047. * - $form
  1048. *
  1049. * @see search-block-form.tpl.php
  1050. */
  1051. function template_preprocess_search_block_form(&$variables) {
  1052. $variables['search'] = array();
  1053. $hidden = array();
  1054. // Provide variables named after form keys so themers can print each element independently.
  1055. foreach (element_children($variables['form']) as $key) {
  1056. $type = $variables['form'][$key]['#type'];
  1057. if ($type == 'hidden' || $type == 'token') {
  1058. $hidden[] = drupal_render($variables['form'][$key]);
  1059. }
  1060. else {
  1061. $variables['search'][$key] = drupal_render($variables['form'][$key]);
  1062. }
  1063. }
  1064. // Hidden form elements have no value to themers. No need for separation.
  1065. $variables['search']['hidden'] = implode($hidden);
  1066. // Collect all form elements to make it easier to print the whole form.
  1067. $variables['search_form'] = implode($variables['search']);
  1068. }
  1069. /**
  1070. * Perform a standard search on the given keys, and return the formatted results.
  1071. */
  1072. function search_data($keys = NULL, $type = 'node') {
  1073. if (isset($keys)) {
  1074. if (module_hook($type, 'search')) {
  1075. $results = module_invoke($type, 'search', 'search', $keys);
  1076. if (isset($results) && is_array($results) && count($results)) {
  1077. if (module_hook($type, 'search_page')) {
  1078. return module_invoke($type, 'search_page', $results);
  1079. }
  1080. else {
  1081. return theme('search_results', $results, $type);
  1082. }
  1083. }
  1084. }
  1085. }
  1086. }
  1087. /**
  1088. * Returns snippets from a piece of text, with certain keywords highlighted.
  1089. * Used for formatting search results.
  1090. *
  1091. * @param $keys
  1092. * A string containing a search query.
  1093. *
  1094. * @param $text
  1095. * The text to extract fragments from.
  1096. *
  1097. * @return
  1098. * A string containing HTML for the excerpt.
  1099. */
  1100. function search_excerpt($keys, $text) {
  1101. // We highlight around non-indexable or CJK characters.
  1102. $boundary = '(?:(?<=['. PREG_CLASS_SEARCH_EXCLUDE . PREG_CLASS_CJK .'])|(?=['. PREG_CLASS_SEARCH_EXCLUDE . PREG_CLASS_CJK .']))';
  1103. // Extract positive keywords and phrases
  1104. preg_match_all('/ ("([^"]+)"|(?!OR)([^" ]+))/', ' '. $keys, $matches);
  1105. $keys = array_merge($matches[2], $matches[3]);
  1106. // Prepare text
  1107. $text = ' '. strip_tags(str_replace(array('<', '>'), array(' <', '> '), $text)) .' ';
  1108. array_walk($keys, '_search_excerpt_replace');
  1109. $workkeys = $keys;
  1110. // Extract a fragment per keyword for at most 4 keywords.
  1111. // First we collect ranges of text around each keyword, starting/ending
  1112. // at spaces.
  1113. // If the sum of all fragments is too short, we look for second occurrences.
  1114. $ranges = array();
  1115. $included = array();
  1116. $length = 0;
  1117. while ($length < 256 && count($workkeys)) {
  1118. foreach ($workkeys as $k => $key) {
  1119. if (strlen($key) == 0) {
  1120. unset($workkeys[$k]);
  1121. unset($keys[$k]);
  1122. continue;
  1123. }
  1124. if ($length >= 256) {
  1125. break;
  1126. }
  1127. // Remember occurrence of key so we can skip over it if more occurrences
  1128. // are desired.
  1129. if (!isset($included[$key])) {
  1130. $included[$key] = 0;
  1131. }
  1132. // Locate a keyword (position $p), then locate a space in front (position
  1133. // $q) and behind it (position $s)
  1134. if (preg_match('/'. $boundary . $key . $boundary .'/iu', $text, $match, PREG_OFFSET_CAPTURE, $included[$key])) {
  1135. $p = $match[0][1];
  1136. if (($q = strpos($text, ' ', max(0, $p - 60))) !== FALSE) {
  1137. $end = substr($text, $p, 80);
  1138. if (($s = strrpos($end, ' ')) !== FALSE) {
  1139. $ranges[$q] = $p + $s;
  1140. $length += $p + $s - $q;
  1141. $included[$key] = $p + 1;
  1142. }
  1143. else {
  1144. unset($workkeys[$k]);
  1145. }
  1146. }
  1147. else {
  1148. unset($workkeys[$k]);
  1149. }
  1150. }
  1151. else {
  1152. unset($workkeys[$k]);
  1153. }
  1154. }
  1155. }
  1156. // If we didn't find anything, return the beginning.
  1157. if (count($ranges) == 0) {
  1158. return truncate_utf8($text, 256) .' ...';
  1159. }
  1160. // Sort the text ranges by starting position.
  1161. ksort($ranges);
  1162. // Now we collapse overlapping text ranges into one. The sorting makes it O(n).
  1163. $newranges = array();
  1164. foreach ($ranges as $from2 => $to2) {
  1165. if (!isset($from1)) {
  1166. $from1 = $from2;
  1167. $to1 = $to2;
  1168. continue;
  1169. }
  1170. if ($from2 <= $to1) {
  1171. $to1 = max($to1, $to2);
  1172. }
  1173. else {
  1174. $newranges[$from1] = $to1;
  1175. $from1 = $from2;
  1176. $to1 = $to2;
  1177. }
  1178. }
  1179. $newranges[$from1] = $to1;
  1180. // Fetch text
  1181. $out = array();
  1182. foreach ($newranges as $from => $to) {
  1183. $out[] = substr($text, $from, $to - $from);
  1184. }
  1185. $text = (isset($newranges[0]) ? '' : '... ') . implode(' ... ', $out) .' ...';
  1186. // Highlight keywords. Must be done at once to prevent conflicts ('strong' and '<strong>').
  1187. $text = preg_replace('/'. $boundary .'('. implode('|', $keys) .')'. $boundary .'/iu', '<strong>\0</strong>', $text);
  1188. return $text;
  1189. }
  1190. /**
  1191. * @} End of "defgroup search".
  1192. */
  1193. /**
  1194. * Helper function for array_walk in search_except.
  1195. */
  1196. function _search_excerpt_replace(&$text) {
  1197. $text = preg_quote($text, '/');
  1198. }
  1199. function search_forms() {
  1200. $forms['search_theme_form']= array(
  1201. 'callback' => 'search_box',
  1202. 'callback arguments' => array('search_theme_form'),
  1203. );
  1204. $forms['search_block_form']= array(
  1205. 'callback' => 'search_box',
  1206. 'callback arguments' => array('search_block_form'),
  1207. );
  1208. return $forms;
  1209. }