summaryrefslogtreecommitdiff
path: root/kolab.org/www/drupal-7.26/modules/search/search.extender.inc
diff options
context:
space:
mode:
Diffstat (limited to 'kolab.org/www/drupal-7.26/modules/search/search.extender.inc')
-rw-r--r--kolab.org/www/drupal-7.26/modules/search/search.extender.inc536
1 files changed, 536 insertions, 0 deletions
diff --git a/kolab.org/www/drupal-7.26/modules/search/search.extender.inc b/kolab.org/www/drupal-7.26/modules/search/search.extender.inc
new file mode 100644
index 0000000..6709466
--- /dev/null
+++ b/kolab.org/www/drupal-7.26/modules/search/search.extender.inc
@@ -0,0 +1,536 @@
+<?php
+
+/**
+ * @file
+ * Search query extender and helper functions.
+ */
+
+/**
+ * Do a query on the full-text search index for a word or words.
+ *
+ * This function is normally only called by each module that supports the
+ * indexed search (and thus, implements hook_update_index()).
+ *
+ * Results are retrieved in two logical passes. However, the two passes are
+ * joined together into a single query. And in the case of most simple
+ * queries the second pass is not even used.
+ *
+ * The first pass selects a set of all possible matches, which has the benefit
+ * of also providing the exact result set for simple "AND" or "OR" searches.
+ *
+ * The second portion of the query further refines this set by verifying
+ * advanced text conditions (such as negative or phrase matches).
+ *
+ * The used query object has the tag 'search_$module' and can be further
+ * extended with hook_query_alter().
+ */
+class SearchQuery extends SelectQueryExtender {
+ /**
+ * The search query that is used for searching.
+ *
+ * @var string
+ */
+ protected $searchExpression;
+
+ /**
+ * Type of search (search module).
+ *
+ * This maps to the value of the type column in search_index, and is equal
+ * to the machine-readable name of the module that implements
+ * hook_search_info().
+ *
+ * @var string
+ */
+ protected $type;
+
+ /**
+ * Positive and negative search keys.
+ *
+ * @var array
+ */
+ protected $keys = array('positive' => array(), 'negative' => array());
+
+ /**
+ * Indicates whether the first pass query requires complex conditions (LIKE).
+ *
+ * @var boolean.
+ */
+ protected $simple = TRUE;
+
+ /**
+ * Conditions that are used for exact searches.
+ *
+ * This is always used for the second pass query but not for the first pass,
+ * unless $this->simple is FALSE.
+ *
+ * @var DatabaseCondition
+ */
+ protected $conditions;
+
+ /**
+ * Indicates how many matches for a search query are necessary.
+ *
+ * @var int
+ */
+ protected $matches = 0;
+
+ /**
+ * Array of search words.
+ *
+ * These words have to match against {search_index}.word.
+ *
+ * @var array
+ */
+ protected $words = array();
+
+ /**
+ * Multiplier for the normalized search score.
+ *
+ * This value is calculated by the first pass query and multiplied with the
+ * actual score of a specific word to make sure that the resulting calculated
+ * score is between 0 and 1.
+ *
+ * @var float
+ */
+ protected $normalize;
+
+ /**
+ * Indicates whether the first pass query has been executed.
+ *
+ * @var boolean
+ */
+ protected $executedFirstPass = FALSE;
+
+ /**
+ * Stores score expressions.
+ *
+ * @var array
+ *
+ * @see addScore()
+ */
+ protected $scores = array();
+
+ /**
+ * Stores arguments for score expressions.
+ *
+ * @var array
+ */
+ protected $scoresArguments = array();
+
+ /**
+ * Stores multipliers for score expressions.
+ *
+ * @var array
+ */
+ protected $multiply = array();
+
+ /**
+ * Whether or not search expressions were ignored.
+ *
+ * The maximum number of AND/OR combinations exceeded can be configured to
+ * avoid Denial-of-Service attacks. Expressions beyond the limit are ignored.
+ *
+ * @var boolean
+ */
+ protected $expressionsIgnored = FALSE;
+
+ /**
+ * Sets up the search query expression.
+ *
+ * @param $query
+ * A search query string, which can contain options.
+ * @param $module
+ * The search module. This maps to {search_index}.type in the database.
+ *
+ * @return
+ * The SearchQuery object.
+ */
+ public function searchExpression($expression, $module) {
+ $this->searchExpression = $expression;
+ $this->type = $module;
+
+ return $this;
+ }
+
+ /**
+ * Applies a search option and removes it from the search query string.
+ *
+ * These options are in the form option:value,value2,value3.
+ *
+ * @param $option
+ * Name of the option.
+ * @param $column
+ * Name of the database column to which the value should be applied.
+ *
+ * @return
+ * TRUE if a value for that option was found, FALSE if not.
+ */
+ public function setOption($option, $column) {
+ if ($values = search_expression_extract($this->searchExpression, $option)) {
+ $or = db_or();
+ foreach (explode(',', $values) as $value) {
+ $or->condition($column, $value);
+ }
+ $this->condition($or);
+ $this->searchExpression = search_expression_insert($this->searchExpression, $option);
+ return TRUE;
+ }
+ return FALSE;
+ }
+
+ /**
+ * Parses the search query into SQL conditions.
+ *
+ * We build two queries that match the dataset bodies.
+ */
+ protected function parseSearchExpression() {
+ // Matchs words optionally prefixed by a dash. A word in this case is
+ // something between two spaces, optionally quoted.
+ preg_match_all('/ (-?)("[^"]+"|[^" ]+)/i', ' ' . $this->searchExpression , $keywords, PREG_SET_ORDER);
+
+ if (count($keywords) == 0) {
+ return;
+ }
+
+ // Classify tokens.
+ $or = FALSE;
+ $warning = '';
+ $limit_combinations = variable_get('search_and_or_limit', 7);
+ // The first search expression does not count as AND.
+ $and_count = -1;
+ $or_count = 0;
+ foreach ($keywords as $match) {
+ if ($or_count && $and_count + $or_count >= $limit_combinations) {
+ // Ignore all further search expressions to prevent Denial-of-Service
+ // attacks using a high number of AND/OR combinations.
+ $this->expressionsIgnored = TRUE;
+ break;
+ }
+ $phrase = FALSE;
+ // Strip off phrase quotes.
+ if ($match[2]{0} == '"') {
+ $match[2] = substr($match[2], 1, -1);
+ $phrase = TRUE;
+ $this->simple = FALSE;
+ }
+ // Simplify keyword according to indexing rules and external
+ // preprocessors. Use same process as during search indexing, so it
+ // will match search index.
+ $words = search_simplify($match[2]);
+ // Re-explode in case simplification added more words, except when
+ // matching a phrase.
+ $words = $phrase ? array($words) : preg_split('/ /', $words, -1, PREG_SPLIT_NO_EMPTY);
+ // Negative matches.
+ if ($match[1] == '-') {
+ $this->keys['negative'] = array_merge($this->keys['negative'], $words);
+ }
+ // OR operator: instead of a single keyword, we store an array of all
+ // OR'd keywords.
+ elseif ($match[2] == 'OR' && count($this->keys['positive'])) {
+ $last = array_pop($this->keys['positive']);
+ // Starting a new OR?
+ if (!is_array($last)) {
+ $last = array($last);
+ }
+ $this->keys['positive'][] = $last;
+ $or = TRUE;
+ $or_count++;
+ continue;
+ }
+ // AND operator: implied, so just ignore it.
+ elseif ($match[2] == 'AND' || $match[2] == 'and') {
+ $warning = $match[2];
+ continue;
+ }
+
+ // Plain keyword.
+ else {
+ if ($match[2] == 'or') {
+ $warning = $match[2];
+ }
+ if ($or) {
+ // Add to last element (which is an array).
+ $this->keys['positive'][count($this->keys['positive']) - 1] = array_merge($this->keys['positive'][count($this->keys['positive']) - 1], $words);
+ }
+ else {
+ $this->keys['positive'] = array_merge($this->keys['positive'], $words);
+ $and_count++;
+ }
+ }
+ $or = FALSE;
+ }
+
+ // Convert keywords into SQL statements.
+ $this->conditions = db_and();
+ $simple_and = FALSE;
+ $simple_or = FALSE;
+ // Positive matches.
+ foreach ($this->keys['positive'] as $key) {
+ // Group of ORed terms.
+ if (is_array($key) && count($key)) {
+ $simple_or = TRUE;
+ $any = FALSE;
+ $queryor = db_or();
+ foreach ($key as $or) {
+ list($num_new_scores) = $this->parseWord($or);
+ $any |= $num_new_scores;
+ $queryor->condition('d.data', "% $or %", 'LIKE');
+ }
+ if (count($queryor)) {
+ $this->conditions->condition($queryor);
+ // A group of OR keywords only needs to match once.
+ $this->matches += ($any > 0);
+ }
+ }
+ // Single ANDed term.
+ else {
+ $simple_and = TRUE;
+ list($num_new_scores, $num_valid_words) = $this->parseWord($key);
+ $this->conditions->condition('d.data', "% $key %", 'LIKE');
+ if (!$num_valid_words) {
+ $this->simple = FALSE;
+ }
+ // Each AND keyword needs to match at least once.
+ $this->matches += $num_new_scores;
+ }
+ }
+ if ($simple_and && $simple_or) {
+ $this->simple = FALSE;
+ }
+ // Negative matches.
+ foreach ($this->keys['negative'] as $key) {
+ $this->conditions->condition('d.data', "% $key %", 'NOT LIKE');
+ $this->simple = FALSE;
+ }
+
+ if ($warning == 'or') {
+ drupal_set_message(t('Search for either of the two terms with uppercase <strong>OR</strong>. For example, <strong>cats OR dogs</strong>.'));
+ }
+ }
+
+ /**
+ * Helper function for parseQuery().
+ */
+ protected function parseWord($word) {
+ $num_new_scores = 0;
+ $num_valid_words = 0;
+ // Determine the scorewords of this word/phrase.
+ $split = explode(' ', $word);
+ foreach ($split as $s) {
+ $num = is_numeric($s);
+ if ($num || drupal_strlen($s) >= variable_get('minimum_word_size', 3)) {
+ if (!isset($this->words[$s])) {
+ $this->words[$s] = $s;
+ $num_new_scores++;
+ }
+ $num_valid_words++;
+ }
+ }
+ // Return matching snippet and number of added words.
+ return array($num_new_scores, $num_valid_words);
+ }
+
+ /**
+ * Executes the first pass query.
+ *
+ * This can either be done explicitly, so that additional scores and
+ * conditions can be applied to the second pass query, or implicitly by
+ * addScore() or execute().
+ *
+ * @return
+ * TRUE if search items exist, FALSE if not.
+ */
+ public function executeFirstPass() {
+ $this->parseSearchExpression();
+
+ if (count($this->words) == 0) {
+ form_set_error('keys', format_plural(variable_get('minimum_word_size', 3), 'You must include at least one positive keyword with 1 character or more.', 'You must include at least one positive keyword with @count characters or more.'));
+ return FALSE;
+ }
+ if ($this->expressionsIgnored) {
+ drupal_set_message(t('Your search used too many AND/OR expressions. Only the first @count terms were included in this search.', array('@count' => variable_get('search_and_or_limit', 7))), 'warning');
+ }
+ $this->executedFirstPass = TRUE;
+
+ if (!empty($this->words)) {
+ $or = db_or();
+ foreach ($this->words as $word) {
+ $or->condition('i.word', $word);
+ }
+ $this->condition($or);
+ }
+ // Build query for keyword normalization.
+ $this->join('search_total', 't', 'i.word = t.word');
+ $this
+ ->condition('i.type', $this->type)
+ ->groupBy('i.type')
+ ->groupBy('i.sid')
+ ->having('COUNT(*) >= :matches', array(':matches' => $this->matches));
+
+ // Clone the query object to do the firstPass query;
+ $first = clone $this->query;
+
+ // For complex search queries, add the LIKE conditions to the first pass query.
+ if (!$this->simple) {
+ $first->join('search_dataset', 'd', 'i.sid = d.sid AND i.type = d.type');
+ $first->condition($this->conditions);
+ }
+
+ // Calculate maximum keyword relevance, to normalize it.
+ $first->addExpression('SUM(i.score * t.count)', 'calculated_score');
+ $this->normalize = $first
+ ->range(0, 1)
+ ->orderBy('calculated_score', 'DESC')
+ ->execute()
+ ->fetchField();
+
+ if ($this->normalize) {
+ return TRUE;
+ }
+ return FALSE;
+ }
+
+ /**
+ * Adds a custom score expression to the search query.
+ *
+ * Score expressions are used to order search results. If no calls to
+ * addScore() have taken place, a default keyword relevance score will be
+ * used. However, if at least one call to addScore() has taken place, the
+ * keyword relevance score is not automatically added.
+ *
+ * Also note that if you call orderBy() directly on the query, search scores
+ * will not automatically be used to order search results. Your orderBy()
+ * expression can reference 'calculated_score', which will be the total
+ * calculated score value.
+ *
+ * @param $score
+ * The score expression, which should evaluate to a number between 0 and 1.
+ * The string 'i.relevance' in a score expression will be replaced by a
+ * measure of keyword relevance between 0 and 1.
+ * @param $arguments
+ * Query arguments needed to provide values to the score expression.
+ * @param $multiply
+ * If set, the score is multiplied with this value. However, all scores
+ * with multipliers are then divided by the total of all multipliers, so
+ * that overall, the normalization is maintained.
+ *
+ * @return object
+ * The updated query object.
+ */
+ public function addScore($score, $arguments = array(), $multiply = FALSE) {
+ if ($multiply) {
+ $i = count($this->multiply);
+ // Modify the score expression so it is multiplied by the multiplier,
+ // with a divisor to renormalize.
+ $score = "CAST(:multiply_$i AS DECIMAL) * COALESCE(( " . $score . "), 0) / CAST(:total_$i AS DECIMAL)";
+ // Add an argument for the multiplier. The :total_$i argument is taken
+ // care of in the execute() method, which is when the total divisor is
+ // calculated.
+ $arguments[':multiply_' . $i] = $multiply;
+ $this->multiply[] = $multiply;
+ }
+
+ $this->scores[] = $score;
+ $this->scoresArguments += $arguments;
+
+ return $this;
+ }
+
+ /**
+ * Executes the search.
+ *
+ * If not already done, this executes the first pass query. Then the complex
+ * conditions are applied to the query including score expressions and
+ * ordering.
+ *
+ * @return
+ * FALSE if the first pass query returned no results, and a database result
+ * set if there were results.
+ */
+ public function execute()
+ {
+ if (!$this->executedFirstPass) {
+ $this->executeFirstPass();
+ }
+ if (!$this->normalize) {
+ return new DatabaseStatementEmpty();
+ }
+
+ // Add conditions to query.
+ $this->join('search_dataset', 'd', 'i.sid = d.sid AND i.type = d.type');
+ $this->condition($this->conditions);
+
+ if (empty($this->scores)) {
+ // Add default score.
+ $this->addScore('i.relevance');
+ }
+
+ if (count($this->multiply)) {
+ // Re-normalize scores with multipliers by dividing by the total of all
+ // multipliers. The expressions were altered in addScore(), so here just
+ // add the arguments for the total.
+ $i = 0;
+ $sum = array_sum($this->multiply);
+ foreach ($this->multiply as $total) {
+ $this->scoresArguments[':total_' . $i] = $sum;
+ $i++;
+ }
+ }
+
+ // Replace the pseudo-expression 'i.relevance' with a measure of keyword
+ // relevance in all score expressions, using string replacement. Careful
+ // though! If you just print out a float, some locales use ',' as the
+ // decimal separator in PHP, while SQL always uses '.'. So, make sure to
+ // set the number format correctly.
+ $relevance = number_format((1.0 / $this->normalize), 10, '.', '');
+ $this->scores = str_replace('i.relevance', '(' . $relevance . ' * i.score * t.count)', $this->scores);
+
+ // Add all scores together to form a query field.
+ $this->addExpression('SUM(' . implode(' + ', $this->scores) . ')', 'calculated_score', $this->scoresArguments);
+
+ // If an order has not yet been set for this query, add a default order
+ // that sorts by the calculated sum of scores.
+ if (count($this->getOrderBy()) == 0) {
+ $this->orderBy('calculated_score', 'DESC');
+ }
+
+ // Add tag and useful metadata.
+ $this
+ ->addTag('search_' . $this->type)
+ ->addMetaData('normalize', $this->normalize)
+ ->fields('i', array('type', 'sid'));
+
+ return $this->query->execute();
+ }
+
+ /**
+ * Builds the default count query for SearchQuery.
+ *
+ * Since SearchQuery always uses GROUP BY, we can default to a subquery. We
+ * also add the same conditions as execute() because countQuery() is called
+ * first.
+ */
+ public function countQuery() {
+ // Clone the inner query.
+ $inner = clone $this->query;
+
+ // Add conditions to query.
+ $inner->join('search_dataset', 'd', 'i.sid = d.sid AND i.type = d.type');
+ $inner->condition($this->conditions);
+
+ // Remove existing fields and expressions, they are not needed for a count
+ // query.
+ $fields =& $inner->getFields();
+ $fields = array();
+ $expressions =& $inner->getExpressions();
+ $expressions = array();
+
+ // Add the sid as the only field and count them as a subquery.
+ $count = db_select($inner->fields('i', array('sid')), NULL, array('target' => 'slave'));
+
+ // Add the COUNT() expression.
+ $count->addExpression('COUNT(*)');
+
+ return $count;
+ }
+}