%PDF- %PDF-
Mini Shell

Mini Shell

Direktori : /proc/1857783/root/var/www/cwg/wp-content/plugins/searchwp/includes/Logic/
Upload File :
Create Path :
Current File : //proc/1857783/root/var/www/cwg/wp-content/plugins/searchwp/includes/Logic/FuzzyMatches.php

<?php

/**
 * SearchWP fuzzy matches logic.
 *
 * @package SearchWP
 * @author  Jon Christopher
 */

namespace SearchWP\Logic;

use SearchWP\Utils;
use SearchWP\Tokens;

/**
 * Class FuzzyMatches is responsible for finding fuzzy matches for search strings.
 *
 * @since 4.0
 */
class FuzzyMatches {

	/**
	 * Partial match handler.
	 *
	 * @since 4.0
	 */
	private $partial;

	/**
	 * Fuzzy tokens.
	 *
	 * @since 4.0
	 * @var array
	 */
	private $tokens = [];

	/**
	 * Tokens from the original search string.
	 *
	 * @since 4.0
	 * @var array
	 */
	private $keywords_tokens;

	private $exact_matches;

	private $query;

	/**
	 * Fuzzy constructor.
	 *
	 * @param array $tokens Incoming tokens.
	 * @param array $args   Arguments for partial match tokens.
	 * @since 4.0
	 */
	function __construct( array $partial_match_tokens, array $args ) {
		// FUTURE: $partial_match_tokens not used now, but could they be useful. @see PartialMatches@find where this class is instantiated for usage.
		// We want to ensure we're working with the original search string, not anything filtered.
		$this->partial   = $args['partial'];
		$keywords_tokens = $args['keyword_tokens'];
		$exact_matches   = $args['exact_matches'];
		$this->query     = $args['query'];

		// Remove exact match tokens.
		$this->exact_matches = $exact_matches;
		$this->keywords_tokens = array_diff( $keywords_tokens, $exact_matches );

		if ( ! empty( $this->keywords_tokens ) ) {
			$this->tokens = $this->get_fuzzy_tokens( $this->keywords_tokens );
		}
	}

	/**
	 * Finds the best fuzzy match for any non-exact token matches.
	 *
	 * @since 4.0
	 * @return array Revised suggested search based on best fuzzy match(es).
	 */
	public function did_you_mean() {
		$suggested_terms        = [];
		$original_search_tokens = Utils::tokenize( $this->partial->get_query()->get_keywords() )->get();

		foreach ( $original_search_tokens as $search_token ) {
			$suggested_term = '';
			$shortest       = -1;

			// If this is an exact match, put it in place and continue.
			if ( in_array( $search_token, $this->exact_matches, true ) ) {
				$suggested_terms[] = $search_token;
				continue;
			}

			// Loop through the fuzzy tokens to find the best match.
			foreach ( $this->tokens as $fuzzy_token ) {
				$lev = levenshtein( $fuzzy_token, $search_token );

				if ( $lev <= $shortest || $shortest < 0 ) {
					$suggested_term  = $fuzzy_token;
					$shortest = $lev;
				}
			}

			if ( ! empty( $suggested_term ) ) {
				$suggested_terms[] = $suggested_term;
			}

			$suggested_term = '';
			$shortest = -1;
		}

		// Tell the Query that we've made a suggestion.
		if ( ! empty( $suggested_terms ) && ! empty( array_diff( $suggested_terms, $original_search_tokens ) ) ) {
			$this->partial->get_query()->set_suggested_search( new Tokens( implode( ' ', $suggested_terms ) ) );
		}

		return $suggested_terms;
	}

	/**
	 * Finds fuzzy matches for tokens.
	 *
	 * @since 4.0
	 * @return array Tokens with fuzzy matches.
	 */
	public function find( array $partial_match_tokens, array $args ) {
		// Maybe bail out if finding fuzzy matches isn't applicable.
		if ( ! empty( $partial_match_tokens ) && ! apply_filters( 'searchwp\query\partial_matches\fuzzy\force', false, [
			'partial_match_tokens' => $partial_match_tokens,
			'query' => $args['query'],
		] ) ) {
			return $partial_match_tokens;
		}

		// Making it this far means partial matching is enabled and it didn't find anything yet (or fuzzy is forced).
		if ( ! apply_filters( 'searchwp\query\partial_matches\fuzzy', true, [
			'partial_match_tokens' => $partial_match_tokens,
			'query' => $args['query'],
		] ) ) {
			return $partial_match_tokens;
		}

		// If there were no fuzzy tokens found, there's nothing to do.
		if ( empty( $this->tokens ) ) {
			return $partial_match_tokens;
		}

		// Limit the tokens to those that meet the minimum similarity threshold.
		return $this->refine_fuzzy_tokens( $this->tokens, $this->keywords_tokens );
	}

	/**
	 * Refines (very) fuzzy tokens to only those that meet a minimum similarity threshold.
	 *
	 * @since 4.0
	 * @param array $fuzzy_tokens Very fuzzy tokens needing refinement.
	 * @param array $search_tokens Original search tokens.
	 * @return mixed Refined tokens that meet our minimum similarity threshold.
	 */
	private function refine_fuzzy_tokens( array $fuzzy_tokens, array $search_tokens ) {
		$threshold = absint( apply_filters( 'searchwp\query\partial_matches\fuzzy\threshold', 70 ) );

		$fuzzy_tokens = call_user_func_array(
			'array_merge',
			array_map(
				function( $search_term ) use ( $fuzzy_tokens, $threshold ) {
					return array_filter(
						$fuzzy_tokens,
						function( $fuzzy_token ) use ( $search_term, $threshold ) {
							similar_text(
								$fuzzy_token,
								$search_term,
								$similarity_percentage
							);

							return $similarity_percentage >= $threshold;
						}
					);
				},
				$search_tokens
			)
		);

		return $fuzzy_tokens;
	}

	/**
	 * Retrieves a (very) fuzzy set of potential matches.
	 *
	 * @since 4.0
	 * @param array $keywords_tokens Original search tokens.
	 * @return array Extremely fuzzy token matches.
	 */
	private function get_fuzzy_tokens( array $keywords_tokens ) {
		global $wpdb;

		$index = \SearchWP::$index;

		// FUTURE: Do stems need to be considered here, or is the SOUNDEX too far off anyway?
		return $wpdb->get_col( $wpdb->prepare(
			"SELECT token
			FROM {$index->get_tables()['tokens']->table_name}
			WHERE "
				. implode( ' OR ',
					array_fill( 0, count( $keywords_tokens ), "SOUNDEX(token) LIKE SOUNDEX(%s)" )
				) . " AND {$this->get_boundaries_sql()}",
			$keywords_tokens
		) );
	}

	/**
	 * Generates SQL clause to limit the length boundaries of tokens to find.
	 *
	 * @since 4.0
	 * @return string The gereated SQL clause.
	 */
	private function get_boundaries_sql() {
		global $wpdb;

		$min_length = absint( apply_filters(
			'searchwp\query\partial_matches\fuzzy\minimum_length',
			$this->partial->get_tokens()->get_minimum_length()
		) );

		$max_length = absint( apply_filters( 'searchwp\query\partial_matches\fuzzy\maximum_length', 0 ) );

		$boundaries = [];

		// Site limiter.
		if ( 'all' !== $this->query->get_args()['site'] ) {
			$index_table  = \SearchWP::$index->get_tables()['index']->table_name;
			$boundaries[] = $wpdb->prepare( "id IN (
				SELECT token
				FROM {$index_table}
				WHERE site IN (" .
					implode( ', ', array_fill( 0, count( $this->query->get_args()['site'] ), '%d' ) )
					. ') )',
				$this->query->get_args()['site'] );
		}

		if ( $min_length ) { $boundaries[] = "CHAR_LENGTH(token) >= {$min_length}"; }
		if ( $max_length ) { $boundaries[] = "CHAR_LENGTH(token) <= {$max_length}"; }

		return empty( $boundaries ) ? '' : '(' . implode( ' AND ', $boundaries ) . ')';
	}
}

Zerion Mini Shell 1.0