diff options
author | Christophe Coevoet <stof@notk.org> | 2012-01-09 18:44:41 +0100 |
---|---|---|
committer | Christophe Coevoet <stof@notk.org> | 2012-01-09 19:21:32 +0100 |
commit | 1effc4b8dece7f8906e0a21f9942252e6aefb2b7 (patch) | |
tree | 1b15a68569b40a9f573a8add89e534bbce2c2842 | |
parent | c8594d6018dd2386e67952a99ee8d672007028f5 (diff) |
Added more phpdoc
-rw-r--r-- | src/Riu/Search/BooleanEngine.php | 18 | ||||
-rw-r--r-- | src/Riu/Search/Document.php | 8 | ||||
-rw-r--r-- | src/Riu/Search/EngineInterface.php | 7 | ||||
-rw-r--r-- | src/Riu/Search/FrequencyWeighter.php | 14 | ||||
-rw-r--r-- | src/Riu/Search/Indexer.php | 9 | ||||
-rw-r--r-- | src/Riu/Search/IndexerInterface.php | 9 | ||||
-rw-r--r-- | src/Riu/Search/Loader/CacmLoader.php | 5 | ||||
-rw-r--r-- | src/Riu/Search/Loader/LoaderInterface.php | 5 | ||||
-rw-r--r-- | src/Riu/Search/Matcher.php | 76 | ||||
-rw-r--r-- | src/Riu/Search/MatcherInterface.php | 14 | ||||
-rw-r--r-- | src/Riu/Search/ProbabilityEngine.php | 11 | ||||
-rw-r--r-- | src/Riu/Search/TfIdfWeighter.php | 14 | ||||
-rw-r--r-- | src/Riu/Search/Tokenizer/SimpleTokenizer.php | 6 | ||||
-rw-r--r-- | src/Riu/Search/Tokenizer/TokenizerInterface.php | 5 | ||||
-rw-r--r-- | src/Riu/Search/VectorEngine.php | 8 | ||||
-rw-r--r-- | src/Riu/Search/WeighterInterface.php | 11 |
16 files changed, 186 insertions, 34 deletions
diff --git a/src/Riu/Search/BooleanEngine.php b/src/Riu/Search/BooleanEngine.php index d823f8f..fccb556 100644 --- a/src/Riu/Search/BooleanEngine.php +++ b/src/Riu/Search/BooleanEngine.php @@ -4,18 +4,26 @@ namespace Riu\Search; use Riu\Search\Exception\InvalidQueryException; -class BooleanEngine implements EngineInterface +/** + * Engine implementing a boolean search model + */ +class BooleanEngine implements EngineInterface { private $matcher; + /** + * @param MatcherInterface $matcher + */ public function __construct(MatcherInterface $matcher) { $this->matcher = $matcher; } /** + * Searches for the given query and returns an array of document ids. + * * @param string $query - * @return \Riu\Search\Document[] + * @return array */ public function search($query) { @@ -29,6 +37,12 @@ class BooleanEngine implements EngineInterface return array_unique($result); } + /** + * Handles a clause without any OR in it. + * + * @param string $query + * @return array + */ private function handleAndQuery($query) { $result = null; diff --git a/src/Riu/Search/Document.php b/src/Riu/Search/Document.php index 453dd00..4394871 100644 --- a/src/Riu/Search/Document.php +++ b/src/Riu/Search/Document.php @@ -47,6 +47,9 @@ class Document implements \Serializable return $this->indexed; } + /** + * Mark the document as indexed + */ public function markAsIndexed() { $this->indexed = true; @@ -94,6 +97,11 @@ class Document implements \Serializable $this->length += 1; } + /** + * Returns the length of the document. + * + * @return integer + */ public function getLength() { return $this->length; diff --git a/src/Riu/Search/EngineInterface.php b/src/Riu/Search/EngineInterface.php index 0eae9a6..cea5715 100644 --- a/src/Riu/Search/EngineInterface.php +++ b/src/Riu/Search/EngineInterface.php @@ -2,11 +2,16 @@ namespace Riu\Search; +/** + * Interface implemented by all search engines. + */ interface EngineInterface { /** + * Searches for the given query and returns an array of document ids. + * * @param string $query - * @return \Riu\Search\Document[] + * @return array */ function search($query); } diff --git a/src/Riu/Search/FrequencyWeighter.php b/src/Riu/Search/FrequencyWeighter.php index 7092fb3..95adbd2 100644 --- a/src/Riu/Search/FrequencyWeighter.php +++ b/src/Riu/Search/FrequencyWeighter.php @@ -2,20 +2,28 @@ namespace Riu\Search; +/** + * Weighter based on thje word frequency + */ class FrequencyWeighter implements WeighterInterface { private $matcher; + /** + * @param MatcherInterface $matcher + */ public function __construct(MatcherInterface $matcher) { $this->matcher = $matcher; } /** + * Gets the weights for the words in all documents. + * * @param array $words * @return array */ - public function getDocumentWeights($words) + public function getDocumentWeights(array $words) { $weight = array(); @@ -36,10 +44,12 @@ class FrequencyWeighter implements WeighterInterface } /** + * Gets the weights for the words in the query. + * * @param array $words * @return array */ - public function getSelfWeights($words) + public function getSelfWeights(array $words) { $weight = array(); $max = max($words); diff --git a/src/Riu/Search/Indexer.php b/src/Riu/Search/Indexer.php index 2a0f6a9..32c4627 100644 --- a/src/Riu/Search/Indexer.php +++ b/src/Riu/Search/Indexer.php @@ -9,12 +9,17 @@ class Indexer implements IndexerInterface private $tokenizer; private $stopList = array(); + /** + * @param Tokenizer\TokenizerInterface $tokenizer + */ public function __construct(TokenizerInterface $tokenizer) { $this->tokenizer = $tokenizer; } /** + * Sets the stop list. + * * @param array $stopList */ public function setStopList(array $stopList) @@ -23,6 +28,8 @@ class Indexer implements IndexerInterface } /** + * Indexes a collection of documents and returns the uses for each word. + * * @param \Riu\Search\Document[] $documents * @return array */ @@ -37,6 +44,8 @@ class Indexer implements IndexerInterface } /** + * Indexes a document and returns the uses for each word. + * * @param \Riu\Search\Document $document * @param array $wordList * @return array diff --git a/src/Riu/Search/IndexerInterface.php b/src/Riu/Search/IndexerInterface.php index 3e5e493..86bde7b 100644 --- a/src/Riu/Search/IndexerInterface.php +++ b/src/Riu/Search/IndexerInterface.php @@ -2,20 +2,29 @@ namespace Riu\Search; +/** + * Interface implemented by the indexer + */ interface IndexerInterface { /** + * Sets the stop list. + * * @param array $stopList */ function setStopList(array $stopList); /** + * Indexes a collection of documents and returns the uses for each word. + * * @param \Riu\Search\Document[] $documents * @return array */ function index(array $documents); /** + * Indexes a document and returns the uses for each word. + * * @param \Riu\Search\Document $document * @param array $wordList * @return array diff --git a/src/Riu/Search/Loader/CacmLoader.php b/src/Riu/Search/Loader/CacmLoader.php index 2e21e45..d51c017 100644 --- a/src/Riu/Search/Loader/CacmLoader.php +++ b/src/Riu/Search/Loader/CacmLoader.php @@ -4,9 +4,14 @@ namespace Riu\Search\Loader; use Riu\Search\Document; +/** + * Loader for the CACM collection + */ class CacmLoader implements LoaderInterface { /** + * Loads a resource. + * * @param string $resource * @return \Riu\Search\Document[] */ diff --git a/src/Riu/Search/Loader/LoaderInterface.php b/src/Riu/Search/Loader/LoaderInterface.php index 244afe1..b186896 100644 --- a/src/Riu/Search/Loader/LoaderInterface.php +++ b/src/Riu/Search/Loader/LoaderInterface.php @@ -2,9 +2,14 @@ namespace Riu\Search\Loader; +/** + * Interface implemented by all loaders + */ interface LoaderInterface { /** + * Loads a resource. + * * @param string $resource * @return \Riu\Search\Document[] */ diff --git a/src/Riu/Search/Matcher.php b/src/Riu/Search/Matcher.php index 899af8d..aee71c5 100644 --- a/src/Riu/Search/Matcher.php +++ b/src/Riu/Search/Matcher.php @@ -14,6 +14,12 @@ class Matcher implements MatcherInterface private $documents; private $words; + /** + * @param \Pimple $container + * @param string $resource The name of the file containing the collection + * @param string $stopList The name of the file containing the stop list + * @param string $cacheDir + */ public function __construct(\Pimple $container, $resource, $stopList, $cacheDir) { $this->container = $container; @@ -22,28 +28,68 @@ class Matcher implements MatcherInterface $this->cacheDir = $cacheDir; } + /** + * Gets a document by its id + * + * @param string $id + * @return \Riu\Search\Document + * @throws \InvalidArgumentException if the id is invalid + */ public function getDocument($id) { - $documents = $this->getDocuments(); - if (!isset($documents[$id])) { + $this->load(); + if (!isset($this->documents[$id])) { throw new \InvalidArgumentException(sprintf('The document "%s" does not exist.', $id)); } - return $documents[$id]; + return $this->documents[$id]; } + /** + * Gets the uses of a word. + * + * The return value is an array of ($docId => $number). + * + * @param string $word + * @return array + */ public function getWordUses($word) { + $this->load(); $word = mb_strtolower($word); - $words = $this->getWords(); - return isset($words[$word]) ? $words[$word] : array(); + return isset($this->words[$word]) ? $this->words[$word] : array(); } + /** + * Gets all documents. + * + * @return \Riu\Search\Document[] + */ public function getDocuments() { - if (null !== $this->documents) { - return $this->documents; + $this->load(); + + return $this->documents; + } + + /** + * Gets all document ids. + * + * @return array + */ + public function getDocumentIds() + { + return array_keys($this->getDocuments()); + } + + /** + * Loads the data from the cache (and refresh it if needed) + */ + private function load() + { + if (null !== $this->documents && null !== $this->words) { + return; } $cacheFile = $this->cacheDir.'/indexed_documents.cache'; @@ -61,21 +107,5 @@ class Matcher implements MatcherInterface } list($this->documents, $this->words) = unserialize(file_get_contents($cacheFile)); - - return $this->documents; - } - - public function getDocumentIds() - { - return array_keys($this->getDocuments()); - } - - private function getWords() - { - if (null === $this->words) { - $this->getDocuments(); - } - - return $this->words; } } diff --git a/src/Riu/Search/MatcherInterface.php b/src/Riu/Search/MatcherInterface.php index 019cfd3..8a68a69 100644 --- a/src/Riu/Search/MatcherInterface.php +++ b/src/Riu/Search/MatcherInterface.php @@ -2,26 +2,40 @@ namespace Riu\Search; +/** + * Interface implemented by the matcher + */ interface MatcherInterface { /** + * Gets a document by its id + * * @param string $id * @return \Riu\Search\Document + * @throws \InvalidArgumentException if the id is invalid */ function getDocument($id); /** + * Gets the uses of a word. + * + * The return value is an array of ($docId => $number). + * * @param string $word * @return array */ function getWordUses($word); /** + * Gets all documents. + * * @return \Riu\Search\Document[] */ function getDocuments(); /** + * Gets all document ids. + * * @return array */ function getDocumentIds(); diff --git a/src/Riu/Search/ProbabilityEngine.php b/src/Riu/Search/ProbabilityEngine.php index b03b79d..3344d7c 100644 --- a/src/Riu/Search/ProbabilityEngine.php +++ b/src/Riu/Search/ProbabilityEngine.php @@ -4,11 +4,18 @@ namespace Riu\Search; use Riu\Search\Exception\InvalidQueryException; +/** + * Engine implementing a probabilistic search model + */ class ProbabilityEngine implements EngineInterface { private $matcher; private $indexer; + /** + * @param MatcherInterface $matcher + * @param IndexerInterface $indexer + */ public function __construct(MatcherInterface $matcher, IndexerInterface $indexer) { $this->matcher = $matcher; @@ -16,8 +23,10 @@ class ProbabilityEngine implements EngineInterface } /** + * Searches for the given query and returns an array of document ids. + * * @param string $query - * @return \Riu\Search\Document[] + * @return array */ public function search($query) { diff --git a/src/Riu/Search/TfIdfWeighter.php b/src/Riu/Search/TfIdfWeighter.php index 2027b94..e95a144 100644 --- a/src/Riu/Search/TfIdfWeighter.php +++ b/src/Riu/Search/TfIdfWeighter.php @@ -2,20 +2,28 @@ namespace Riu\Search; +/** + * Weighter based on the Tf-Idf + */ class TfIdfWeighter implements WeighterInterface { private $matcher; + /** + * @param MatcherInterface $matcher + */ public function __construct(MatcherInterface $matcher) { $this->matcher = $matcher; } /** + * Gets the weights for the words in all documents. + * * @param array $words * @return array */ - public function getDocumentWeights($words) + public function getDocumentWeights(array $words) { $documents = $this->matcher->getDocuments(); $weight = array(); @@ -44,10 +52,12 @@ class TfIdfWeighter implements WeighterInterface } /** + * Gets the weights for the words in the query. + * * @param array $words * @return array */ - function getSelfWeights($words) + public function getSelfWeights($words) { $documents = $this->matcher->getDocuments(); $sum = array_sum($words); diff --git a/src/Riu/Search/Tokenizer/SimpleTokenizer.php b/src/Riu/Search/Tokenizer/SimpleTokenizer.php index f7ebc27..9a76fec 100644 --- a/src/Riu/Search/Tokenizer/SimpleTokenizer.php +++ b/src/Riu/Search/Tokenizer/SimpleTokenizer.php @@ -2,9 +2,15 @@ namespace Riu\Search\Tokenizer; +/** + * The simple tokenizer splits the content into words without + * normalizing similar words. + */ class SimpleTokenizer implements TokenizerInterface { /** + * Tokenizes a content. + * * @param string $content * @return array */ diff --git a/src/Riu/Search/Tokenizer/TokenizerInterface.php b/src/Riu/Search/Tokenizer/TokenizerInterface.php index 199fb5b..1f4ca8b 100644 --- a/src/Riu/Search/Tokenizer/TokenizerInterface.php +++ b/src/Riu/Search/Tokenizer/TokenizerInterface.php @@ -2,9 +2,14 @@ namespace Riu\Search\Tokenizer; +/** + * Interface implemented by all tokenizers. + */ interface TokenizerInterface { /** + * Tokenizes a content. + * * @param string $content * @return array */ diff --git a/src/Riu/Search/VectorEngine.php b/src/Riu/Search/VectorEngine.php index dff1acd..471d5e1 100644 --- a/src/Riu/Search/VectorEngine.php +++ b/src/Riu/Search/VectorEngine.php @@ -2,6 +2,9 @@ namespace Riu\Search; +/** + * Engine implementing a vectorial search model + */ class VectorEngine implements EngineInterface { private $indexer; @@ -19,9 +22,12 @@ class VectorEngine implements EngineInterface $this->matcher = $matcher; $this->weighter = $weighter; } + /** + * Searches for the given query and returns an array of document ids. + * * @param string $query - * @return \Riu\Search\Document[] + * @return array */ public function search($query) { diff --git a/src/Riu/Search/WeighterInterface.php b/src/Riu/Search/WeighterInterface.php index 9cc1dc6..d3b2b1a 100644 --- a/src/Riu/Search/WeighterInterface.php +++ b/src/Riu/Search/WeighterInterface.php @@ -2,17 +2,24 @@ namespace Riu\Search; +/** + * Interface implemented by the weighter used for the vectorial engine + */ interface WeighterInterface { /** + * Gets the weights for the words in all documents. + * * @param array $words * @return array */ - function getDocumentWeights ($words); + function getDocumentWeights(array $words); /** + * Gets the weights for the words in the query. + * * @param array $words * @return array */ - function getSelfWeights ($words); + function getSelfWeights(array $words); } |