summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristophe Coevoet <stof@notk.org>2012-01-09 18:44:41 +0100
committerChristophe Coevoet <stof@notk.org>2012-01-09 19:21:32 +0100
commit1effc4b8dece7f8906e0a21f9942252e6aefb2b7 (patch)
tree1b15a68569b40a9f573a8add89e534bbce2c2842
parentc8594d6018dd2386e67952a99ee8d672007028f5 (diff)
Added more phpdoc
-rw-r--r--src/Riu/Search/BooleanEngine.php18
-rw-r--r--src/Riu/Search/Document.php8
-rw-r--r--src/Riu/Search/EngineInterface.php7
-rw-r--r--src/Riu/Search/FrequencyWeighter.php14
-rw-r--r--src/Riu/Search/Indexer.php9
-rw-r--r--src/Riu/Search/IndexerInterface.php9
-rw-r--r--src/Riu/Search/Loader/CacmLoader.php5
-rw-r--r--src/Riu/Search/Loader/LoaderInterface.php5
-rw-r--r--src/Riu/Search/Matcher.php76
-rw-r--r--src/Riu/Search/MatcherInterface.php14
-rw-r--r--src/Riu/Search/ProbabilityEngine.php11
-rw-r--r--src/Riu/Search/TfIdfWeighter.php14
-rw-r--r--src/Riu/Search/Tokenizer/SimpleTokenizer.php6
-rw-r--r--src/Riu/Search/Tokenizer/TokenizerInterface.php5
-rw-r--r--src/Riu/Search/VectorEngine.php8
-rw-r--r--src/Riu/Search/WeighterInterface.php11
16 files changed, 186 insertions, 34 deletions
diff --git a/src/Riu/Search/BooleanEngine.php b/src/Riu/Search/BooleanEngine.php
index d823f8f..fccb556 100644
--- a/src/Riu/Search/BooleanEngine.php
+++ b/src/Riu/Search/BooleanEngine.php
@@ -4,18 +4,26 @@ namespace Riu\Search;
use Riu\Search\Exception\InvalidQueryException;
-class BooleanEngine implements EngineInterface
+/**
+ * Engine implementing a boolean search model
+ */
+class BooleanEngine implements EngineInterface
{
private $matcher;
+ /**
+ * @param MatcherInterface $matcher
+ */
public function __construct(MatcherInterface $matcher)
{
$this->matcher = $matcher;
}
/**
+ * Searches for the given query and returns an array of document ids.
+ *
* @param string $query
- * @return \Riu\Search\Document[]
+ * @return array
*/
public function search($query)
{
@@ -29,6 +37,12 @@ class BooleanEngine implements EngineInterface
return array_unique($result);
}
+ /**
+ * Handles a clause without any OR in it.
+ *
+ * @param string $query
+ * @return array
+ */
private function handleAndQuery($query)
{
$result = null;
diff --git a/src/Riu/Search/Document.php b/src/Riu/Search/Document.php
index 453dd00..4394871 100644
--- a/src/Riu/Search/Document.php
+++ b/src/Riu/Search/Document.php
@@ -47,6 +47,9 @@ class Document implements \Serializable
return $this->indexed;
}
+ /**
+ * Mark the document as indexed
+ */
public function markAsIndexed()
{
$this->indexed = true;
@@ -94,6 +97,11 @@ class Document implements \Serializable
$this->length += 1;
}
+ /**
+ * Returns the length of the document.
+ *
+ * @return integer
+ */
public function getLength()
{
return $this->length;
diff --git a/src/Riu/Search/EngineInterface.php b/src/Riu/Search/EngineInterface.php
index 0eae9a6..cea5715 100644
--- a/src/Riu/Search/EngineInterface.php
+++ b/src/Riu/Search/EngineInterface.php
@@ -2,11 +2,16 @@
namespace Riu\Search;
+/**
+ * Interface implemented by all search engines.
+ */
interface EngineInterface
{
/**
+ * Searches for the given query and returns an array of document ids.
+ *
* @param string $query
- * @return \Riu\Search\Document[]
+ * @return array
*/
function search($query);
}
diff --git a/src/Riu/Search/FrequencyWeighter.php b/src/Riu/Search/FrequencyWeighter.php
index 7092fb3..95adbd2 100644
--- a/src/Riu/Search/FrequencyWeighter.php
+++ b/src/Riu/Search/FrequencyWeighter.php
@@ -2,20 +2,28 @@
namespace Riu\Search;
+/**
+ * Weighter based on thje word frequency
+ */
class FrequencyWeighter implements WeighterInterface
{
private $matcher;
+ /**
+ * @param MatcherInterface $matcher
+ */
public function __construct(MatcherInterface $matcher)
{
$this->matcher = $matcher;
}
/**
+ * Gets the weights for the words in all documents.
+ *
* @param array $words
* @return array
*/
- public function getDocumentWeights($words)
+ public function getDocumentWeights(array $words)
{
$weight = array();
@@ -36,10 +44,12 @@ class FrequencyWeighter implements WeighterInterface
}
/**
+ * Gets the weights for the words in the query.
+ *
* @param array $words
* @return array
*/
- public function getSelfWeights($words)
+ public function getSelfWeights(array $words)
{
$weight = array();
$max = max($words);
diff --git a/src/Riu/Search/Indexer.php b/src/Riu/Search/Indexer.php
index 2a0f6a9..32c4627 100644
--- a/src/Riu/Search/Indexer.php
+++ b/src/Riu/Search/Indexer.php
@@ -9,12 +9,17 @@ class Indexer implements IndexerInterface
private $tokenizer;
private $stopList = array();
+ /**
+ * @param Tokenizer\TokenizerInterface $tokenizer
+ */
public function __construct(TokenizerInterface $tokenizer)
{
$this->tokenizer = $tokenizer;
}
/**
+ * Sets the stop list.
+ *
* @param array $stopList
*/
public function setStopList(array $stopList)
@@ -23,6 +28,8 @@ class Indexer implements IndexerInterface
}
/**
+ * Indexes a collection of documents and returns the uses for each word.
+ *
* @param \Riu\Search\Document[] $documents
* @return array
*/
@@ -37,6 +44,8 @@ class Indexer implements IndexerInterface
}
/**
+ * Indexes a document and returns the uses for each word.
+ *
* @param \Riu\Search\Document $document
* @param array $wordList
* @return array
diff --git a/src/Riu/Search/IndexerInterface.php b/src/Riu/Search/IndexerInterface.php
index 3e5e493..86bde7b 100644
--- a/src/Riu/Search/IndexerInterface.php
+++ b/src/Riu/Search/IndexerInterface.php
@@ -2,20 +2,29 @@
namespace Riu\Search;
+/**
+ * Interface implemented by the indexer
+ */
interface IndexerInterface
{
/**
+ * Sets the stop list.
+ *
* @param array $stopList
*/
function setStopList(array $stopList);
/**
+ * Indexes a collection of documents and returns the uses for each word.
+ *
* @param \Riu\Search\Document[] $documents
* @return array
*/
function index(array $documents);
/**
+ * Indexes a document and returns the uses for each word.
+ *
* @param \Riu\Search\Document $document
* @param array $wordList
* @return array
diff --git a/src/Riu/Search/Loader/CacmLoader.php b/src/Riu/Search/Loader/CacmLoader.php
index 2e21e45..d51c017 100644
--- a/src/Riu/Search/Loader/CacmLoader.php
+++ b/src/Riu/Search/Loader/CacmLoader.php
@@ -4,9 +4,14 @@ namespace Riu\Search\Loader;
use Riu\Search\Document;
+/**
+ * Loader for the CACM collection
+ */
class CacmLoader implements LoaderInterface
{
/**
+ * Loads a resource.
+ *
* @param string $resource
* @return \Riu\Search\Document[]
*/
diff --git a/src/Riu/Search/Loader/LoaderInterface.php b/src/Riu/Search/Loader/LoaderInterface.php
index 244afe1..b186896 100644
--- a/src/Riu/Search/Loader/LoaderInterface.php
+++ b/src/Riu/Search/Loader/LoaderInterface.php
@@ -2,9 +2,14 @@
namespace Riu\Search\Loader;
+/**
+ * Interface implemented by all loaders
+ */
interface LoaderInterface
{
/**
+ * Loads a resource.
+ *
* @param string $resource
* @return \Riu\Search\Document[]
*/
diff --git a/src/Riu/Search/Matcher.php b/src/Riu/Search/Matcher.php
index 899af8d..aee71c5 100644
--- a/src/Riu/Search/Matcher.php
+++ b/src/Riu/Search/Matcher.php
@@ -14,6 +14,12 @@ class Matcher implements MatcherInterface
private $documents;
private $words;
+ /**
+ * @param \Pimple $container
+ * @param string $resource The name of the file containing the collection
+ * @param string $stopList The name of the file containing the stop list
+ * @param string $cacheDir
+ */
public function __construct(\Pimple $container, $resource, $stopList, $cacheDir)
{
$this->container = $container;
@@ -22,28 +28,68 @@ class Matcher implements MatcherInterface
$this->cacheDir = $cacheDir;
}
+ /**
+ * Gets a document by its id
+ *
+ * @param string $id
+ * @return \Riu\Search\Document
+ * @throws \InvalidArgumentException if the id is invalid
+ */
public function getDocument($id)
{
- $documents = $this->getDocuments();
- if (!isset($documents[$id])) {
+ $this->load();
+ if (!isset($this->documents[$id])) {
throw new \InvalidArgumentException(sprintf('The document "%s" does not exist.', $id));
}
- return $documents[$id];
+ return $this->documents[$id];
}
+ /**
+ * Gets the uses of a word.
+ *
+ * The return value is an array of ($docId => $number).
+ *
+ * @param string $word
+ * @return array
+ */
public function getWordUses($word)
{
+ $this->load();
$word = mb_strtolower($word);
- $words = $this->getWords();
- return isset($words[$word]) ? $words[$word] : array();
+ return isset($this->words[$word]) ? $this->words[$word] : array();
}
+ /**
+ * Gets all documents.
+ *
+ * @return \Riu\Search\Document[]
+ */
public function getDocuments()
{
- if (null !== $this->documents) {
- return $this->documents;
+ $this->load();
+
+ return $this->documents;
+ }
+
+ /**
+ * Gets all document ids.
+ *
+ * @return array
+ */
+ public function getDocumentIds()
+ {
+ return array_keys($this->getDocuments());
+ }
+
+ /**
+ * Loads the data from the cache (and refresh it if needed)
+ */
+ private function load()
+ {
+ if (null !== $this->documents && null !== $this->words) {
+ return;
}
$cacheFile = $this->cacheDir.'/indexed_documents.cache';
@@ -61,21 +107,5 @@ class Matcher implements MatcherInterface
}
list($this->documents, $this->words) = unserialize(file_get_contents($cacheFile));
-
- return $this->documents;
- }
-
- public function getDocumentIds()
- {
- return array_keys($this->getDocuments());
- }
-
- private function getWords()
- {
- if (null === $this->words) {
- $this->getDocuments();
- }
-
- return $this->words;
}
}
diff --git a/src/Riu/Search/MatcherInterface.php b/src/Riu/Search/MatcherInterface.php
index 019cfd3..8a68a69 100644
--- a/src/Riu/Search/MatcherInterface.php
+++ b/src/Riu/Search/MatcherInterface.php
@@ -2,26 +2,40 @@
namespace Riu\Search;
+/**
+ * Interface implemented by the matcher
+ */
interface MatcherInterface
{
/**
+ * Gets a document by its id
+ *
* @param string $id
* @return \Riu\Search\Document
+ * @throws \InvalidArgumentException if the id is invalid
*/
function getDocument($id);
/**
+ * Gets the uses of a word.
+ *
+ * The return value is an array of ($docId => $number).
+ *
* @param string $word
* @return array
*/
function getWordUses($word);
/**
+ * Gets all documents.
+ *
* @return \Riu\Search\Document[]
*/
function getDocuments();
/**
+ * Gets all document ids.
+ *
* @return array
*/
function getDocumentIds();
diff --git a/src/Riu/Search/ProbabilityEngine.php b/src/Riu/Search/ProbabilityEngine.php
index b03b79d..3344d7c 100644
--- a/src/Riu/Search/ProbabilityEngine.php
+++ b/src/Riu/Search/ProbabilityEngine.php
@@ -4,11 +4,18 @@ namespace Riu\Search;
use Riu\Search\Exception\InvalidQueryException;
+/**
+ * Engine implementing a probabilistic search model
+ */
class ProbabilityEngine implements EngineInterface
{
private $matcher;
private $indexer;
+ /**
+ * @param MatcherInterface $matcher
+ * @param IndexerInterface $indexer
+ */
public function __construct(MatcherInterface $matcher, IndexerInterface $indexer)
{
$this->matcher = $matcher;
@@ -16,8 +23,10 @@ class ProbabilityEngine implements EngineInterface
}
/**
+ * Searches for the given query and returns an array of document ids.
+ *
* @param string $query
- * @return \Riu\Search\Document[]
+ * @return array
*/
public function search($query)
{
diff --git a/src/Riu/Search/TfIdfWeighter.php b/src/Riu/Search/TfIdfWeighter.php
index 2027b94..e95a144 100644
--- a/src/Riu/Search/TfIdfWeighter.php
+++ b/src/Riu/Search/TfIdfWeighter.php
@@ -2,20 +2,28 @@
namespace Riu\Search;
+/**
+ * Weighter based on the Tf-Idf
+ */
class TfIdfWeighter implements WeighterInterface
{
private $matcher;
+ /**
+ * @param MatcherInterface $matcher
+ */
public function __construct(MatcherInterface $matcher)
{
$this->matcher = $matcher;
}
/**
+ * Gets the weights for the words in all documents.
+ *
* @param array $words
* @return array
*/
- public function getDocumentWeights($words)
+ public function getDocumentWeights(array $words)
{
$documents = $this->matcher->getDocuments();
$weight = array();
@@ -44,10 +52,12 @@ class TfIdfWeighter implements WeighterInterface
}
/**
+ * Gets the weights for the words in the query.
+ *
* @param array $words
* @return array
*/
- function getSelfWeights($words)
+ public function getSelfWeights($words)
{
$documents = $this->matcher->getDocuments();
$sum = array_sum($words);
diff --git a/src/Riu/Search/Tokenizer/SimpleTokenizer.php b/src/Riu/Search/Tokenizer/SimpleTokenizer.php
index f7ebc27..9a76fec 100644
--- a/src/Riu/Search/Tokenizer/SimpleTokenizer.php
+++ b/src/Riu/Search/Tokenizer/SimpleTokenizer.php
@@ -2,9 +2,15 @@
namespace Riu\Search\Tokenizer;
+/**
+ * The simple tokenizer splits the content into words without
+ * normalizing similar words.
+ */
class SimpleTokenizer implements TokenizerInterface
{
/**
+ * Tokenizes a content.
+ *
* @param string $content
* @return array
*/
diff --git a/src/Riu/Search/Tokenizer/TokenizerInterface.php b/src/Riu/Search/Tokenizer/TokenizerInterface.php
index 199fb5b..1f4ca8b 100644
--- a/src/Riu/Search/Tokenizer/TokenizerInterface.php
+++ b/src/Riu/Search/Tokenizer/TokenizerInterface.php
@@ -2,9 +2,14 @@
namespace Riu\Search\Tokenizer;
+/**
+ * Interface implemented by all tokenizers.
+ */
interface TokenizerInterface
{
/**
+ * Tokenizes a content.
+ *
* @param string $content
* @return array
*/
diff --git a/src/Riu/Search/VectorEngine.php b/src/Riu/Search/VectorEngine.php
index dff1acd..471d5e1 100644
--- a/src/Riu/Search/VectorEngine.php
+++ b/src/Riu/Search/VectorEngine.php
@@ -2,6 +2,9 @@
namespace Riu\Search;
+/**
+ * Engine implementing a vectorial search model
+ */
class VectorEngine implements EngineInterface
{
private $indexer;
@@ -19,9 +22,12 @@ class VectorEngine implements EngineInterface
$this->matcher = $matcher;
$this->weighter = $weighter;
}
+
/**
+ * Searches for the given query and returns an array of document ids.
+ *
* @param string $query
- * @return \Riu\Search\Document[]
+ * @return array
*/
public function search($query)
{
diff --git a/src/Riu/Search/WeighterInterface.php b/src/Riu/Search/WeighterInterface.php
index 9cc1dc6..d3b2b1a 100644
--- a/src/Riu/Search/WeighterInterface.php
+++ b/src/Riu/Search/WeighterInterface.php
@@ -2,17 +2,24 @@
namespace Riu\Search;
+/**
+ * Interface implemented by the weighter used for the vectorial engine
+ */
interface WeighterInterface
{
/**
+ * Gets the weights for the words in all documents.
+ *
* @param array $words
* @return array
*/
- function getDocumentWeights ($words);
+ function getDocumentWeights(array $words);
/**
+ * Gets the weights for the words in the query.
+ *
* @param array $words
* @return array
*/
- function getSelfWeights ($words);
+ function getSelfWeights(array $words);
}