InDefero

Sign in or create your account | Project List | Help

InDefero Git Source Tree

Root/src/IDF/Search.php

1<?php
2/* -*- tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3/*
4# ***** BEGIN LICENSE BLOCK *****
5# This file is part of InDefero, an open source project management application.
6# Copyright (C) 2008 Céondo Ltd and contributors.
7#
8# InDefero is free software; you can redistribute it and/or modify
9# it under the terms of the GNU General Public License as published by
10# the Free Software Foundation; either version 2 of the License, or
11# (at your option) any later version.
12#
13# InDefero is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16# GNU General Public License for more details.
17#
18# You should have received a copy of the GNU General Public License
19# along with this program; if not, write to the Free Software
20# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21#
22# Based on work under GNU LGPL copyright, from the Pluf Framework
23# Copyright (C) 2001-2007 Loic d'Anterroches and contributors.
24#
25# ***** END LICENSE BLOCK ***** */
26
27/**
28 * Class implementing the search engine
29 *
30 * It is a modified version of the Pluf_Search class to be able to
31 * cluster the results by project.
32 */
33class IDF_Search extends Pluf_Search
34{
35    /**
36     * Search.
37     *
38     * Returns an array of array with model_class, model_id and
39     * score. The list is already sorted by score descending.
40     *
41     * You can then filter the list as you wish with another set of
42     * weights.
43     *
44     * @param string Query string.
45     * @param int Project id to limit the results (null)
46     * @param string Model class (null)
47     * @param string Stemmer class ('Pluf_Text_Stemmer_Porter')
48     * @return array Results
49     */
50    public static function mySearch($query, $project=null, $model=null, $stemmer='Pluf_Text_Stemmer_Porter')
51    {
52        $query = Pluf_Text::cleanString(html_entity_decode($query, ENT_QUOTES, 'UTF-8'));
53        $words = Pluf_Text::tokenize($query);
54        if ($stemmer != null) {
55            $words = self::stem($words, $stemmer);
56        }
57        $words_flat = array();
58        foreach ($words as $word=>$c) {
59            $words_flat[] = $word;
60        }
61        $word_ids = self::getWordIds($words_flat);
62        if (in_array(null, $word_ids) or count($word_ids) == 0) {
63            return array();
64        }
65        return self::mySearchDocuments($word_ids, $project, $model);
66    }
67
68    /**
69     * Search documents.
70     *
71     * Only the total of the ponderated occurences is used to sort the
72     * results.
73     *
74     * @param array Ids.
75     * @param IDF_Project Project to limit the search.
76     * @param string Model class to limit the search.
77     * @return array Sorted by score, returns model_class, model_id and score.
78     */
79    public static function mySearchDocuments($wids, $project, $model)
80    {
81        $db =& Pluf::db();
82        $gocc = new IDF_Search_Occ();
83        $where = array();
84        foreach ($wids as $id) {
85            $where[] = $db->qn('word').'='.(int)$id;
86        }
87        $prj = (is_null($project)) ? '' : ' AND project='.(int)$project->id;
88        $md = (is_null($model)) ? '' : ' AND model_class='.$db->esc($model);
89        $select = 'SELECT model_class, model_id, SUM(pondocc) AS score FROM '.$gocc->getSqlTable().' WHERE '.implode(' OR ', $where).$prj.$md.' GROUP BY model_class, model_id HAVING COUNT(*)='.count($wids).' ORDER BY score DESC';
90        return $db->select($select);
91    }
92
93    /**
94     * Index a document.
95     *
96     * See Pluf_Search for the disclaimer and informations.
97     *
98     * @param Pluf_Model Document to index.
99     * @param Stemmer used. ('Pluf_Text_Stemmer_Porter')
100     * @return array Statistics.
101     */
102    public static function index($doc, $stemmer='Pluf_Text_Stemmer_Porter')
103    {
104        $words = Pluf_Text::tokenize($doc->_toIndex());
105        if ($stemmer != null) {
106            $words = self::stem($words, $stemmer);
107        }
108        // Get the total number of words.
109        $total = 0.0;
110        $words_flat = array();
111        foreach ($words as $word => $occ) {
112            $total += (float) $occ;
113            $words_flat[] = $word;
114        }
115        // Drop the last indexation.
116        $gocc = new IDF_Search_Occ();
117        $sql = new Pluf_SQL('DELETE FROM '.$gocc->getSqlTable().' WHERE model_class=%s AND model_id=%s', array($doc->_model, $doc->id));
118        $db =& Pluf::db();
119        $db->execute($sql->gen());
120        // Get the ids for each word.
121        $ids = self::getWordIds($words_flat);
122        // Insert a new word for the missing words and add the occ.
123        $n = count($ids);
124        $new_words = 0;
125        $done = array();
126        for ($i=0;$i<$n;$i++) {
127            if ($ids[$i] === null) {
128                $word = new Pluf_Search_Word();
129                $word->word = $words_flat[$i];
130                try {
131                    $word->create();
132                    $new_words++;
133                    $ids[$i] = $word->id;
134                } catch (Exception $e) {
135                    // 100% of the time, the word has been created
136                    // by another process in the background.
137                    $r_ids = self::getWordIds(array($word->word));
138                    if ($r_ids[0]) {
139                        $ids[$i] = $r_ids[0];
140                    } else {
141                        // give up for this word
142                        continue;
143                    }
144                }
145            }
146            if (isset($done[$ids[$i]])) {
147                continue;
148            }
149            $done[$ids[$i]] = true;
150            $occ = new IDF_Search_Occ();
151            $occ->word = new Pluf_Search_Word($ids[$i]);
152            $occ->model_class = $doc->_model;
153            $occ->model_id = $doc->id;
154            $occ->project = $doc->get_project();
155            $occ->occ = $words[$words_flat[$i]];
156            $occ->pondocc = $words[$words_flat[$i]]/$total;
157            $occ->create();
158        }
159        // update the stats
160        $sql = new Pluf_SQL('model_class=%s AND model_id=%s',
161                            array($doc->_model, $doc->id));
162        $last_index = Pluf::factory('Pluf_Search_Stats')->getList(array('filter' => $sql->gen()));
163        if ($last_index->count() == 0) {
164            $stats = new Pluf_Search_Stats();
165            $stats->model_class = $doc->_model;
166            $stats->model_id = $doc->id;
167            $stats->indexations = 1;
168            $stats->create();
169        } else {
170            $last_index[0]->indexations += 1;
171            $last_index[0]->update();
172        }
173        return array('total' => $total, 'new' => $new_words, 'unique'=>$n);
174    }
175
176    /**
177     * Remove an item from the index.
178     *
179     * You must call this function when you delete items wich are
180     * indexed. Just add the call:
181     *
182     * IDF_Search::remove($this);
183     *
184     * in the preDelete() method of your object.
185     *
186     * @param mixed Item to be removed
187     * @return bool Success
188     */
189    public static function remove($item)
190    {
191        if ($item->id > 0) {
192            $sql = new Pluf_SQL('model_id=%s AND model_class=%s',
193                                array($item->id, $item->_model));
194            $items = Pluf::factory('IDF_Search_Occ')->getList(array('filter'=>$sql->gen()));
195            foreach ($items as $tl) {
196                $tl->delete();
197            }
198        }
199        return true;
200    }
201
202}

Archive Download this file

Branches:
dev
master
newdiff
svn