root / xap / tags / 0.1.5 / searcher.py

Revision 226:7bbf27312bd7, 2.9 kB (checked in by Lafaye Philippe (RAGE2000) <lafaye@…>, 8 months ago)

Add a new version

Line 
1#!/usr/bin/python
2# -*- coding: UTF-8 -*-
3#
4# Copyright (c) 2007 Tarek Ziadé
5#
6# Authors:
7#   Tarek Ziadé <tarek@ziade.org>
8#
9# This program is free software; you can redistribute it and/or
10# modify it under the terms of the GNU General Public License
11# as published by the Free Software Foundation; either version 2
12# of the License, or (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program; if not, write to the Free Software
21# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
22""" Searcher
23"""
24
25import os
26import logging
27
28import xapian
29from tokenizer import tokenize
30from settings import DB_FILE
31from model import statistics
32
33def read_only():
34    return xapian.flint_open(DB_FILE)
35
36def corpus_size():
37    """retrieves number of docs"""
38    db = read_only()
39    return db.get_doccount()
40
41def _get_document_internal_id(uid):
42    """retrieves a document"""
43    enquire = xapian.Enquire(read_only())
44    query = xapian.Query('Q%s' % uid)
45    enquire.set_query(query)
46    res = list(enquire.get_mset(0, 1))
47    if len(res) == 0:
48        return None
49    return res[0].docid
50
51def document_exists(uid):
52    """tels if the document exists"""
53    return _get_document_internal_id(uid) is not None
54
55def document_terms(uid):
56    """retrieves terms"""
57    db = read_only()
58    docid = _get_document_internal_id(uid)
59    if docid is not None:
60        return (el.term for el in read_only().get_document(docid).termlist()
61                if el.term != 'Q%s' % uid)
62    return None
63
64def search(query, or_=False, language=None):
65    """search"""
66    logging.debug('searching for "%s"' % query)
67
68    db = read_only()
69    options = {'treshold': 2}
70    if language is not None:
71        options['lang'] = language
72
73    tquery = tokenize(query, options=options)
74    enquire = xapian.Enquire(db)
75    if or_:
76        op = xapian.Query.OP_OR
77    else:
78        op = xapian.Query.OP_AND
79
80    xquery = xapian.Query(op, tquery)
81    enquire.set_query(xquery)
82    res = enquire.get_mset(0, 100)
83
84    def _extract_uid(result):
85        # buuuu
86        ids = [t.term for t in result.document.termlist()
87              if t.term.startswith('Q')]
88        if len(ids) > 0:
89            return ids[0][1:]
90        return None
91
92    logging.debug('searching for "%s" is over' % query)
93
94    stat = statistics.select(statistics.c.query==query).execute().fetchone()
95
96    if stat is not None:
97        count = stat.count
98        stat.close()
99        statistics.update(statistics.c.query==query).execute(count=count+1)
100    else:
101        statistics.insert().execute(query=query, count=1)
102
103    return (_extract_uid(el) for el in res)
104
Note: See TracBrowser for help on using the browser.