root / atomisator / filters / bayes.py

Revision 102:917b0f7d3576, 2.8 kB (checked in by Tarek Ziad?? <tarek@…>, 15 months ago)

fixes

Line 
1#!/usr/bin/python
2# -*- coding: UTF-8 -*-
3#
4# Copyright (c) 2007 Tarek Ziadé
5#
6# Authors:
7#   Tarek Ziadé <tarek@ziade.org>
8#
9# This program is free software; you can redistribute it and/or
10# modify it under the terms of the GNU General Public License
11# as published by the Free Software Foundation; either version 2
12# of the License, or (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program; if not, write to the Free Software
21# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
22# $Id: tables.py 1518 2007-05-21 12:35:44Z rage $
23""" will filter entries, with a bayesian filter
24
25two modes:
26    - interactive: training
27    - direct
28"""
29import os
30import sys
31
32from filtering import register_filter
33
34from BayesCore.classifier import BayesClassifier
35from BayesCore.storage import SQLStorage
36from BayesCore.tokenizer import AllFilters
37
38# XXX todo: conf file for language
39LANG = 'fr'
40
41current_dir = os.path.dirname(__file__)
42
43if current_dir =='':
44    current_dir = '.'
45
46sys.curdir = current_dir
47
48
49SQLURI = 'sqlite:///filters/BayesCore/data/bayes.db'
50
51def bayesian(entry, entries, sqluri=None):
52    """uses bayesian inference over entries"""
53    content = entry['content'].encode('utf8')
54
55    if 'title' in entry.keys():
56        title = entry['title'].encode('utf8')
57    else:
58        title = ''
59
60    data = '%s %s' % (content, title)
61
62    if sqluri is None:
63        sqluri = SQLURI
64
65    classifier = BayesClassifier(LANG, SQLStorage(sqluri), AllFilters())
66
67    # let's test the entry
68    result = classifier.guess(data)
69
70    if result == []:
71        return True
72
73
74    return  result[0][0] == 'nojunk'
75
76register_filter(bayesian)
77
78def bayesian_learn(entry, sqluri=None, answer=None):
79    """uses bayesian inference over entries"""
80    content = entry['content'].encode('utf8')
81
82    if 'title' in entry.keys():
83        title = entry['title'].encode('utf8')
84    else:
85        title = ''
86
87
88    if sqluri is None:
89        sqluri = SQLURI
90
91    data = '%s %s' % (content, title)
92
93    classifier = BayesClassifier(LANG, SQLStorage(sqluri), AllFilters())
94
95    if answer is None:
96        print 'title : %s' % title
97
98        res = raw_input("Interesting (type 'm' for more) ? (y/n)  ")
99        answer = res.strip().lower()
100
101        if answer == 'm':
102            print content
103            answer = raw_input("Interesting ? (y/n)  ")
104
105        print 'Learning...'
106
107    if answer.strip().lower() in ('y', 'yes'):
108        classifier.learn(data, 'nojunk')
109    else:
110        classifier.learn(data, 'junk')
111
Note: See TracBrowser for help on using the browser.