#!/usr/bin/python
# -*- coding: UTF-8 -*-
#
# Copyright (c) 2007 Tarek Ziadé
#
# Authors:
#   Tarek Ziadé <tarek@ziade.org>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
# $Id: tables.py 1518 2007-05-21 12:35:44Z rage $
""" will filter entries, with a bayesian filter

two modes:
    - interactive: training
    - direct
"""
import os
import sys

from filtering import register_filter

from BayesCore.classifier import BayesClassifier
from BayesCore.storage import SQLStorage
from BayesCore.tokenizer import AllFilters

# XXX todo: conf file for language
LANG = 'fr'

current_dir = os.path.dirname(__file__)

if current_dir =='':
    current_dir = '.'

sys.curdir = current_dir


SQLURI = 'sqlite:///filters/BayesCore/data/bayes.db'

def bayesian(entry, entries, sqluri=None):
    """uses bayesian inference over entries"""
    content = entry['content'].encode('utf8')

    if 'title' in entry.keys():
        title = entry['title'].encode('utf8')
    else:
        title = ''

    data = '%s %s' % (content, title)

    if sqluri is None:
        sqluri = SQLURI

    classifier = BayesClassifier(LANG, SQLStorage(sqluri), AllFilters())

    # let's test the entry
    result = classifier.guess(data)

    if result == []:
        return True


    return  result[0][0] == 'nojunk'

register_filter(bayesian)

def bayesian_learn(entry, sqluri=None, answer=None):
    """uses bayesian inference over entries"""
    content = entry['content'].encode('utf8')

    if 'title' in entry.keys():
        title = entry['title'].encode('utf8')
    else:
        title = ''


    if sqluri is None:
        sqluri = SQLURI

    data = '%s %s' % (content, title)

    classifier = BayesClassifier(LANG, SQLStorage(sqluri), AllFilters())

    if answer is None:
        print 'title : %s' % title

        res = raw_input("Interesting (type 'm' for more) ? (y/n)  ")
        answer = res.strip().lower()

        if answer == 'm':
            print content
            answer = raw_input("Interesting ? (y/n)  ")

        print 'Learning...'

    if answer.strip().lower() in ('y', 'yes'):
        classifier.learn(data, 'nojunk')
    else:
        classifier.learn(data, 'junk')


