#!/usr/bin/python
# -*- coding: UTF-8 -*-
#
# Copyright (c) 2007 Tarek Ziadé
#
# Authors:
#   Tarek Ziadé <tarek@ziade.org>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
# $Id: tables.py 1518 2007-05-21 12:35:44Z rage $
""" will remove html entities
"""
import os
import sys
import htmlentitydefs
import re
import cgi

from filtering import register_filter

re_options = re.UNICODE | re.MULTILINE

entities = re.compile("&(\w+?);", re_options)

def descape_entity(m, defs=htmlentitydefs.entitydefs):
    # callback: translate one entity to its ISO Latin value
    try:
        return defs[m.group(1)].decode('iso8859-15')
    except KeyError:
        return m.group(0) # use as is

tags = re.compile('<(.*?)>', re_options)

def detag(m):
    return u' '

exprs = ((entities,  descape_entity), (tags, detag))

def descape(entry, entries):

    for expr, func in exprs:
        if 'title' in entry.keys():
            entry['title'] = expr.sub(func, entry['title'])
        entry['content'] = expr.sub(func, entry['content'])

    return True

register_filter(descape)

