root / atomisator / filters / unhtml.py

Revision 104:1d060e571377, 1.7 kB (checked in by Tarek Ziad?? <tarek@…>, 15 months ago)

initial commit for jpf slides

Line 
1#!/usr/bin/python
2# -*- coding: UTF-8 -*-
3#
4# Copyright (c) 2007 Tarek Ziadé
5#
6# Authors:
7#   Tarek Ziadé <tarek@ziade.org>
8#
9# This program is free software; you can redistribute it and/or
10# modify it under the terms of the GNU General Public License
11# as published by the Free Software Foundation; either version 2
12# of the License, or (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program; if not, write to the Free Software
21# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
22# $Id: tables.py 1518 2007-05-21 12:35:44Z rage $
23""" will remove html entities
24"""
25import os
26import sys
27import htmlentitydefs
28import re
29import cgi
30
31from filtering import register_filter
32
33re_options = re.UNICODE | re.MULTILINE
34
35entities = re.compile("&(\w+?);", re_options)
36
37def descape_entity(m, defs=htmlentitydefs.entitydefs):
38    # callback: translate one entity to its ISO Latin value
39    try:
40        return defs[m.group(1)].decode('iso8859-15')
41    except KeyError:
42        return m.group(0) # use as is
43
44tags = re.compile('<(.*?)>', re_options)
45
46def detag(m):
47    return u' '
48
49exprs = ((entities,  descape_entity), (tags, detag))
50
51def descape(entry, entries):
52
53    for expr, func in exprs:
54        if 'title' in entry.keys():
55            entry['title'] = expr.sub(func, entry['title'])
56        entry['content'] = expr.sub(func, entry['content'])
57
58    return True
59
60register_filter(descape)
Note: See TracBrowser for help on using the browser.