| 1 | #!/usr/bin/python |
|---|
| 2 | # -*- coding: UTF-8 -*- |
|---|
| 3 | # |
|---|
| 4 | # Copyright (c) 2007 Tarek Ziadé |
|---|
| 5 | # |
|---|
| 6 | # Authors: |
|---|
| 7 | # Tarek Ziadé <tarek@ziade.org> |
|---|
| 8 | # |
|---|
| 9 | # This program is free software; you can redistribute it and/or |
|---|
| 10 | # modify it under the terms of the GNU General Public License |
|---|
| 11 | # as published by the Free Software Foundation; either version 2 |
|---|
| 12 | # of the License, or (at your option) any later version. |
|---|
| 13 | # |
|---|
| 14 | # This program is distributed in the hope that it will be useful, |
|---|
| 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of |
|---|
| 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|---|
| 17 | # GNU General Public License for more details. |
|---|
| 18 | # |
|---|
| 19 | # You should have received a copy of the GNU General Public License |
|---|
| 20 | # along with this program; if not, write to the Free Software |
|---|
| 21 | # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. |
|---|
| 22 | # $Id: tables.py 1518 2007-05-21 12:35:44Z rage $ |
|---|
| 23 | """ will remove html entities |
|---|
| 24 | """ |
|---|
| 25 | import os |
|---|
| 26 | import sys |
|---|
| 27 | import htmlentitydefs |
|---|
| 28 | import re |
|---|
| 29 | import cgi |
|---|
| 30 | |
|---|
| 31 | from filtering import register_filter |
|---|
| 32 | |
|---|
| 33 | re_options = re.UNICODE | re.MULTILINE |
|---|
| 34 | |
|---|
| 35 | entities = re.compile("&(\w+?);", re_options) |
|---|
| 36 | |
|---|
| 37 | def descape_entity(m, defs=htmlentitydefs.entitydefs): |
|---|
| 38 | # callback: translate one entity to its ISO Latin value |
|---|
| 39 | try: |
|---|
| 40 | return defs[m.group(1)].decode('iso8859-15') |
|---|
| 41 | except KeyError: |
|---|
| 42 | return m.group(0) # use as is |
|---|
| 43 | |
|---|
| 44 | tags = re.compile('<(.*?)>', re_options) |
|---|
| 45 | |
|---|
| 46 | def detag(m): |
|---|
| 47 | return u' ' |
|---|
| 48 | |
|---|
| 49 | exprs = ((entities, descape_entity), (tags, detag)) |
|---|
| 50 | |
|---|
| 51 | def descape(entry, entries): |
|---|
| 52 | |
|---|
| 53 | for expr, func in exprs: |
|---|
| 54 | if 'title' in entry.keys(): |
|---|
| 55 | entry['title'] = expr.sub(func, entry['title']) |
|---|
| 56 | entry['content'] = expr.sub(func, entry['content']) |
|---|
| 57 | |
|---|
| 58 | return True |
|---|
| 59 | |
|---|
| 60 | register_filter(descape) |
|---|