Changeset 92:c307d90395db
- Timestamp:
- 05/28/07 15:49:13 (17 months ago)
- Author:
- Tarek Ziad?? <tarek@…>
- Message:
-
overall fix
- Files:
-
Legend:
- Unmodified
- Added
- Removed
-
|
r81
|
r92
|
|
| 5 | 5 | |
| 6 | 6 | def test_one(self): |
| 7 | | self.assertEquals(division(4, 2) , 3) |
| | 7 | self.assertEquals(division(4, 2) , 2) |
| 8 | 8 | |
| 9 | 9 | def test_two(self): |
-
|
r81
|
r92
|
|
| 515 | 515 | ==================== |
| 516 | 516 | |
| 517 | | XXX |
| | 517 | - punctuation signs to underline sections |
| | 518 | - text enhancements |
| | 519 | |
| | 520 | - ``*emphasis*`` |
| | 521 | - ``**strong emphasis**`` |
| | 522 | - ```interpreted text``` |
| | 523 | - ````inline literal text```` |
| | 524 | |
| | 525 | reST syntax overview |
| | 526 | ==================== |
| | 527 | |
| | 528 | - bullet list: use +, * or - |
| | 529 | - enumerated list: use n. (1., 2., etc.) or #. (automatic) |
| 518 | 530 | |
| 519 | 531 | Part 2 |
-
|
r87
|
r92
|
|
| 39 | 39 | |
| 40 | 40 | >>> from filters import levenshtein |
| 41 | | >>> all = entries.select_entries() |
| | 41 | >>> all = entries |
| 42 | 42 | |
| 43 | 43 | Text 2 should be blocked because a similar entry is in:: |
-
|
r91
|
r92
|
|
| 16 | 16 | |
| 17 | 17 | >>> from outputs import write_output |
| 18 | | >>> write_output('rss', entries) |
| | 18 | >>> print write_output('rss', entries) |
| | 19 | <?xml version="1.0" encoding="utf-8"?> |
| | 20 | ... |
| 19 | 21 | |
| 20 | 22 | |
| 21 | | |
-
|
r85
|
r92
|
|
| 24 | 24 | """ |
| 25 | 25 | from datetime import datetime |
| | 26 | from time import strftime |
| 26 | 27 | from sqlalchemy import * |
| | 28 | from sqlalchemy.exceptions import * |
| 27 | 29 | |
| 28 | 30 | _connector = None |
| | 31 | TIME_FMT = '%Y-%m-%d %H:%M:%S' |
| 29 | 32 | |
| 30 | 33 | class Entries(object): |
| … |
… |
|
| 32 | 35 | def __init__(self, sqluri): |
| 33 | 36 | self._mapper = self._getEntriesMapper(sqluri) |
| 34 | | self._mapper.create() |
| | 37 | try: |
| | 38 | self._mapper.create() |
| | 39 | except SQLError: |
| | 40 | pass # already there |
| 35 | 41 | |
| 36 | 42 | def getConnector(self): |
| … |
… |
|
| 50 | 56 | Column('creation_date', DateTime)) |
| 51 | 57 | |
| 52 | | def insert_entry(self, url, title, content, date=datetime.now(), |
| 53 | | creation_date=datetime.now()): |
| | 58 | def insert_entry(self, url, title, content, date=strftime(TIME_FMT), |
| | 59 | creation_date=strftime(TIME_FMT)): |
| 54 | 60 | """inserts entry""" |
| 55 | 61 | inserter = self._mapper.insert() |
| 56 | 62 | inserter.execute(url=url, title=title, content=content, date=date, |
| 57 | 63 | creation_date=creation_date) |
| | 64 | |
| | 65 | def insert_entries(self, entries): |
| | 66 | """insert entries""" |
| | 67 | for entry in entries: |
| | 68 | if 'url' in entry: |
| | 69 | url = entry['url'] |
| | 70 | else: |
| | 71 | url = entry['link'] |
| | 72 | |
| | 73 | if 'creation_date' not in entry: |
| | 74 | entry['creation_date'] = strftime(TIME_FMT) |
| | 75 | |
| | 76 | content = entry['content'] |
| | 77 | |
| | 78 | entry_date = entry['date'] |
| | 79 | if isinstance(entry_date, basestring): |
| | 80 | try: |
| | 81 | entry_date = datetime.strptime(entry_date, |
| | 82 | '%Y-%m-%dT%H:%M:%S+00:00') |
| | 83 | strftime(TIME_FMT, entry_date.timetuple()) |
| | 84 | except ValueError: |
| | 85 | pass |
| | 86 | |
| | 87 | self.insert_entry(url, entry['title'], content, |
| | 88 | entry_date, entry['creation_date']) |
| | 89 | |
| 58 | 90 | |
| 59 | 91 | def select_entries(self, *args): |
-
|
r88
|
r92
|
|
| | 1 | from filtering import run_filters |
| | 2 | |
| 1 | 3 | from levenshtein import levenshtein |
| 2 | 4 | |
-
|
r88
|
r92
|
|
| 32 | 32 | """ |
| 33 | 33 | content = entry['content'] |
| 34 | | for stored_entry in entries: |
| | 34 | for stored_entry in entries.select_entries(): |
| 35 | 35 | if ratio(content, stored_entry.content) > 0.7: |
| 36 | 36 | return False |
-
|
r91
|
r92
|
|
| 31 | 31 | def write_output(type_, entries): |
| 32 | 32 | """Returns entries from all sources""" |
| 33 | | _outputs[type_](entries) |
| | 33 | return _outputs[type_](entries) |
| 34 | 34 | |
-
|
r91
|
r92
|
|
| 67 | 67 | |
| 68 | 68 | for item in entries: |
| 69 | | if 'link' in item: |
| | 69 | if 'link' in item.keys(): |
| 70 | 70 | url = escape(item['link']) |
| | 71 | elif 'url' in item.keys(): |
| | 72 | url = escape(item['url']) |
| 71 | 73 | else: |
| 72 | 74 | url = 'no link' |
| 73 | 75 | |
| 74 | 76 | header_text += RSS_ITEM_LI % {'item_id': url} |
| 75 | | if 'modified_parsed' in item: |
| | 77 | if 'modified_parsed' in item.keys(): |
| 76 | 78 | item_time = item['modified_parsed'] |
| 77 | 79 | else: |
| … |
… |
|
| 94 | 96 | summary = summary[:i] |
| 95 | 97 | summary += '...' |
| | 98 | elif 'description' in item.keys(): |
| | 99 | summary = item['description'] |
| | 100 | elif 'content' in item.keys(): |
| | 101 | summary = item['content'] |
| 96 | 102 | else: |
| 97 | 103 | summary = '?' |
| 98 | 104 | |
| 99 | | if 'title' in item: |
| | 105 | if 'title' in item.keys(): |
| 100 | 106 | title = item['title'] |
| 101 | 107 | else: |
-
|
r89
|
r92
|
|
| | 1 | from sourcing import get_entries |
| 1 | 2 | |
| 2 | 3 | import rss |
-
|
r89
|
r92
|
|
| 31 | 31 | for entry in entries: |
| 32 | 32 | if 'content' not in entry: |
| 33 | | entry['content'] = entry['summary'] |
| | 33 | if 'summary' in entry: |
| | 34 | entry['content'] = entry['summary'] |
| | 35 | elif 'title_detail' in entry: |
| | 36 | entry['content'] = entry['title_detail']['value'] |
| | 37 | else: |
| | 38 | entry['content'] = u'' |
| | 39 | |
| | 40 | content = entry['content'] |
| | 41 | |
| | 42 | # better parsiong to be done here |
| | 43 | if isinstance(content, list): |
| | 44 | content = content[0] |
| | 45 | |
| | 46 | if not isinstance(content, basestring): |
| | 47 | if isinstance(content, dict) and 'value' in content: |
| | 48 | entry['content'] = content['value'] |
| | 49 | else: |
| | 50 | entry['content'] = u'' |
| | 51 | |
| 34 | 52 | return entries |
| 35 | 53 | |