root / logilab.pylintinstaller / logilab / common / textutils.py

Revision 202:d67e86292521, 11.9 kB (checked in by tziade@…, 9 months ago)

added logilab.pylintinstaller

Line 
1# This program is free software; you can redistribute it and/or modify it under
2# the terms of the GNU General Public License as published by the Free Software
3# Foundation; either version 2 of the License, or (at your option) any later
4# version.
5#
6# This program is distributed in the hope that it will be useful, but WITHOUT
7# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
8# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
9#
10# You should have received a copy of the GNU General Public License along with
11# this program; if not, write to the Free Software Foundation, Inc.,
12# 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
13"""Some text manipulation utility functions.
14
15:author:    Logilab
16:copyright: 2003-2008 LOGILAB S.A. (Paris, FRANCE)
17:contact:   http://www.logilab.fr/ -- mailto:python-projects@logilab.org
18
19:group text formatting: normalize_text, normalize_paragraph, pretty_match,\
20unquote, colorize_ansi
21:group text manipulation: searchall, get_csv
22:sort: text formatting, text manipulation
23
24
25
26:type ANSI_STYLES: dict(str)
27:var ANSI_STYLES: dictionary mapping style identifier to ANSI terminal code
28
29:type ANSI_COLORS: dict(str)
30:var ANSI_COLORS: dictionary mapping color identifier to ANSI terminal code
31
32:type ANSI_PREFIX: str
33:var ANSI_PREFIX:
34  ANSI terminal code notifing the start of an ANSI escape sequence
35 
36:type ANSI_END: str
37:var ANSI_END:
38  ANSI terminal code notifing the end of an ANSI escape sequence
39 
40:type ANSI_RESET: str
41:var ANSI_RESET:
42  ANSI terminal code reseting format defined by a previous ANSI escape sequence
43"""
44
45__docformat__ = "restructuredtext en"
46
47import re
48from unicodedata import normalize as _uninormalize
49from os import linesep
50
51
52MANUAL_UNICODE_MAP = {
53    u'\xa1': u'!',    # INVERTED EXCLAMATION MARK
54    u'\u0142': u'l',  # LATIN SMALL LETTER L WITH STROKE
55    u'\u2044': u'/',  # FRACTION SLASH
56    u'\xc6': u'AE',   # LATIN CAPITAL LETTER AE
57    u'\xa9': u'(c)',  # COPYRIGHT SIGN
58    u'\xab': u'"',    # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
59    u'\xe6': u'ae',   # LATIN SMALL LETTER AE
60    u'\xae': u'(r)',  # REGISTERED SIGN
61    u'\u0153': u'oe', # LATIN SMALL LIGATURE OE
62    u'\u0152': u'OE', # LATIN CAPITAL LIGATURE OE
63    u'\xd8': u'O',    # LATIN CAPITAL LETTER O WITH STROKE
64    u'\xf8': u'o',    # LATIN SMALL LETTER O WITH STROKE
65    u'\xbb': u'"',    # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
66    u'\xdf': u'ss',   # LATIN SMALL LETTER SHARP S
67    }
68
69def unormalize(ustring, ignorenonascii=False):
70    """replace diacritical characters with their corresponding ascii characters
71    """
72    res = []
73    for letter in ustring[:]:
74        try:
75            replacement = MANUAL_UNICODE_MAP[letter]
76        except KeyError:
77            if ord(letter) >= 2**8:
78                if ignorenonascii:
79                    continue
80                raise ValueError("can't deal with non-ascii based characters")
81            replacement = _uninormalize('NFD', letter)[0]
82        res.append(replacement)
83    return u''.join(res)
84
85def unquote(string):
86    """remove optional quotes (simple or double) from the string
87
88    :type string: str or unicode
89    :param string: an optionaly quoted string
90
91    :rtype: str or unicode
92    :return: the unquoted string (or the input string if it wasn't quoted)
93    """
94    if not string:
95        return string
96    if string[0] in '"\'':
97        string = string[1:]
98    if string[-1] in '"\'':
99        string = string[:-1]
100    return string
101
102
103_BLANKLINES_RGX = re.compile('\r?\n\r?\n')
104_NORM_SPACES_RGX = re.compile('\s+')
105
106def normalize_text(text, line_len=80, indent='', rest=False):
107    """normalize a text to display it with a maximum line size and
108    optionally arbitrary indentation. Line jumps are normalized but blank
109    lines are kept. The indentation string may be used to insert a
110    comment (#) or a quoting (>) mark  for instance.
111
112    :type text: str or unicode
113    :param text: the input text to normalize
114
115    :type line_len: int
116    :param line_len: expected maximum line's length, default to 80
117
118    :type indent: str or unicode
119    :param indent: optional string to use as indentation
120
121    :rtype: str or unicode
122    :return:
123      the input text normalized to fit on lines with a maximized size
124      inferior to `line_len`, and optionally prefixed by an
125      indentation string
126    """
127    if rest:
128        normp = normalize_rest_paragraph
129    else:
130        normp = normalize_paragraph
131    result = []
132    for text in _BLANKLINES_RGX.split(text):
133        result.append(normp(text, line_len, indent))
134    return ('%s%s%s' % (linesep, indent, linesep)).join(result)
135
136
137def normalize_paragraph(text, line_len=80, indent=''):
138    """normalize a text to display it with a maximum line size and
139    optionaly arbitrary indentation. Line jumps are normalized. The
140    indentation string may be used top insert a comment mark for
141    instance.
142
143    :type text: str or unicode
144    :param text: the input text to normalize
145
146    :type line_len: int
147    :param line_len: expected maximum line's length, default to 80
148
149    :type indent: str or unicode
150    :param indent: optional string to use as indentation
151
152    :rtype: str or unicode
153    :return:
154      the input text normalized to fit on lines with a maximized size
155      inferior to `line_len`, and optionally prefixed by an
156      indentation string
157    """
158    text = _NORM_SPACES_RGX.sub(' ', text)
159    line_len = line_len - len(indent)
160    lines = []
161    while text:
162        aline, text = splittext(text.strip(), line_len)
163        lines.append(indent + aline)
164    return linesep.join(lines)
165   
166def normalize_rest_paragraph(text, line_len=80, indent=''):
167    """normalize a ReST text to display it with a maximum line size and
168    optionaly arbitrary indentation. Line jumps are normalized. The
169    indentation string may be used top insert a comment mark for
170    instance.
171
172    :type text: str or unicode
173    :param text: the input text to normalize
174
175    :type line_len: int
176    :param line_len: expected maximum line's length, default to 80
177
178    :type indent: str or unicode
179    :param indent: optional string to use as indentation
180
181    :rtype: str or unicode
182    :return:
183      the input text normalized to fit on lines with a maximized size
184      inferior to `line_len`, and optionally prefixed by an
185      indentation string
186    """
187    toreport = ''
188    lines = []
189    line_len = line_len - len(indent)
190    for line in text.splitlines():
191        line = toreport + _NORM_SPACES_RGX.sub(' ', line.strip())
192        toreport = ''
193        while len(line) > line_len:
194            # too long line, need split
195            line, toreport = splittext(line, line_len)
196            lines.append(indent + line)
197            if toreport:
198                line = toreport + ' '
199                toreport = ''
200            else:
201                line = ''
202        if line:
203            lines.append(indent + line.strip())
204    return linesep.join(lines)
205
206def splittext(text, line_len):
207    """split the given text on space according to the given max line size
208   
209    return a 2-uple:
210    * a line <= line_len if possible
211    * the rest of the text which has to be reported on another line
212    """
213    if len(text) <= line_len:
214        return text, ''
215    pos = min(len(text)-1, line_len)
216    while pos > 0 and text[pos] != ' ':
217        pos -= 1
218    if pos == 0:
219        pos = min(len(text), line_len)
220        while len(text) > pos and text[pos] != ' ':
221            pos += 1
222    return text[:pos], text[pos+1:].strip()
223
224
225def get_csv(string, sep=','):
226    """return a list of string in from a csv formatted line
227
228    >>> get_csv('a, b, c   ,  4')
229    ['a', 'b', 'c', '4']
230    >>> get_csv('a')
231    ['a']
232    >>>
233
234    :type string: str or unicode
235    :param string: a csv line
236
237    :type sep: str or unicode
238    :param sep: field separator, default to the comma (',')
239
240    :rtype: str or unicode
241    :return: the unquoted string (or the input string if it wasn't quoted)   
242    """
243    return [word.strip() for word in string.split(sep) if word.strip()]
244
245
246_LINE_RGX = re.compile('\r\n|\r+|\n')
247
248def pretty_match(match, string, underline_char='^'):
249    """return a string with the match location underlined:
250
251    >>> import re
252    >>> print pretty_match(re.search('mange', 'il mange du bacon'), 'il mange du bacon')
253    il mange du bacon
254       ^^^^^
255    >>>
256   
257    :type match: _sre.SRE_match
258    :param match: object returned by re.match, re.search or re.finditer
259
260    :type string: str or unicode
261    :param string:
262      the string on which the regular expression has been applied to
263      obtain the `match` object
264
265    :type underline_char: str or unicode
266    :param underline_char:
267      character to use to underline the matched section, default to the
268      carret '^'
269
270    :rtype: str or unicode
271    :return:
272      the original string with an inserted line to underline the match
273      location
274    """
275    start = match.start()
276    end = match.end()
277    string = _LINE_RGX.sub(linesep, string)
278    start_line_pos = string.rfind(linesep, 0, start)
279    if start_line_pos == -1:
280        start_line_pos = 0
281        result = []
282    else:
283        result = [string[:start_line_pos]]
284        start_line_pos += len(linesep)
285    offset = start - start_line_pos
286    underline = ' ' * offset + underline_char * (end - start)
287    end_line_pos = string.find(linesep, end)
288    if end_line_pos == -1:
289        string = string[start_line_pos:]
290        result.append(string)
291        result.append(underline)
292    else:
293        end = string[end_line_pos + len(linesep):]
294        string = string[start_line_pos:end_line_pos]
295        result.append(string)
296        result.append(underline)
297        result.append(end)
298    return linesep.join(result).rstrip()
299
300
301# Ansi colorization ###########################################################
302
303ANSI_PREFIX = '\033['
304ANSI_END = 'm'
305ANSI_RESET = '\033[0m'
306ANSI_STYLES = {
307    'reset'     : "0",
308    'bold'      : "1",
309    'italic'    : "3",
310    'underline' : "4",
311    'blink'     : "5",
312    'inverse'   : "7",
313    'strike'    : "9",
314}
315ANSI_COLORS = {
316    'reset'   : "0",
317    'black'   : "30",
318    'red'     : "31",
319    'green'   : "32",
320    'yellow'  : "33",
321    'blue'    : "34",
322    'magenta' : "35",
323    'cyan'    : "36",
324    'white'   : "37",
325}
326
327
328def _get_ansi_code(color=None, style=None):
329    """return ansi escape code corresponding to color and style
330   
331    :type color: str or None
332    :param color:
333      the color identifier (see `ANSI_COLORS` for available values)
334
335    :type style: str or None
336    :param style:
337      style string (see `ANSI_COLORS` for available values). To get
338      several style effects at the same time, use a coma as separator.
339
340    :raise KeyError: if an unexistant color or style identifier is given
341   
342    :rtype: str
343    :return: the built escape code
344    """
345    ansi_code = []
346    if style:
347        style_attrs = get_csv(style)
348        for effect in style_attrs:
349            ansi_code.append(ANSI_STYLES[effect])
350    if color:
351        ansi_code.append(ANSI_COLORS[color])
352    if ansi_code:
353        return ANSI_PREFIX + ';'.join(ansi_code) + ANSI_END
354    return ''
355
356def colorize_ansi(msg, color=None, style=None):
357    """colorize message by wrapping it with ansi escape codes
358
359    :type msg: str or unicode
360    :param msg: the message string to colorize
361
362    :type color: str or None
363    :param color:
364      the color identifier (see `ANSI_COLORS` for available values)
365
366    :type style: str or None
367    :param style:
368      style string (see `ANSI_COLORS` for available values). To get
369      several style effects at the same time, use a coma as separator.
370
371    :raise KeyError: if an unexistant color or style identifier is given
372
373    :rtype: str or unicode
374    :return: the ansi escaped string
375    """
376    # If both color and style are not defined, then leave the text as is
377    if color is None and style is None:
378        return msg
379    escape_code = _get_ansi_code(color, style)
380    # If invalid (or unknown) color, don't wrap msg with ansi codes
381    if escape_code:
382        return '%s%s%s' % (escape_code, msg, ANSI_RESET)
383    return msg
Note: See TracBrowser for help on using the browser.