root / logilab.pylintinstaller / logilab / common / fileutils.py

Revision 202:d67e86292521, 15.2 kB (checked in by tziade@…, 9 months ago)

added logilab.pylintinstaller

Line 
1# Copyright (c) 2003-2006 LOGILAB S.A. (Paris, FRANCE).
2# http://www.logilab.fr/ -- mailto:contact@logilab.fr
3#
4# This program is free software; you can redistribute it and/or modify it under
5# the terms of the GNU General Public License as published by the Free Software
6# Foundation; either version 2 of the License, or (at your option) any later
7# version.
8#
9# This program is distributed in the hope that it will be useful, but WITHOUT
10# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
12#
13# You should have received a copy of the GNU General Public License along with
14# this program; if not, write to the Free Software Foundation, Inc.,
15# 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
16"""Some file / file path manipulation utilities.
17
18:group path manipulation: first_level_directory, relative_path, is_binary,\
19get_by_ext, remove_dead_links
20:group file manipulation: norm_read, norm_open, lines, stream_lines, lines,\
21write_open_mode, ensure_fs_mode, export
22:sort: path manipulation, file manipulation
23"""
24
25__docformat__ = "restructuredtext en"
26
27import sys
28import shutil
29import mimetypes
30from os.path import isabs, isdir, islink, split, exists, walk, normpath, join
31from os.path import abspath
32from os import sep, mkdir, remove, listdir, stat, chmod
33from stat import ST_MODE, S_IWRITE
34from cStringIO import StringIO
35
36from logilab.common import STD_BLACKLIST as BASE_BLACKLIST, IGNORED_EXTENSIONS
37from logilab.common.shellutils import find
38
39def first_level_directory(path):
40    """return the first level directory of a path
41   
42    >>> first_level_directory('home/syt/work')
43    'home'
44    >>> first_level_directory('/home/syt/work')
45    '/'
46    >>> first_level_directory('work')
47    'work'
48    >>>
49
50    :type path: str
51    :param path: the path for which we want the first level directory
52
53    :rtype: str
54    :return: the first level directory appearing in `path`
55    """
56    head, tail = split(path)
57    while head and tail:
58        head, tail = split(head)
59    if tail:
60        return tail
61    # path was absolute, head is the fs root
62    return head
63
64def abspath_listdir(path):
65    """lists path's content using absolute paths
66
67    >>> os.listdir('/home')
68    ['adim', 'alf', 'arthur', 'auc']   
69    >>> abspath_listdir('/home')
70    ['/home/adim', '/home/alf', '/home/arthur', '/home/auc']
71    """
72    path = abspath(path)
73    return [join(path, filename) for filename in listdir(path)]
74
75   
76def is_binary(filename):
77    """return true if filename may be a binary file, according to it's
78    extension
79
80    :type filename: str
81    :param filename: the name of the file
82
83    :rtype: bool
84    :return:
85      true if the file is a binary file (actually if it's mime type
86      isn't begining by text/)
87    """
88    try:
89        return not mimetypes.guess_type(filename)[0].startswith('text')
90    except AttributeError:
91        return 1
92
93
94def write_open_mode(filename):
95    """return the write mode that should used to open file
96
97    :type filename: str
98    :param filename: the name of the file
99
100    :rtype: str
101    :return: the mode that should be use to open the file ('w' or 'wb')
102    """
103    if is_binary(filename):
104        return 'wb'
105    return 'w'
106
107
108def ensure_fs_mode(filepath, desired_mode=S_IWRITE):
109    """check that the given file has the given mode(s) set, else try to
110    set it
111
112    :type filepath: str
113    :param filepath: path of the file
114
115    :type desired_mode: int
116    :param desired_mode:
117      ORed flags describing the desired mode. Use constants from the
118      `stat` module for file permission's modes
119    """
120    mode = stat(filepath)[ST_MODE]
121    if not mode & desired_mode:
122        chmod(filepath, mode | desired_mode)
123       
124
125class ProtectedFile(file):
126    """a special file-object class that automatically that automatically
127    does a 'chmod +w' when needed
128
129    XXX: for now, the way it is done allows 'normal file-objects' to be
130    created during the ProtectedFile object lifetime.
131    One way to circumvent this would be to chmod / unchmod on each
132    write operation.
133   
134    One other way would be to :
135   
136    - catch the IOError in the __init__
137   
138    - if IOError, then create a StringIO object
139   
140    - each write operation writes in this StringIO obejct
141   
142    - on close()/del(), write/append the StringIO content to the file and
143      do the chmod only once
144    """
145    def __init__(self, filepath, mode):
146        self.original_mode = stat(filepath)[ST_MODE]
147        self.mode_changed = False
148        if mode in ('w', 'a', 'wb', 'ab'):
149            if not self.original_mode & S_IWRITE:
150                chmod(filepath, self.original_mode | S_IWRITE)
151                self.mode_changed = True
152        file.__init__(self, filepath, mode)
153
154    def _restore_mode(self):
155        """restores the original mode if needed"""
156        if self.mode_changed:
157            chmod(self.name, self.original_mode)
158            # Don't re-chmod in case of several restore
159            self.mode_changed = False
160   
161    def close(self):
162        """restore mode before closing"""
163        self._restore_mode()
164        file.close(self)
165
166    def __del__(self):
167        if not self.closed:
168            self.close()
169
170
171class UnresolvableError(Exception):
172    """exception raised by relative path when it's unable to compute relative
173    path between two paths
174    """
175
176def relative_path(from_file, to_file):
177    """try to get a relative path from from `from_file` to `to_file`
178    (path will be absolute if to_file is an absolute file). This function
179    is useful to create link in `from_file` to `to_file`. This typical use
180    case is used in this function description.
181   
182    If both files are relative, they're expected to be relative to the same
183    directory.
184   
185    >>> relative_path( from_file='toto/index.html', to_file='index.html')
186    '../index.html'
187    >>> relative_path( from_file='index.html', to_file='toto/index.html')
188    'toto/index.html'
189    >>> relative_path( from_file='tutu/index.html', to_file='toto/index.html')
190    '../toto/index.html'
191    >>> relative_path( from_file='toto/index.html', to_file='/index.html')
192    '/index.html'
193    >>> relative_path( from_file='/toto/index.html', to_file='/index.html')
194    '../index.html'
195    >>> relative_path( from_file='/toto/index.html', to_file='/toto/summary.html')
196    'summary.html'
197    >>> relative_path( from_file='index.html', to_file='index.html')
198    ''
199    >>> relative_path( from_file='/index.html', to_file='toto/index.html')
200    Traceback (most recent call last):
201      File "<string>", line 1, in ?
202      File "<stdin>", line 37, in relative_path
203    UnresolvableError
204    >>> relative_path( from_file='/index.html', to_file='/index.html')
205    ''
206    >>>
207
208    :type from_file: str
209    :param from_file: source file (where links will be inserted)
210   
211    :type to_file: str
212    :param to_file: target file (on which links point)
213
214    :raise UnresolvableError: if it has been unable to guess a correct path
215   
216    :rtype: str
217    :return: the relative path of `to_file` from `from_file`
218    """
219    from_file = normpath(from_file)
220    to_file = normpath(to_file)
221    if from_file == to_file:
222        return ''
223    if isabs(to_file):
224        if not isabs(from_file):
225            return to_file
226    elif isabs(from_file):
227        raise UnresolvableError()
228    from_parts = from_file.split(sep)
229    to_parts = to_file.split(sep)
230    idem = 1
231    result = []
232    while len(from_parts) > 1:
233        dirname = from_parts.pop(0)
234        if idem and len(to_parts) > 1 and dirname == to_parts[0]:
235            to_parts.pop(0)
236        else:
237            idem = 0
238            result.append('..')
239    result += to_parts
240    return sep.join(result)
241
242
243from logilab.common.textutils import _LINE_RGX
244from sys import version_info
245_HAS_UNIV_OPEN = version_info[:2] >= (2, 3)
246del version_info
247
248def norm_read(path):
249    """return the content of the file with normalized line feeds
250
251    :type path: str
252    :param path: path to the file to read
253
254    :rtype: str
255    :return: the content of the file with normalized line feeds
256    """
257    if _HAS_UNIV_OPEN:
258        return open(path, 'U').read()
259    return _LINE_RGX.sub('\n', open(path).read())
260
261
262def norm_open(path):
263    """return a stream for a file with content with normalized line feeds
264
265    :type path: str
266    :param path: path to the file to open
267
268    :rtype: file or StringIO
269    :return: the opened file with normalized line feeds
270    """
271    if _HAS_UNIV_OPEN:
272        return open(path, 'U')
273    return StringIO(_LINE_RGX.sub('\n', open(path).read()))
274
275     
276def lines(path, comments=None):
277    """return a list of non empty lines in the file located at `path`
278
279    :type path: str
280    :param path: path to the file
281
282    :type comments: str or None
283    :param comments:
284      optional string which can be used to comment a line in the file
285      (ie lines starting with this string won't be returned)
286
287    :rtype: list
288    :return:
289      a list of stripped line in the file, without empty and commented
290      lines
291
292    :warning: at some point this function will probably return an iterator
293    """
294    stream = norm_open(path)
295    result = stream_lines(stream, comments)
296    stream.close()
297    return result
298
299
300def stream_lines(stream, comments=None):
301    """return a list of non empty lines in the given `stream`
302
303    :type stream: object implementing 'xreadlines' or 'readlines'
304    :param stream: file like object
305
306    :type comments: str or None
307    :param comments:
308      optional string which can be used to comment a line in the file
309      (ie lines starting with this string won't be returned)
310
311    :rtype: list
312    :return:
313      a list of stripped line in the file, without empty and commented
314      lines
315
316    :warning: at some point this function will probably return an iterator
317    """
318    try:
319        readlines = stream.xreadlines
320    except AttributeError:
321        readlines = stream.readlines
322    result = []
323    for line in readlines():
324        line = line.strip()
325        if line and (comments is None or not line.startswith(comments)):
326            result.append(line)
327    return result
328
329
330def export(from_dir, to_dir,
331           blacklist=BASE_BLACKLIST, ignore_ext=IGNORED_EXTENSIONS,
332           verbose=0):
333    """make a mirror of `from_dir` in `to_dir`, omitting directories and
334    files listed in the black list or ending with one of the given
335    extensions
336
337    :type from_dir: str
338    :param from_dir: directory to export
339   
340    :type to_dir: str
341    :param to_dir: destination directory
342
343    :type blacklist: list or tuple
344    :param blacklist:
345      list of files or directories to ignore, default to the content of
346      `BASE_BLACKLIST`
347
348    :type ignore_ext: list or tuple
349    :param ignore_ext:
350      list of extensions to ignore, default to  the content of
351      `IGNORED_EXTENSIONS`
352
353    :type verbose: bool
354    :param verbose:
355      flag indicating wether information about exported files should be
356      printed to stderr, default to False
357    """
358    def make_mirror(_, directory, fnames):
359        """walk handler"""
360        for norecurs in blacklist:
361            try:
362                fnames.remove(norecurs)
363            except ValueError:
364                continue
365        for filename in fnames:
366            # don't include binary files
367            for ext in ignore_ext:
368                if filename.endswith(ext):
369                    break
370            else:
371                src = join(directory, filename)
372                dest = to_dir + src[len(from_dir):]
373                if verbose:
374                    print >> sys.stderr, src, '->', dest
375                if isdir(src):
376                    if not exists(dest):
377                        mkdir(dest)
378                else:
379                    if exists(dest):
380                        remove(dest)
381                    shutil.copy2(src, dest)
382    try:
383        mkdir(to_dir)
384    except OSError:
385        pass
386    walk(from_dir, make_mirror, None)
387
388
389def remove_dead_links(directory, verbose=0):
390    """recursivly traverse directory and remove all dead links
391
392    :type directory: str
393    :param directory: directory to cleanup
394
395    :type verbose: bool
396    :param verbose:
397      flag indicating wether information about deleted links should be
398      printed to stderr, default to False
399    """
400    def _remove_dead_link(_, directory, fnames):
401        """walk handler"""
402        for filename in fnames:
403            src = join(directory, filename)
404            if islink(src) and not exists(src):
405                if verbose:
406                    print 'remove dead link', src
407                remove(src)
408    walk(directory, _remove_dead_link, None)
409
410
411from warnings import warn
412
413def files_by_ext(directory, include_exts=None, exclude_exts=None,
414                 exclude_dirs=BASE_BLACKLIST):
415    """return a list of files in a directory matching (or not) some
416    extensions: you should either give the `include_exts` argument (and
417    only files ending with one of the listed extensions will be
418    considered) or the `exclude_exts` argument (and only files not
419    ending by one of the listed extensions will be considered).
420    Subdirectories are processed recursivly.
421
422    :type directory: str
423    :param directory: directory where files should be searched
424
425    :type include_exts: list or tuple or None
426    :param include_exts: list of file extensions to consider
427   
428    :type exclude_exts: list or tuple or None
429    :param exclude_exts: list of file extensions to ignore
430
431    :type exclude_dirs: list or tuple or None
432    :param exclude_dirs: list of directory where we should not recurse
433
434    :rtype: list
435    :return: the list of files matching input criteria
436    """
437    assert not (include_exts and exclude_exts)
438    warn("files_by_ext is deprecated, use shellutils.find instead" ,
439         DeprecationWarning, stacklevel=2)
440    if include_exts:
441        return find(directory, include_exts, blacklist=exclude_dirs)
442    return find(directory, exclude_exts, exclude=True, blacklist=exclude_dirs)
443
444def include_files_by_ext(directory, include_exts, exclude_dirs=BASE_BLACKLIST):
445    """return a list of files in a directory matching some extensions
446
447    :type directory: str
448    :param directory: directory where files should be searched
449
450    :type include_exts: list or tuple or None
451    :param include_exts: list of file extensions to consider
452
453    :type exclude_dirs: list or tuple or None
454    :param exclude_dirs: list of directory where we should not recurse
455
456    :rtype: list
457    :return: the list of files matching input criterias
458    """
459    warn("include_files_by_ext is deprecated, use shellutils.find instead" ,
460         DeprecationWarning, stacklevel=2)
461    return find(directory, include_exts, blacklist=exclude_dirs)
462
463def exclude_files_by_ext(directory, exclude_exts, exclude_dirs=BASE_BLACKLIST):
464    """return a list of files in a directory not matching some extensions
465
466    :type directory: str
467    :param directory: directory where files should be searched
468
469    :type exclude_exts: list or tuple or None
470    :param exclude_exts: list of file extensions to ignore
471
472    :type exclude_dirs: list or tuple or None
473    :param exclude_dirs: list of directory where we should not recurse
474
475    :rtype: list
476    :return: the list of files matching input criterias
477    """
478    warn("exclude_files_by_ext is deprecated, use shellutils.find instead" ,
479         DeprecationWarning, stacklevel=2)
480    return find(directory, exclude_exts, exclude=True, blacklist=exclude_dirs)
Note: See TracBrowser for help on using the browser.