| 1 | # Copyright (c) 2003-2006 LOGILAB S.A. (Paris, FRANCE). |
|---|
| 2 | # http://www.logilab.fr/ -- mailto:contact@logilab.fr |
|---|
| 3 | # |
|---|
| 4 | # This program is free software; you can redistribute it and/or modify it under |
|---|
| 5 | # the terms of the GNU General Public License as published by the Free Software |
|---|
| 6 | # Foundation; either version 2 of the License, or (at your option) any later |
|---|
| 7 | # version. |
|---|
| 8 | # |
|---|
| 9 | # This program is distributed in the hope that it will be useful, but WITHOUT |
|---|
| 10 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
|---|
| 11 | # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. |
|---|
| 12 | # |
|---|
| 13 | # You should have received a copy of the GNU General Public License along with |
|---|
| 14 | # this program; if not, write to the Free Software Foundation, Inc., |
|---|
| 15 | # 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. |
|---|
| 16 | """Some file / file path manipulation utilities. |
|---|
| 17 | |
|---|
| 18 | :group path manipulation: first_level_directory, relative_path, is_binary,\ |
|---|
| 19 | get_by_ext, remove_dead_links |
|---|
| 20 | :group file manipulation: norm_read, norm_open, lines, stream_lines, lines,\ |
|---|
| 21 | write_open_mode, ensure_fs_mode, export |
|---|
| 22 | :sort: path manipulation, file manipulation |
|---|
| 23 | """ |
|---|
| 24 | |
|---|
| 25 | __docformat__ = "restructuredtext en" |
|---|
| 26 | |
|---|
| 27 | import sys |
|---|
| 28 | import shutil |
|---|
| 29 | import mimetypes |
|---|
| 30 | from os.path import isabs, isdir, islink, split, exists, walk, normpath, join |
|---|
| 31 | from os.path import abspath |
|---|
| 32 | from os import sep, mkdir, remove, listdir, stat, chmod |
|---|
| 33 | from stat import ST_MODE, S_IWRITE |
|---|
| 34 | from cStringIO import StringIO |
|---|
| 35 | |
|---|
| 36 | from logilab.common import STD_BLACKLIST as BASE_BLACKLIST, IGNORED_EXTENSIONS |
|---|
| 37 | from logilab.common.shellutils import find |
|---|
| 38 | |
|---|
| 39 | def first_level_directory(path): |
|---|
| 40 | """return the first level directory of a path |
|---|
| 41 | |
|---|
| 42 | >>> first_level_directory('home/syt/work') |
|---|
| 43 | 'home' |
|---|
| 44 | >>> first_level_directory('/home/syt/work') |
|---|
| 45 | '/' |
|---|
| 46 | >>> first_level_directory('work') |
|---|
| 47 | 'work' |
|---|
| 48 | >>> |
|---|
| 49 | |
|---|
| 50 | :type path: str |
|---|
| 51 | :param path: the path for which we want the first level directory |
|---|
| 52 | |
|---|
| 53 | :rtype: str |
|---|
| 54 | :return: the first level directory appearing in `path` |
|---|
| 55 | """ |
|---|
| 56 | head, tail = split(path) |
|---|
| 57 | while head and tail: |
|---|
| 58 | head, tail = split(head) |
|---|
| 59 | if tail: |
|---|
| 60 | return tail |
|---|
| 61 | # path was absolute, head is the fs root |
|---|
| 62 | return head |
|---|
| 63 | |
|---|
| 64 | def abspath_listdir(path): |
|---|
| 65 | """lists path's content using absolute paths |
|---|
| 66 | |
|---|
| 67 | >>> os.listdir('/home') |
|---|
| 68 | ['adim', 'alf', 'arthur', 'auc'] |
|---|
| 69 | >>> abspath_listdir('/home') |
|---|
| 70 | ['/home/adim', '/home/alf', '/home/arthur', '/home/auc'] |
|---|
| 71 | """ |
|---|
| 72 | path = abspath(path) |
|---|
| 73 | return [join(path, filename) for filename in listdir(path)] |
|---|
| 74 | |
|---|
| 75 | |
|---|
| 76 | def is_binary(filename): |
|---|
| 77 | """return true if filename may be a binary file, according to it's |
|---|
| 78 | extension |
|---|
| 79 | |
|---|
| 80 | :type filename: str |
|---|
| 81 | :param filename: the name of the file |
|---|
| 82 | |
|---|
| 83 | :rtype: bool |
|---|
| 84 | :return: |
|---|
| 85 | true if the file is a binary file (actually if it's mime type |
|---|
| 86 | isn't begining by text/) |
|---|
| 87 | """ |
|---|
| 88 | try: |
|---|
| 89 | return not mimetypes.guess_type(filename)[0].startswith('text') |
|---|
| 90 | except AttributeError: |
|---|
| 91 | return 1 |
|---|
| 92 | |
|---|
| 93 | |
|---|
| 94 | def write_open_mode(filename): |
|---|
| 95 | """return the write mode that should used to open file |
|---|
| 96 | |
|---|
| 97 | :type filename: str |
|---|
| 98 | :param filename: the name of the file |
|---|
| 99 | |
|---|
| 100 | :rtype: str |
|---|
| 101 | :return: the mode that should be use to open the file ('w' or 'wb') |
|---|
| 102 | """ |
|---|
| 103 | if is_binary(filename): |
|---|
| 104 | return 'wb' |
|---|
| 105 | return 'w' |
|---|
| 106 | |
|---|
| 107 | |
|---|
| 108 | def ensure_fs_mode(filepath, desired_mode=S_IWRITE): |
|---|
| 109 | """check that the given file has the given mode(s) set, else try to |
|---|
| 110 | set it |
|---|
| 111 | |
|---|
| 112 | :type filepath: str |
|---|
| 113 | :param filepath: path of the file |
|---|
| 114 | |
|---|
| 115 | :type desired_mode: int |
|---|
| 116 | :param desired_mode: |
|---|
| 117 | ORed flags describing the desired mode. Use constants from the |
|---|
| 118 | `stat` module for file permission's modes |
|---|
| 119 | """ |
|---|
| 120 | mode = stat(filepath)[ST_MODE] |
|---|
| 121 | if not mode & desired_mode: |
|---|
| 122 | chmod(filepath, mode | desired_mode) |
|---|
| 123 | |
|---|
| 124 | |
|---|
| 125 | class ProtectedFile(file): |
|---|
| 126 | """a special file-object class that automatically that automatically |
|---|
| 127 | does a 'chmod +w' when needed |
|---|
| 128 | |
|---|
| 129 | XXX: for now, the way it is done allows 'normal file-objects' to be |
|---|
| 130 | created during the ProtectedFile object lifetime. |
|---|
| 131 | One way to circumvent this would be to chmod / unchmod on each |
|---|
| 132 | write operation. |
|---|
| 133 | |
|---|
| 134 | One other way would be to : |
|---|
| 135 | |
|---|
| 136 | - catch the IOError in the __init__ |
|---|
| 137 | |
|---|
| 138 | - if IOError, then create a StringIO object |
|---|
| 139 | |
|---|
| 140 | - each write operation writes in this StringIO obejct |
|---|
| 141 | |
|---|
| 142 | - on close()/del(), write/append the StringIO content to the file and |
|---|
| 143 | do the chmod only once |
|---|
| 144 | """ |
|---|
| 145 | def __init__(self, filepath, mode): |
|---|
| 146 | self.original_mode = stat(filepath)[ST_MODE] |
|---|
| 147 | self.mode_changed = False |
|---|
| 148 | if mode in ('w', 'a', 'wb', 'ab'): |
|---|
| 149 | if not self.original_mode & S_IWRITE: |
|---|
| 150 | chmod(filepath, self.original_mode | S_IWRITE) |
|---|
| 151 | self.mode_changed = True |
|---|
| 152 | file.__init__(self, filepath, mode) |
|---|
| 153 | |
|---|
| 154 | def _restore_mode(self): |
|---|
| 155 | """restores the original mode if needed""" |
|---|
| 156 | if self.mode_changed: |
|---|
| 157 | chmod(self.name, self.original_mode) |
|---|
| 158 | # Don't re-chmod in case of several restore |
|---|
| 159 | self.mode_changed = False |
|---|
| 160 | |
|---|
| 161 | def close(self): |
|---|
| 162 | """restore mode before closing""" |
|---|
| 163 | self._restore_mode() |
|---|
| 164 | file.close(self) |
|---|
| 165 | |
|---|
| 166 | def __del__(self): |
|---|
| 167 | if not self.closed: |
|---|
| 168 | self.close() |
|---|
| 169 | |
|---|
| 170 | |
|---|
| 171 | class UnresolvableError(Exception): |
|---|
| 172 | """exception raised by relative path when it's unable to compute relative |
|---|
| 173 | path between two paths |
|---|
| 174 | """ |
|---|
| 175 | |
|---|
| 176 | def relative_path(from_file, to_file): |
|---|
| 177 | """try to get a relative path from from `from_file` to `to_file` |
|---|
| 178 | (path will be absolute if to_file is an absolute file). This function |
|---|
| 179 | is useful to create link in `from_file` to `to_file`. This typical use |
|---|
| 180 | case is used in this function description. |
|---|
| 181 | |
|---|
| 182 | If both files are relative, they're expected to be relative to the same |
|---|
| 183 | directory. |
|---|
| 184 | |
|---|
| 185 | >>> relative_path( from_file='toto/index.html', to_file='index.html') |
|---|
| 186 | '../index.html' |
|---|
| 187 | >>> relative_path( from_file='index.html', to_file='toto/index.html') |
|---|
| 188 | 'toto/index.html' |
|---|
| 189 | >>> relative_path( from_file='tutu/index.html', to_file='toto/index.html') |
|---|
| 190 | '../toto/index.html' |
|---|
| 191 | >>> relative_path( from_file='toto/index.html', to_file='/index.html') |
|---|
| 192 | '/index.html' |
|---|
| 193 | >>> relative_path( from_file='/toto/index.html', to_file='/index.html') |
|---|
| 194 | '../index.html' |
|---|
| 195 | >>> relative_path( from_file='/toto/index.html', to_file='/toto/summary.html') |
|---|
| 196 | 'summary.html' |
|---|
| 197 | >>> relative_path( from_file='index.html', to_file='index.html') |
|---|
| 198 | '' |
|---|
| 199 | >>> relative_path( from_file='/index.html', to_file='toto/index.html') |
|---|
| 200 | Traceback (most recent call last): |
|---|
| 201 | File "<string>", line 1, in ? |
|---|
| 202 | File "<stdin>", line 37, in relative_path |
|---|
| 203 | UnresolvableError |
|---|
| 204 | >>> relative_path( from_file='/index.html', to_file='/index.html') |
|---|
| 205 | '' |
|---|
| 206 | >>> |
|---|
| 207 | |
|---|
| 208 | :type from_file: str |
|---|
| 209 | :param from_file: source file (where links will be inserted) |
|---|
| 210 | |
|---|
| 211 | :type to_file: str |
|---|
| 212 | :param to_file: target file (on which links point) |
|---|
| 213 | |
|---|
| 214 | :raise UnresolvableError: if it has been unable to guess a correct path |
|---|
| 215 | |
|---|
| 216 | :rtype: str |
|---|
| 217 | :return: the relative path of `to_file` from `from_file` |
|---|
| 218 | """ |
|---|
| 219 | from_file = normpath(from_file) |
|---|
| 220 | to_file = normpath(to_file) |
|---|
| 221 | if from_file == to_file: |
|---|
| 222 | return '' |
|---|
| 223 | if isabs(to_file): |
|---|
| 224 | if not isabs(from_file): |
|---|
| 225 | return to_file |
|---|
| 226 | elif isabs(from_file): |
|---|
| 227 | raise UnresolvableError() |
|---|
| 228 | from_parts = from_file.split(sep) |
|---|
| 229 | to_parts = to_file.split(sep) |
|---|
| 230 | idem = 1 |
|---|
| 231 | result = [] |
|---|
| 232 | while len(from_parts) > 1: |
|---|
| 233 | dirname = from_parts.pop(0) |
|---|
| 234 | if idem and len(to_parts) > 1 and dirname == to_parts[0]: |
|---|
| 235 | to_parts.pop(0) |
|---|
| 236 | else: |
|---|
| 237 | idem = 0 |
|---|
| 238 | result.append('..') |
|---|
| 239 | result += to_parts |
|---|
| 240 | return sep.join(result) |
|---|
| 241 | |
|---|
| 242 | |
|---|
| 243 | from logilab.common.textutils import _LINE_RGX |
|---|
| 244 | from sys import version_info |
|---|
| 245 | _HAS_UNIV_OPEN = version_info[:2] >= (2, 3) |
|---|
| 246 | del version_info |
|---|
| 247 | |
|---|
| 248 | def norm_read(path): |
|---|
| 249 | """return the content of the file with normalized line feeds |
|---|
| 250 | |
|---|
| 251 | :type path: str |
|---|
| 252 | :param path: path to the file to read |
|---|
| 253 | |
|---|
| 254 | :rtype: str |
|---|
| 255 | :return: the content of the file with normalized line feeds |
|---|
| 256 | """ |
|---|
| 257 | if _HAS_UNIV_OPEN: |
|---|
| 258 | return open(path, 'U').read() |
|---|
| 259 | return _LINE_RGX.sub('\n', open(path).read()) |
|---|
| 260 | |
|---|
| 261 | |
|---|
| 262 | def norm_open(path): |
|---|
| 263 | """return a stream for a file with content with normalized line feeds |
|---|
| 264 | |
|---|
| 265 | :type path: str |
|---|
| 266 | :param path: path to the file to open |
|---|
| 267 | |
|---|
| 268 | :rtype: file or StringIO |
|---|
| 269 | :return: the opened file with normalized line feeds |
|---|
| 270 | """ |
|---|
| 271 | if _HAS_UNIV_OPEN: |
|---|
| 272 | return open(path, 'U') |
|---|
| 273 | return StringIO(_LINE_RGX.sub('\n', open(path).read())) |
|---|
| 274 | |
|---|
| 275 | |
|---|
| 276 | def lines(path, comments=None): |
|---|
| 277 | """return a list of non empty lines in the file located at `path` |
|---|
| 278 | |
|---|
| 279 | :type path: str |
|---|
| 280 | :param path: path to the file |
|---|
| 281 | |
|---|
| 282 | :type comments: str or None |
|---|
| 283 | :param comments: |
|---|
| 284 | optional string which can be used to comment a line in the file |
|---|
| 285 | (ie lines starting with this string won't be returned) |
|---|
| 286 | |
|---|
| 287 | :rtype: list |
|---|
| 288 | :return: |
|---|
| 289 | a list of stripped line in the file, without empty and commented |
|---|
| 290 | lines |
|---|
| 291 | |
|---|
| 292 | :warning: at some point this function will probably return an iterator |
|---|
| 293 | """ |
|---|
| 294 | stream = norm_open(path) |
|---|
| 295 | result = stream_lines(stream, comments) |
|---|
| 296 | stream.close() |
|---|
| 297 | return result |
|---|
| 298 | |
|---|
| 299 | |
|---|
| 300 | def stream_lines(stream, comments=None): |
|---|
| 301 | """return a list of non empty lines in the given `stream` |
|---|
| 302 | |
|---|
| 303 | :type stream: object implementing 'xreadlines' or 'readlines' |
|---|
| 304 | :param stream: file like object |
|---|
| 305 | |
|---|
| 306 | :type comments: str or None |
|---|
| 307 | :param comments: |
|---|
| 308 | optional string which can be used to comment a line in the file |
|---|
| 309 | (ie lines starting with this string won't be returned) |
|---|
| 310 | |
|---|
| 311 | :rtype: list |
|---|
| 312 | :return: |
|---|
| 313 | a list of stripped line in the file, without empty and commented |
|---|
| 314 | lines |
|---|
| 315 | |
|---|
| 316 | :warning: at some point this function will probably return an iterator |
|---|
| 317 | """ |
|---|
| 318 | try: |
|---|
| 319 | readlines = stream.xreadlines |
|---|
| 320 | except AttributeError: |
|---|
| 321 | readlines = stream.readlines |
|---|
| 322 | result = [] |
|---|
| 323 | for line in readlines(): |
|---|
| 324 | line = line.strip() |
|---|
| 325 | if line and (comments is None or not line.startswith(comments)): |
|---|
| 326 | result.append(line) |
|---|
| 327 | return result |
|---|
| 328 | |
|---|
| 329 | |
|---|
| 330 | def export(from_dir, to_dir, |
|---|
| 331 | blacklist=BASE_BLACKLIST, ignore_ext=IGNORED_EXTENSIONS, |
|---|
| 332 | verbose=0): |
|---|
| 333 | """make a mirror of `from_dir` in `to_dir`, omitting directories and |
|---|
| 334 | files listed in the black list or ending with one of the given |
|---|
| 335 | extensions |
|---|
| 336 | |
|---|
| 337 | :type from_dir: str |
|---|
| 338 | :param from_dir: directory to export |
|---|
| 339 | |
|---|
| 340 | :type to_dir: str |
|---|
| 341 | :param to_dir: destination directory |
|---|
| 342 | |
|---|
| 343 | :type blacklist: list or tuple |
|---|
| 344 | :param blacklist: |
|---|
| 345 | list of files or directories to ignore, default to the content of |
|---|
| 346 | `BASE_BLACKLIST` |
|---|
| 347 | |
|---|
| 348 | :type ignore_ext: list or tuple |
|---|
| 349 | :param ignore_ext: |
|---|
| 350 | list of extensions to ignore, default to the content of |
|---|
| 351 | `IGNORED_EXTENSIONS` |
|---|
| 352 | |
|---|
| 353 | :type verbose: bool |
|---|
| 354 | :param verbose: |
|---|
| 355 | flag indicating wether information about exported files should be |
|---|
| 356 | printed to stderr, default to False |
|---|
| 357 | """ |
|---|
| 358 | def make_mirror(_, directory, fnames): |
|---|
| 359 | """walk handler""" |
|---|
| 360 | for norecurs in blacklist: |
|---|
| 361 | try: |
|---|
| 362 | fnames.remove(norecurs) |
|---|
| 363 | except ValueError: |
|---|
| 364 | continue |
|---|
| 365 | for filename in fnames: |
|---|
| 366 | # don't include binary files |
|---|
| 367 | for ext in ignore_ext: |
|---|
| 368 | if filename.endswith(ext): |
|---|
| 369 | break |
|---|
| 370 | else: |
|---|
| 371 | src = join(directory, filename) |
|---|
| 372 | dest = to_dir + src[len(from_dir):] |
|---|
| 373 | if verbose: |
|---|
| 374 | print >> sys.stderr, src, '->', dest |
|---|
| 375 | if isdir(src): |
|---|
| 376 | if not exists(dest): |
|---|
| 377 | mkdir(dest) |
|---|
| 378 | else: |
|---|
| 379 | if exists(dest): |
|---|
| 380 | remove(dest) |
|---|
| 381 | shutil.copy2(src, dest) |
|---|
| 382 | try: |
|---|
| 383 | mkdir(to_dir) |
|---|
| 384 | except OSError: |
|---|
| 385 | pass |
|---|
| 386 | walk(from_dir, make_mirror, None) |
|---|
| 387 | |
|---|
| 388 | |
|---|
| 389 | def remove_dead_links(directory, verbose=0): |
|---|
| 390 | """recursivly traverse directory and remove all dead links |
|---|
| 391 | |
|---|
| 392 | :type directory: str |
|---|
| 393 | :param directory: directory to cleanup |
|---|
| 394 | |
|---|
| 395 | :type verbose: bool |
|---|
| 396 | :param verbose: |
|---|
| 397 | flag indicating wether information about deleted links should be |
|---|
| 398 | printed to stderr, default to False |
|---|
| 399 | """ |
|---|
| 400 | def _remove_dead_link(_, directory, fnames): |
|---|
| 401 | """walk handler""" |
|---|
| 402 | for filename in fnames: |
|---|
| 403 | src = join(directory, filename) |
|---|
| 404 | if islink(src) and not exists(src): |
|---|
| 405 | if verbose: |
|---|
| 406 | print 'remove dead link', src |
|---|
| 407 | remove(src) |
|---|
| 408 | walk(directory, _remove_dead_link, None) |
|---|
| 409 | |
|---|
| 410 | |
|---|
| 411 | from warnings import warn |
|---|
| 412 | |
|---|
| 413 | def files_by_ext(directory, include_exts=None, exclude_exts=None, |
|---|
| 414 | exclude_dirs=BASE_BLACKLIST): |
|---|
| 415 | """return a list of files in a directory matching (or not) some |
|---|
| 416 | extensions: you should either give the `include_exts` argument (and |
|---|
| 417 | only files ending with one of the listed extensions will be |
|---|
| 418 | considered) or the `exclude_exts` argument (and only files not |
|---|
| 419 | ending by one of the listed extensions will be considered). |
|---|
| 420 | Subdirectories are processed recursivly. |
|---|
| 421 | |
|---|
| 422 | :type directory: str |
|---|
| 423 | :param directory: directory where files should be searched |
|---|
| 424 | |
|---|
| 425 | :type include_exts: list or tuple or None |
|---|
| 426 | :param include_exts: list of file extensions to consider |
|---|
| 427 | |
|---|
| 428 | :type exclude_exts: list or tuple or None |
|---|
| 429 | :param exclude_exts: list of file extensions to ignore |
|---|
| 430 | |
|---|
| 431 | :type exclude_dirs: list or tuple or None |
|---|
| 432 | :param exclude_dirs: list of directory where we should not recurse |
|---|
| 433 | |
|---|
| 434 | :rtype: list |
|---|
| 435 | :return: the list of files matching input criteria |
|---|
| 436 | """ |
|---|
| 437 | assert not (include_exts and exclude_exts) |
|---|
| 438 | warn("files_by_ext is deprecated, use shellutils.find instead" , |
|---|
| 439 | DeprecationWarning, stacklevel=2) |
|---|
| 440 | if include_exts: |
|---|
| 441 | return find(directory, include_exts, blacklist=exclude_dirs) |
|---|
| 442 | return find(directory, exclude_exts, exclude=True, blacklist=exclude_dirs) |
|---|
| 443 | |
|---|
| 444 | def include_files_by_ext(directory, include_exts, exclude_dirs=BASE_BLACKLIST): |
|---|
| 445 | """return a list of files in a directory matching some extensions |
|---|
| 446 | |
|---|
| 447 | :type directory: str |
|---|
| 448 | :param directory: directory where files should be searched |
|---|
| 449 | |
|---|
| 450 | :type include_exts: list or tuple or None |
|---|
| 451 | :param include_exts: list of file extensions to consider |
|---|
| 452 | |
|---|
| 453 | :type exclude_dirs: list or tuple or None |
|---|
| 454 | :param exclude_dirs: list of directory where we should not recurse |
|---|
| 455 | |
|---|
| 456 | :rtype: list |
|---|
| 457 | :return: the list of files matching input criterias |
|---|
| 458 | """ |
|---|
| 459 | warn("include_files_by_ext is deprecated, use shellutils.find instead" , |
|---|
| 460 | DeprecationWarning, stacklevel=2) |
|---|
| 461 | return find(directory, include_exts, blacklist=exclude_dirs) |
|---|
| 462 | |
|---|
| 463 | def exclude_files_by_ext(directory, exclude_exts, exclude_dirs=BASE_BLACKLIST): |
|---|
| 464 | """return a list of files in a directory not matching some extensions |
|---|
| 465 | |
|---|
| 466 | :type directory: str |
|---|
| 467 | :param directory: directory where files should be searched |
|---|
| 468 | |
|---|
| 469 | :type exclude_exts: list or tuple or None |
|---|
| 470 | :param exclude_exts: list of file extensions to ignore |
|---|
| 471 | |
|---|
| 472 | :type exclude_dirs: list or tuple or None |
|---|
| 473 | :param exclude_dirs: list of directory where we should not recurse |
|---|
| 474 | |
|---|
| 475 | :rtype: list |
|---|
| 476 | :return: the list of files matching input criterias |
|---|
| 477 | """ |
|---|
| 478 | warn("exclude_files_by_ext is deprecated, use shellutils.find instead" , |
|---|
| 479 | DeprecationWarning, stacklevel=2) |
|---|
| 480 | return find(directory, exclude_exts, exclude=True, blacklist=exclude_dirs) |
|---|