Package pyffi :: Package object_models
[hide private]
[frames] | no frames]

Source Code for Package pyffi.object_models

  1  """ 
  2  :mod:`pyffi.object_models` --- File format description engines 
  3  ============================================================== 
  4   
  5  .. warning:: 
  6   
  7     The documentation of this module is very incomplete. 
  8   
  9  This module bundles all file format object models. An object model 
 10  is a group of classes whose instances can hold the information 
 11  contained in a file whose format is described in a particular way 
 12  (xml, xsd, and possibly others). 
 13   
 14  .. 
 15    There is a strong distinction between types that contain very specific 
 16    simple data (SimpleType) and more complex types that contain groups of 
 17    simple data (ComplexType, with its descendants StructType for named 
 18    lists of objects of different type and ArrayType for indexed lists of 
 19    objects of the same type). 
 20     
 21    The complex types are generic in that they can be instantiated using 
 22    metadata (i.e. data describing the structure of the actual file data) 
 23    from xml, xsd, or any other file format description. 
 24     
 25    For the simple types there are specific classes implementing access to 
 26    these data types. Typical implementations are present for integers, 
 27    floats, strings, and so on. Some simple types may also be derived from 
 28    already implemented simple types, if the metadata description allows 
 29    this. 
 30   
 31  .. autoclass:: MetaFileFormat 
 32     :show-inheritance: 
 33     :members: 
 34   
 35  .. autoclass:: FileFormat 
 36     :show-inheritance: 
 37     :members: 
 38  """ 
 39   
 40  # ***** BEGIN LICENSE BLOCK ***** 
 41  # 
 42  # Copyright (c) 2007-2011, Python File Format Interface 
 43  # All rights reserved. 
 44  # 
 45  # Redistribution and use in source and binary forms, with or without 
 46  # modification, are permitted provided that the following conditions 
 47  # are met: 
 48  # 
 49  #    * Redistributions of source code must retain the above copyright 
 50  #      notice, this list of conditions and the following disclaimer. 
 51  # 
 52  #    * Redistributions in binary form must reproduce the above 
 53  #      copyright notice, this list of conditions and the following 
 54  #      disclaimer in the documentation and/or other materials provided 
 55  #      with the distribution. 
 56  # 
 57  #    * Neither the name of the Python File Format Interface 
 58  #      project nor the names of its contributors may be used to endorse 
 59  #      or promote products derived from this software without specific 
 60  #      prior written permission. 
 61  # 
 62  # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 63  # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 64  # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
 65  # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
 66  # COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 
 67  # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 
 68  # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 
 69  # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 
 70  # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 
 71  # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 
 72  # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
 73  # POSSIBILITY OF SUCH DAMAGE. 
 74  # 
 75  # ***** END LICENSE BLOCK ***** 
 76   
 77  import logging 
 78  import os.path # os.path.altsep 
 79  import re # compile 
 80  import sys # version_info 
 81   
 82  import pyffi.utils 
 83  import pyffi.utils.graph 
84 85 86 -class MetaFileFormat(type):
87 """This metaclass is an abstract base class for transforming 88 a file format description into classes which can be directly used to 89 manipulate files in this format. 90 91 A file format is implemented as a particular class (a subclass of 92 :class:`FileFormat`) with class members corresponding to different 93 (bit)struct types, enum types, basic types, and aliases. 94 """ 95 96 @staticmethod
97 - def openfile(filename, filepaths=None):
98 """Find *filename* in given *filepaths*, and open it. Raises 99 ``IOError`` if file cannot be opened. 100 101 :param filename: The file to open. 102 :type filename: ``str`` 103 :param filepaths: List of paths where to look for the file. 104 :type filepaths: ``list`` of ``str``\ s 105 """ 106 if not filepaths: 107 return open(filename) 108 else: 109 for filepath in filepaths: 110 if not filepath: 111 continue 112 try: 113 return open(os.path.join(filepath, filename)) 114 except IOError: 115 continue 116 break 117 else: 118 raise IOError( 119 "'%s' not found in any of the directories %s" 120 % (filename, filepaths))
121
122 123 -class FileFormat(object):
124 """This class is the base class for all file formats. It implements 125 a number of useful functions such as walking over directory trees 126 (:meth:`walkData`) and a default attribute naming function 127 (:meth:`name_attribute`). 128 It also implements the base class for representing file data 129 (:class:`FileFormat.Data`). 130 """ 131 132 RE_FILENAME = None 133 """Override this with a regular expression (the result of a ``re.compile`` 134 call) for the file extension of the format you are implementing. 135 """ 136 137 ARCHIVE_CLASSES = [] 138 """Override this with a list of archive formats that may contain 139 files of the format. 140 """ 141 142 # precompiled regular expressions, used in name_parts 143 144 _RE_NAME_SEP = re.compile('[_\W]+') 145 """Matches seperators for splitting names.""" 146 147 _RE_NAME_DIGITS = re.compile('([0-9]+)|([a-zA-Z]+)') 148 """Matches digits or characters for splitting names.""" 149 150 _RE_NAME_CAMEL = re.compile('([A-Z][a-z]*)|([a-z]+)') 151 """Finds components of camelCase and CamelCase names.""" 152 153 _RE_NAME_LC = re.compile('[a-z]') 154 """Matches a lower case character.""" 155 156 _RE_NAME_UC = re.compile('[A-Z]') 157 """Matches an upper case character.""" 158 159 # override this with the data instance for this format
160 - class Data(pyffi.utils.graph.GlobalNode):
161 """Base class for representing data in a particular format. 162 Override this class to implement reading and writing. 163 """ 164 165 _byte_order = '<' 166 """Set to '<' for little-endian, and '>' for big-endian.""" 167 168 version = None 169 """Version of the data.""" 170 171 user_version = None 172 """User version (additional version field) of the data.""" 173
174 - def inspect(self, stream):
175 """Quickly checks whether the stream appears to contain 176 data of a particular format. Resets stream to original position. 177 Call this function if you simply wish to check that a file is 178 of a particular format without having to parse it completely. 179 180 Override this method. 181 182 :param stream: The file to inspect. 183 :type stream: file 184 :return: ``True`` if stream is of particular format, ``False`` 185 otherwise. 186 """ 187 raise NotImplementedError
188
189 - def read(self, stream):
190 """Read data of particular format from stream. 191 Override this method. 192 193 :param stream: The file to read from. 194 :type stream: ``file`` 195 """ 196 raise NotImplementedError
197
198 - def write(self, stream):
199 """Write data of particular format to stream. 200 Override this method. 201 202 :param stream: The file to write to. 203 :type stream: ``file`` 204 """ 205 raise NotImplementedError
206 207 @staticmethod
208 - def version_number(version_str):
209 """Converts version string into an integer. 210 This default implementation simply returns zero at all times, 211 and works for formats that are not versioned. 212 213 Override for versioned formats. 214 215 :param version_str: The version string. 216 :type version_str: ``str`` 217 :return: A version integer. 218 """ 219 return 0
220 221 @classmethod
222 - def name_parts(cls, name):
223 """Intelligently split a name into parts: 224 225 * first, split at non-alphanumeric characters 226 * next, seperate digits from characters 227 * finally, if some part has mixed case, it must be 228 camel case so split it further at upper case characters 229 230 >>> FileFormat.name_parts("hello_world") 231 ['hello', 'world'] 232 >>> FileFormat.name_parts("HELLO_WORLD") 233 ['HELLO', 'WORLD'] 234 >>> FileFormat.name_parts("HelloWorld") 235 ['Hello', 'World'] 236 >>> FileFormat.name_parts("helloWorld") 237 ['hello', 'World'] 238 >>> FileFormat.name_parts("xs:NMTOKEN") 239 ['xs', 'NMTOKEN'] 240 >>> FileFormat.name_parts("xs:NCName") 241 ['xs', 'N', 'C', 'Name'] 242 >>> FileFormat.name_parts('this IS a sillyNAME') 243 ['this', 'IS', 'a', 'silly', 'N', 'A', 'M', 'E'] 244 >>> FileFormat.name_parts('tHis is A Silly naME') 245 ['t', 'His', 'is', 'A', 'Silly', 'na', 'M', 'E'] 246 """ 247 # str(name) converts name to string in case it is a py2k 248 # unicode string 249 name = str(name) 250 # separate at symbols 251 parts = cls._RE_NAME_SEP.split(name) 252 # seperate digits 253 newparts = [] 254 for part in parts: 255 for part_groups in cls._RE_NAME_DIGITS.findall(part): 256 for group in part_groups: 257 if group: 258 newparts.append(group) 259 break 260 parts = newparts 261 # separate at upper case characters for CamelCase and camelCase words 262 newparts = [] 263 for part in parts: 264 if cls._RE_NAME_LC.search(part) and cls._RE_NAME_UC.search(part): 265 # find the camel bumps 266 for part_groups in cls._RE_NAME_CAMEL.findall(part): 267 for group in part_groups: 268 if group: 269 newparts.append(group) 270 break 271 else: 272 newparts.append(part) 273 parts = newparts 274 # return result 275 return parts
276 277 @classmethod
278 - def name_attribute(cls, name):
279 """Converts an attribute name, as in the description file, 280 into a name usable by python. 281 282 :param name: The attribute name. 283 :type name: ``str`` 284 :return: Reformatted attribute name, useable by python. 285 286 >>> FileFormat.name_attribute('tHis is A Silly naME') 287 't_his_is_a_silly_na_m_e' 288 >>> FileFormat.name_attribute('Test:Something') 289 'test_something' 290 >>> FileFormat.name_attribute('unknown?') 291 'unknown' 292 """ 293 return '_'.join(part.lower() for part in cls.name_parts(name))
294 295 @classmethod
296 - def name_class(cls, name):
297 """Converts a class name, as in the xsd file, into a name usable 298 by python. 299 300 :param name: The class name. 301 :type name: str 302 :return: Reformatted class name, useable by python. 303 304 >>> FileFormat.name_class('this IS a sillyNAME') 305 'ThisIsASillyNAME' 306 """ 307 return ''.join(part.capitalize() 308 for part in cls.name_parts(name))
309 310 @classmethod
311 - def walkData(cls, top, topdown=True, mode='rb'):
312 """A generator which yields the data of all files in 313 directory top whose filename matches the regular expression 314 :attr:`RE_FILENAME`. The argument top can also be a file instead of a 315 directory. Errors coming from os.walk are ignored. 316 317 Note that the caller is not responsible for closing the stream. 318 319 This function is for instance used by :mod:`pyffi.spells` to implement 320 modifying a file after reading and parsing. 321 322 :param top: The top folder. 323 :type top: ``str`` 324 :param topdown: Determines whether subdirectories should be iterated 325 over first. 326 :type topdown: ``bool`` 327 :param mode: The mode in which to open files. 328 :type mode: ``str`` 329 """ 330 # now walk over all these files in directory top 331 for filename in pyffi.utils.walk(top, topdown, onerror=None, 332 re_filename=cls.RE_FILENAME): 333 stream = open(filename, mode) 334 try: 335 # return data for the stream 336 # the caller can call data.read(stream), 337 # or data.inspect(stream), etc. 338 yield stream, cls.Data() 339 finally: 340 stream.close()
341 342 @classmethod
343 - def walk(cls, top, topdown=True, mode='rb'):
344 """A generator which yields all files in 345 directory top whose filename matches the regular expression 346 :attr:`RE_FILENAME`. The argument top can also be a file instead of a 347 directory. Errors coming from os.walk are ignored. 348 349 Note that the caller is not responsible for closing the stream. 350 351 This function is for instance used by :mod:`pyffi.spells` to implement 352 modifying a file after reading and parsing. 353 354 :param top: The top folder. 355 :type top: ``str`` 356 :param topdown: Determines whether subdirectories should be iterated 357 over first. 358 :type topdown: ``bool`` 359 :param mode: The mode in which to open files. 360 :type mode: ``str`` 361 """ 362 # now walk over all these files in directory top 363 for filename in pyffi.utils.walk(top, topdown, onerror=None, 364 re_filename=cls.RE_FILENAME): 365 stream = open(filename, mode) 366 try: 367 yield stream 368 finally: 369 stream.close()
370
371 -class ArchiveFileFormat(FileFormat):
372 """This class is the base class for all archive file formats. It 373 implements incremental reading and writing of archive files. 374 """ 375
376 - class Data(FileFormat.Data):
377 """Base class for representing archive data. 378 Override this class to implement incremental reading and writing. 379 """ 380 381 _stream = None 382 """The file stream associated with the archive.""" 383
384 - def __init__(self, name=None, mode=None, fileobj=None):
385 """Sets _stream and _mode.""" 386 # at least: 387 #self._stream = fileobj if fileobj else open(name, mode) 388 raise NotImplementedError
389
390 - def get_members(self):
391 raise NotImplementedError
392
393 - def set_members(self, members):
394 raise NotImplementedError
395
396 - def close(self):
397 # at least: 398 #self._stream.close() 399 raise NotImplementedError
400
401 - def read(self, stream):
402 self.__init__(mode='r', stream=stream)
403
404 - def write(self, stream):
405 if self._stream == stream: 406 raise ValueError("cannot write back to the same stream") 407 # get all members from the old stream 408 members = list(self.get_members()) 409 self.__init__(mode='w', fileobj=stream) 410 # set all members to the new stream 411 self.set_members(members)
412
413 -class ArchiveMember(object):
414 stream = None 415 """Temporary file stream which contains the extracted data.""" 416 417 name = None 418 """Name of the file as recorded in the archive."""
419