Trees | Indices | Help |
|
---|
|
1 """ 2 Unstable and experimental parser implementation. 3 """ 4 5 import base64 6 from functools import partial 7 from os import path 8 from os.path import exists 9 from xml.etree.ElementTree import iterparse 10 from xml.parsers.expat import ExpatError 11 12 from advene.model.consts import ADVENE_XML, PARSER_META_PREFIX, PACKAGED_ROOT 13 from advene.model.parsers.base_xml import XmlParserBase 14 from advene.model.parsers.exceptions import ParserError 15 import advene.model.serializers.advene_xml as serializer 16 from advene.util.files import get_path, is_local19 20 NAME = serializer.NAME 21 EXTENSION = serializer.EXTENSION 22 MIMETYPE = serializer.MIMETYPE 23 SERIALIZER = serializer # may be None for some parsers 24 25 @classmethod30027 """Is this parser likely to parse that file-like object? 28 29 `file_` is a readable file-like object. It is the responsability of the 30 caller to close it. 31 32 Return an int between 00 and 99, indicating the likelyhood of this parser 33 to handle correctly the given URL. 70 is used as a standard value when the 34 parser is pretty sure it can handle the URL. 35 """ 36 r = 0 37 if hasattr(file_, "seek"): 38 # try to open it as xml file and get the root element 39 t = file_.tell() 40 file_.seek(0) 41 it = iterparse(file_, events=("start",)) 42 try: 43 ev, el = it.next() 44 except ExpatError, e: 45 return 0 46 else: 47 if el.tag == "{%s}package" % cls._NAMESPACE_URI: 48 return 80 49 else: 50 return 0 51 file_.seek(0) 52 53 info = getattr(file_, "info", lambda: {})() 54 mimetype = info.get("content-type", "") 55 if mimetype.startswith(cls.MIMETYPE): 56 r = 80 57 else: 58 if mimetype.startswith("application/xml") \ 59 or mimetype.startswith("text/xml"): 60 r += 20 61 fpath = get_path(file_) 62 if fpath.endswith(cls.EXTENSION): 63 r += 50 64 elif fpath.endswith(".xml"): 65 r += 20 66 return r67 68 @classmethod70 """Return a parser that will parse `file_` into `package`. 71 72 `file_` is a writable file-like object. It is the responsability of the 73 caller to close it. 74 75 The returned object must implement the interface for which 76 :class:`_Parser` is the reference implementation. 77 """ 78 return cls(file_, package)79 80 @classmethod82 """A shortcut for ``make_parser(file_, package).parse()``. 83 84 See also `make_parser`. 85 """ 86 cls(file_, package).parse()8789 "Do the actual parsing." 90 file_ = self.file 91 fpath = get_path(file_) 92 if is_local(file_) and fpath.endswith("content.xml"): 93 # looks like this is a manually-unzipped package, 94 dirname = path.split(fpath)[0] 95 mfn = path.join(dirname, "mimetype") 96 if exists(mfn): 97 f = open(mfn) 98 mimetype = f.read() 99 f.close() 100 if mimetype == self.MIMETYPE: 101 self.package.set_meta(PACKAGED_ROOT, dirname) 102 XmlParserBase.parse(self)103 104 # end of public interface 105 106 _NAMESPACE_URI = ADVENE_XML 107109 assert self.__class__.claims_for_parse(file_) > 0 110 XmlParserBase.__init__(self, file_, package, self._NAMESPACE_URI, 111 "package") 112 self._postponed = []113115 """ 116 If `identified` an imported element, function is invoked with `id` as 117 its argument. 118 119 If `id` is a plain identifier, it is checked whether `self.package` has 120 such an element. If so, function is invoked with that element as its 121 argument; else, its execution is postponed. 122 123 This is useful because some elements in the serialization may refer to 124 other elements that are defined further. 125 126 If function2 is provided and the invocation is postponed, then it will 127 be function2 rather than function that will be invoked. 128 """ 129 colon = id.find(":") 130 if colon > 0: 131 elt = id 132 do_it_now = self.package.get(id[:colon]) is not None 133 else: 134 elt = self.package.get(id) 135 do_it_now = elt is not None 136 if do_it_now: 137 try_enter_no_event_section(elt, function) 138 try: 139 function(elt) 140 finally: 141 try_exit_no_event_section(elt, function) 142 else: 143 self._postponed.append((function2 or function, id))144146 items_name = kw.pop("items_name", None) 147 if items_name is None: 148 items_name = tag[:-1] # remove terminal 's' 149 stream = self.stream 150 151 stream.forward() 152 elem = stream.elem 153 if stream.event == "start" \ 154 and elem.tag == self.tag_template % tag: 155 self.sequence(items_name, *args, **kw) 156 self._check_end(elem) 157 else: 158 stream.pushback()159161 """ 162 This method may be overridden by application model parsers having a 163 syntax simiar to the generic advene format - like the cinelab parser. 164 """ 165 self.optional_sequence("imports") 166 self.optional_sequence("tags") 167 self.optional_sequence("medias") 168 self.optional_sequence("resources") 169 self.optional_sequence("annotations") 170 self.optional_sequence("relations") 171 self.optional_sequence("views") 172 self.optional_sequence("queries", items_name="query") 173 self.optional_sequence("lists") 174 self.optional_sequence("external-tag-associations", 175 items_name="association")176178 """ 179 Subclasses should normally not override this method, but rather 180 `manage_package_subelements`. 181 """ 182 pa = self.package 183 namespaces = "\n".join([ " ".join(el) 184 for el in self.ns_stack if el[0] ]) 185 if namespaces: 186 pa.set_meta(PARSER_META_PREFIX+"namespaces", namespaces) 187 uri = self.current.get("uri") 188 if uri is not None: 189 pa.uri = uri 190 self.optional("meta", pa) 191 self.manage_package_subelements() 192 for f, id in self._postponed: 193 if id.find(":") > 0: # imported 194 f(id) 195 else: 196 elt = self.package.get(id) 197 try_enter_no_event_section(elt, f) 198 try: 199 f(elt) 200 finally: 201 try_exit_no_event_section(elt, f)202204 id = self.get_attribute("id") 205 url = self.get_attribute("url") 206 uri = self.get_attribute("uri", "") 207 elt = self.package._create_import_in_parser(id, url, uri) 208 elt.enter_no_event_section() 209 try: 210 self.optional_sequence("tags", element=elt) 211 self.optional("meta", elt) 212 finally: 213 elt.exit_no_event_section()214216 if element is None: 217 # tag definition in package 218 id = self.get_attribute("id") 219 elt = self.package.create_tag(id) 220 elt.enter_no_event_section() 221 try: 222 self.optional_sequence( 223 "imported-elements", items_name="element", advene_tag=elt) 224 self.optional_sequence("tags", element=elt) 225 self.optional("meta", elt) 226 finally: 227 elt.exit_no_event_section() 228 else: 229 # tag association in element 230 id = self.get_attribute("id-ref") 231 self.do_or_postpone(id, 232 partial(self.package.associate_tag, element))233235 id = self.get_attribute("id") 236 url = self.get_attribute("url") 237 foref = self.get_attribute("frame-of-reference") 238 elt = self.package.create_media(id, url, foref) 239 elt.enter_no_event_section() 240 try: 241 self.optional_sequence("tags", element=elt) 242 self.optional("meta", elt) 243 finally: 244 elt.exit_no_event_section()245247 id = self.get_attribute("id") 248 elt = self.required("content", self.package.create_resource, id) 249 elt.enter_no_event_section() 250 try: 251 self.optional_sequence("tags", element=elt) 252 self.optional("meta", elt) 253 finally: 254 elt.exit_no_event_section()255257 id = self.get_attribute("id") 258 media = self.get_attribute("media") 259 if media.find(":") <= 0: # same package 260 media = self.package.get(media) 261 if media is None: 262 raise ParserError("unknown media %s" % self.get_attribute("media")) 263 begin = self.get_attribute("begin") 264 try: 265 begin = int(begin) 266 except ValueError: 267 raise ParserError("wrong begin value for %s" % id) 268 end = self.get_attribute("end") 269 try: 270 end = int(end) 271 except ValueError: 272 raise ParserError("wrong end value for %s" % id) 273 if end < begin: 274 raise ParserError("end is before begin in %s" % id) 275 elt = self.required("content", self.package.create_annotation, 276 id, media, begin, end) 277 elt.enter_no_event_section() 278 try: 279 self.optional_sequence("tags", element=elt) 280 self.optional("meta", elt) 281 finally: 282 elt.exit_no_event_section()283285 id = self.get_attribute("id") 286 elt = self.package.create_relation(id, "x-advene/none") 287 def update_content_info(mimetype, model, url): 288 elt.content_mimetype = mimetype 289 elt.content_model = model 290 elt.content_url = url 291 return elt292 elt.enter_no_event_section() 293 try: 294 self.optional_sequence("members", elt) 295 self.optional("content", update_content_info) 296 self.optional_sequence("tags", element=elt) 297 self.optional("meta", elt) 298 finally: 299 elt.exit_no_event_section()302 id = self.get_attribute("id") 303 elt = self.required("content", self.package.create_view, id) 304 elt.enter_no_event_section() 305 try: 306 self.optional_sequence("tags", element=elt) 307 self.optional("meta", elt) 308 finally: 309 elt.exit_no_event_section()310312 id = self.get_attribute("id") 313 elt = self.required("content", self.package.create_query, id) 314 elt.enter_no_event_section() 315 try: 316 self.optional_sequence("tags", element=elt) 317 self.optional("meta", elt) 318 finally: 319 elt.exit_no_event_section()320322 id = self.get_attribute("id") 323 elt = self.package.create_list(id) 324 elt.enter_no_event_section() 325 try: 326 self.optional_sequence("items", elt, [0]) 327 self.optional_sequence("tags", element=elt) 328 self.optional("meta", elt) 329 finally: 330 elt.exit_no_event_section()331 332 # utility methods 333335 elem = self.complete_current() 336 for child in elem: 337 key = child.tag 338 if key.startswith("{"): 339 cut = key.find("}") 340 key = key[1:cut] + key[cut+1:] 341 if len(child): 342 raise ParserError("Unexpected sub-element in metadata %s" % 343 key) 344 val = child.get("id-ref") 345 if val is None: 346 text = child.text or "" # because child.text could be None 347 obj.enter_no_event_section() 348 try: 349 obj.set_meta(key, text, False) 350 finally: 351 obj.exit_no_event_section() 352 elif val.find(":") > 0: # imported 353 obj.enter_no_event_section() 354 try: 355 obj.set_meta(key, val, True) 356 finally: 357 obj.exit_no_event_section() 358 else: 359 self.do_or_postpone(val, partial(obj.set_meta, key))360362 mimetype = self.get_attribute("mimetype") 363 url = self.get_attribute("url", "") 364 model = self.get_attribute("model", "") 365 encoding = self.get_attribute("encoding", "") 366 elt = creation_method(*args + (mimetype, "", url)) 367 self.do_or_postpone(model, elt._set_content_model) 368 elem = self.complete_current() 369 if len(elem): 370 raise ParserError("no XML tag allowed in content; use <tag>") 371 data = elem.text 372 if url and data and data.strip(): 373 raise ParserError("content can not have both url (%s) and data" % 374 url) 375 elif data: 376 if encoding: 377 if encoding == "base64": 378 data = base64.decodestring(data) 379 else: 380 raise ParserError("encoding %s is not supported", encoding) 381 elt.enter_no_event_section() 382 try: 383 elt.content_data = data 384 finally: 385 elt.exit_no_event_section() 386 return elt387389 a = self.get_attribute("id-ref") 390 if ":" not in a: 391 a = self.package.get(a) 392 relation.append(a)393395 # c is a 1-item list containing the virtual length of the list, 396 # i.e. the length taking into account the postponed elements 397 # it is used to insert postponed elements at the right index 398 id = self.get_attribute("id-ref") 399 self.do_or_postpone(id, lst.append, partial(lst.insert, c[0])) 400 c[0] += 1401403 id = self.get_attribute("id-ref") 404 # should only be imported, so no check 405 self.package.associate_tag(id, advene_tag)406408 elt_id = self.get_attribute("element") 409 tag_id = self.get_attribute("tag") 410 # both tag and element should be imported, so no check 411 self.package.associate_tag(elt_id, tag_id)412 419 426 427 # 428
Trees | Indices | Help |
|
---|
Generated by Epydoc 3.0.1 on Wed Jul 8 16:00:05 2009 | http://epydoc.sourceforge.net |