1 """I provide base classes for XML parsers.
2
3 TODO: document the provided classes.
4 """
5 from xml.etree.ElementTree import iterparse
6
7 from advene.model.consts import _RAISE
8 from advene.model.parsers.exceptions import ParserError
11 """
12 TODO write a better documentation
13
14 The idea is that subclasses define ``handle_X`` methods where X is the
15 unqualified tag.
16
17 Property `package` holds the package to parse into.
18
19 DEPRECATED: Property `backend` and `package_id` are useful to feed the
20 package's backend.
21
22 Property `current` always points to the current element (with the
23 ElementTree API). Note that the element will have its attribute, but not
24 its text nor its sub-elements. To wait for an element to be completely
25 constructed, invoke method `complete_current`. However, to parse
26 subelements, you may prefer to use methods `required`, `optional` and
27 `sequence`, that will check the structure of subelements, then invoke the
28 corresponding `handle_X` methods. Note that `required` and `optional` will
29 return the value returned by `handle_X` (`optional` returns None if the
30 element is not found).
31
32 Method `get_attribute` is a shortcut for ``current.get(k[, d])`` but will
33 raise a `ParseError` with the appopriate message if the attribute is
34 missing and no default value is provided.
35
36 Property `ns_stack` is a list of (prefix, uri) pairs used as a stack for
37 namespaces.
38
39 For advanced use, property `stream` holds the underlying `Stream` instance.
40
41 See `advene.model.parsers.advene_xml` for an example.
42 """
43
44 - def __init__(self, file_, package, namespace_uri, root):
45 self.file = file_
46 self.package = package
47 self.namespace_uri = namespace_uri
48 self.tag_template = "{%s}%%s" % namespace_uri
49 self.root = root
50 self.clear_after_handle = True
51 self._completed = 0
52 self.cut = len(namespace_uri)+2
53
54 @property
56 return self.stream.elem
57
58 @property
60 return self.stream.namespaces
61
70
85
96
108
110 """NB: this methods allows an *empty* sequence.
111
112 If you want a sequence with at least 1 element, use the following
113 pattern:::
114
115 required(mytag)
116 sequence(mytag)
117 """
118 stream = self.stream
119 tag = self.tag_template % tag
120 stream.forward()
121 while stream.event == "start" and stream.elem.tag == tag:
122 elem = stream.elem
123 self._handle(args, kw)
124 self._check_end(elem)
125 stream.forward()
126 stream.pushback()
127
129 self._completed += 1
130 tag = self.stream.elem.tag
131 for ev, el in self.stream:
132 if ev == "end" and el.tag == tag:
133 break
134 return el
135
137 stream = self.stream
138 elem = self.stream.elem
139 assert elem.tag.startswith("{%s}" % self.namespace_uri)
140 n = t = elem.tag[self.cut:]
141 i = n.find("-")
142 while i >= 0:
143 n = n[:i] + "_" + n[i+1:]
144 i = n[i+1:].find("-")
145 h = getattr(self, "handle_%s" % n, None)
146 if h is None:
147 raise NotImplementedError("don't know what to do with tag %s" % t)
148 return h(*args, **kw)
149
169
172 """
173 Wrap the result of iterparse:
174 * start-ns and end-ns are interpreted, (prefix, uri) pairs being pushed
175 and popped accordingly in attribute `namespaces`
176 * start and end events are accessible through the `event` and `elem`
177
178 Unlike iterators, a `Stream` has a notion of "current" item (accessible
179 through `event` and `elem`. To access the next element, the `forward`
180 method must be explicitly invoked. If it reaches the end, `event` and
181 `elem` will be None.
182
183 Note that a `Stream` is also iterable. The first yielded item will be the
184 current item. If the iteration is interrupted, the current item will be
185 the last yielded item.
186 """
188 if hasattr(filelike, "seek"):
189
190 filelike.seek(0)
191 self._it = iterparse(filelike,
192 events=("start", "end", "start-ns", "end-ns",))
193 self.namespaces = []
194 self._event = None
195 self._elem = None
196 self._prev = None
197 self._next = None
198 self.forward()
199
200 @property
203
204 @property
207
209 if self._it is None: return False
210 if self._next:
211 self._prev = self._event, self._elem
212 self._event, self._elem = self._next
213 self._next = None
214 return True
215 stop = False
216 namespaces = self.namespaces
217 try:
218 while not stop:
219 ev, el = self._it.next()
220 if ev == "start-ns":
221 namespaces.append(el)
222 elif ev == "end-ns":
223 namespaces.pop(-1)
224 else:
225 stop = True
226 except StopIteration:
227 ev, el = None, None
228 self._it = None
229 self._prev = self._event, self._elem
230 self._event = ev
231 self._elem = el
232 return ev is not None
233
235 """Push the last item back in the stream.
236
237 Note that no more than one item can be pushed back.
238
239 Limitation: this does not change `namespaces` accordingly!
240 """
241 if self._prev is None or self._prev[0] is None:
242 raise ValueError("nothing to pushback")
243 elif self._next is not None:
244 raise Exception("can only pushback one step")
245 else:
246 self._next = self._event, self._elem
247 self._event, self._elem = self._prev
248
250 while self._event is not None:
251 yield self._event, self._elem
252 self.forward()
253