1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 """
24 Shipyard is a module to process data in a format inspired by
25 email headers (RFC 2822). It's called I{Shipyard} because that word
26 contains "py" and doesn't seem to be taken yet.
27
28 Format:
29 =======
30
31 A *data set* consists of zero or more *records* seperated by one
32 or more empty lines.
33
34 Lines starting with the *comment mark* (default: ``#``) are ignored.
35
36 A *record* consists of one or more *fields*.
37
38 A *field* is a line that has the form::
39
40 key: value
41
42 *key* is a string that
43
44 - doesn't contain a colon
45 - doesn't start with the *comment mark* (see above)
46 - doesn't start with the *continuation mark* (see below)
47
48 *value* is an arbitrary string
49
50 If a line starts with the *continuation mark* (default: ``" "``
51 (one blank)) it gets appended to the preceding line, with the
52 *continuation mark* removed (the newline is kept)
53
54
55 Example:
56 ========
57
58
59 The navigation menu on the `Shipyard homepage
60 <http://www.florian-diesch.de/software/shipyard/>`__ is created from a
61 shipyard file ``files.sy`` that contains the following lines::
62
63 ID: shipyard
64 Section: /software/shipyard
65 Path: software/shipyard
66 Short: Shipyard
67 Desc: Shipyard is a Python module to process data in a format
68 inspired by email headers (RFC 2822).
69
70 ID: firkin
71 Section: /software/firkin
72 Path: software/firkin
73 Short: firkin
74 Desc: Firkin is a Python module to convert between units
75
76 #ID: loci
77 #Section: /software/loci
78 #Path: software/loci
79 #Short: Loci
80
81 ``loci`` isn't ready for release yet so its data lines are commented out.
82
83 First we open the file:
84
85 >>> f=open(path, "rb")
86
87 Then we create a parser object:
88
89 >>> reader=shipyard.Parser(keys=['ID', 'Section', 'Path','Short',
90 'Long', 'Desc'])
91
92 For every record the given C{keys} are initialized with C{None}.
93
94 To get some fancy output we create a pretty printer:
95
96 >>> import pprint
97 >>> pp = pprint.PrettyPrinter(indent=4)
98
99
100 Now we can iterater through the records:
101
102 >>> for record in reader.parse(input):
103 ... pp.pprint(record)
104 { 'Desc': 'Shipyard is a Python module to process data in a format'
105 'inspired by email headers (RFC 2822).',
106 'ID': 'shipyard',
107 'Long': None,
108 'Path': 'software/shipyard',
109 'Section': '/software/shipyard',
110 'Short': 'Shipyard'}
111 { 'Desc': 'Firkin is a Python module to convert between units',
112 'ID': 'firkin',
113 'Long': None,
114 'Path': 'software/firkin',
115 'Section': '/software/firkin',
116 'Short': 'firkin'}
117
118 We don't need the file anymore so we close it:
119
120 >>> input.close()
121
122
123 """
124
125 import re
126
127
129 """
130 Something is wrong with a line
131
132 :see: `Parser.parse()`
133 """
134 pass
135
137 """
138 Something is wrong with a key
139
140 :see: `Parser.parse()`
141 """
142 pass
143
144
146 """
147 Reader for Shipyard files
148 """
149 - def __init__(self, keys=None, defaults=None,
150 keep_linebreaks=True,
151 comment='#', continuation=' ', encode=True):
152 """
153 Constructor
154
155 :Parameters:
156 keys : list of strings
157 list of keys this parser accepts
158 defaults : dict
159 default values for records
160 keep_linebreaks : bool
161 if True linebreaks in continuation lines are kept
162 comment : string
163 mark that starts a comment line
164 continuation : string
165 mark that starts a continuation line
166 encode : bool
167 True if coding marks should be evaluated
168 """
169 self.defaults=defaults
170 if keys is not None:
171 self.keys = set(keys)
172 else:
173 self.keys = ()
174 self.keep_linebreaks=keep_linebreaks
175 self.comment = comment
176 self.continuation = continuation
177 self.encode = encode
178
180 """
181 Add missing keys to a record
182
183 :Parameters:
184 record : dict
185 record zu fill
186 """
187 if self.defaults is None:
188 func=lambda key: None
189 else:
190 func=lambda key: self.defaults.get(key, None)
191 if record:
192 for k in self.keys:
193 if not k in record:
194 record[k] = func(k)
195 return record
196
197
198 - def parse(self, inpt, factory=None):
199 """
200 Iterator that returns the next record each time it is called
201
202 :Parameters:
203 inpt : iterable containing strings (e.g. a file)
204 input to parse
205 factory : callable to create the return values.
206 If factory is not None, for every record the result of
207 ``factory(**record)`` is returned
208
209 :Exceptions:
210 - `InvalidLineError`: if a continuation marks without a
211 previous data line is found
212 - `InvalidLineError`: if a data line without a ':' is found
213 - `InvalidKeyError`: if in `__init__()` ``keys`` is given
214 and a key is found that is not in keys
215
216 :see: Parser.get_list()
217 """
218 record = {}
219 key = None
220 coding = None
221 is_first_line = True
222 for num, line in enumerate(inpt):
223 if coding is not None:
224 line = line.decode(coding)
225
226 if line.strip() == '':
227 record = self.fill_record(record)
228 if record:
229 if factory is not None:
230 yield factory(**record)
231 else:
232 yield record
233 record = {}
234 key = None
235 elif line.startswith(self.comment):
236 if self.encode and is_first_line:
237 match = re.search(r"coding[:=]\s*([-\w.]+)", line)
238 if match:
239 coding = match.group(1)
240 elif line.startswith(self.continuation):
241 if key is not None:
242 if self.keep_linebreaks:
243 record[key] += '\n' + line[ len(self.continuation):
244 ].rstrip('\n')
245 else:
246 record[key] += line[ len(self.continuation):
247 ].rstrip('\n')
248 else:
249 raise InvalidLineError('Invalid line %s'%num)
250 else:
251 try:
252 key, value = line.split(':', 1)
253 except Exception, e:
254 print line
255 raise InvalidLineError('Invalid line %s: %s'%(num, e))
256 key = key.strip()
257 if self.keys and key not in self.keys:
258 raise InvalidKeyError('Invalid key "%s" on line %s'%
259 (key, num))
260 record[key] = value.lstrip().rstrip('\n')
261 is_first_line = False
262
263 record = self.fill_record(record)
264 if record:
265 if factory is not None:
266 yield factory(**record)
267 else:
268 yield record
269
270
271 - def get_list(self, inpt, factory=None):
272 """
273 Returns a list of all records
274
275 :Parameters:
276 inpt : iterable containing strings (e.g. a file)
277 input to parse
278 factory : callable to create the return values.
279 If factory is not None, for every record the result of
280 ``factory(**record)`` is returned
281
282 :Exceptions:
283 - `InvalidLineError`: if a continuation marks without a
284 previous data line is found
285 - `InvalidLineError`: if a data line without a ':' is found
286 - `InvalidKeyError`: if in `__init__()` ``keys`` is given
287 and a key is found that is not in keys
288
289 :see: Parser.parse()
290 """
291 result = []
292 for record in self.parse(inpt, factory):
293 result.append(record)
294 return result
295
296
297 - def get_dict(self, inpt, key, factory=None):
298 """
299 Returns a dict of all records
300
301 :Parameters:
302 inpt : iterable containing strings (e.g. a file)
303 input to parse
304 key : string
305 name of the field used as key for the result dict
306 factory : callable to create the return values.
307 If factory is not None, for every record the result of
308 ``factory(**record)`` is returned
309
310 :Exceptions:
311 - `InvalidLineError`: if a continuation marks without a
312 previous data line is found
313 - `InvalidLineError`: if a data line without a ':' is found
314 - `InvalidKeyError`: if in `__init__()` ``keys`` is given
315 and a key is found that is not in keys
316
317 :see: Parser.parse()
318 """
319 result = {}
320 for record in self.parse(inpt):
321
322
323 if factory is None:
324 result[record[key]] = record
325 else:
326 result[record[key]] = factory(**record)
327 return result
328
329
330
331
332
334 """
335 Writer for Shipyard files
336 """
337
338 - def __init__(self, keys=None, comment='#', continuation=' ', coding=None,
339 ignore_values=None):
340 """
341 Constructor
342
343 :Parameters:
344 keys : list of strings
345 list of keys this writer accepts
346 comment : string
347 mark that starts a comment line
348 continuation : string
349 mark that starts a continuation line
350 ignore_values : set or ``None``
351 fields containing this values aren't written
352 """
353 self.keys = keys
354 self.comment = comment
355 self.continuation = continuation
356 self.coding = coding
357 if ignore_values is None:
358 self.ignore_values = set()
359 else:
360 self.ignore_values = ignore_values
361
362 - def write(self, output, record):
363 """
364 Write a record
365
366 If in `__init__()` ``keys`` is given only those values are written
367
368 :Parameters:
369 output : needs a method ``write`` that takes a string parameter
370 output to write to
371 record : ``dict``
372 record to write
373 """
374
375 if self.keys:
376 keys = self.keys
377 else:
378 keys = record.keys()
379 for key in keys:
380 if record[key] in self.ignore_values:
381 continue
382 try:
383 value = record[key].encode(self.coding)
384 except Exception, e:
385 print e
386 value = str(record[key])
387 lines = value.splitlines()
388 value = ('\n'+self.continuation).join(lines)
389 output.write('%s: %s\n'%(key, value))
390
391 output.write('\n')
392
394 """
395 Write a list record
396
397 If in `__init__()` ``keys`` is given only those values are written
398
399 :Parameters:
400 output : needs a method ``write`` that takes a string parameter
401 output to write to
402 records : list of dicts
403 records to write
404 """
405 for rec in records:
406 self.write(output, rec)
407
423
424 - def write_coding(self, output, template='#-*- coding: %s -*-\n'):
425 """
426 Write a coding line for the coding given in __init__().
427 Does nothing if no coding is given.
428
429 :Parameters:
430 output : needs a method ``write`` that takes a string parameter
431 output to write to
432 template : string
433 template string for the coding line
434 """
435
436 if self.coding:
437 output.write(template%self.coding)
438