1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 import datetime, codecs
24
25 FREQ=set((None, 'always', 'hourly', 'daily', 'weekly', 'monthly',
26 'yearly', 'never'))
27
28
30 """
31 Class to handle a URL in `Sitemap`
32 """
33 - def __init__(self, loc, lastmod, changefreq, priority, escape=True):
34 """
35 Constructor
36
37 :Parameters:
38 loc : string
39 Location (URL). See http://www.sitemaps.org/protocol.php#locdef
40 lastmod : ``datetime.date`` or ``string``
41 Date of last modification.
42 See http://www.sitemaps.org/protocol.php#lastmoddef
43 The ``today`` is replaced by today's date
44 changefreq : One of the values in `FREQ`
45 Expected frequency for changes.
46 See http://www.sitemaps.org/protocol.php#changefreqdef
47 priority : ``float`` or ``string``
48 Priority of this URL relative to other URLs on your site.
49 See http://www.sitemaps.org/protocol.php#prioritydef
50 escape
51 True if escaping for XML special characters should be done.
52 See http://www.sitemaps.org/protocol.php#escaping
53 """
54 if escape:
55 self.loc=self.escape(loc)
56 else:
57 self.loc=loc
58 if lastmod=='today':
59 lastmod=datetime.date.today().isoformat()
60 if lastmod is not None:
61 self.lastmod=unicode(lastmod)
62 else:
63 self.lastmod=None
64 if changefreq not in FREQ:
65 raise ValueError("Invalid changefreq value: '%s'"%changefreq)
66 if changefreq is not None:
67 self.changefreq=unicode(changefreq)
68 else:
69 self.changefreq=None
70 if priority is not None:
71 self.priority=unicode(priority)
72 else:
73 self.priority=None
74 self.urls=[]
75
77 """
78 Escaping XML special chracters
79
80 :Parameters:
81 s
82 String to escape
83 :return: Escaped string
84 """
85 s=s.replace('&', '&')
86 s=s.replace("'", ''')
87 s=s.replace('"', '&quod;')
88 s=s.replace('>', '>')
89 s=s.replace('<', '<')
90 return s
91
93 """
94 Class to manage a sitemap
95 """
96 - def __init__(self, lastmod=None, changefreq=None, priority=None):
97 """
98 Constructor
99
100 :Parameters:
101 lastmod
102 Default value for `lastmod`. See `Url.__init__()`.
103 changefreq
104 Default value for `changefreq`. See `Url.__init__()`.
105 priority
106 Default value for `priority`. See `Url.__init__()`.
107 """
108
109 self.lastmod=lastmod
110 self.changefreq=changefreq
111 self.priority=priority
112 self.urls=[]
113
114
115 - def add(self, loc, lastmod=None, changefreq=None, priority=None, escape=True):
116 """
117 Add a new URl. Parameters are the same as in `Url.__init__()`.
118 If ``lastmod``, ``changefreq`` or ``priority`` is ``None`` the default
119 value is used (see `__init__()`)
120 """
121
122 if lastmod is None:
123 lastmod=self.lastmod
124 if changefreq is None:
125 changefreq=self.changefreq
126 if priority is None:
127 priority=self.priority
128 self.urls.append(Url(loc, lastmod, changefreq, priority, escape))
129
130
132 """
133 Write sitemap to ``out``
134
135 :Parameters:
136 out
137 file name or anything with a ``write()`` method
138 """
139
140 if isinstance(out, basestring):
141 try:
142 output=codecs.open(out, 'w', 'utf-8')
143 except Exception, e:
144 print "Can't open file '%s': %s"%(path, str(e))
145 return
146 else:
147 output=out
148 output.write("<?xml version='1.0' encoding='UTF-8'?>\n"
149 '<urlset xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"\n'
150 ' xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9\n'
151 ' http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"\n'
152 ' xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n')
153
154
155 for url in self.urls:
156 lastmod=changefreq=priority=''
157 if url.lastmod is not None:
158 lastmod=' <lastmod>%s</lastmod>\n'%url.lastmod
159 if url.changefreq is not None:
160 changefreq=' <changefreq>%s</changefreq>\n'%url.changefreq
161 if url.priority is not None:
162 priority=' <priority>%s</priority>\n'%url.priority
163 output.write(" <url>\n"
164 " <loc>%s</loc>\n%s%s%s"
165 " </url>\n"%(url.loc.decode('utf-8'),
166 lastmod.decode('utf-8'),
167 changefreq.decode('utf-8'),
168 priority.decode('utf-8')))
169 output.write('</urlset>\n')
170 if output is not out:
171 output.close()
172