Module apesmit
[hide private]
[frames] | no frames]

Source Code for Module apesmit

  1  #!/usr/bin/env python 
  2  #-*- coding: utf-8 -*- 
  3   
  4  ##     ApeSmit - A simple Python module to create XML sitemaps 
  5  ##                     <http://www.florian-diesch.de/software/apesmit/> 
  6  ##     Copyright (C) 2008  Florian Diesch <devel@florian-diesch.de> 
  7   
  8  ##     This program is free software; you can redistribute it and/or modify 
  9  ##     it under the terms of the GNU General Public License as published by 
 10  ##     the Free Software Foundation; either version 2 of the License, or 
 11  ##     (at your option) any later version. 
 12   
 13  ##     This program is distributed in the hope that it will be useful, 
 14  ##     but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  ##     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  ##     GNU General Public License for more details. 
 17   
 18  ##     You should have received a copy of the GNU General Public License along 
 19  ##     with this program; if not, write to the Free Software Foundation, Inc., 
 20  ##     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 
 21   
 22   
 23  import datetime, codecs 
 24   
 25  FREQ=set((None, 'always', 'hourly', 'daily', 'weekly', 'monthly', 
 26            'yearly', 'never'))  #: values for changefreq 
 27   
 28        
29 -class Url(object):
30 """ 31 Class to handle a URL in `Sitemap` 32 """
33 - def __init__(self, loc, lastmod, changefreq, priority, escape=True):
34 """ 35 Constructor 36 37 :Parameters: 38 loc : string 39 Location (URL). See http://www.sitemaps.org/protocol.php#locdef 40 lastmod : ``datetime.date`` or ``string`` 41 Date of last modification. 42 See http://www.sitemaps.org/protocol.php#lastmoddef 43 The ``today`` is replaced by today's date 44 changefreq : One of the values in `FREQ` 45 Expected frequency for changes. 46 See http://www.sitemaps.org/protocol.php#changefreqdef 47 priority : ``float`` or ``string`` 48 Priority of this URL relative to other URLs on your site. 49 See http://www.sitemaps.org/protocol.php#prioritydef 50 escape 51 True if escaping for XML special characters should be done. 52 See http://www.sitemaps.org/protocol.php#escaping 53 """ 54 if escape: 55 self.loc=self.escape(loc) 56 else: 57 self.loc=loc 58 if lastmod=='today': 59 lastmod=datetime.date.today().isoformat() 60 if lastmod is not None: 61 self.lastmod=unicode(lastmod) 62 else: 63 self.lastmod=None 64 if changefreq not in FREQ: 65 raise ValueError("Invalid changefreq value: '%s'"%changefreq) 66 if changefreq is not None: 67 self.changefreq=unicode(changefreq) 68 else: 69 self.changefreq=None 70 if priority is not None: 71 self.priority=unicode(priority) 72 else: 73 self.priority=None 74 self.urls=[]
75
76 - def escape(self, s):
77 """ 78 Escaping XML special chracters 79 80 :Parameters: 81 s 82 String to escape 83 :return: Escaped string 84 """ 85 s=s.replace('&', '&amp;') 86 s=s.replace("'", '&apos;') 87 s=s.replace('"', '&quod;') 88 s=s.replace('>', '&gt;') 89 s=s.replace('<', '&lt;') 90 return s
91
92 -class Sitemap(object):
93 """ 94 Class to manage a sitemap 95 """
96 - def __init__(self, lastmod=None, changefreq=None, priority=None):
97 """ 98 Constructor 99 100 :Parameters: 101 lastmod 102 Default value for `lastmod`. See `Url.__init__()`. 103 changefreq 104 Default value for `changefreq`. See `Url.__init__()`. 105 priority 106 Default value for `priority`. See `Url.__init__()`. 107 """ 108 109 self.lastmod=lastmod 110 self.changefreq=changefreq 111 self.priority=priority 112 self.urls=[]
113 114
115 - def add(self, loc, lastmod=None, changefreq=None, priority=None, escape=True):
116 """ 117 Add a new URl. Parameters are the same as in `Url.__init__()`. 118 If ``lastmod``, ``changefreq`` or ``priority`` is ``None`` the default 119 value is used (see `__init__()`) 120 """ 121 122 if lastmod is None: 123 lastmod=self.lastmod 124 if changefreq is None: 125 changefreq=self.changefreq 126 if priority is None: 127 priority=self.priority 128 self.urls.append(Url(loc, lastmod, changefreq, priority, escape))
129 130
131 - def write(self, out):
132 """ 133 Write sitemap to ``out`` 134 135 :Parameters: 136 out 137 file name or anything with a ``write()`` method 138 """ 139 140 if isinstance(out, basestring): 141 try: 142 output=codecs.open(out, 'w', 'utf-8') 143 except Exception, e: 144 print "Can't open file '%s': %s"%(path, str(e)) 145 return 146 else: 147 output=out 148 output.write("<?xml version='1.0' encoding='UTF-8'?>\n" 149 '<urlset xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"\n' 150 ' xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9\n' 151 ' http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"\n' 152 ' xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n') 153 154 155 for url in self.urls: 156 lastmod=changefreq=priority='' 157 if url.lastmod is not None: 158 lastmod=' <lastmod>%s</lastmod>\n'%url.lastmod 159 if url.changefreq is not None: 160 changefreq=' <changefreq>%s</changefreq>\n'%url.changefreq 161 if url.priority is not None: 162 priority=' <priority>%s</priority>\n'%url.priority 163 output.write(" <url>\n" 164 " <loc>%s</loc>\n%s%s%s" 165 " </url>\n"%(url.loc.decode('utf-8'), 166 lastmod.decode('utf-8'), 167 changefreq.decode('utf-8'), 168 priority.decode('utf-8'))) 169 output.write('</urlset>\n') 170 if output is not out: 171 output.close()
172