[Xml-logilab] vcalsax not fully vCal compliant

Sylvain Thénault Sylvain Thénault
Fri, 20 Sep 2002 22:00:50 +0200


--wac7ysb48OaltWcw
Content-Type: text/plain; charset=iso-8859-1
Content-Disposition: inline
Content-Transfer-Encoding: 8bit

On Friday 20 September à 18:26, Anders Bruun Olsen wrote:
> Hi,
> 
> I am considering using vcalsax to implement a backend for a
> calendarprogram I am writing in Python but have run into a small problem
> with vcalsax.
> 
> I chokes on multiline DESCRIPTION properties.
> An example:
> 
> DESCRIPTION;QUOTED-PRINTABLE:This is a =0A=
> multiline =0A=
> description!
> 
> This causes the following response:
> 
> >>> test = load_vcal_to_dom("vcalout.vcs")
> Warning : bad format
> Traceback (most recent call last):
>   File "<stdin>", line 1, in ?
>   File "/usr/lib/python2.2/site-packages/logilab/vcalsax/vcalsax.py", line 187, in load_vcal_to_dom
>     return reader.fromUri(uri)
>   File "/usr/lib/python2.2/site-packages/_xmlplus/dom/ext/reader/__init__.py", line 70, in fromUri
>     return self.fromStream(stream, ownerDoc)
>   File "/usr/lib/python2.2/site-packages/_xmlplus/dom/ext/reader/Sax2.py", line 372, in fromStream
>     self.parser.parse(s)
>   File "/usr/lib/python2.2/site-packages/logilab/vcalsax/vcalsax.py", line 97, in parse
>     qnames[(EMPTY_NAMESPACE,paramname)] = paramname
> UnboundLocalError: local variable 'paramname' referenced before assignment
> 
> I have looked at the vcalsax source but I am not much of a regexp
> wizard, so I have not been able to devise a solution.

Actually you have shown two problems here : vcalsax doesn't currently
support multilines and a typo which caused the traceback. I've joined to
this mail a new version of vcalsax which fix the typo.

Could you send your vcalsax file to help us to resolve the multiline
problem ?

-- 
Sylvain Thénault 

--wac7ysb48OaltWcw
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="vcalsax.py"

# Copyright (c) 2000 LOGILAB S.A. (Paris, FRANCE).
# http://www.logilab.fr/ -- mailto:contact@logilab.fr
#
# This program is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
# Foundation; either version 2 of the License, or (at your option) any later
# version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.

"""The VCalSax module defines a Sax parser for VCalendar files.
It offers conversions between VCalendar data and a VCalXml DOM
implementation.
"""

__revision__ = "$Id: vcalsax.py,v 1.10 2002/04/09 08:30:46 alf Exp $"

from xml.sax.saxutils import prepare_input_source
from xml.sax import saxlib
from xml.sax.xmlreader import AttributesImpl, AttributesNSImpl
from xml.dom import EMPTY_NAMESPACE
from xml.dom.ext.reader import Sax2
import re

try:
    import codecs
    def to_xml_string(str,encoding):
        try:
            decoder = codecs.lookup(encoding)[1]
            encoder = codecs.lookup('utf-8')[0]
            return encoder(decoder(str)[0])[0]
        except LookupError:
            return str
except ImportError:
    from xml.unicode.iso8859 import wstring
    def to_xml_string(str,encoding):
        if upper(self._encoding) == 'UTF-8':
            return str
        else:
            return wstring.decode(encoding,str).utf8()

# Parser for VCalendar data
class VcfParser(saxlib.XMLReader) :

    def __init__(self):
        saxlib.XMLReader.__init__(self)
        self._handle_namespaces = 0
        
    def parse(self, source):
        try:
            file=source.getByteStream()
        except AttributeError:
            file = source
            
        data = file.readline()
        while data != '' :
            # VCALENDAR, VEVENT or VTODO
            m = re.search('^[ \t]*BEGIN:[ \t]*(\S*)',data)
            if m :
                vobjname = m.group(1)
                if self._handle_namespaces:
                    self._cont_handler.startElementNS((EMPTY_NAMESPACE,vobjname),vobjname,AttributesNSImpl({},{}))
                else:
                    self._cont_handler.startElement(vobjname,AttributesImpl({}))

            else :
                m = re.search('^[ \t]*END:[ \t]*(\S*)',data)
                if m :
                    vobjname = m.group(1)
                    if self._handle_namespaces:
                        self._cont_handler.endElementNS((EMPTY_NAMESPACE,vobjname),vobjname)
                    else:
                        self._cont_handler.endElement(vobjname)
                else :
                    # VCF property 'propname;paramname=paramvalue:propvalue'
                    m = re.search('^[ \t]*([^\s;:]*);?([^:]*):(.*)',data)
                    if m :
                        propname = m.group(1)
                        propvalue = m.group(3)
                        attrs = {}
                        qnames = {}
                        if m.group(2) != '' :
                            propparams = re.split(';',m.group(2))
                            for i in propparams :
                                try:
                                    (paramname,paramvalue) = re.split('=',i)
                                except:
                                    print "Warning : bad format"
                                    paramname = "Unknown"
                                    paramvalue = "Unkown"
                                qnames[(EMPTY_NAMESPACE,paramname)] = paramname
                                attrs[(EMPTY_NAMESPACE,paramname)] = paramvalue
                            
                                
                        # 'X-' prefixed properties
                        if re.match('X-',propname) :
                            qnames[(EMPTY_NAMESPACE,'x-name')] = 'x-name'
                            attrs[(EMPTY_NAMESPACE,'x-name')] = propname
                            propname = 'extension'
                        
                        propvalue = unicode(to_xml_string(propvalue,'iso-8859-1'),'UTF-8')
                        # <=> to the above line, but doesn't work with python
                        # versions <2.0
                        #propvalue = unicode(propvalue,'iso-8859-1')
                        self._cont_handler.startElementNS((EMPTY_NAMESPACE,propname),propname,AttributesNSImpl(attrs,qnames))
                        self._cont_handler.characters(propvalue)
                        self._cont_handler.endElementNS((EMPTY_NAMESPACE,propname),propname)
            data = file.readline()
        file.close()

    def getFeature(self, name):
        if name == saxlib.feature_namespaces:
            return self._handle_namespaces
        elif name == saxlib.feature_namespace_prefixes:
            return 0
        raise saxlib.SAXNotRecognizedException("Feature '%s' not recognized" % name)

    def setFeature(self, name, value):
        if name == saxlib.feature_namespaces:
            self._handle_namespaces = value
        else:
            raise saxlib.SAXNotRecognizedException("Feature '%s' not recognized" % name)

    def getProperty(self, name):
        if name == saxlib.property_lexical_handler:
            return self._lex_handler
        elif name == saxlib.property_declaration_handler:
            return self._decl_handler
        raise saxlib.SAXNotRecognizedException("Property '%s' not recognized" % name)

    def setProperty(self, name, value):
        if name == saxlib.property_lexical_handler:
            self._lex_handler = value
        elif name == saxlib.property_declaration_handler:
            self._decl_handler = value
        else:
            raise saxlib.SAXNotRecognizedException("Property '%s' not recognized" % name)

xslt = '''<?xml version="1.0" encoding="ISO-8859-1"?>
<xsl:transform xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">

<xsl:output method="text" encoding="ISO-8859-1"/>
<xsl:strip-space elements='*'/>

<xsl:template match='/VCALENDAR'>BEGIN:VCALENDAR
<xsl:apply-templates/>END:VCALENDAR
</xsl:template>

<xsl:template match='VEVENT'>BEGIN:VEVENT
<xsl:apply-templates/>END:VEVENT
</xsl:template>

<xsl:template match='VTODO'>BEGIN:VTODO
<xsl:apply-templates/>END:VTODO
</xsl:template>

<xsl:template match='*[text()]'>
<xsl:choose>
<xsl:when test='name()="extension"'>
<xsl:value-of select='@x-name'/>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select='name()'/>
</xsl:otherwise>
</xsl:choose>
<xsl:apply-templates select='@*[name()!="x-name"]'/>:<xsl:value-of select='text()'/><xsl:text>
</xsl:text>
</xsl:template>

<xsl:template match='@*'>;<xsl:value-of select='name()'/>=<xsl:value-of select='.'/></xsl:template>

</xsl:transform>'''


#---------------------------------------------------------------------

# VCalendar file to DOM
def load_vcal_to_dom(uri) :
    parser=VcfParser()
    reader = Sax2.Reader(0, 0, None, Sax2.XmlDomGenerator, parser)
    return reader.fromUri(uri)

# DOM to VCalendar file
def write_dom_to_vcal(doc, uri):
    new_api = 0
    try:
        from Ft.Xml.Xslt.Processor import Processor
        try:
            from Ft.Xml.InputSource import InputSourceFactory,InputSource
        except:
            new_api = 1
    except:
        from xml.xslt.Processor import Processor
    processor = Processor()
    if not new_api:
        processor.appendStylesheetString(xslt)
        result = processor.runNode(doc,1,{},None)
    else:
        self.inputsourcefactory = InputSourceFactory()
        self.processor.appendStylesheet(self.inputsourcefactory.fromString(
            xslt, 'dummy'))
        dom = self.processor.execute(doc,InputSource(None,"dummy"), 1, {},
                                     None)
    file = open(uri, 'w')
    file.write(result)
    file.close()

--wac7ysb48OaltWcw--