Development/Tutorials/Programming Tutorial KDE 4/How to write an XML parser: Difference between revisions

    From KDE TechBase
    No edit summary
     
    No edit summary
    Line 20: Line 20:
       reader.setContentHandler( handler );
       reader.setContentHandler( handler );
       reader.parse( source );
       reader.parse( source );
    }
    </highlightSyntax>
    Next file is parser.h:
    <highlightSyntax language="cpp">
    /*
    parser.h - demonstration of a parser in C++
    */
    #ifndef PARSER_H
    #define PARSER_H
    #include <qstring.h>
    #include <QtXml/QXmlDefaultHandler>
    #include <QtXml/QXmlAttributes>
    class Parser : public QXmlDefaultHandler
    {
    public:
      Parser();
      /** given by the framework from qxml. Called when parsing the xml-document starts.          */
      bool startDocument();
      /** given by the framework from qxml. Called when the reader occurs an open tag (e.g. \<b\> ) */
      bool startElement( const QString&, const QString&, const QString& qName, const QXmlAttributes& att );
    };
    #endif
    </highlightSyntax>
    Next file is parser.cpp:
    <highlightSyntax language="cpp">
    /*
    parser.cpp - demonstration of a parser in C++
    */
    #include "parser.h"
    #include <kdebug.h>
      Parser::Parser()
      {
      }
     
      bool Parser::startDocument()
      {
        kDebug() << "Searching document for tags" << endl;
        return true;
      }
     
      bool Parser::startElement( const QString&, const QString&, const QString& qName, const QXmlAttributes& att )
      {
        kDebug() << "Found Element " << qName << endl;
        return true;
    }
    </highlightSyntax>
    <highlightSyntax language="cpp">
    /*
      dom.cpp
      A demonstration how to use the dom parsing framework.
      Prints the first subnode of an html file, i.e. typically "head" or "body".
      compile it like this:
      g++ -I. -I/opt/kde3/include -I/usr/lib/qt3/include dom.cpp -L/opt/kde3/lib -L/usr/lib/qt3/lib -lqt-mt -lkdeui 
    */
    #include <qdom.h>
    #include <qfile.h>
    #include <kdebug.h>
    int main()
    {
      QDomDocument doc( "myDocument" );
      QFile qf("hello.htm");
      doc.setContent( &qf );
      QDomElement docElement = doc.documentElement();
      QDomNode node;
      node = docElement.firstChild();
      kdDebug() << node.nodeName() << endl;
    }
    }
    </highlightSyntax>
    </highlightSyntax>

    Revision as of 20:46, 31 August 2006

    A parser is used to distinguish between formal language and bulk data of a given grammar. See en.wikipedia.org/wiki/Parser if in doubt. There are two ways to write a parser: to split up the content of a file into an object as known from object-oriented programming or to trigger a function everytime a reader occurs a given syntax tag. In the following code, a function is triggered for every time an html tag is found in a file. The first file is hello.cpp: <highlightSyntax language="cpp"> /* hello.cpp compile it with g++ -I. -I/home/kde-devel/kde/include -I/home/kde-devel/qt-unstable/include/Qt -I/home/kde-devel/qt-unstable/include -I/home/kde-devel/qt-unstable/include/QtXml parser.h parser.cpp hello.cpp -L/home/kde-devel/kde/lib -L/home/kde-devel/qt-unstable/lib -lQtCore_debug -lQtXml_debug -lkdeui

    • /


    1. include <qstring.h>
    2. include <QXmlInputSource>
    3. include <qfile.h>
    4. include <parser.h>

    int main() {

     Parser* handler=new Parser();
     QXmlInputSource* source=new QXmlInputSource(new QFile("hello.htm"));
     QXmlSimpleReader reader;
     reader.setContentHandler( handler );
     reader.parse( source );
    

    } </highlightSyntax> Next file is parser.h: <highlightSyntax language="cpp"> /*

    parser.h - demonstration of a parser in C++
    
    • /
    1. ifndef PARSER_H
    2. define PARSER_H
    1. include <qstring.h>
    2. include <QtXml/QXmlDefaultHandler>
    3. include <QtXml/QXmlAttributes>

    class Parser : public QXmlDefaultHandler { public:

     Parser();
    
     /** given by the framework from qxml. Called when parsing the xml-document starts.          */
     bool startDocument();
    
     /** given by the framework from qxml. Called when the reader occurs an open tag (e.g. \<b\> ) */
     bool startElement( const QString&, const QString&, const QString& qName, const QXmlAttributes& att );
    

    };


    1. endif

    </highlightSyntax>

    Next file is parser.cpp: <highlightSyntax language="cpp"> /*

    parser.cpp - demonstration of a parser in C++
    
    • /
    1. include "parser.h"
    2. include <kdebug.h>
     Parser::Parser()
     {
     }
     
     bool Parser::startDocument()
     {
       kDebug() << "Searching document for tags" << endl;
       return true;
     }
     
     bool Parser::startElement( const QString&, const QString&, const QString& qName, const QXmlAttributes& att )
     {
       kDebug() << "Found Element " << qName << endl;
       return true;
    }
    

    </highlightSyntax>

    <highlightSyntax language="cpp"> /*

      dom.cpp
      A demonstration how to use the dom parsing framework.
      Prints the first subnode of an html file, i.e. typically "head" or "body".
      compile it like this:
      g++ -I. -I/opt/kde3/include -I/usr/lib/qt3/include dom.cpp -L/opt/kde3/lib -L/usr/lib/qt3/lib -lqt-mt -lkdeui   
    
    • /
    1. include <qdom.h>
    2. include <qfile.h>
    3. include <kdebug.h>

    int main() {

     QDomDocument doc( "myDocument" );
     QFile qf("hello.htm");
     doc.setContent( &qf );
     QDomElement docElement = doc.documentElement(); 
     QDomNode node;
     node = docElement.firstChild();
     kdDebug() << node.nodeName() << endl;
    

    } </highlightSyntax>