Difference between revisions of "Development/Tutorials/Programming Tutorial KDE 4/How to write an XML parser"

(Use Template:Proposed_deletion)
 
(18 intermediate revisions by 10 users not shown)
Line 1: Line 1:
 +
{{Proposed_deletion}}
 
A parser is used to distinguish between formal language and bulk data of a given grammar. See http://en.wikipedia.org/wiki/Parser for more information. There are two ways to write a parser: to split up the content of a file into an object as known from object-oriented programming (the <b>DOM approach</b>) or to trigger a function everytime a reader occurs a given syntax tag (the <b>QXML approach</b>).
 
A parser is used to distinguish between formal language and bulk data of a given grammar. See http://en.wikipedia.org/wiki/Parser for more information. There are two ways to write a parser: to split up the content of a file into an object as known from object-oriented programming (the <b>DOM approach</b>) or to trigger a function everytime a reader occurs a given syntax tag (the <b>QXML approach</b>).
  
Line 4: Line 5:
  
 
<b>parser.h:</b>
 
<b>parser.h:</b>
<highlightSyntax language="cpp">
+
<syntaxhighlight lang="cpp-qt" line>
 
/*
 
/*
 
  parser.h - demonstration of a parser in C++
 
  parser.h - demonstration of a parser in C++
Line 32: Line 33:
  
 
#endif
 
#endif
</highlightSyntax>
+
</syntaxhighlight>
  
 
<b>parser.cpp:</b>
 
<b>parser.cpp:</b>
<highlightSyntax language="cpp">
+
<syntaxhighlight lang="cpp-qt" line>
 
/*
 
/*
 
  parser.cpp - demonstration of a parser in C++
 
  parser.cpp - demonstration of a parser in C++
Line 49: Line 50:
 
   bool Parser::startDocument()
 
   bool Parser::startDocument()
 
   {
 
   {
     kDebug() << "Searching document for tags" << endl;
+
     kDebug() << "Searching document for tags";
 
     return true;
 
     return true;
 
   }
 
   }
Line 55: Line 56:
 
   bool Parser::startElement( const QString&, const QString&, const QString& qName, const QXmlAttributes& att )
 
   bool Parser::startElement( const QString&, const QString&, const QString& qName, const QXmlAttributes& att )
 
   {
 
   {
     kDebug() << "Found Element " << qName << endl;
+
     kDebug() << "Found Element" << qName;
 
     return true;
 
     return true;
 
  }
 
  }
</highlightSyntax>
+
</syntaxhighlight>
  
 
<b>hello.cpp:</b>
 
<b>hello.cpp:</b>
<highlightSyntax language="cpp">
+
<syntaxhighlight lang="cpp-qt" line>
 
/*
 
/*
 
hello.cpp
 
hello.cpp
 
compile it with
 
compile it with
g++ -I. -I/home/kde-devel/kde/include -I/home/kde-devel/qt-unstable/include/Qt  
+
g++ -I. -I/home/kde-devel/kde/include -I/home/kde-devel/qt-unstable/include/Qt -I/home/kde-devel/qt-unstable/include /home/kde-devel/qt-unstable/include/QtXml parser.h parser.cpp hello.cpp -L/home/kde-devel/kde/lib -L/home/kde-devel/qt-unstable/lib -lQtCore_debug -lQtXml_debug -lkdeui
-I/home/kde-devel/qt-unstable/include I/home/kde-devel/qt-unstable/include/QtXml  
 
parser.h parser.cpp hello.cpp -L/home/kde-devel/kde/lib  
 
-L/home/kde-devel/qt-unstable/lib -lQtCore_debug -lQtXml_debug -lkdeui
 
 
*/
 
*/
  
Line 85: Line 83:
 
   reader.parse( source );
 
   reader.parse( source );
 
}
 
}
</highlightSyntax>
+
</syntaxhighlight>
  
 
=The DOM approach=
 
=The DOM approach=
<highlightSyntax language="cpp">
+
<syntaxhighlight lang="cpp-qt" line>
 
/*
 
/*
 
   dom.cpp
 
   dom.cpp
 
   A demonstration how to use the dom parsing framework.
 
   A demonstration how to use the dom parsing framework.
   Prints the first subnode of an html file, i.e. typically "head" or "body".
+
   Prints the first subnode of an HTML file, i.e. typically  
 +
  "head" or "body".
 
   compile it like this:
 
   compile it like this:
   g++ -I. -I/opt/kde3/include -I/usr/lib/qt3/include dom.cpp -L/opt/kde3/lib \
+
   g++ -I. -I/opt/kde3/include -I/usr/lib/qt3/include dom.cpp \
-L/usr/lib/qt3/lib -lqt-mt -lkdeui   
+
  -L/opt/kde3/lib -L/usr/lib/qt3/lib -lqt-mt -lkdeui   
 
*/
 
*/
 
#include <qdom.h>
 
#include <qdom.h>
Line 111: Line 110:
 
   kdDebug() << node.nodeName() << endl;
 
   kdDebug() << node.nodeName() << endl;
 
}
 
}
</highlightSyntax>
+
</syntaxhighlight>
 +
 
 +
=Drawbacks=
 +
HTML parsing only works for "legal" html documents.
 +
For example, look at this code:
 +
<syntaxhighlight lang="xml">
 +
<html>
 +
  <body>
 +
      <a href="http://www.kde.org/"></a>
 +
      <a href="/index.php?title=Special:User&returnto=Main_Page">Log in</a>
 +
      <a href="http://www.gmx.de"></a>
 +
  </body>
 +
</html>
 +
</syntaxhighlight>
 +
This code contains a &amp; and will bring your parser to an error.
 +
 
 +
See here:
 +
<syntaxhighlight lang="xml">
 +
<html>
 +
  <body>
 +
      <a href="http://www.kde.org/"></a>
 +
      <a href="/index.php" nowrap>Log in</a>
 +
      <a href="http://www.gmx.de"></a>
 +
  </body>
 +
</html>
 +
</syntaxhighlight>
 +
This code will throw an error because of the '''nowrap''' that is not xml-conform.
 +
 
 +
[[Category:Proposed_deletion]]

Latest revision as of 12:18, 15 May 2019

Flag-red.png
 
Proposed for Deletion
This page has been proposed for deletion.

A parser is used to distinguish between formal language and bulk data of a given grammar. See http://en.wikipedia.org/wiki/Parser for more information. There are two ways to write a parser: to split up the content of a file into an object as known from object-oriented programming (the DOM approach) or to trigger a function everytime a reader occurs a given syntax tag (the QXML approach).

The QXML approach

parser.h:

 1 /*
 2  parser.h - demonstration of a parser in C++
 3 */
 4 
 5 #ifndef PARSER_H
 6 #define PARSER_H
 7 
 8 #include <qstring.h>
 9 #include <QtXml/QXmlDefaultHandler>
10 #include <QtXml/QXmlAttributes>
11 
12 class Parser : public QXmlDefaultHandler
13 {
14 public:
15 
16   Parser();
17 
18   /** given by the framework from qxml. Called when parsing the xml-document starts.          */
19   bool startDocument();
20 
21   /** given by the framework from qxml. Called when the reader occurs an open tag (e.g. \<b\> ) */
22   bool startElement( const QString&, const QString&, const QString& qName, const QXmlAttributes& att );
23 
24 };
25 
26 
27 #endif

parser.cpp:

 1 /*
 2  parser.cpp - demonstration of a parser in C++
 3 */
 4 
 5 #include "parser.h"
 6 #include <kdebug.h>
 7 
 8   Parser::Parser()
 9   {
10   }
11   
12   bool Parser::startDocument()
13   {
14     kDebug() << "Searching document for tags";
15     return true;
16   }
17   
18   bool Parser::startElement( const QString&, const QString&, const QString& qName, const QXmlAttributes& att )
19   {
20     kDebug() << "Found Element" << qName;
21     return true;
22  }

hello.cpp:

 1 /*
 2 hello.cpp
 3 compile it with
 4 g++ -I. -I/home/kde-devel/kde/include -I/home/kde-devel/qt-unstable/include/Qt -I/home/kde-devel/qt-unstable/include /home/kde-devel/qt-unstable/include/QtXml parser.h parser.cpp hello.cpp -L/home/kde-devel/kde/lib -L/home/kde-devel/qt-unstable/lib -lQtCore_debug -lQtXml_debug -lkdeui
 5 */
 6 
 7 
 8 #include <qstring.h>
 9 #include <QXmlInputSource>
10 #include <qfile.h>
11 #include <parser.h>
12 
13 int main()
14 {  
15   Parser* handler=new Parser();
16   QXmlInputSource* source=new QXmlInputSource(new QFile("hello.htm"));
17   QXmlSimpleReader reader;
18   reader.setContentHandler( handler );
19   reader.parse( source );
20 }

The DOM approach

 1 /*
 2    dom.cpp
 3    A demonstration how to use the dom parsing framework.
 4    Prints the first subnode of an HTML file, i.e. typically 
 5    "head" or "body".
 6    compile it like this:
 7    g++ -I. -I/opt/kde3/include -I/usr/lib/qt3/include dom.cpp \
 8    -L/opt/kde3/lib -L/usr/lib/qt3/lib -lqt-mt -lkdeui   
 9 */
10 #include <qdom.h>
11 #include <qfile.h>
12 #include <kdebug.h>
13 
14 int main()
15 {
16   QDomDocument doc( "myDocument" );
17   QFile qf("hello.htm");
18   doc.setContent( &qf );
19   QDomElement docElement = doc.documentElement(); 
20   QDomNode node;
21   node = docElement.firstChild();
22   kdDebug() << node.nodeName() << endl;
23 }

Drawbacks

HTML parsing only works for "legal" html documents. For example, look at this code:

<html>
  <body>
      <a href="http://www.kde.org/"></a>
      <a href="/index.php?title=Special:User&returnto=Main_Page">Log in</a>
      <a href="http://www.gmx.de"></a>
  </body>
</html>

This code contains a & and will bring your parser to an error.

See here:

<html>
  <body>
      <a href="http://www.kde.org/"></a>
      <a href="/index.php" nowrap>Log in</a>
      <a href="http://www.gmx.de"></a>
  </body>
</html>

This code will throw an error because of the nowrap that is not xml-conform.


This page was last edited on 15 May 2019, at 12:18. Content is available under Creative Commons License SA 4.0 unless otherwise noted.