Development/Tutorials/Programming Tutorial KDE 4/How to write an XML parser: Difference between revisions
(Use Template:Proposed_deletion) |
|||
(12 intermediate revisions by 7 users not shown) | |||
Line 1: | Line 1: | ||
{{Proposed_deletion}} | |||
A parser is used to distinguish between formal language and bulk data of a given grammar. See http://en.wikipedia.org/wiki/Parser for more information. There are two ways to write a parser: to split up the content of a file into an object as known from object-oriented programming (the <b>DOM approach</b>) or to trigger a function everytime a reader occurs a given syntax tag (the <b>QXML approach</b>). | A parser is used to distinguish between formal language and bulk data of a given grammar. See http://en.wikipedia.org/wiki/Parser for more information. There are two ways to write a parser: to split up the content of a file into an object as known from object-oriented programming (the <b>DOM approach</b>) or to trigger a function everytime a reader occurs a given syntax tag (the <b>QXML approach</b>). | ||
Line 4: | Line 5: | ||
<b>parser.h:</b> | <b>parser.h:</b> | ||
< | <syntaxhighlight lang="cpp-qt" line> | ||
/* | /* | ||
parser.h - demonstration of a parser in C++ | parser.h - demonstration of a parser in C++ | ||
Line 32: | Line 33: | ||
#endif | #endif | ||
</ | </syntaxhighlight> | ||
<b>parser.cpp:</b> | <b>parser.cpp:</b> | ||
< | <syntaxhighlight lang="cpp-qt" line> | ||
/* | /* | ||
parser.cpp - demonstration of a parser in C++ | parser.cpp - demonstration of a parser in C++ | ||
Line 49: | Line 50: | ||
bool Parser::startDocument() | bool Parser::startDocument() | ||
{ | { | ||
kDebug() << "Searching document for tags" | kDebug() << "Searching document for tags"; | ||
return true; | return true; | ||
} | } | ||
Line 55: | Line 56: | ||
bool Parser::startElement( const QString&, const QString&, const QString& qName, const QXmlAttributes& att ) | bool Parser::startElement( const QString&, const QString&, const QString& qName, const QXmlAttributes& att ) | ||
{ | { | ||
kDebug() << "Found Element " << qName | kDebug() << "Found Element" << qName; | ||
return true; | return true; | ||
} | } | ||
</ | </syntaxhighlight> | ||
<b>hello.cpp:</b> | <b>hello.cpp:</b> | ||
< | <syntaxhighlight lang="cpp-qt" line> | ||
/* | /* | ||
hello.cpp | hello.cpp | ||
Line 82: | Line 83: | ||
reader.parse( source ); | reader.parse( source ); | ||
} | } | ||
</ | </syntaxhighlight> | ||
=The DOM approach= | =The DOM approach= | ||
< | <syntaxhighlight lang="cpp-qt" line> | ||
/* | /* | ||
dom.cpp | dom.cpp | ||
Line 109: | Line 110: | ||
kdDebug() << node.nodeName() << endl; | kdDebug() << node.nodeName() << endl; | ||
} | } | ||
</ | </syntaxhighlight> | ||
=Drawbacks= | |||
HTML parsing only works for "legal" html documents. | |||
For example, look at this code: | |||
<syntaxhighlight lang="xml"> | |||
<html> | |||
<body> | |||
<a href="http://www.kde.org/"></a> | |||
<a href="/index.php?title=Special:User&returnto=Main_Page">Log in</a> | |||
<a href="http://www.gmx.de"></a> | |||
</body> | |||
</html> | |||
</syntaxhighlight> | |||
This code contains a & and will bring your parser to an error. | |||
See here: | |||
<syntaxhighlight lang="xml"> | |||
<html> | |||
<body> | |||
<a href="http://www.kde.org/"></a> | |||
<a href="/index.php" nowrap>Log in</a> | |||
<a href="http://www.gmx.de"></a> | |||
</body> | |||
</html> | |||
</syntaxhighlight> | |||
This code will throw an error because of the '''nowrap''' that is not xml-conform. | |||
[[Category:Proposed_deletion]] |
Latest revision as of 12:18, 15 May 2019
Proposed for Deletion |
---|
This page has been proposed for deletion. |
A parser is used to distinguish between formal language and bulk data of a given grammar. See http://en.wikipedia.org/wiki/Parser for more information. There are two ways to write a parser: to split up the content of a file into an object as known from object-oriented programming (the DOM approach) or to trigger a function everytime a reader occurs a given syntax tag (the QXML approach).
The QXML approach
parser.h:
/*
parser.h - demonstration of a parser in C++
*/
#ifndef PARSER_H
#define PARSER_H
#include <qstring.h>
#include <QtXml/QXmlDefaultHandler>
#include <QtXml/QXmlAttributes>
class Parser : public QXmlDefaultHandler
{
public:
Parser();
/** given by the framework from qxml. Called when parsing the xml-document starts. */
bool startDocument();
/** given by the framework from qxml. Called when the reader occurs an open tag (e.g. \<b\> ) */
bool startElement( const QString&, const QString&, const QString& qName, const QXmlAttributes& att );
};
#endif
parser.cpp:
/*
parser.cpp - demonstration of a parser in C++
*/
#include "parser.h"
#include <kdebug.h>
Parser::Parser()
{
}
bool Parser::startDocument()
{
kDebug() << "Searching document for tags";
return true;
}
bool Parser::startElement( const QString&, const QString&, const QString& qName, const QXmlAttributes& att )
{
kDebug() << "Found Element" << qName;
return true;
}
hello.cpp:
/*
hello.cpp
compile it with
g++ -I. -I/home/kde-devel/kde/include -I/home/kde-devel/qt-unstable/include/Qt -I/home/kde-devel/qt-unstable/include /home/kde-devel/qt-unstable/include/QtXml parser.h parser.cpp hello.cpp -L/home/kde-devel/kde/lib -L/home/kde-devel/qt-unstable/lib -lQtCore_debug -lQtXml_debug -lkdeui
*/
#include <qstring.h>
#include <QXmlInputSource>
#include <qfile.h>
#include <parser.h>
int main()
{
Parser* handler=new Parser();
QXmlInputSource* source=new QXmlInputSource(new QFile("hello.htm"));
QXmlSimpleReader reader;
reader.setContentHandler( handler );
reader.parse( source );
}
The DOM approach
/*
dom.cpp
A demonstration how to use the dom parsing framework.
Prints the first subnode of an HTML file, i.e. typically
"head" or "body".
compile it like this:
g++ -I. -I/opt/kde3/include -I/usr/lib/qt3/include dom.cpp \
-L/opt/kde3/lib -L/usr/lib/qt3/lib -lqt-mt -lkdeui
*/
#include <qdom.h>
#include <qfile.h>
#include <kdebug.h>
int main()
{
QDomDocument doc( "myDocument" );
QFile qf("hello.htm");
doc.setContent( &qf );
QDomElement docElement = doc.documentElement();
QDomNode node;
node = docElement.firstChild();
kdDebug() << node.nodeName() << endl;
}
Drawbacks
HTML parsing only works for "legal" html documents. For example, look at this code:
<html>
<body>
<a href="http://www.kde.org/"></a>
<a href="/index.php?title=Special:User&returnto=Main_Page">Log in</a>
<a href="http://www.gmx.de"></a>
</body>
</html>
This code contains a & and will bring your parser to an error.
See here:
<html>
<body>
<a href="http://www.kde.org/"></a>
<a href="/index.php" nowrap>Log in</a>
<a href="http://www.gmx.de"></a>
</body>
</html>
This code will throw an error because of the nowrap that is not xml-conform.