Development/Tutorials/Programming Tutorial KDE 3/KHTML: Difference between revisions
No edit summary |
No edit summary |
||
Line 41: | Line 41: | ||
gcc -I/usr/lib/qt3/include -I/opt/kde3/include \ | gcc -I/usr/lib/qt3/include -I/opt/kde3/include \ | ||
-L/opt/kde3/lib -lkdeui -lkhtml -o tags tags.cpp | -L/opt/kde3/lib -lkdeui -lkhtml -o tags tags.cpp | ||
=Second= | |||
<highlightSyntax language="cpp"> | |||
#include <kapplication.h> | |||
#include <kaboutdata.h> | |||
#include <kcmdlineargs.h> | |||
#include <dom/html_document.h> | |||
#include <dom/html_element.h> | |||
#include <dom/dom_node.h> | |||
int main (int argc, char *argv[]) | |||
{ | |||
KAboutData aboutData( "test", "test", | |||
"1.0", "test", KAboutData::License_GPL, | |||
"(c) 2006" ); | |||
KCmdLineArgs::init( argc, argv, &aboutData ); | |||
KApplication khello; | |||
DOM::HTMLDocument doc; | |||
DOM::DOMString tag("*"); | |||
DOM::DOMString uri("<html><body><a href=\"http://www.kde.org/\"><b>fat</b></a><a href=\"/index.php\" nowrap>Log in</a><a href=\"http://www.gmx.de\"></a></body></html>"); | |||
doc.loadXML(uri); | |||
kdDebug() << "Here's a list of the document elements" << endl; | |||
for (int i=0; i<doc.getElementsByTagName(tag).length(); i++) kdDebug() << doc.getElementsByTagName(tag).item(i).nodeName().string() << endl; | |||
DOM::HTMLDocument doc2; | |||
DOM::DOMString uri2("<html><body>this is html<b>fat</b></body></html>"); | |||
doc2.loadXML(uri2); | |||
kdDebug() << "This is the in-memory html:" << endl; | |||
kdDebug() << doc.toString().string() << endl; | |||
doc.body().insertBefore(doc.body().firstChild().firstChild(),doc.body().firstChild()); | |||
kdDebug() << "Moving around nodes" << endl; | |||
kdDebug() << doc.toString().string() << endl; | |||
} | |||
</highlightSyntax> |
Revision as of 10:20, 11 November 2006
For HTML parsing, you have the following possibilities:
- QXML
- QDOM
- perl
- khtml
Obviously, QXML and QDOM need xml-compliant html pages, and the least html pages are xml-compliant. Perl is not the scope of this site. So, this tutorial choses the khtml approach.
First step
As we remember from http://developernew.kde.org/Development/Tutorials/Programming_Tutorial_KDE_4/How_to_write_an_HTML_parser, biggest thing is to be able to parse non-XML-conform syntax. It works with the following program.
tags.cpp <highlightSyntax language="cpp">
- include <kapplication.h>
- include <kaboutdata.h>
- include <kcmdlineargs.h>
- include <dom/html_document.h>
int main (int argc, char *argv[]) {
KAboutData aboutData( "test", "test", "1.0", "test", KAboutData::License_GPL, "(c) 2006" ); KCmdLineArgs::init( argc, argv, &aboutData ); KApplication khello;
DOM::HTMLDocument doc; DOM::DOMString tag("*"); DOM::DOMString uri("<html><body><a href=\"http://www.kde.org/\"></a><a href=\"/index.php\" nowrap>Log in</a><a href=\"http://www.gmx.de\"></a></body></html>");
doc.loadXML(uri); kdDebug() << "Does this doc have child elements ? " << doc.hasChildNodes() << endl; for (int i=0; i<doc.getElementsByTagName(tag).length(); i++) kdDebug() << doc.getElementsByTagName(tag).item(i).nodeName().string() << endl; kdDebug() << "Size of your doc " << sizeof(doc.firstChild()) << endl; kdDebug() << doc.isHTMLDocument() << endl; kdDebug() << doc.toString().string() << endl;
} </highlightSyntax>
Compile it like this:
gcc -I/usr/lib/qt3/include -I/opt/kde3/include \ -L/opt/kde3/lib -lkdeui -lkhtml -o tags tags.cpp
Second
<highlightSyntax language="cpp">
- include <kapplication.h>
- include <kaboutdata.h>
- include <kcmdlineargs.h>
- include <dom/html_document.h>
- include <dom/html_element.h>
- include <dom/dom_node.h>
int main (int argc, char *argv[]) {
KAboutData aboutData( "test", "test", "1.0", "test", KAboutData::License_GPL, "(c) 2006" ); KCmdLineArgs::init( argc, argv, &aboutData ); KApplication khello;
DOM::HTMLDocument doc; DOM::DOMString tag("*"); DOM::DOMString uri("<html><body><a href=\"http://www.kde.org/\">fat</a><a href=\"/index.php\" nowrap>Log in</a><a href=\"http://www.gmx.de\"></a></body></html>");
doc.loadXML(uri); kdDebug() << "Here's a list of the document elements" << endl; for (int i=0; i<doc.getElementsByTagName(tag).length(); i++) kdDebug() << doc.getElementsByTagName(tag).item(i).nodeName().string() << endl; DOM::HTMLDocument doc2; DOM::DOMString uri2("<html><body>this is htmlfat</body></html>"); doc2.loadXML(uri2); kdDebug() << "This is the in-memory html:" << endl; kdDebug() << doc.toString().string() << endl; doc.body().insertBefore(doc.body().firstChild().firstChild(),doc.body().firstChild()); kdDebug() << "Moving around nodes" << endl; kdDebug() << doc.toString().string() << endl;
} </highlightSyntax>