(→Showing tags) |
Neverendingo (Talk | contribs) m (Text replace - "</code>" to "</syntaxhighlight>") |
||
(11 intermediate revisions by 8 users not shown) | |||
Line 2: | Line 2: | ||
* QXML | * QXML | ||
* QDOM | * QDOM | ||
− | * | + | * Perl |
− | * | + | * XHTML |
− | Obviously, QXML and QDOM need | + | Obviously, QXML and QDOM need XML-compliant HTML pages, and the least HTML pages are XML-compliant. Perl is not the scope of this site. This tutorial chooses the XHTML approach. |
+ | |||
=First step= | =First step= | ||
− | + | As we remember from http://developernew.kde.org/Development/Tutorials/Programming_Tutorial_KDE_4/How_to_write_an_HTML_parser, biggest thing is to be able to parse non-XML-conform syntax. It works with the following program. | |
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | '''tags.cpp''' | |
− | + | <syntaxhighlight lang="cpp-qt" line> | |
− | < | + | |
#include <kapplication.h> | #include <kapplication.h> | ||
#include <kaboutdata.h> | #include <kaboutdata.h> | ||
Line 45: | Line 24: | ||
KApplication khello; | KApplication khello; | ||
− | DOM:: | + | DOM::HTMLDocument doc; |
DOM::DOMString tag("*"); | DOM::DOMString tag("*"); | ||
− | DOM::DOMString uri("<html>< | + | DOM::DOMString uri("<html><body><a href=\"http://www.kde.org/\"></a><a href=\"/index.php\" nowrap>Log in</a><a href=\"http://www.gmx.de\"></a></body></html>"); |
− | + | ||
− | + | doc.loadXML(uri); | |
− | + | kdDebug() << "Does this doc have child elements ? " << doc.hasChildNodes() << endl; | |
− | + | for (int i=0; i<doc.getElementsByTagName(tag).length(); i++) kdDebug() << doc.getElementsByTagName(tag).item(i).nodeName().string() << endl; | |
− | kdDebug() << " | + | kdDebug() << "Size of your doc " << sizeof(doc.firstChild()) << endl; |
− | for (int i=0; i<doc | + | kdDebug() << doc.isHTMLDocument() << endl; |
− | kdDebug() << "Size of your doc " << sizeof(doc) << endl; | + | kdDebug() << doc.toString().string() << endl; |
− | kdDebug() << doc | + | |
} | } | ||
− | </ | + | </syntaxhighlight> |
− | + | Compile it like this: | |
− | + | gcc -I/usr/lib/qt3/include -I/opt/kde3/include \ | |
− | < | + | -L/opt/kde3/lib -lkdeui -lkhtml -o tags tags.cpp |
+ | |||
+ | =Second= | ||
+ | |||
+ | <syntaxhighlight lang="cpp-qt"> | ||
#include <kapplication.h> | #include <kapplication.h> | ||
#include <kaboutdata.h> | #include <kaboutdata.h> | ||
#include <kcmdlineargs.h> | #include <kcmdlineargs.h> | ||
#include <dom/html_document.h> | #include <dom/html_document.h> | ||
+ | #include <dom/html_element.h> | ||
+ | #include <dom/dom_node.h> | ||
int main (int argc, char *argv[]) | int main (int argc, char *argv[]) | ||
Line 78: | Line 62: | ||
DOM::HTMLDocument doc; | DOM::HTMLDocument doc; | ||
DOM::DOMString tag("*"); | DOM::DOMString tag("*"); | ||
− | DOM::DOMString uri("<html><body><a href=\"http://www.kde.org/\"></a><a href=\"/index.php\" nowrap>Log in</a><a href=\"http://www.gmx.de\"></a></body></html>"); | + | DOM::DOMString uri("<html><body><a href=\"http://www.kde.org/\"><b>fat</b></a><a href=\"/index.php\" nowrap>Log in</a><a href=\"http://www.gmx.de\"></a></body></html>"); |
doc.loadXML(uri); | doc.loadXML(uri); | ||
− | kdDebug() << " | + | kdDebug() << "Here's a list of the document elements" << endl; |
for (int i=0; i<doc.getElementsByTagName(tag).length(); i++) kdDebug() << doc.getElementsByTagName(tag).item(i).nodeName().string() << endl; | for (int i=0; i<doc.getElementsByTagName(tag).length(); i++) kdDebug() << doc.getElementsByTagName(tag).item(i).nodeName().string() << endl; | ||
− | kdDebug() << " | + | |
− | + | DOM::HTMLDocument doc2; | |
+ | DOM::DOMString uri2("<html><body>this is html<b>fat</b></body></html>"); | ||
+ | doc2.loadXML(uri2); | ||
+ | kdDebug() << "This is the in-memory html:" << endl; | ||
+ | kdDebug() << doc.toString().string() << endl; | ||
+ | doc.body().insertBefore(doc.body().firstChild().firstChild(),doc.body().firstChild()); | ||
+ | kdDebug() << "Moving around nodes" << endl; | ||
kdDebug() << doc.toString().string() << endl; | kdDebug() << doc.toString().string() << endl; | ||
} | } | ||
− | </ | + | </syntaxhighlight> |
For HTML parsing, you have the following possibilities:
Obviously, QXML and QDOM need XML-compliant HTML pages, and the least HTML pages are XML-compliant. Perl is not the scope of this site. This tutorial chooses the XHTML approach.
As we remember from http://developernew.kde.org/Development/Tutorials/Programming_Tutorial_KDE_4/How_to_write_an_HTML_parser, biggest thing is to be able to parse non-XML-conform syntax. It works with the following program.
tags.cpp
1 #include <kapplication.h>
2 #include <kaboutdata.h>
3 #include <kcmdlineargs.h>
4 #include <dom/html_document.h>
5
6 int main (int argc, char *argv[])
7 {
8 KAboutData aboutData( "test", "test",
9 "1.0", "test", KAboutData::License_GPL,
10 "(c) 2006" );
11 KCmdLineArgs::init( argc, argv, &aboutData );
12 KApplication khello;
13
14 DOM::HTMLDocument doc;
15 DOM::DOMString tag("*");
16 DOM::DOMString uri("<html><body><a href=\"http://www.kde.org/\"></a><a href=\"/index.php\" nowrap>Log in</a><a href=\"http://www.gmx.de\"></a></body></html>");
17
18 doc.loadXML(uri);
19 kdDebug() << "Does this doc have child elements ? " << doc.hasChildNodes() << endl;
20 for (int i=0; i<doc.getElementsByTagName(tag).length(); i++) kdDebug() << doc.getElementsByTagName(tag).item(i).nodeName().string() << endl;
21 kdDebug() << "Size of your doc " << sizeof(doc.firstChild()) << endl;
22 kdDebug() << doc.isHTMLDocument() << endl;
23 kdDebug() << doc.toString().string() << endl;
24 }
Compile it like this:
gcc -I/usr/lib/qt3/include -I/opt/kde3/include \ -L/opt/kde3/lib -lkdeui -lkhtml -o tags tags.cpp
#include <kapplication.h>
#include <kaboutdata.h>
#include <kcmdlineargs.h>
#include <dom/html_document.h>
#include <dom/html_element.h>
#include <dom/dom_node.h>
int main (int argc, char *argv[])
{
KAboutData aboutData( "test", "test",
"1.0", "test", KAboutData::License_GPL,
"(c) 2006" );
KCmdLineArgs::init( argc, argv, &aboutData );
KApplication khello;
DOM::HTMLDocument doc;
DOM::DOMString tag("*");
DOM::DOMString uri("<html><body><a href=\"http://www.kde.org/\"><b>fat</b></a><a href=\"/index.php\" nowrap>Log in</a><a href=\"http://www.gmx.de\"></a></body></html>");
doc.loadXML(uri);
kdDebug() << "Here's a list of the document elements" << endl;
for (int i=0; i<doc.getElementsByTagName(tag).length(); i++) kdDebug() << doc.getElementsByTagName(tag).item(i).nodeName().string() << endl;
DOM::HTMLDocument doc2;
DOM::DOMString uri2("<html><body>this is html<b>fat</b></body></html>");
doc2.loadXML(uri2);
kdDebug() << "This is the in-memory html:" << endl;
kdDebug() << doc.toString().string() << endl;
doc.body().insertBefore(doc.body().firstChild().firstChild(),doc.body().firstChild());
kdDebug() << "Moving around nodes" << endl;
kdDebug() << doc.toString().string() << endl;
}