Projects/Acid3

From KDE TechBase

Acid3 Test Failures of KHTML

This is an overview of the remaing failures of KHTML on the Acid3 test. FF3.1 beta2pre scores 91/100, with 5 of the colored boxes filled in correctly.

Test No Area Diagnosis Comment passes in FF3 passes in FF3.1
04 HTML Parser Parser bug: iframe missing text kid discard_until = ID_IFRAME+ID_CLOSE_TAG --- added in http://lists.kde.org/?l=kde-commits&m=99906936412933&w=2. No Yes
13 DOM Range Unhandled mutation Yes Yes
26/27 JS + DOM Memory management Cycle breaking cleaning up too much Yes, but slow Yes, but slow
29 HTML Parser Parser bug: table missing whitespace kid Yes Yes
40 ???? ???? No
43 Checkboxes, value, attributes Yes Yes
44 CSS Parser Misparse of html*.test selector (see below) Yes
48 (red linktest failed) CSS :visited doesn't match relative URL right Performance critical, hot on things like Qt docs, and already slow Yes Yes
51 DOM2 Table Stray row Buggy test. Raised with Ian Hickson. Yes Yes
53 DOM2 Forms Not managing form's element collection when not in document. Yes Yes
65/69 Part loading onload events not emitted for many objects Yes, after several attempts 69 fails due to network timeout
70-80 Can't run due to 65/69 SVG, SMIL + XHTML No 72, 73, and 80 pass; rest fail

Note: An upgrade to PCRE 7.7 is required to make tests 89 and 90 pass.

Random patch storage

These are not meant for commit, but more as a proof of analysis, and starting point for proper fix:

#16, red cat.

Ugly, but roughly correct.

Index: kio/slavebase.cpp
===================================================================
--- kio/slavebase.cpp	(revision 793314)
+++ kio/slavebase.cpp	(working copy)
@@ -527,6 +527,7 @@
 void SlaveBase::errorPage()
 {
     send( INF_ERROR_PAGE );
+    mOutgoingMetaData["__kio_error_page"] = "1";
 }
 
 static bool isSubCommand(int cmd)
@@ -554,6 +555,11 @@
       KIO_DATA << mOutgoingMetaData;
       send( INF_META_DATA, data );
     }
+
+    // re-send the error-page flag as well.
+    if (mOutgoingMetaData.contains("__kio_error_page"))
+	send( INF_ERROR_PAGE );
+    
     KIO_DATA << _type;
     send( INF_MIME_TYPE, data );
     while(true)

#4, iframe kids.

This one may be committable, actually

--- a/html/htmlparser.cpp
+++ b/html/htmlparser.cpp
@@ -874,7 +874,7 @@ NodeImpl *KHTMLParser::getElement(Token* t)
         // a bit a special case, since the frame is inlined...
     case ID_IFRAME:
         n = new HTMLIFrameElementImpl(document);
-        if (!t->flat) discard_until = ID_IFRAME+ID_CLOSE_TAG;
+        //if (!t->flat) discard_until = ID_IFRAME+ID_CLOSE_TAG;
         break;

 // form elements

#44 testcase

<style>
html*.test { border: 2px solid red; margin: 5px; }
</style>
<body class="test">
  <p class="test">There should be no red borders here.
</body>

#48, red linktest failed

It's a bug inside KonqHistoryManager --- it ignores some inserts, which makes the link not :visited. We may still want to consider the diff below, but it'll likely slow things down too much.

Index: css/cssstyleselector.h
===================================================================
--- css/cssstyleselector.h	(revision 873693)
+++ css/cssstyleselector.h	(working copy)
@@ -29,6 +29,7 @@
 #include "css/css_mediaquery.h"
 #include <QtCore/QVarLengthArray>
 #include <QtCore/QList>
+#include <kurl.h>
 
 class KHTMLSettings;
 class KHTMLView;
@@ -187,11 +188,7 @@
 	QVector<int> fixedFontSizes() const { return m_fixedFontSizes; }
 
 	bool strictParsing;
-	struct Encodedurl {
-	    QString host; //also contains protocol
-	    QString path;
-	    QString file;
-	} encodedurl;
+	KUrl encodedurl;
 
         // called from KHTMLView::print()
 	void computeFontSizes(int logicalDpiY, int zoomFactor);
Index: css/cssstyleselector.cpp
===================================================================
--- css/cssstyleselector.cpp	(revision 873693)
+++ css/cssstyleselector.cpp	(working copy)
@@ -287,15 +287,7 @@
 
     u.setQuery( QString() );
     u.setRef( QString() );
-    encodedurl.file = u.url();
-    int pos = encodedurl.file.lastIndexOf('/');
-    encodedurl.path = encodedurl.file;
-    if ( pos > 0 ) {
-	encodedurl.path.truncate( pos );
-	encodedurl.path += '/';
-    }
-    u.setPath( QString() );
-    encodedurl.host = u.url();
+    encodedurl = u;
 
     //kDebug() << "CSSStyleSelector::CSSStyleSelector encoded url " << encodedurl.path;
 }
@@ -919,46 +911,8 @@
     return numProps;
 }
 
-// modified version of the one in kurl.cpp
-static void cleanpath(QString &path)
+static PseudoState checkPseudoState( const KUrl& encodedurl, DOM::ElementImpl *e )
 {
-    int pos;
-    while ( (pos = path.indexOf( "/../" )) != -1 ) {
-        int prev = 0;
-        if ( pos > 0 )
-            prev = path.lastIndexOf( "/", pos -1 );
-        // don't remove the host, i.e. http://foo.org/../foo.html
-        if (prev < 0 || (prev > 3 && path.lastIndexOf("://", prev-1) == prev-2))
-            path.remove( pos, 3);
-        else
-            // matching directory found ?
-            path.remove( prev, pos- prev + 3 );
-    }
-    pos = 0;
-
-    // Don't remove "//" from an anchor identifier. -rjw
-    // Set refPos to -2 to mean "I haven't looked for the anchor yet".
-    // We don't want to waste a function call on the search for the anchor
-    // in the vast majority of cases where there is no "//" in the path.
-    int refPos = -2;
-    while ( (pos = path.indexOf( "//", pos )) != -1) {
-        if (refPos == -2)
-            refPos = path.indexOf("#", 0);
-        if (refPos > 0 && pos >= refPos)
-            break;
-
-        if ( pos == 0 || path[pos-1] != ':' )
-            path.remove( pos, 1 );
-        else
-            pos += 2;
-    }
-    while ( (pos = path.indexOf( "/./" )) != -1)
-        path.remove( pos, 2 );
-    //kDebug() << "checkPseudoState " << path;
-}
-
-static PseudoState checkPseudoState( const CSSStyleSelector::Encodedurl& encodedurl, DOM::ElementImpl *e )
-{
     if( e->id() != ID_A ) {
         return PseudoNone;
     }
@@ -966,20 +920,20 @@
     if( attr.isNull() ) {
         return PseudoNone;
     }
-    QString u = QString::fromRawData(attr.unicode(), attr.length());
-    if ( !u.contains("://") ) {
-        if ( u[0] == '/' )
-            u = encodedurl.host + u;
-        else if ( u[0] == '#' )
-            u = encodedurl.file + u;
-        else
-            u = encodedurl.path + u;
-        cleanpath( u );
-    }
-    //completeURL( attr.string() );
+    QString rel = QString::fromRawData(attr.unicode(), attr.length());
+
+    // Note: we do want to check both fragment and query here.
+    KUrl absolute(encodedurl, rel);
+    absolute.cleanPath();
+
+    QString u = absolute.url();
+    
+    kDebug() << u;
+
     bool contains = KHTMLGlobal::vLinks()->contains( u );
     if ( !contains && u.count('/')==2 )
       contains = KHTMLGlobal::vLinks()->contains( u+'/' );
+    kDebug() << u << "was found?" << contains;
     return contains ? PseudoVisited : PseudoLink;
 }
 

7x tests

A lot of the fail due to XSS checks disabling all access to XML. Below fixes it:

Index: khtml_part.cpp
===================================================================
--- khtml_part.cpp	(revision 873693)
+++ khtml_part.cpp	(working copy)
@@ -5314,7 +5321,7 @@
   if (callingHtmlPart == this)
     return true; // trivial
 
-  if (htmlDocument().isNull()) {
+  if (!xmlDocImpl()) {
 #ifdef DEBUG_FINDFRAME
     kDebug(6050) << "Empty part" << this << "URL = " << url();
 #endif
@@ -5322,9 +5329,9 @@
   }
 
   // now compare the domains
-  if (callingHtmlPart && callingHtmlPart->docImpl() && docImpl())  {
-    DOM::DOMString actDomain = callingHtmlPart->docImpl()->domain();
-    DOM::DOMString destDomain = docImpl()->domain();
+  if (callingHtmlPart && callingHtmlPart->xmlDocImpl() && xmlDocImpl())  {
+    DOM::DOMString actDomain = callingHtmlPart->xmlDocImpl()->domain();
+    DOM::DOMString destDomain = xmlDocImpl()->domain();
 
 #ifdef DEBUG_FINDFRAME
     kDebug(6050) << "actDomain =" << actDomain.string() << "destDomain =" << destDomain.string();
Index: ecma/kjs_window.cpp
===================================================================
--- ecma/kjs_window.cpp	(revision 873693)
+++ ecma/kjs_window.cpp	(working copy)
@@ -1312,17 +1313,13 @@
   if ( !part->xmlDocImpl() )
     return true; // allow to access a window that was just created (e.g. with window.open("about:blank"))
 
-  DOM::HTMLDocumentImpl* thisDocument = part->docImpl();
-  if ( !thisDocument ) {
-    kDebug(6070) << "Window::isSafeScript: trying to access an XML document !?";
-    return false;
-  }
+  DOM::DocumentImpl* thisDocument = part->xmlDocImpl();
 
   KHTMLPart *activeKHTMLPart = qobject_cast<KHTMLPart*>(activePart);
   if (!activeKHTMLPart)
     return true; // not a KHTMLPart
 
-  DOM::HTMLDocumentImpl* actDocument = activeKHTMLPart->docImpl();
+  DOM::DocumentImpl* actDocument = activeKHTMLPart->xmlDocImpl();
   if ( !actDocument ) {
     kDebug(6070) << "Window::isSafeScript: active part has no document!";
     return false;

#71 testcase

With above, we fail at least the following in #71:

<script>
var doc = document;
doc.open();
doc.write("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\"><title><\/title><span><\/span><script type=\"text/javascript\"><\/script>");
doc.close();
alert(document.childNodes.length);
alert(document.childNodes[0]);
alert(document.childNodes[1]);
</script>

This is fixed by below, but integrated test still fails:

--- html/htmlparser.cpp (revision 873693)
+++ html/htmlparser.cpp (working copy)
@@ -316,7 +316,8 @@
 void KHTMLParser::parseDoctypeToken(DoctypeToken* t)
 {
     // Ignore any doctype after the first. TODO It should be also ignored when processing DocumentFragment
-    if (current != document || document->doctype())
+    // We do want to accept it even after we have a document, however.
+    if (document->doctype())
         return;

     DocumentTypeImpl* doctype = new DocumentTypeImpl(document->implementation(), document, t->name, t->publicID, t->systemID);