diff --git a/.gitattributes b/.gitattributes index d8664ed..10aee3c 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,6 +1,3 @@ /.* export-ignore -/Src/Sunra/PhpSimple/simplehtmldom_1_5/app export-ignore -/Src/Sunra/PhpSimple/simplehtmldom_1_5/example export-ignore -/Src/Sunra/PhpSimple/simplehtmldom_1_5/manual export-ignore -/Src/Sunra/PhpSimple/simplehtmldom_1_5/testcase export-ignore -/Src/Sunra/PhpSimple/simplehtmldom_1_5/change_log.txt export-ignore +/Src/Sunra/PhpSimple/simplehtmldom_1_8_1/example export-ignore +/Src/Sunra/PhpSimple/simplehtmldom_1_8_1/manual export-ignore \ No newline at end of file diff --git a/README.md b/README.md index 945a683..6fec25f 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ php-simple-html-dom-parser ========================== -Version 1.5.2 +Version 1.8.1 Adaptation for Composer and PSR-0 of: @@ -20,7 +20,7 @@ Install composer.phar ```json "require": { - "sunra/php-simple-html-dom-parser": "1.5.2" + "sunra/php-simple-html-dom-parser": "1.8.1" } ``` diff --git a/Src/Sunra/PhpSimple/HtmlDomParser.php b/Src/Sunra/PhpSimple/HtmlDomParser.php index 4f3d013..6e4061a 100644 --- a/Src/Sunra/PhpSimple/HtmlDomParser.php +++ b/Src/Sunra/PhpSimple/HtmlDomParser.php @@ -2,22 +2,22 @@ namespace Sunra\PhpSimple; -require 'simplehtmldom_1_5'.DIRECTORY_SEPARATOR.'simple_html_dom.php'; +require 'simplehtmldom_1_8_1'.DIRECTORY_SEPARATOR.'simple_html_dom.php'; class HtmlDomParser { /** - * @return \simplehtmldom_1_5\simple_html_dom + * @return \simplehtmldom_1_8_1\simple_html_dom */ static public function file_get_html() { - return call_user_func_array ( '\simplehtmldom_1_5\file_get_html' , func_get_args() ); + return call_user_func_array ( '\simplehtmldom_1_8_1\file_get_html' , func_get_args() ); } /** * get html dom from string - * @return \simplehtmldom_1_5\simple_html_dom + * @return \simplehtmldom_1_8_1\simple_html_dom */ static public function str_get_html() { - return call_user_func_array ( '\simplehtmldom_1_5\str_get_html' , func_get_args() ); + return call_user_func_array ( '\simplehtmldom_1_8_1\str_get_html' , func_get_args() ); } } \ No newline at end of file diff --git a/Src/Sunra/PhpSimple/simplehtmldom_1_5/app/google.htm b/Src/Sunra/PhpSimple/simplehtmldom_1_5/app/google.htm deleted file mode 100644 index c705a5e..0000000 --- a/Src/Sunra/PhpSimple/simplehtmldom_1_5/app/google.htm +++ /dev/null @@ -1,891 +0,0 @@ - - - - - - - FootballScoresLive - Previous Results - - - - - - - - -
- - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - -
- - FootballScoresLive logo - - - - - -
- - For FREE goals to your mobile - Click Here - - - - - -
- - - -   -
-
- -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - Add this page to your favourites - -
Season Ticket Goal Alerts - coming soon
Mobile Goal Alerts
- English Leagues
English National Team - coming soon
- - Scottish Leagues
- - Spanish Leagues
- - Italian Leagues
Live Scores
Football Supermarket - coming soon
Previous Results
Future Fixtures
League Tables - coming soon
Sponsored Links - coming soon
 
Personalise
- -
- -
-
- - - -
- - - - - - - - - -
-
- - - - -
- -
-
-
- - - - - - - -
- - English Leagues goal alerts by SMS - - Scottish Leagues goal alerts by SMS - - Spanish Leagues goal alerts by SMS - - Italian Leagues goal alerts by SMS
-
-
-
-
-
-
- - -
- -

Historical Football Archive - Updated Daily!

-
-

Missed the details of your football club's latest performance? - Whether you support Manchester United or Cambridge United, Premiership club or Conference club, - you will find it all here, with our fantastic soccer library of results over the last two weeks. - These pages contain the history & detailed results package for every football game from around the world - from the last 14 match days, including not only every goal scored, but, game results, names of goal scorer, - match results, red cards and plenty more! This service is automatically updated every 24 hours, - simply select a date from the drop down menu :- -

- - -

- - - - - - - - - -
-  SCOTLAND - DIVISION 2
-
- - - - - - -
StartStatMinRPOHome AwayRPOLatest
- 19:45 - Pst - -   - -   - -   - STRANRAER00PETERHEAD -   - -   - -   - -   - -   -
-
- -
- -
- - - - - - - - - - -
-  ENGLAND - FA TROPHY - FIRST ROUND
-
- - - - - - -
StartStatMinRPOHome AwayRPOLatest
- 19:45 - FT - -   - -   - -   - SWINDON SUPERMARINE10EASTBOURNE BOROUGH -   - -   - -   - - - - 74goalEDENBOROUGH -
-
- -
- -
- - - - - - - - - - -
-  EUROPE (UEFA) - UEFA CUP - GROUP STAGE
-
- - - - - - - - - - - - - - - - - - - - -
StartStatMinRPOHome AwayRPOLatest
- 19:45 - FT - -   - -   - -   - AC MILAN22VfL WOLFSBURG -   - -   - -   - -   - - 81goalSAGLIK -
- 19:45 - FT - -   - - 1 - -   - AJAX22SLAVIA PRAHA -   - -   - -   - -   - - 90penaltySUAREZ -
- 19:45 - FT - -   - -   - -   - CLUB BRUGGE01FC COPENHAGEN -   - -   - -   - -   - - 58goalSANTIN -
- 19:45 - FT - -   - -   - -   - DEP. LA CORUNA10NANCY -   - -   - -   - -   - - 74goalBODIPO -
- 19:45 - FT - -   - -   - -   - FEYENOORD01LECH POZNAN -   - -   - -   - -   - - 26goalDJURDJEVIC -
- 19:45 - FT - -   - -   - -   - HAMBURGER SV31ASTON VILLA - 1 - -   - -   - -   - - 84red cardSIDWELL -
- 19:45 - FT - -   - -   - -   - PORTSMOUTH30HEERENVEEN -   - -   - -   - -   - - 90goalHREIDARSSON -
- 19:45 - FT - -   - -   - -   - ST ETIENNE22VALENCIA -   - -   - -   - -   - - 72goalZIGIC -
-
- -
- -
- - - - - - - - - - -
-  INTERNATIONAL TOURNAMENTS - OMAN FOUR NATIONS TOUR
-
- - - - - - - - -
StartStatMinRPOHome AwayRPOLatest
- 14:00 - FT - -   - -   - -   - OMAN31CHINA -   - -   - -   - - - - 84goalSALEH -
- 16:30 - FT - -   - -   - -   - ECUADOR10IRAN - 1 - -   - -   - - - - 90red cardNEJAD -
-
- -
- -
- - - - - - - - - - -
-  ITALY - FA CUP - ROUND 16
-
- - - - - - - - -
StartStatMinRPOHome AwayRPOLatest
- 16:00 - FT - -   - -   - -   - FIORENTINA01TORINO -   - -   - -   - - - - 19goalBIANCHI -
- 20:00 - FT - -   - -   - -   - ROMA20BOLOGNA -   - -   - -   - - - - 86goalVUCINIC -
-
- -
- -
- - - - - - - - - - -
-  ITALY - SERIE C1A
-
- - - - - - -
StartStatMinRPOHome AwayRPOLatest
- 13:30 - FT - -   - -   - -   - PRO SESTO00LECCO -   - -   - -   - -   - -   -
-
- -
- -
- - - - - - - - - - -
-  ITALY - SERIE C2A
-
- - - - - - -
StartStatMinRPOHome AwayRPOLatest
- 13:30 - FT - -   - -   - -   - MEZZOCORONA13SAMBONIFACESE -   - -   - -   - -   - - 74goal  -
-
- -
- -
- - - - - - - - - - -
-  SCOTLAND - FA CUP - THIRD ROUND
-
- - - - - - -
StartStatMinRPOHome AwayRPOLatest
- 13:30 - FT - -   - -   - -   - LOCHEE UTD11AYR UTD -   - -   - -   - - - - 86goalHAGAN -
-
- -
- -
- - - - - - - - - - -
-  WORLD (FIFA) - FIFA CLUB WORLD CHAMPIONSHIP - SEMI
-
- - - - - - -
StartStatMinRPOHome AwayRPOLatest
- 10:30 - FT - -   - -   - -   - PACHUCA (MEX)02LIGA D.U. QUITO(ECU) -   - -   - -   - - - - 26goalBOLANOS -
-
- -
- -
- - - - -
- - - - -
- View complete archive of FootballScoresLive.com Match Results -
- -
- - - - - - - -
- - - -
-  |  - Disclaimer |  - Privacy |  - About Us |  - News Archive |  -
- -
- - - - - -
- - \ No newline at end of file diff --git a/Src/Sunra/PhpSimple/simplehtmldom_1_5/app/index.php b/Src/Sunra/PhpSimple/simplehtmldom_1_5/app/index.php deleted file mode 100644 index 189aa5a..0000000 --- a/Src/Sunra/PhpSimple/simplehtmldom_1_5/app/index.php +++ /dev/null @@ -1,144 +0,0 @@ -find('html', 0); -if ($l!==null) - $lang = $l->lang; -if ($lang!='') - $lang = 'lang="'.$lang.'"'; - -$charset = $html->find('meta[http-equiv*=content-type]', 0); -$target = array(); -$query = ''; - -if (isset($_REQUEST['query'])) { - $query = $_REQUEST['query']; - $target = $html->find($query); -} - -function stat_dom($dom) { - $count_text = 0; - $count_comm = 0; - $count_elem = 0; - $count_tag_end = 0; - $count_unknown = 0; - - foreach($dom->nodes as $n) { - if ($n->nodetype==HDOM_TYPE_TEXT) - ++$count_text; - if ($n->nodetype==HDOM_TYPE_COMMENT) - ++$count_comm; - if ($n->nodetype==HDOM_TYPE_ELEMENT) - ++$count_elem; - if ($n->nodetype==HDOM_TYPE_ENDTAG) - ++$count_tag_end; - if ($n->nodetype==HDOM_TYPE_UNKNOWN) - ++$count_unknown; - } - - echo 'Total: '. count($dom->nodes). - ', Text: '.$count_text. - ', Commnet: '.$count_comm. - ', Tag: '.$count_elem. - ', End Tag: '.$count_tag_end. - ', Unknown: '.$count_unknown; -} - -function dump_my_html_tree($node, $show_attr=true, $deep=0, $last=true) { - $count = count($node->nodes); - if ($count>0) { - if($last) - echo '\n"; -} -?> - - -> - - '; - else if ($charset) - echo $charset; - else - echo ''; - ?> - Simple HTML DOM Query Test - - - - - - - - -
-

Simple HTML DOM Test

-
- find: - -
-
- HTML STAT ()
-
-
Collapse All | Expand All

- -
- - \ No newline at end of file diff --git a/Src/Sunra/PhpSimple/simplehtmldom_1_5/app/js/images/treeview-default-line.gif b/Src/Sunra/PhpSimple/simplehtmldom_1_5/app/js/images/treeview-default-line.gif deleted file mode 100644 index 37114d3..0000000 Binary files a/Src/Sunra/PhpSimple/simplehtmldom_1_5/app/js/images/treeview-default-line.gif and /dev/null differ diff --git a/Src/Sunra/PhpSimple/simplehtmldom_1_5/app/js/images/treeview-default.gif b/Src/Sunra/PhpSimple/simplehtmldom_1_5/app/js/images/treeview-default.gif deleted file mode 100644 index a12ac52..0000000 Binary files a/Src/Sunra/PhpSimple/simplehtmldom_1_5/app/js/images/treeview-default.gif and /dev/null differ diff --git a/Src/Sunra/PhpSimple/simplehtmldom_1_5/app/js/jquery.js b/Src/Sunra/PhpSimple/simplehtmldom_1_5/app/js/jquery.js deleted file mode 100644 index b660baa..0000000 --- a/Src/Sunra/PhpSimple/simplehtmldom_1_5/app/js/jquery.js +++ /dev/null @@ -1,3363 +0,0 @@ -(function(){ -/* - * jQuery 1.2.2b2 - New Wave Javascript - * - * Copyright (c) 2007 John Resig (jquery.com) - * Dual licensed under the MIT (MIT-LICENSE.txt) - * and GPL (GPL-LICENSE.txt) licenses. - * - * $Date: 2007-12-20 14:36:56 +0100 (Don, 20 Dez 2007) $ - * $Rev: 4251 $ - */ - -// Map over jQuery in case of overwrite -if ( window.jQuery ) - var _jQuery = window.jQuery; - -var jQuery = window.jQuery = function( selector, context ) { - // The jQuery object is actually just the init constructor 'enhanced' - return new jQuery.prototype.init( selector, context ); -}; - -// Map over the $ in case of overwrite -if ( window.$ ) - var _$ = window.$; - -// Map the jQuery namespace to the '$' one -window.$ = jQuery; - -// A simple way to check for HTML strings or ID strings -// (both of which we optimize for) -var quickExpr = /^[^<]*(<(.|\s)+>)[^>]*$|^#(\w+)$/; - -// Is it a simple selector -var isSimple = /^.[^:#\[\.]*$/; - -jQuery.fn = jQuery.prototype = { - init: function( selector, context ) { - // Make sure that a selection was provided - selector = selector || document; - - // Handle $(DOMElement) - if ( selector.nodeType ) { - this[0] = selector; - this.length = 1; - return this; - - // Handle HTML strings - } else if ( typeof selector == "string" ) { - // Are we dealing with HTML string or an ID? - var match = quickExpr.exec( selector ); - - // Verify a match, and that no context was specified for #id - if ( match && (match[1] || !context) ) { - - // HANDLE: $(html) -> $(array) - if ( match[1] ) - selector = jQuery.clean( [ match[1] ], context ); - - // HANDLE: $("#id") - else { - var elem = document.getElementById( match[3] ); - - // Make sure an element was located - if ( elem ) - // Handle the case where IE and Opera return items - // by name instead of ID - if ( elem.id != match[3] ) - return jQuery().find( selector ); - - // Otherwise, we inject the element directly into the jQuery object - else { - this[0] = elem; - this.length = 1; - return this; - } - - else - selector = []; - } - - // HANDLE: $(expr, [context]) - // (which is just equivalent to: $(content).find(expr) - } else - return new jQuery( context ).find( selector ); - - // HANDLE: $(function) - // Shortcut for document ready - } else if ( jQuery.isFunction( selector ) ) - return new jQuery( document )[ jQuery.fn.ready ? "ready" : "load" ]( selector ); - - return this.setArray( - // HANDLE: $(array) - selector.constructor == Array && selector || - - // HANDLE: $(arraylike) - // Watch for when an array-like object, contains DOM nodes, is passed in as the selector - (selector.jquery || selector.length && selector != window && !selector.nodeType && selector[0] != undefined && selector[0].nodeType) && jQuery.makeArray( selector ) || - - // HANDLE: $(*) - [ selector ] ); - }, - - // The current version of jQuery being used - jquery: "@VERSION", - - // The number of elements contained in the matched element set - size: function() { - return this.length; - }, - - // The number of elements contained in the matched element set - length: 0, - - // Get the Nth element in the matched element set OR - // Get the whole matched element set as a clean array - get: function( num ) { - return num == undefined ? - - // Return a 'clean' array - jQuery.makeArray( this ) : - - // Return just the object - this[ num ]; - }, - - // Take an array of elements and push it onto the stack - // (returning the new matched element set) - pushStack: function( elems ) { - // Build a new jQuery matched element set - var ret = jQuery( elems ); - - // Add the old object onto the stack (as a reference) - ret.prevObject = this; - - // Return the newly-formed element set - return ret; - }, - - // Force the current matched set of elements to become - // the specified array of elements (destroying the stack in the process) - // You should use pushStack() in order to do this, but maintain the stack - setArray: function( elems ) { - // Resetting the length to 0, then using the native Array push - // is a super-fast way to populate an object with array-like properties - this.length = 0; - Array.prototype.push.apply( this, elems ); - - return this; - }, - - // Execute a callback for every element in the matched set. - // (You can seed the arguments with an array of args, but this is - // only used internally.) - each: function( callback, args ) { - return jQuery.each( this, callback, args ); - }, - - // Determine the position of an element within - // the matched set of elements - index: function( elem ) { - var ret = -1; - - // Locate the position of the desired element - this.each(function(i){ - if ( this == elem ) - ret = i; - }); - - return ret; - }, - - attr: function( name, value, type ) { - var options = name; - - // Look for the case where we're accessing a style value - if ( name.constructor == String ) - if ( value == undefined ) - return this.length && jQuery[ type || "attr" ]( this[0], name ) || undefined; - - else { - options = {}; - options[ name ] = value; - } - - // Check to see if we're setting style values - return this.each(function(i){ - // Set all the styles - for ( name in options ) - jQuery.attr( - type ? - this.style : - this, - name, jQuery.prop( this, options[ name ], type, i, name ) - ); - }); - }, - - css: function( key, value ) { - // ignore negative width and height values - if ( (key == 'width' || key == 'height') && parseFloat(value) < 0 ) - value = undefined; - return this.attr( key, value, "curCSS" ); - }, - - text: function( text ) { - if ( typeof text != "object" && text != null ) - return this.empty().append( (this[0] && this[0].ownerDocument || document).createTextNode( text ) ); - - var ret = ""; - - jQuery.each( text || this, function(){ - jQuery.each( this.childNodes, function(){ - if ( this.nodeType != 8 ) - ret += this.nodeType != 1 ? - this.nodeValue : - jQuery.fn.text( [ this ] ); - }); - }); - - return ret; - }, - - wrapAll: function( html ) { - if ( this[0] ) - // The elements to wrap the target around - jQuery( html, this[0].ownerDocument ) - .clone() - .insertBefore( this[0] ) - .map(function(){ - var elem = this; - - while ( elem.firstChild ) - elem = elem.firstChild; - - return elem; - }) - .append(this); - - return this; - }, - - wrapInner: function( html ) { - return this.each(function(){ - jQuery( this ).contents().wrapAll( html ); - }); - }, - - wrap: function( html ) { - return this.each(function(){ - jQuery( this ).wrapAll( html ); - }); - }, - - append: function() { - return this.domManip(arguments, true, false, function(elem){ - if (this.nodeType == 1) - this.appendChild( elem ); - }); - }, - - prepend: function() { - return this.domManip(arguments, true, true, function(elem){ - if (this.nodeType == 1) - this.insertBefore( elem, this.firstChild ); - }); - }, - - before: function() { - return this.domManip(arguments, false, false, function(elem){ - this.parentNode.insertBefore( elem, this ); - }); - }, - - after: function() { - return this.domManip(arguments, false, true, function(elem){ - this.parentNode.insertBefore( elem, this.nextSibling ); - }); - }, - - end: function() { - return this.prevObject || jQuery( [] ); - }, - - find: function( selector ) { - var elems = jQuery.map(this, function(elem){ - return jQuery.find( selector, elem ); - }); - - return this.pushStack( /[^+>] [^+>]/.test( selector ) || selector.indexOf("..") > -1 ? - jQuery.unique( elems ) : - elems ); - }, - - clone: function( events ) { - // Do the clone - var ret = this.map(function(){ - if ( jQuery.browser.msie && !jQuery.isXMLDoc(this) ) { - // IE copies events bound via attachEvent when - // using cloneNode. Calling detachEvent on the - // clone will also remove the events from the orignal - // In order to get around this, we use innerHTML. - // Unfortunately, this means some modifications to - // attributes in IE that are actually only stored - // as properties will not be copied (such as the - // the name attribute on an input). - var clone = this.cloneNode(true), - container = document.createElement("div"), - container2 = document.createElement("div"); - container.appendChild(clone); - container2.innerHTML = container.innerHTML; - return container2.firstChild; - } else - return this.cloneNode(true); - }); - - // Need to set the expando to null on the cloned set if it exists - // removeData doesn't work here, IE removes it from the original as well - // this is primarily for IE but the data expando shouldn't be copied over in any browser - var clone = ret.find("*").andSelf().each(function(){ - if ( this[ expando ] != undefined ) - this[ expando ] = null; - }); - - // Copy the events from the original to the clone - if ( events === true ) - this.find("*").andSelf().each(function(i){ - var events = jQuery.data( this, "events" ); - - for ( var type in events ) - for ( var handler in events[ type ] ) - jQuery.event.add( clone[ i ], type, events[ type ][ handler ], events[ type ][ handler ].data ); - }); - - // Return the cloned set - return ret; - }, - - filter: function( selector ) { - return this.pushStack( - jQuery.isFunction( selector ) && - jQuery.grep(this, function(elem, i){ - return selector.call( elem, i ); - }) || - - jQuery.multiFilter( selector, this ) ); - }, - - not: function( selector ) { - if ( selector.constructor == String ) - // test special case where just one selector is passed in - if ( isSimple.test( selector ) ) - return this.pushStack( jQuery.multiFilter( selector, this, true ) ); - else - selector = jQuery.multiFilter( selector, this ); - - var isArrayLike = selector.length && selector[selector.length - 1] !== undefined && !selector.nodeType; - return this.filter(function() { - return isArrayLike ? jQuery.inArray( this, selector ) < 0 : this != selector; - }); - }, - - add: function( selector ) { - return !selector ? this : this.pushStack( jQuery.merge( - this.get(), - selector.constructor == String ? - jQuery( selector ).get() : - selector.length != undefined && (!selector.nodeName || jQuery.nodeName(selector, "form")) ? - selector : [selector] ) ); - }, - - is: function( selector ) { - return selector ? - jQuery.multiFilter( selector, this ).length > 0 : - false; - }, - - hasClass: function( selector ) { - return this.is( "." + selector ); - }, - - val: function( value ) { - if ( value == undefined ) { - - if ( this.length ) { - var elem = this[0]; - - // We need to handle select boxes special - if ( jQuery.nodeName( elem, "select" ) ) { - var index = elem.selectedIndex, - values = [], - options = elem.options, - one = elem.type == "select-one"; - - // Nothing was selected - if ( index < 0 ) - return null; - - // Loop through all the selected options - for ( var i = one ? index : 0, max = one ? index + 1 : options.length; i < max; i++ ) { - var option = options[ i ]; - - if ( option.selected ) { - // Get the specifc value for the option - value = jQuery.browser.msie && !option.attributes.value.specified ? option.text : option.value; - - // We don't need an array for one selects - if ( one ) - return value; - - // Multi-Selects return an array - values.push( value ); - } - } - - return values; - - // Everything else, we just grab the value - } else - return (this[0].value || "").replace(/\r/g, ""); - - } - - return undefined; - } - - return this.each(function(){ - if ( this.nodeType != 1 ) - return; - - if ( value.constructor == Array && /radio|checkbox/.test( this.type ) ) - this.checked = (jQuery.inArray(this.value, value) >= 0 || - jQuery.inArray(this.name, value) >= 0); - - else if ( jQuery.nodeName( this, "select" ) ) { - var values = value.constructor == Array ? - value : - [ value ]; - - jQuery( "option", this ).each(function(){ - this.selected = (jQuery.inArray( this.value, values ) >= 0 || - jQuery.inArray( this.text, values ) >= 0); - }); - - if ( !values.length ) - this.selectedIndex = -1; - - } else - this.value = value; - }); - }, - - html: function( value ) { - return value == undefined ? - (this.length ? - this[0].innerHTML : - null) : - this.empty().append( value ); - }, - - replaceWith: function( value ) { - return this.after( value ).remove(); - }, - - eq: function( i ) { - return this.slice( i, i + 1 ); - }, - - slice: function() { - return this.pushStack( Array.prototype.slice.apply( this, arguments ) ); - }, - - map: function( callback ) { - return this.pushStack( jQuery.map(this, function(elem, i){ - return callback.call( elem, i, elem ); - })); - }, - - andSelf: function() { - return this.add( this.prevObject ); - }, - - domManip: function( args, table, reverse, callback ) { - var clone = this.length > 1, elems; - - return this.each(function(){ - if ( !elems ) { - elems = jQuery.clean( args, this.ownerDocument ); - - if ( reverse ) - elems.reverse(); - } - - var obj = this; - - if ( table && jQuery.nodeName( this, "table" ) && jQuery.nodeName( elems[0], "tr" ) ) - obj = this.getElementsByTagName("tbody")[0] || this.appendChild( this.ownerDocument.createElement("tbody") ); - - var scripts = jQuery( [] ); - - jQuery.each(elems, function(){ - var elem = clone ? - jQuery( this ).clone( true )[0] : - this; - - // execute all scripts after the elements have been injected - if ( jQuery.nodeName( elem, "script" ) ) { - scripts = scripts.add( elem ); - } else { - // Remove any inner scripts for later evaluation - if ( elem.nodeType == 1 ) - scripts = scripts.add( jQuery( "script", elem ).remove() ); - - // Inject the elements into the document - callback.call( obj, elem ); - } - }); - - scripts.each( evalScript ); - }); - } -}; - -// Give the init function the jQuery prototype for later instantiation -jQuery.prototype.init.prototype = jQuery.prototype; - -function evalScript( i, elem ) { - if ( elem.src ) - jQuery.ajax({ - url: elem.src, - async: false, - dataType: "script" - }); - - else - jQuery.globalEval( elem.text || elem.textContent || elem.innerHTML || "" ); - - if ( elem.parentNode ) - elem.parentNode.removeChild( elem ); -} - -jQuery.extend = jQuery.fn.extend = function() { - // copy reference to target object - var target = arguments[0] || {}, i = 1, length = arguments.length, deep = false, options; - - // Handle a deep copy situation - if ( target.constructor == Boolean ) { - deep = target; - target = arguments[1] || {}; - // skip the boolean and the target - i = 2; - } - - // Handle case when target is a string or something (possible in deep copy) - if ( typeof target != "object" && typeof target != "function" ) - target = {}; - - // extend jQuery itself if only one argument is passed - if ( length == 1 ) { - target = this; - i = 0; - } - - for ( ; i < length; i++ ) - // Only deal with non-null/undefined values - if ( (options = arguments[ i ]) != null ) - // Extend the base object - for ( var name in options ) { - // Prevent never-ending loop - if ( target === options[ name ] ) - continue; - - // Recurse if we're merging object values - if ( deep && options[ name ] && typeof options[ name ] == "object" && target[ name ] && !options[ name ].nodeType ) - target[ name ] = jQuery.extend( target[ name ], options[ name ] ); - - // Don't bring in undefined values - else if ( options[ name ] != undefined ) - target[ name ] = options[ name ]; - - } - - // Return the modified object - return target; -}; - -var expando = "jQuery" + (new Date()).getTime(), uuid = 0, windowData = {}; - -// exclude the following css properties to add px -var exclude = /z-?index|font-?weight|opacity|zoom|line-?height/i; - -jQuery.extend({ - noConflict: function( deep ) { - window.$ = _$; - - if ( deep ) - window.jQuery = _jQuery; - - return jQuery; - }, - - // This may seem like some crazy code, but trust me when I say that this - // is the only cross-browser way to do this. --John - isFunction: function( fn ) { - return !!fn && typeof fn != "string" && !fn.nodeName && - fn.constructor != Array && /function/i.test( fn + "" ); - }, - - // check if an element is in a (or is an) XML document - isXMLDoc: function( elem ) { - return elem.documentElement && !elem.body || - elem.tagName && elem.ownerDocument && !elem.ownerDocument.body; - }, - - // Evalulates a script in a global context - globalEval: function( data ) { - data = jQuery.trim( data ); - - if ( data ) { - // Inspired by code by Andrea Giammarchi - // http://webreflection.blogspot.com/2007/08/global-scope-evaluation-and-dom.html - var head = document.getElementsByTagName("head")[0] || document.documentElement, - script = document.createElement("script"); - - script.type = "text/javascript"; - if ( jQuery.browser.msie ) - script.text = data; - else - script.appendChild( document.createTextNode( data ) ); - - head.appendChild( script ); - head.removeChild( script ); - } - }, - - nodeName: function( elem, name ) { - return elem.nodeName && elem.nodeName.toUpperCase() == name.toUpperCase(); - }, - - cache: {}, - - data: function( elem, name, data ) { - elem = elem == window ? - windowData : - elem; - - var id = elem[ expando ]; - - // Compute a unique ID for the element - if ( !id ) - id = elem[ expando ] = ++uuid; - - // Only generate the data cache if we're - // trying to access or manipulate it - if ( name && !jQuery.cache[ id ] ) - jQuery.cache[ id ] = {}; - - // Prevent overriding the named cache with undefined values - if ( data != undefined ) - jQuery.cache[ id ][ name ] = data; - - // Return the named cache data, or the ID for the element - return name ? - jQuery.cache[ id ][ name ] : - id; - }, - - removeData: function( elem, name ) { - elem = elem == window ? - windowData : - elem; - - var id = elem[ expando ]; - - // If we want to remove a specific section of the element's data - if ( name ) { - if ( jQuery.cache[ id ] ) { - // Remove the section of cache data - delete jQuery.cache[ id ][ name ]; - - // If we've removed all the data, remove the element's cache - name = ""; - - for ( name in jQuery.cache[ id ] ) - break; - - if ( !name ) - jQuery.removeData( elem ); - } - - // Otherwise, we want to remove all of the element's data - } else { - // Clean up the element expando - try { - delete elem[ expando ]; - } catch(e){ - // IE has trouble directly removing the expando - // but it's ok with using removeAttribute - if ( elem.removeAttribute ) - elem.removeAttribute( expando ); - } - - // Completely remove the data cache - delete jQuery.cache[ id ]; - } - }, - - // args is for internal usage only - each: function( object, callback, args ) { - if ( args ) { - if ( object.length == undefined ) - for ( var name in object ) - callback.apply( object[ name ], args ); - else - for ( var i = 0, length = object.length; i < length; i++ ) - if ( callback.apply( object[ i ], args ) === false ) - break; - - // A special, fast, case for the most common use of each - } else { - if ( object.length == undefined ) - for ( var name in object ) - callback.call( object[ name ], name, object[ name ] ); - else - for ( var i = 0, length = object.length, value = object[0]; - i < length && callback.call( value, i, value ) !== false; value = object[++i] ){} - } - - return object; - }, - - prop: function( elem, value, type, i, name ) { - // Handle executable functions - if ( jQuery.isFunction( value ) ) - value = value.call( elem, i ); - - // Handle passing in a number to a CSS property - return value && value.constructor == Number && type == "curCSS" && !exclude.test( name ) ? - value + "px" : - value; - }, - - className: { - // internal only, use addClass("class") - add: function( elem, classNames ) { - jQuery.each((classNames || "").split(/\s+/), function(i, className){ - if ( elem.nodeType == 1 && !jQuery.className.has( elem.className, className ) ) - elem.className += (elem.className ? " " : "") + className; - }); - }, - - // internal only, use removeClass("class") - remove: function( elem, classNames ) { - if (elem.nodeType == 1) - elem.className = classNames != undefined ? - jQuery.grep(elem.className.split(/\s+/), function(className){ - return !jQuery.className.has( classNames, className ); - }).join(" ") : - ""; - }, - - // internal only, use is(".class") - has: function( elem, className ) { - return jQuery.inArray( className, (elem.className || elem).toString().split(/\s+/) ) > -1; - } - }, - - // A method for quickly swapping in/out CSS properties to get correct calculations - swap: function( elem, options, callback ) { - var old = {}; - // Remember the old values, and insert the new ones - for ( var name in options ) { - old[ name ] = elem.style[ name ]; - elem.style[ name ] = options[ name ]; - } - - callback.call( elem ); - - // Revert the old values - for ( var name in options ) - elem.style[ name ] = old[ name ]; - }, - - css: function( elem, name, force ) { - if ( name == "width" || name == "height" ) { - var val, props = { position: "absolute", visibility: "hidden", display:"block" }, which = name == "width" ? [ "Left", "Right" ] : [ "Top", "Bottom" ]; - - function getWH() { - val = name == "width" ? elem.offsetWidth : elem.offsetHeight; - var padding = 0, border = 0; - jQuery.each( which, function() { - padding += parseFloat(jQuery.curCSS( elem, "padding" + this, true)) || 0; - border += parseFloat(jQuery.curCSS( elem, "border" + this + "Width", true)) || 0; - }); - val -= Math.round(padding + border); - } - - if ( jQuery(elem).is(":visible") ) - getWH(); - else - jQuery.swap( elem, props, getWH ); - - return Math.max(0, val); - } - - return jQuery.curCSS( elem, name, force ); - }, - - curCSS: function( elem, name, force ) { - var ret; - - // A helper method for determining if an element's values are broken - function color( elem ) { - if ( !jQuery.browser.safari ) - return false; - - var ret = document.defaultView.getComputedStyle( elem, null ); - return !ret || ret.getPropertyValue("color") == ""; - } - - // We need to handle opacity special in IE - if ( name == "opacity" && jQuery.browser.msie ) { - ret = jQuery.attr( elem.style, "opacity" ); - - return ret == "" ? - "1" : - ret; - } - // Opera sometimes will give the wrong display answer, this fixes it, see #2037 - if ( jQuery.browser.opera && name == "display" ) { - var save = elem.style.display; - elem.style.display = "block"; - elem.style.display = save; - } - - // Make sure we're using the right name for getting the float value - if ( name.match( /float/i ) ) - name = styleFloat; - - if ( !force && elem.style[ name ] ) - ret = elem.style[ name ]; - - else if ( document.defaultView && document.defaultView.getComputedStyle ) { - - // Only "float" is needed here - if ( name.match( /float/i ) ) - name = "float"; - - name = name.replace( /([A-Z])/g, "-$1" ).toLowerCase(); - - var getComputedStyle = document.defaultView.getComputedStyle( elem, null ); - - if ( getComputedStyle && !color( elem ) ) - ret = getComputedStyle.getPropertyValue( name ); - - // If the element isn't reporting its values properly in Safari - // then some display: none elements are involved - else { - var swap = [], stack = []; - - // Locate all of the parent display: none elements - for ( var a = elem; a && color(a); a = a.parentNode ) - stack.unshift(a); - - // Go through and make them visible, but in reverse - // (It would be better if we knew the exact display type that they had) - for ( var i = 0; i < stack.length; i++ ) - if ( color( stack[ i ] ) ) { - swap[ i ] = stack[ i ].style.display; - stack[ i ].style.display = "block"; - } - - // Since we flip the display style, we have to handle that - // one special, otherwise get the value - ret = name == "display" && swap[ stack.length - 1 ] != null ? - "none" : - ( getComputedStyle && getComputedStyle.getPropertyValue( name ) ) || ""; - - // Finally, revert the display styles back - for ( var i = 0; i < swap.length; i++ ) - if ( swap[ i ] != null ) - stack[ i ].style.display = swap[ i ]; - } - - // We should always get a number back from opacity - if ( name == "opacity" && ret == "" ) - ret = "1"; - - } else if ( elem.currentStyle ) { - var camelCase = name.replace(/\-(\w)/g, function(all, letter){ - return letter.toUpperCase(); - }); - - ret = elem.currentStyle[ name ] || elem.currentStyle[ camelCase ]; - - // From the awesome hack by Dean Edwards - // http://erik.eae.net/archives/2007/07/27/18.54.15/#comment-102291 - - // If we're not dealing with a regular pixel number - // but a number that has a weird ending, we need to convert it to pixels - if ( !/^\d+(px)?$/i.test( ret ) && /^\d/.test( ret ) ) { - // Remember the original values - var style = elem.style.left, runtimeStyle = elem.runtimeStyle.left; - - // Put in the new values to get a computed value out - elem.runtimeStyle.left = elem.currentStyle.left; - elem.style.left = ret || 0; - ret = elem.style.pixelLeft + "px"; - - // Revert the changed values - elem.style.left = style; - elem.runtimeStyle.left = runtimeStyle; - } - } - - return ret; - }, - - clean: function( elems, context ) { - var ret = []; - context = context || document; - // !context.createElement fails in IE with an error but returns typeof 'object' - if (typeof context.createElement == 'undefined') - context = context.ownerDocument || context[0] && context[0].ownerDocument || document; - - jQuery.each(elems, function(i, elem){ - if ( !elem ) - return; - - if ( elem.constructor == Number ) - elem = elem.toString(); - - // Convert html string into DOM nodes - if ( typeof elem == "string" ) { - // Fix "XHTML"-style tags in all browsers - elem = elem.replace(/(<(\w+)[^>]*?)\/>/g, function(all, front, tag){ - return tag.match(/^(abbr|br|col|img|input|link|meta|param|hr|area)$/i) ? - all : - front + ">"; - }); - - // Trim whitespace, otherwise indexOf won't work as expected - var tags = jQuery.trim( elem ).toLowerCase(), div = context.createElement("div"); - - var wrap = - // option or optgroup - !tags.indexOf("", "" ] || - - !tags.indexOf("", "" ] || - - tags.match(/^<(thead|tbody|tfoot|colg|cap)/) && - [ 1, "", "
" ] || - - !tags.indexOf("", "" ] || - - // matched above - (!tags.indexOf("", "" ] || - - !tags.indexOf("", "" ] || - - // IE can't serialize and - - - -
-

W3C - -

Selectors

- -

W3C Working Draft 15 December 2005

- -
- -
This version: - -
- http://www.w3.org/TR/2005/WD-css3-selectors-20051215 - -
Latest version: - -
- http://www.w3.org/TR/css3-selectors - -
Previous version: - -
- http://www.w3.org/TR/2001/CR-css3-selectors-20011113 - -
Editors: - -
Daniel Glazman (Invited Expert)
- -
Tantek Çelik (Invited Expert) - -
Ian Hickson (Google) - -
Peter Linss (former editor, Netscape/AOL) - -
John Williams (former editor, Quark, Inc.) - -
- -
- -
- -

Abstract

- -

Selectors are patterns that match against elements in a - tree. Selectors have been optimized for use with HTML and XML, and - are designed to be usable in performance-critical code.

- -

CSS (Cascading - Style Sheets) is a language for describing the rendering of HTML and XML documents on - screen, on paper, in speech, etc. CSS uses Selectors for binding - style properties to elements in the document. This document - describes extensions to the selectors defined in CSS level 2. These - extended selectors will be used by CSS level 3. - -

Selectors define the following function:

- -
expression ∗ element → boolean
- -

That is, given an element and a selector, this specification - defines whether that element matches the selector.

- -

These expressions can also be used, for instance, to select a set - of elements, or a single element from a set of elements, by - evaluating the expression across all the elements in a - subtree. STTS (Simple Tree Transformation Sheets), a - language for transforming XML trees, uses this mechanism. [STTS]

- -

Status of this document

- -

This section describes the status of this document at the - time of its publication. Other documents may supersede this - document. A list of current W3C publications and the latest revision - of this technical report can be found in the W3C technical reports index at - http://www.w3.org/TR/.

- -

This document describes the selectors that already exist in CSS1 and CSS2, and - also proposes new selectors for CSS3 and other languages that may need them.

- -

The CSS Working Group doesn't expect that all implementations of - CSS3 will have to implement all selectors. Instead, there will - probably be a small number of variants of CSS3, called profiles. For - example, it may be that only a profile for interactive user agents - will include all of the selectors.

- -

This specification is a last call working draft for the the CSS Working Group - (Style Activity). This - document is a revision of the Candidate - Recommendation dated 2001 November 13, and has incorporated - implementation feedback received in the past few years. It is - expected that this last call will proceed straight to Proposed - Recommendation stage since it is believed that interoperability will - be demonstrable.

- -

All persons are encouraged to review and implement this - specification and return comments to the (archived) - public mailing list www-style - (see instructions). W3C - Members can also send comments directly to the CSS Working - Group. - The deadline for comments is 14 January 2006.

- -

This is still a draft document and may be updated, replaced, or - obsoleted by other documents at any time. It is inappropriate to - cite a W3C Working Draft as other than "work in progress". - -

This document may be available in translation. - The English version of this specification is the only normative - version. - -

- -

Table of contents

- - - -
- -

1. Introduction

- -

1.1. Dependencies

- -

Some features of this specification are specific to CSS, or have - particular limitations or rules specific to CSS. In this - specification, these have been described in terms of CSS2.1. [CSS21]

- -

1.2. Terminology

- -

All of the text of this specification is normative except - examples, notes, and sections explicitly marked as - non-normative.

- -

1.3. Changes from CSS2

- -

This section is non-normative.

- -

The main differences between the selectors in CSS2 and those in - Selectors are: - -

    - -
  • the list of basic definitions (selector, group of selectors, - simple selector, etc.) has been changed; in particular, what was - referred to in CSS2 as a simple selector is now called a sequence - of simple selectors, and the term "simple selector" is now used for - the components of this sequence
  • - -
  • an optional namespace component is now allowed in type element - selectors, the universal selector and attribute selectors
  • - -
  • a new combinator has been introduced
  • - -
  • new simple selectors including substring matching attribute - selectors, and new pseudo-classes
  • - -
  • new pseudo-elements, and introduction of the "::" convention - for pseudo-elements
  • - -
  • the grammar has been rewritten
  • - -
  • profiles to be added to specifications integrating Selectors - and defining the set of selectors which is actually supported by - each specification
  • - -
  • Selectors are now a CSS3 Module and an independent - specification; other specifications can now refer to this document - independently of CSS
  • - -
  • the specification now has its own test suite
  • - -
- -

2. Selectors

- -

This section is non-normative, as it merely summarizes the -following sections.

- -

A Selector represents a structure. This structure can be used as a -condition (e.g. in a CSS rule) that determines which elements a -selector matches in the document tree, or as a flat description of the -HTML or XML fragment corresponding to that structure.

- -

Selectors may range from simple element names to rich contextual -representations.

- -

The following table summarizes the Selector syntax:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
PatternMeaningDescribed in sectionFirst defined in CSS level
*any elementUniversal - selector2
Ean element of type EType selector1
E[foo]an E element with a "foo" attributeAttribute - selectors2
E[foo="bar"]an E element whose "foo" attribute value is exactly - equal to "bar"Attribute - selectors2
E[foo~="bar"]an E element whose "foo" attribute value is a list of - space-separated values, one of which is exactly equal to "bar"Attribute - selectors2
E[foo^="bar"]an E element whose "foo" attribute value begins exactly - with the string "bar"Attribute - selectors3
E[foo$="bar"]an E element whose "foo" attribute value ends exactly - with the string "bar"Attribute - selectors3
E[foo*="bar"]an E element whose "foo" attribute value contains the - substring "bar"Attribute - selectors3
E[hreflang|="en"]an E element whose "hreflang" attribute has a hyphen-separated - list of values beginning (from the left) with "en"Attribute - selectors2
E:rootan E element, root of the documentStructural - pseudo-classes3
E:nth-child(n)an E element, the n-th child of its parentStructural - pseudo-classes3
E:nth-last-child(n)an E element, the n-th child of its parent, counting - from the last oneStructural - pseudo-classes3
E:nth-of-type(n)an E element, the n-th sibling of its typeStructural - pseudo-classes3
E:nth-last-of-type(n)an E element, the n-th sibling of its type, counting - from the last oneStructural - pseudo-classes3
E:first-childan E element, first child of its parentStructural - pseudo-classes2
E:last-childan E element, last child of its parentStructural - pseudo-classes3
E:first-of-typean E element, first sibling of its typeStructural - pseudo-classes3
E:last-of-typean E element, last sibling of its typeStructural - pseudo-classes3
E:only-childan E element, only child of its parentStructural - pseudo-classes3
E:only-of-typean E element, only sibling of its typeStructural - pseudo-classes3
E:emptyan E element that has no children (including text - nodes)Structural - pseudo-classes3
E:link
E:visited
an E element being the source anchor of a hyperlink of - which the target is not yet visited (:link) or already visited - (:visited)The link - pseudo-classes1
E:active
E:hover
E:focus
an E element during certain user actionsThe user - action pseudo-classes1 and 2
E:targetan E element being the target of the referring URIThe target - pseudo-class3
E:lang(fr)an element of type E in language "fr" (the document - language specifies how language is determined)The :lang() - pseudo-class2
E:enabled
E:disabled
a user interface element E which is enabled or - disabledThe UI element states - pseudo-classes3
E:checkeda user interface element E which is checked (for instance a radio-button or checkbox)The UI element states - pseudo-classes3
E::first-linethe first formatted line of an E elementThe ::first-line - pseudo-element1
E::first-letterthe first formatted letter of an E elementThe ::first-letter - pseudo-element1
E::selectionthe portion of an E element that is currently - selected/highlighted by the userThe UI element - fragments pseudo-elements3
E::beforegenerated content before an E elementThe ::before - pseudo-element2
E::aftergenerated content after an E elementThe ::after - pseudo-element2
E.warningan E element whose class is -"warning" (the document language specifies how class is determined).Class - selectors1
E#myidan E element with ID equal to "myid".ID - selectors1
E:not(s)an E element that does not match simple selector sNegation - pseudo-class3
E Fan F element descendant of an E elementDescendant - combinator1
E > Fan F element child of an E elementChild - combinator2
E + Fan F element immediately preceded by an E elementAdjacent sibling combinator2
E ~ Fan F element preceded by an E elementGeneral sibling combinator3
- -

The meaning of each selector is derived from the table above by -prepending "matches" to the contents of each cell in the "Meaning" -column.

- -

3. Case sensitivity

- -

The case sensitivity of document language element names, attribute -names, and attribute values in selectors depends on the document -language. For example, in HTML, element names are case-insensitive, -but in XML, they are case-sensitive.

- -

4. Selector syntax

- -

A selector is a chain of one -or more sequences of simple selectors -separated by combinators.

- -

A sequence of simple selectors -is a chain of simple selectors -that are not separated by a combinator. It -always begins with a type selector or a -universal selector. No other type -selector or universal selector is allowed in the sequence.

- -

A simple selector is either a type selector, universal selector, attribute selector, class selector, ID selector, content selector, or pseudo-class. One pseudo-element may be appended to the last -sequence of simple selectors.

- -

Combinators are: white space, "greater-than -sign" (U+003E, >), "plus sign" (U+002B, -+) and "tilde" (U+007E, ~). White -space may appear between a combinator and the simple selectors around -it. Only the characters "space" (U+0020), "tab" -(U+0009), "line feed" (U+000A), "carriage return" (U+000D), and "form -feed" (U+000C) can occur in white space. Other space-like characters, -such as "em-space" (U+2003) and "ideographic space" (U+3000), are -never part of white space.

- -

The elements of a document tree that are represented by a selector -are the subjects of the selector. A -selector consisting of a single sequence of simple selectors -represents any element satisfying its requirements. Prepending another -sequence of simple selectors and a combinator to a sequence imposes -additional matching constraints, so the subjects of a selector are -always a subset of the elements represented by the last sequence of -simple selectors.

- -

An empty selector, containing no sequence of simple selectors and -no pseudo-element, is an invalid -selector.

- -

5. Groups of selectors

- -

When several selectors share the same declarations, they may be -grouped into a comma-separated list. (A comma is U+002C.)

- -
-

CSS examples:

-

In this example, we condense three rules with identical -declarations into one. Thus,

-
h1 { font-family: sans-serif }
-h2 { font-family: sans-serif }
-h3 { font-family: sans-serif }
-

is equivalent to:

-
h1, h2, h3 { font-family: sans-serif }
-
- -

Warning: the equivalence is true in this example -because all the selectors are valid selectors. If just one of these -selectors were invalid, the entire group of selectors would be -invalid. This would invalidate the rule for all three heading -elements, whereas in the former case only one of the three individual -heading rules would be invalidated.

- - -

6. Simple selectors

- -

6.1. Type selector

- -

A type selector is the name of a document language -element type. A type selector represents an instance of the element -type in the document tree.

- -
-

Example:

-

The following selector represents an h1 element in the document tree:

-
h1
-
- - -

6.1.1. Type selectors and namespaces

- -

Type selectors allow an optional namespace ([XMLNAMES]) component. A namespace prefix -that has been previously declared may be prepended to the element name -separated by the namespace separator "vertical bar" -(U+007C, |).

- -

The namespace component may be left empty to indicate that the -selector is only to represent elements with no declared namespace.

- -

An asterisk may be used for the namespace prefix, indicating that -the selector represents elements in any namespace (including elements -with no namespace).

- -

Element type selectors that have no namespace component (no -namespace separator), represent elements without regard to the -element's namespace (equivalent to "*|") unless a default -namespace has been declared. If a default namespace has been declared, -the selector will represent only elements in the default -namespace.

- -

A type selector containing a namespace prefix that has not been -previously declared is an invalid selector. -The mechanism for declaring a namespace prefix is left up to the -language implementing Selectors. In CSS, such a mechanism is defined -in the General Syntax module.

- -

In a namespace-aware client, element type selectors will only match -against the local part -of the element's qualified -name. See below for notes about matching -behaviors in down-level clients.

- -

In summary:

- -
-
ns|E
-
elements with name E in namespace ns
-
*|E
-
elements with name E in any namespace, including those without any - declared namespace
-
|E
-
elements with name E without any declared namespace
-
E
-
if no default namespace has been specified, this is equivalent to *|E. - Otherwise it is equivalent to ns|E where ns is the default namespace.
-
- -
-

CSS examples:

- -
@namespace foo url(http://www.example.com);
- foo|h1 { color: blue }
- foo|* { color: yellow }
- |h1 { color: red }
- *|h1 { color: green }
- h1 { color: green }
- -

The first rule will match only h1 elements in the - "http://www.example.com" namespace.

- -

The second rule will match all elements in the - "http://www.example.com" namespace.

- -

The third rule will match only h1 elements without - any declared namespace.

- -

The fourth rule will match h1 elements in any - namespace (including those without any declared namespace).

- -

The last rule is equivalent to the fourth rule because no default - namespace has been defined.

- -
- -

6.2. Universal selector

- -

The universal selector, written "asterisk" -(*), represents the qualified name of any element -type. It represents any single element in the document tree in any -namespace (including those without any declared namespace) if no -default namespace has been specified. If a default namespace has been -specified, see Universal selector and -Namespaces below.

- -

If the universal selector is not the only component of a sequence -of simple selectors, the * may be omitted.

- -
-

Examples:

-
    -
  • *[hreflang|=en] and [hreflang|=en] are equivalent,
  • -
  • *.warning and .warning are equivalent,
  • -
  • *#myid and #myid are equivalent.
  • -
-
- -

Note: it is recommended that the -*, representing the universal selector, not be -omitted.

- -

6.2.1. Universal selector and namespaces

- -

The universal selector allows an optional namespace component. It -is used as follows:

- -
-
ns|*
-
all elements in namespace ns
-
*|*
-
all elements
-
|*
-
all elements without any declared namespace
-
*
-
if no default namespace has been specified, this is equivalent to *|*. - Otherwise it is equivalent to ns|* where ns is the default namespace.
-
- -

A universal selector containing a namespace prefix that has not -been previously declared is an invalid -selector. The mechanism for declaring a namespace prefix is left up -to the language implementing Selectors. In CSS, such a mechanism is -defined in the General Syntax module.

- - -

6.3. Attribute selectors

- -

Selectors allow the representation of an element's attributes. When -a selector is used as an expression to match against an element, -attribute selectors must be considered to match an element if that -element has an attribute that matches the attribute represented by the -attribute selector.

- -

6.3.1. Attribute presence and values -selectors

- -

CSS2 introduced four attribute selectors:

- -
-
[att] -
Represents an element with the att attribute, whatever the value of - the attribute.
-
[att=val]
-
Represents an element with the att attribute whose value is exactly - "val".
-
[att~=val]
-
Represents an element with the att attribute whose value is a whitespace-separated list of words, one of - which is exactly "val". If "val" contains whitespace, it will never - represent anything (since the words are separated by - spaces).
-
[att|=val] -
Represents an element with the att attribute, its value either - being exactly "val" or beginning with "val" immediately followed by - "-" (U+002D). This is primarily intended to allow language subcode - matches (e.g., the hreflang attribute on the - link element in HTML) as described in RFC 3066 ([RFC3066]). For lang (or - xml:lang) language subcode matching, please see the :lang pseudo-class.
-
- -

Attribute values must be identifiers or strings. The -case-sensitivity of attribute names and values in selectors depends on -the document language.

- -
- -

Examples:

- -

The following attribute selector represents an h1 - element that carries the title attribute, whatever its - value:

- -
h1[title]
- -

In the following example, the selector represents a - span element whose class attribute has - exactly the value "example":

- -
span[class="example"]
- -

Multiple attribute selectors can be used to represent several - attributes of an element, or several conditions on the same - attribute. Here, the selector represents a span element - whose hello attribute has exactly the value "Cleveland" - and whose goodbye attribute has exactly the value - "Columbus":

- -
span[hello="Cleveland"][goodbye="Columbus"]
- -

The following selectors illustrate the differences between "=" - and "~=". The first selector will represent, for example, the value - "copyright copyleft copyeditor" on a rel attribute. The - second selector will only represent an a element with - an href attribute having the exact value - "http://www.w3.org/".

- -
a[rel~="copyright"]
-a[href="http://www.w3.org/"]
- -

The following selector represents a link element - whose hreflang attribute is exactly "fr".

- -
link[hreflang=fr]
- -

The following selector represents a link element for - which the values of the hreflang attribute begins with - "en", including "en", "en-US", and "en-cockney":

- -
link[hreflang|="en"]
- -

Similarly, the following selectors represents a - DIALOGUE element whenever it has one of two different - values for an attribute character:

- -
DIALOGUE[character=romeo]
-DIALOGUE[character=juliet]
- -
- -

6.3.2. Substring matching attribute -selectors

- -

Three additional attribute selectors are provided for matching -substrings in the value of an attribute:

- -
-
[att^=val]
-
Represents an element with the att attribute whose value begins - with the prefix "val".
-
[att$=val] -
Represents an element with the att attribute whose value ends with - the suffix "val".
-
[att*=val] -
Represents an element with the att attribute whose value contains - at least one instance of the substring "val".
-
- -

Attribute values must be identifiers or strings. The -case-sensitivity of attribute names in selectors depends on the -document language.

- -
-

Examples:

-

The following selector represents an HTML object, referencing an - image:

-
object[type^="image/"]
-

The following selector represents an HTML anchor a with an - href attribute whose value ends with ".html".

-
a[href$=".html"]
-

The following selector represents an HTML paragraph with a title - attribute whose value contains the substring "hello"

-
p[title*="hello"]
-
- -

6.3.3. Attribute selectors and namespaces

- -

Attribute selectors allow an optional namespace component to the -attribute name. A namespace prefix that has been previously declared -may be prepended to the attribute name separated by the namespace -separator "vertical bar" (|). In keeping with -the Namespaces in the XML recommendation, default namespaces do not -apply to attributes, therefore attribute selectors without a namespace -component apply only to attributes that have no declared namespace -(equivalent to "|attr"). An asterisk may be used for the -namespace prefix indicating that the selector is to match all -attribute names without regard to the attribute's namespace. - -

An attribute selector with an attribute name containing a namespace -prefix that has not been previously declared is an invalid selector. The mechanism for declaring -a namespace prefix is left up to the language implementing Selectors. -In CSS, such a mechanism is defined in the General Syntax module. - -

-

CSS examples:

-
@namespace foo "http://www.example.com";
-[foo|att=val] { color: blue }
-[*|att] { color: yellow }
-[|att] { color: green }
-[att] { color: green }
- -

The first rule will match only elements with the attribute - att in the "http://www.example.com" namespace with the - value "val".

- -

The second rule will match only elements with the attribute - att regardless of the namespace of the attribute - (including no declared namespace).

- -

The last two rules are equivalent and will match only elements - with the attribute att where the attribute is not - declared to be in a namespace.

- -
- -

6.3.4. Default attribute values in DTDs

- -

Attribute selectors represent explicitly set attribute values in -the document tree. Default attribute values may be defined in a DTD or -elsewhere, but cannot always be selected by attribute -selectors. Selectors should be designed so that they work even if the -default values are not included in the document tree.

- -

More precisely, a UA is not required to read an "external -subset" of the DTD but is required to look for default -attribute values in the document's "internal subset." (See [XML10] for definitions of these subsets.)

- -

A UA that recognizes an XML namespace [XMLNAMES] is not required to use its -knowledge of that namespace to treat default attribute values as if -they were present in the document. (For example, an XHTML UA is not -required to use its built-in knowledge of the XHTML DTD.)

- -

Note: Typically, implementations -choose to ignore external subsets.

- -
-

Example:

- -

Consider an element EXAMPLE with an attribute "notation" that has a -default value of "decimal". The DTD fragment might be

- -
<!ATTLIST EXAMPLE notation (decimal,octal) "decimal">
- -

If the style sheet contains the rules

- -
EXAMPLE[notation=decimal] { /*... default property settings ...*/ }
-EXAMPLE[notation=octal]   { /*... other settings...*/ }
- -

the first rule will not match elements whose "notation" attribute -is set by default, i.e. not set explicitly. To catch all cases, the -attribute selector for the default value must be dropped:

- -
EXAMPLE                   { /*... default property settings ...*/ }
-EXAMPLE[notation=octal]   { /*... other settings...*/ }
- -

Here, because the selector EXAMPLE[notation=octal] is -more specific than the tag -selector alone, the style declarations in the second rule will override -those in the first for elements that have a "notation" attribute value -of "octal". Care has to be taken that all property declarations that -are to apply only to the default case are overridden in the non-default -cases' style rules.

- -
- -

6.4. Class selectors

- -

Working with HTML, authors may use the period (U+002E, -.) notation as an alternative to the ~= -notation when representing the class attribute. Thus, for -HTML, div.value and div[class~=value] have -the same meaning. The attribute value must immediately follow the -"period" (.).

- -

UAs may apply selectors using the period (.) notation in XML -documents if the UA has namespace-specific knowledge that allows it to -determine which attribute is the "class" attribute for the -respective namespace. One such example of namespace-specific knowledge -is the prose in the specification for a particular namespace (e.g. SVG -1.0 [SVG] describes the SVG -"class" attribute and how a UA should interpret it, and -similarly MathML 1.01 [MATH] describes the MathML -"class" attribute.)

- -
-

CSS examples:

- -

We can assign style information to all elements with - class~="pastoral" as follows:

- -
*.pastoral { color: green }  /* all elements with class~=pastoral */
- -

or just

- -
.pastoral { color: green }  /* all elements with class~=pastoral */
- -

The following assigns style only to H1 elements with - class~="pastoral":

- -
H1.pastoral { color: green }  /* H1 elements with class~=pastoral */
- -

Given these rules, the first H1 instance below would not have - green text, while the second would:

- -
<H1>Not green</H1>
-<H1 class="pastoral">Very green</H1>
- -
- -

To represent a subset of "class" values, each value must be preceded -by a ".", in any order.

- -
- -

CSS example:

- -

The following rule matches any P element whose "class" attribute - has been assigned a list of whitespace-separated values that includes - "pastoral" and "marine":

- -
p.pastoral.marine { color: green }
- -

This rule matches when class="pastoral blue aqua - marine" but does not match for class="pastoral - blue".

- -
- -

Note: Because CSS gives considerable -power to the "class" attribute, authors could conceivably design their -own "document language" based on elements with almost no associated -presentation (such as DIV and SPAN in HTML) and assigning style -information through the "class" attribute. Authors should avoid this -practice since the structural elements of a document language often -have recognized and accepted meanings and author-defined classes may -not.

- -

Note: If an element has multiple -class attributes, their values must be concatenated with spaces -between the values before searching for the class. As of this time the -working group is not aware of any manner in which this situation can -be reached, however, so this behavior is explicitly non-normative in -this specification.

- -

6.5. ID selectors

- -

Document languages may contain attributes that are declared to be -of type ID. What makes attributes of type ID special is that no two -such attributes can have the same value in a document, regardless of -the type of the elements that carry them; whatever the document -language, an ID typed attribute can be used to uniquely identify its -element. In HTML all ID attributes are named "id"; XML applications -may name ID attributes differently, but the same restriction -applies.

- -

An ID-typed attribute of a document language allows authors to -assign an identifier to one element instance in the document tree. W3C -ID selectors represent an element instance based on its identifier. An -ID selector contains a "number sign" (U+0023, -#) immediately followed by the ID value, which must be an -identifier.

- -

Selectors does not specify how a UA knows the ID-typed attribute of -an element. The UA may, e.g., read a document's DTD, have the -information hard-coded or ask the user. - -

-

Examples:

-

The following ID selector represents an h1 element - whose ID-typed attribute has the value "chapter1":

-
h1#chapter1
-

The following ID selector represents any element whose ID-typed - attribute has the value "chapter1":

-
#chapter1
-

The following selector represents any element whose ID-typed - attribute has the value "z98y".

-
*#z98y
-
- -

Note. In XML 1.0 [XML10], the information about which attribute -contains an element's IDs is contained in a DTD or a schema. When -parsing XML, UAs do not always read the DTD, and thus may not know -what the ID of an element is (though a UA may have namespace-specific -knowledge that allows it to determine which attribute is the ID -attribute for that namespace). If a style sheet designer knows or -suspects that a UA may not know what the ID of an element is, he -should use normal attribute selectors instead: -[name=p371] instead of #p371. Elements in -XML 1.0 documents without a DTD do not have IDs at all.

- -

If an element has multiple ID attributes, all of them must be -treated as IDs for that element for the purposes of the ID -selector. Such a situation could be reached using mixtures of xml:id, -DOM3 Core, XML DTDs, and namespace-specific knowledge.

- -

6.6. Pseudo-classes

- -

The pseudo-class concept is introduced to permit selection based on -information that lies outside of the document tree or that cannot be -expressed using the other simple selectors.

- -

A pseudo-class always consists of a "colon" -(:) followed by the name of the pseudo-class and -optionally by a value between parentheses.

- -

Pseudo-classes are allowed in all sequences of simple selectors -contained in a selector. Pseudo-classes are allowed anywhere in -sequences of simple selectors, after the leading type selector or -universal selector (possibly omitted). Pseudo-class names are -case-insensitive. Some pseudo-classes are mutually exclusive, while -others can be applied simultaneously to the same -element. Pseudo-classes may be dynamic, in the sense that an element -may acquire or lose a pseudo-class while a user interacts with the -document.

- - -

6.6.1. Dynamic pseudo-classes

- -

Dynamic pseudo-classes classify elements on characteristics other -than their name, attributes, or content, in principle characteristics -that cannot be deduced from the document tree.

- -

Dynamic pseudo-classes do not appear in the document source or -document tree.

- - -
The link pseudo-classes: :link and :visited
- -

User agents commonly display unvisited links differently from -previously visited ones. Selectors -provides the pseudo-classes :link and -:visited to distinguish them:

- -
    -
  • The :link pseudo-class applies to links that have - not yet been visited.
  • -
  • The :visited pseudo-class applies once the link has - been visited by the user.
  • -
- -

After some amount of time, user agents may choose to return a -visited link to the (unvisited) ':link' state.

- -

The two states are mutually exclusive.

- -
- -

Example:

- -

The following selector represents links carrying class - external and already visited:

- -
a.external:visited
- -
- -

Note: It is possible for style sheet -authors to abuse the :link and :visited pseudo-classes to determine -which sites a user has visited without the user's consent. - -

UAs may therefore treat all links as unvisited links, or implement -other measures to preserve the user's privacy while rendering visited -and unvisited links differently.

- -
The user action pseudo-classes -:hover, :active, and :focus
- -

Interactive user agents sometimes change the rendering in response -to user actions. Selectors provides -three pseudo-classes for the selection of an element the user is -acting on.

- -
    - -
  • The :hover pseudo-class applies while the user - designates an element with a pointing device, but does not activate - it. For example, a visual user agent could apply this pseudo-class - when the cursor (mouse pointer) hovers over a box generated by the - element. User agents not that do not support interactive - media do not have to support this pseudo-class. Some conforming - user agents that support interactive - media may not be able to support this pseudo-class (e.g., a pen - device that does not detect hovering).
  • - -
  • The :active pseudo-class applies while an element - is being activated by the user. For example, between the times the - user presses the mouse button and releases it.
  • - -
  • The :focus pseudo-class applies while an element - has the focus (accepts keyboard or mouse events, or other forms of - input).
  • - -
- -

There may be document language or implementation specific limits on -which elements can become :active or acquire -:focus.

- -

These pseudo-classes are not mutually exclusive. An element may -match several pseudo-classes at the same time.

- -

Selectors doesn't define if the parent of an element that is -':active' or ':hover' is also in that state.

- -
-

Examples:

-
a:link    /* unvisited links */
-a:visited /* visited links */
-a:hover   /* user hovers */
-a:active  /* active links */
-

An example of combining dynamic pseudo-classes:

-
a:focus
-a:focus:hover
-

The last selector matches a elements that are in - the pseudo-class :focus and in the pseudo-class :hover.

-
- -

Note: An element can be both ':visited' -and ':active' (or ':link' and ':active').

- -

6.6.2. The target pseudo-class :target

- -

Some URIs refer to a location within a resource. This kind of URI -ends with a "number sign" (#) followed by an anchor -identifier (called the fragment identifier).

- -

URIs with fragment identifiers link to a certain element within the -document, known as the target element. For instance, here is a URI -pointing to an anchor named section_2 in an HTML -document:

- -
http://example.com/html/top.html#section_2
- -

A target element can be represented by the :target -pseudo-class. If the document's URI has no fragment identifier, then -the document has no target element.

- -
-

Example:

-
p.note:target
-

This selector represents a p element of class - note that is the target element of the referring - URI.

-
- -
-

CSS example:

-

Here, the :target pseudo-class is used to make the - target element red and place an image before it, if there is one:

-
*:target { color : red }
-*:target::before { content : url(target.png) }
-
- -

6.6.3. The language pseudo-class :lang

- -

If the document language specifies how the human language of an -element is determined, it is possible to write selectors that -represent an element based on its language. For example, in HTML [HTML4], the language is determined by a -combination of the lang attribute, the meta -element, and possibly by information from the protocol (such as HTTP -headers). XML uses an attribute called xml:lang, and -there may be other document language-specific methods for determining -the language.

- -

The pseudo-class :lang(C) represents an element that -is in language C. Whether an element is represented by a -:lang() selector is based solely on the identifier C -being either equal to, or a hyphen-separated substring of, the -element's language value, in the same way as if performed by the '|=' operator in attribute -selectors. The identifier C does not have to be a valid language -name.

- -

C must not be empty. (If it is, the selector is invalid.)

- -

Note: It is recommended that -documents and protocols indicate language using codes from RFC 3066 [RFC3066] or its successor, and by means of -"xml:lang" attributes in the case of XML-based documents [XML10]. See -"FAQ: Two-letter or three-letter language codes."

- -
-

Examples:

-

The two following selectors represent an HTML document that is in - Belgian, French, or German. The two next selectors represent - q quotations in an arbitrary element in Belgian, French, - or German.

-
html:lang(fr-be)
-html:lang(de)
-:lang(fr-be) > q
-:lang(de) > q
-
- -

6.6.4. The UI element states pseudo-classes

- -
The :enabled and :disabled pseudo-classes
- -

The :enabled pseudo-class allows authors to customize -the look of user interface elements that are enabled — which the -user can select or activate in some fashion (e.g. clicking on a button -with a mouse). There is a need for such a pseudo-class because there -is no way to programmatically specify the default appearance of say, -an enabled input element without also specifying what it -would look like when it was disabled.

- -

Similar to :enabled, :disabled allows the -author to specify precisely how a disabled or inactive user interface -element should look.

- -

Most elements will be neither enabled nor disabled. An element is -enabled if the user can either activate it or transfer the focus to -it. An element is disabled if it could be enabled, but the user cannot -presently activate it or transfer focus to it.

- - -
The :checked pseudo-class
- -

Radio and checkbox elements can be toggled by the user. Some menu -items are "checked" when the user selects them. When such elements are -toggled "on" the :checked pseudo-class applies. The -:checked pseudo-class initially applies to such elements -that have the HTML4 selected and checked -attributes as described in Section -17.2.1 of HTML4, but of course the user can toggle "off" such -elements in which case the :checked pseudo-class would no -longer apply. While the :checked pseudo-class is dynamic -in nature, and is altered by user action, since it can also be based -on the presence of the semantic HTML4 selected and -checked attributes, it applies to all media. - - -

The :indeterminate pseudo-class
- -
- -

Radio and checkbox elements can be toggled by the user, but are -sometimes in an indeterminate state, neither checked nor unchecked. -This can be due to an element attribute, or DOM manipulation.

- -

A future version of this specification may introduce an -:indeterminate pseudo-class that applies to such elements. -

- -
- - -

6.6.5. Structural pseudo-classes

- -

Selectors introduces the concept of structural -pseudo-classes to permit selection based on extra information that lies in -the document tree but cannot be represented by other simple selectors or -combinators. - -

Note that standalone pieces of PCDATA (text nodes in the DOM) are -not counted when calculating the position of an element in the list of -children of its parent. When calculating the position of an element in -the list of children of its parent, the index numbering starts at 1. - - -

:root pseudo-class
- -

The :root pseudo-class represents an element that is -the root of the document. In HTML 4, this is always the -HTML element. - - -

:nth-child() pseudo-class
- -

The -:nth-child(an+b) -pseudo-class notation represents an element that has -an+b-1 siblings -before it in the document tree, for a given positive -integer or zero value of n, and has a parent element. In -other words, this matches the bth child of an element after -all the children have been split into groups of a elements -each. For example, this allows the selectors to address every other -row in a table, and could be used to alternate the color -of paragraph text in a cycle of four. The a and -b values must be zero, negative integers or positive -integers. The index of the first child of an element is 1. - -

In addition to this, :nth-child() can take -'odd' and 'even' as arguments instead. -'odd' has the same signification as 2n+1, -and 'even' has the same signification as 2n. - - -

-

Examples:

-
tr:nth-child(2n+1) /* represents every odd row of an HTML table */
-tr:nth-child(odd)  /* same */
-tr:nth-child(2n)   /* represents every even row of an HTML table */
-tr:nth-child(even) /* same */
-
-/* Alternate paragraph colours in CSS */
-p:nth-child(4n+1) { color: navy; }
-p:nth-child(4n+2) { color: green; }
-p:nth-child(4n+3) { color: maroon; }
-p:nth-child(4n+4) { color: purple; }
-
- -

When a=0, no repeating is used, so for example -:nth-child(0n+5) matches only the fifth child. When -a=0, the an part need not be -included, so the syntax simplifies to -:nth-child(b) and the last example simplifies -to :nth-child(5). - -

-

Examples:

-
foo:nth-child(0n+1)   /* represents an element foo, first child of its parent element */
-foo:nth-child(1)      /* same */
-
- -

When a=1, the number may be omitted from the rule. - -

-

Examples:

-

The following selectors are therefore equivalent:

-
bar:nth-child(1n+0)   /* represents all bar elements, specificity (0,1,1) */
-bar:nth-child(n+0)    /* same */
-bar:nth-child(n)      /* same */
-bar                   /* same but lower specificity (0,0,1) */
-
- -

If b=0, then every ath element is picked. In -such a case, the b part may be omitted. - -

-

Examples:

-
tr:nth-child(2n+0) /* represents every even row of an HTML table */
-tr:nth-child(2n) /* same */
-
- -

If both a and b are equal to zero, the -pseudo-class represents no element in the document tree.

- -

The value a can be negative, but only the positive -values of an+b, for -n≥0, may represent an element in the document -tree.

- -
-

Example:

-
html|tr:nth-child(-n+6)  /* represents the 6 first rows of XHTML tables */
-
- -

When the value b is negative, the "+" character in the -expression must be removed (it is effectively replaced by the "-" -character indicating the negative value of b).

- -
-

Examples:

-
:nth-child(10n-1)  /* represents the 9th, 19th, 29th, etc, element */
-:nth-child(10n+9)  /* Same */
-:nth-child(10n+-1) /* Syntactically invalid, and would be ignored */
-
- - -
:nth-last-child() pseudo-class
- -

The :nth-last-child(an+b) -pseudo-class notation represents an element that has -an+b-1 siblings -after it in the document tree, for a given positive -integer or zero value of n, and has a parent element. See -:nth-child() pseudo-class for the syntax of its argument. -It also accepts the 'even' and 'odd' values -as arguments. - - -

-

Examples:

-
tr:nth-last-child(-n+2)    /* represents the two last rows of an HTML table */
-
-foo:nth-last-child(odd)    /* represents all odd foo elements in their parent element,
-                              counting from the last one */
-
- - -
:nth-of-type() pseudo-class
- -

The :nth-of-type(an+b) -pseudo-class notation represents an element that has -an+b-1 siblings with the same -element name before it in the document tree, for a -given zero or positive integer value of n, and has a -parent element. In other words, this matches the bth child -of that type after all the children of that type have been split into -groups of a elements each. See :nth-child() pseudo-class -for the syntax of its argument. It also accepts the -'even' and 'odd' values. - - -

-

CSS example:

-

This allows an author to alternate the position of floated images:

-
img:nth-of-type(2n+1) { float: right; }
-img:nth-of-type(2n) { float: left; }
-
- - -
:nth-last-of-type() pseudo-class
- -

The :nth-last-of-type(an+b) -pseudo-class notation represents an element that has -an+b-1 siblings with the same -element name after it in the document tree, for a -given zero or positive integer value of n, and has a -parent element. See :nth-child() pseudo-class for the -syntax of its argument. It also accepts the 'even' and 'odd' values. - - -

-

Example:

-

To represent all h2 children of an XHTML - body except the first and last, one could use the - following selector:

-
body > h2:nth-of-type(n+2):nth-last-of-type(n+2)
-

In this case, one could also use :not(), although the - selector ends up being just as long:

-
body > h2:not(:first-of-type):not(:last-of-type)
-
- - -
:first-child pseudo-class
- -

Same as :nth-child(1). The :first-child pseudo-class -represents an element that is the first child of some other element. - - -

-

Examples:

-

The following selector represents a p element that is - the first child of a div element:

-
div > p:first-child
-

This selector can represent the p inside the - div of the following fragment:

-
<p> The last P before the note.</p>
-<div class="note">
-   <p> The first P inside the note.</p>
-</div>
but cannot represent the second p in the following -fragment: -
<p> The last P before the note.</p>
-<div class="note">
-   <h2> Note </h2>
-   <p> The first P inside the note.</p>
-</div>
-

The following two selectors are usually equivalent:

-
* > a:first-child /* a is first child of any element */
-a:first-child /* Same (assuming a is not the root element) */
-
- -
:last-child pseudo-class
- -

Same as :nth-last-child(1). The :last-child pseudo-class -represents an element that is the last child of some other element. - -

-

Example:

-

The following selector represents a list item li that - is the last child of an ordered list ol. -

ol > li:last-child
-
- -
:first-of-type pseudo-class
- -

Same as :nth-of-type(1). The :first-of-type pseudo-class -represents an element that is the first sibling of its type in the list of -children of its parent element. - -

-

Example:

-

The following selector represents a definition title -dt inside a definition list dl, this -dt being the first of its type in the list of children of -its parent element.

-
dl dt:first-of-type
-

It is a valid description for the first two dt -elements in the following example but not for the third one:

-
<dl>
- <dt>gigogne</dt>
- <dd>
-  <dl>
-   <dt>fusée</dt>
-   <dd>multistage rocket</dd>
-   <dt>table</dt>
-   <dd>nest of tables</dd>
-  </dl>
- </dd>
-</dl>
-
- -
:last-of-type pseudo-class
- -

Same as :nth-last-of-type(1). The -:last-of-type pseudo-class represents an element that is -the last sibling of its type in the list of children of its parent -element.

- -
-

Example:

-

The following selector represents the last data cell - td of a table row.

-
tr > td:last-of-type
-
- -
:only-child pseudo-class
- -

Represents an element that has a parent element and whose parent -element has no other element children. Same as -:first-child:last-child or -:nth-child(1):nth-last-child(1), but with a lower -specificity.

- -
:only-of-type pseudo-class
- -

Represents an element that has a parent element and whose parent -element has no other element children with the same element name. Same -as :first-of-type:last-of-type or -:nth-of-type(1):nth-last-of-type(1), but with a lower -specificity.

- - -
:empty pseudo-class
- -

The :empty pseudo-class represents an element that has -no children at all. In terms of the DOM, only element nodes and text -nodes (including CDATA nodes and entity references) whose data has a -non-zero length must be considered as affecting emptiness; comments, -PIs, and other nodes must not affect whether an element is considered -empty or not.

- -
-

Examples:

-

p:empty is a valid representation of the following fragment:

-
<p></p>
-

foo:empty is not a valid representation for the - following fragments:

-
<foo>bar</foo>
-
<foo><bar>bla</bar></foo>
-
<foo>this is not <bar>:empty</bar></foo>
-
- -

6.6.6. Blank

- -

This section intentionally left blank.

- - -

6.6.7. The negation pseudo-class

- -

The negation pseudo-class, :not(X), is a -functional notation taking a simple -selector (excluding the negation pseudo-class itself and -pseudo-elements) as an argument. It represents an element that is not -represented by the argument. - - - -

-

Examples:

-

The following CSS selector matches all button - elements in an HTML document that are not disabled.

-
button:not([DISABLED])
-

The following selector represents all but FOO - elements.

-
*:not(FOO)
-

The following group of selectors represents all HTML elements - except links.

-
html|*:not(:link):not(:visited)
-
- -

Default namespace declarations do not affect the argument of the -negation pseudo-class unless the argument is a universal selector or a -type selector.

- -
-

Examples:

-

Assuming that the default namespace is bound to - "http://example.com/", the following selector represents all - elements that are not in that namespace:

-
*|*:not(*)
-

The following CSS selector matches any element being hovered, - regardless of its namespace. In particular, it is not limited to - only matching elements in the default namespace that are not being - hovered, and elements not in the default namespace don't match the - rule when they are being hovered.

-
*|*:not(:hover)
-
- -

Note: the :not() pseudo allows -useless selectors to be written. For instance :not(*|*), -which represents no element at all, or foo:not(bar), -which is equivalent to foo but with a higher -specificity.

- -

7. Pseudo-elements

- -

Pseudo-elements create abstractions about the document tree beyond -those specified by the document language. For instance, document -languages do not offer mechanisms to access the first letter or first -line of an element's content. Pseudo-elements allow designers to refer -to this otherwise inaccessible information. Pseudo-elements may also -provide designers a way to refer to content that does not exist in the -source document (e.g., the ::before and -::after pseudo-elements give access to generated -content).

- -

A pseudo-element is made of two colons (::) followed -by the name of the pseudo-element.

- -

This :: notation is introduced by the current document -in order to establish a discrimination between pseudo-classes and -pseudo-elements. For compatibility with existing style sheets, user -agents must also accept the previous one-colon notation for -pseudo-elements introduced in CSS levels 1 and 2 (namely, -:first-line, :first-letter, -:before and :after). This compatibility is -not allowed for the new pseudo-elements introduced in CSS level 3.

- -

Only one pseudo-element may appear per selector, and if present it -must appear after the sequence of simple selectors that represents the -subjects of the selector. A -future version of this specification may allow multiple -pesudo-elements per selector.

- -

7.1. The ::first-line pseudo-element

- -

The ::first-line pseudo-element describes the contents -of the first formatted line of an element. - -

-

CSS example:

-
p::first-line { text-transform: uppercase }
-

The above rule means "change the letters of the first line of every -paragraph to uppercase".

-
- -

The selector p::first-line does not match any real -HTML element. It does match a pseudo-element that conforming user -agents will insert at the beginning of every paragraph.

- -

Note that the length of the first line depends on a number of -factors, including the width of the page, the font size, etc. Thus, -an ordinary HTML paragraph such as:

- -
-<P>This is a somewhat long HTML 
-paragraph that will be broken into several 
-lines. The first line will be identified
-by a fictional tag sequence. The other lines 
-will be treated as ordinary lines in the 
-paragraph.</P>
-
- -

the lines of which happen to be broken as follows: - -

-THIS IS A SOMEWHAT LONG HTML PARAGRAPH THAT
-will be broken into several lines. The first
-line will be identified by a fictional tag 
-sequence. The other lines will be treated as 
-ordinary lines in the paragraph.
-
- -

This paragraph might be "rewritten" by user agents to include the -fictional tag sequence for ::first-line. This -fictional tag sequence helps to show how properties are inherited.

- -
-<P><P::first-line> This is a somewhat long HTML 
-paragraph that </P::first-line> will be broken into several
-lines. The first line will be identified 
-by a fictional tag sequence. The other lines 
-will be treated as ordinary lines in the 
-paragraph.</P>
-
- -

If a pseudo-element breaks up a real element, the desired effect -can often be described by a fictional tag sequence that closes and -then re-opens the element. Thus, if we mark up the previous paragraph -with a span element:

- -
-<P><SPAN class="test"> This is a somewhat long HTML
-paragraph that will be broken into several
-lines.</SPAN> The first line will be identified
-by a fictional tag sequence. The other lines 
-will be treated as ordinary lines in the 
-paragraph.</P>
-
- -

the user agent could simulate start and end tags for -span when inserting the fictional tag sequence for -::first-line. - -

-<P><P::first-line><SPAN class="test"> This is a
-somewhat long HTML
-paragraph that will </SPAN></P::first-line><SPAN class="test"> be
-broken into several
-lines.</SPAN> The first line will be identified
-by a fictional tag sequence. The other lines
-will be treated as ordinary lines in the 
-paragraph.</P>
-
- -

In CSS, the ::first-line pseudo-element can only be -attached to a block-level element, an inline-block, a table-caption, -or a table-cell.

- -

The "first formatted line" of an -element may occur inside a -block-level descendant in the same flow (i.e., a block-level -descendant that is not positioned and not a float). E.g., the first -line of the div in <DIV><P>This -line...</P></DIV> is the first line of the p (assuming -that both p and div are block-level). - -

The first line of a table-cell or inline-block cannot be the first -formatted line of an ancestor element. Thus, in <DIV><P -STYLE="display: inline-block">Hello<BR>Goodbye</P> -etcetera</DIV> the first formatted line of the -div is not the line "Hello". - -

Note that the first line of the p in this -fragment: <p><br>First... doesn't contain any -letters (assuming the default style for br in HTML -4). The word "First" is not on the first formatted line. - -

A UA should act as if the fictional start tags of the -::first-line pseudo-elements were nested just inside the -innermost enclosing block-level element. (Since CSS1 and CSS2 were -silent on this case, authors should not rely on this behavior.) Here -is an example. The fictional tag sequence for

- -
-<DIV>
-  <P>First paragraph</P>
-  <P>Second paragraph</P>
-</DIV>
-
- -

is

- -
-<DIV>
-  <P><DIV::first-line><P::first-line>First paragraph</P::first-line></DIV::first-line></P>
-  <P><P::first-line>Second paragraph</P::first-line></P>
-</DIV>
-
- -

The ::first-line pseudo-element is similar to an -inline-level element, but with certain restrictions. In CSS, the -following properties apply to a ::first-line -pseudo-element: font properties, color property, background -properties, 'word-spacing', 'letter-spacing', 'text-decoration', -'vertical-align', 'text-transform', 'line-height'. UAs may apply other -properties as well.

- - -

7.2. The ::first-letter pseudo-element

- -

The ::first-letter pseudo-element represents the first -letter of the first line of a block, if it is not preceded by any -other content (such as images or inline tables) on its line. The -::first-letter pseudo-element may be used for "initial caps" and "drop -caps", which are common typographical effects. This type of initial -letter is similar to an inline-level element if its 'float' property -is 'none'; otherwise, it is similar to a floated element.

- -

In CSS, these are the properties that apply to ::first-letter -pseudo-elements: font properties, 'text-decoration', 'text-transform', -'letter-spacing', 'word-spacing' (when appropriate), 'line-height', -'float', 'vertical-align' (only if 'float' is 'none'), margin -properties, padding properties, border properties, color property, -background properties. UAs may apply other properties as well. To -allow UAs to render a typographically correct drop cap or initial cap, -the UA may choose a line-height, width and height based on the shape -of the letter, unlike for normal elements.

- -
-

Example:

-

This example shows a possible rendering of an initial cap. Note -that the 'line-height' that is inherited by the ::first-letter -pseudo-element is 1.1, but the UA in this example has computed the -height of the first letter differently, so that it doesn't cause any -unnecessary space between the first two lines. Also note that the -fictional start tag of the first letter is inside the span, and thus -the font weight of the first letter is normal, not bold as the span: -

-p { line-height: 1.1 }
-p::first-letter { font-size: 3em; font-weight: normal }
-span { font-weight: bold }
-...
-<p><span>Het hemelsche</span> gerecht heeft zich ten lange lesten<br>
-Erbarremt over my en mijn benaeuwde vesten<br>
-En arme burgery, en op mijn volcx gebed<br>
-En dagelix geschrey de bange stad ontzet.
-
-
-

Image illustrating the ::first-letter pseudo-element -

-
- -
-

The following CSS will make a drop cap initial letter span about two lines:

- -
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN">
-<HTML>
- <HEAD>
-  <TITLE>Drop cap initial letter</TITLE>
-  <STYLE type="text/css">
-   P               { font-size: 12pt; line-height: 1.2 }
-   P::first-letter { font-size: 200%; font-weight: bold; float: left }
-   SPAN            { text-transform: uppercase }
-  </STYLE>
- </HEAD>
- <BODY>
-  <P><SPAN>The first</SPAN> few words of an article
-    in The Economist.</P>
- </BODY>
-</HTML>
-
- -

This example might be formatted as follows:

- -
-

Image illustrating the combined effect of the ::first-letter and ::first-line pseudo-elements

-
- -

The fictional tag sequence is:

- -
-<P>
-<SPAN>
-<P::first-letter>
-T
-</P::first-letter>he first
-</SPAN> 
-few words of an article in the Economist.
-</P>
-
- -

Note that the ::first-letter pseudo-element tags abut -the content (i.e., the initial character), while the ::first-line -pseudo-element start tag is inserted right after the start tag of the -block element.

- -

In order to achieve traditional drop caps formatting, user agents -may approximate font sizes, for example to align baselines. Also, the -glyph outline may be taken into account when formatting.

- -

Punctuation (i.e, characters defined in Unicode in the "open" (Ps), -"close" (Pe), "initial" (Pi). "final" (Pf) and "other" (Po) -punctuation classes), that precedes or follows the first letter should -be included. [UNICODE]

- -
-

Quotes that precede the
-first letter should be included.

-
- -

The ::first-letter also applies if the first letter is -in fact a digit, e.g., the "6" in "67 million dollars is a lot of -money."

- -

In CSS, the ::first-letter pseudo-element applies to -block, list-item, table-cell, table-caption, and inline-block -elements. A future version of this specification -may allow this pesudo-element to apply to more element -types.

- -

The ::first-letter pseudo-element can be used with all -such elements that contain text, or that have a descendant in the same -flow that contains text. A UA should act as if the fictional start tag -of the ::first-letter pseudo-element is just before the first text of -the element, even if that first text is in a descendant.

- -
-

Example:

-

The fictional tag sequence for this HTMLfragment: -

<div>
-<p>The first text.
-

is: -

<div>
-<p><div::first-letter><p::first-letter>T</...></...>he first text.
-
- -

The first letter of a table-cell or inline-block cannot be the -first letter of an ancestor element. Thus, in <DIV><P -STYLE="display: inline-block">Hello<BR>Goodbye</P> -etcetera</DIV> the first letter of the div is not the -letter "H". In fact, the div doesn't have a first letter. - -

The first letter must occur on the first formatted line. For example, in -this fragment: <p><br>First... the first line -doesn't contain any letters and ::first-letter doesn't -match anything (assuming the default style for br in HTML -4). In particular, it does not match the "F" of "First." - -

In CSS, if an element is a list item ('display: list-item'), the -::first-letter applies to the first letter in the -principal box after the marker. UAs may ignore -::first-letter on list items with 'list-style-position: -inside'. If an element has ::before or -::after content, the ::first-letter applies -to the first letter of the element including that content. - -

-

Example:

-

After the rule 'p::before {content: "Note: "}', the selector -'p::first-letter' matches the "N" of "Note".

-
- -

Some languages may have specific rules about how to treat certain -letter combinations. In Dutch, for example, if the letter combination -"ij" appears at the beginning of a word, both letters should be -considered within the ::first-letter pseudo-element. - -

If the letters that would form the ::first-letter are not in the -same element, such as "'T" in <p>'<em>T..., the UA -may create a ::first-letter pseudo-element from one of the elements, -both elements, or simply not create a pseudo-element.

- -

Similarly, if the first letter(s) of the block are not at the start -of the line (for example due to bidirectional reordering), then the UA -need not create the pseudo-element(s). - -

-

Example:

-

The following example illustrates -how overlapping pseudo-elements may interact. The first letter of -each P element will be green with a font size of '24pt'. The rest of -the first formatted line will be 'blue' while the rest of the -paragraph will be 'red'.

- -
p { color: red; font-size: 12pt }
-p::first-letter { color: green; font-size: 200% }
-p::first-line { color: blue }
-
-<P>Some text that ends up on two lines</P>
- -

Assuming that a line break will occur before the word "ends", the -fictional tag -sequence for this fragment might be:

- -
<P>
-<P::first-line>
-<P::first-letter> 
-S 
-</P::first-letter>ome text that 
-</P::first-line> 
-ends up on two lines 
-</P>
- -

Note that the ::first-letter element is inside the ::first-line -element. Properties set on ::first-line are inherited by -::first-letter, but are overridden if the same property is set on -::first-letter.

-
- - -

7.3. The ::selection pseudo-element

- -

The ::selection pseudo-element applies to the portion -of a document that has been highlighted by the user. This also -applies, for example, to selected text within an editable text -field. This pseudo-element should not be confused with the :checked pseudo-class (which used to be -named :selected) - -

Although the ::selection pseudo-element is dynamic in -nature, and is altered by user action, it is reasonable to expect that -when a UA re-renders to a static medium (such as a printed page, see -[CSS21]) which was originally rendered to a -dynamic medium (like screen), the UA may wish to transfer the current -::selection state to that other medium, and have all the -appropriate formatting and rendering take effect as well. This is not -required — UAs may omit the ::selection -pseudo-element for static media. - -

These are the CSS properties that apply to ::selection -pseudo-elements: color, background, cursor (optional), outline -(optional). The computed value of the 'background-image' property on -::selection may be ignored. - - -

7.4. The ::before and ::after pseudo-elements

- -

The ::before and ::after pseudo-elements -can be used to describe generated content before or after an element's -content. They are explained in CSS 2.1 [CSS21].

- -

When the ::first-letter and ::first-line -pseudo-elements are combined with ::before and -::after, they apply to the first letter or line of the -element including the inserted text.

- -

8. Combinators

- -

8.1. Descendant combinator

- -

At times, authors may want selectors to describe an element that is -the descendant of another element in the document tree (e.g., "an -EM element that is contained within an H1 -element"). Descendant combinators express such a relationship. A -descendant combinator is white space that -separates two sequences of simple selectors. A selector of the form -"A B" represents an element B that is an -arbitrary descendant of some ancestor element A. - -

-

Examples:

-

For example, consider the following selector:

-
h1 em
-

It represents an em element being the descendant of - an h1 element. It is a correct and valid, but partial, - description of the following fragment:

-
<h1>This <span class="myclass">headline
-is <em>very</em> important</span></h1>
-

The following selector:

-
div * p
-

represents a p element that is a grandchild or later - descendant of a div element. Note the whitespace on - either side of the "*" is not part of the universal selector; the - whitespace is a combinator indicating that the DIV must be the - ancestor of some element, and that that element must be an ancestor - of the P.

-

The following selector, which combines descendant combinators and - attribute selectors, represents an - element that (1) has the href attribute set and (2) is - inside a p that is itself inside a div:

-
div p *[href]
-
- -

8.2. Child combinators

- -

A child combinator describes a childhood relationship -between two elements. A child combinator is made of the -"greater-than sign" (>) character and -separates two sequences of simple selectors. - - -

-

Examples:

-

The following selector represents a p element that is - child of body:

-
body > p
-

The following example combines descendant combinators and child - combinators.

-
div ol>li p
-

It represents a p element that is a descendant of an - li element; the li element must be the - child of an ol element; the ol element must - be a descendant of a div. Notice that the optional white - space around the ">" combinator has been left out.

-
- -

For information on selecting the first child of an element, please -see the section on the :first-child pseudo-class -above.

- -

8.3. Sibling combinators

- -

There are two different sibling combinators: the adjacent sibling -combinator and the general sibling combinator. In both cases, -non-element nodes (e.g. text between elements) are ignored when -considering adjacency of elements.

- -

8.3.1. Adjacent sibling combinator

- -

The adjacent sibling combinator is made of the "plus -sign" (U+002B, +) character that separates two -sequences of simple selectors. The elements represented by the two -sequences share the same parent in the document tree and the element -represented by the first sequence immediately precedes the element -represented by the second one.

- -
-

Examples:

-

The following selector represents a p element - immediately following a math element:

-
math + p
-

The following selector is conceptually similar to the one in the - previous example, except that it adds an attribute selector — it - adds a constraint to the h1 element, that it must have - class="opener":

-
h1.opener + h2
-
- - -

8.3.2. General sibling combinator

- -

The general sibling combinator is made of the "tilde" -(U+007E, ~) character that separates two sequences of -simple selectors. The elements represented by the two sequences share -the same parent in the document tree and the element represented by -the first sequence precedes (not necessarily immediately) the element -represented by the second one.

- -
-

Example:

-
h1 ~ pre
-

represents a pre element following an h1. It - is a correct and valid, but partial, description of:

-
<h1>Definition of the function a</h1>
-<p>Function a(x) has to be applied to all figures in the table.</p>
-<pre>function a(x) = 12x/13.5</pre>
-
- -

9. Calculating a selector's specificity

- -

A selector's specificity is calculated as follows:

- -
    -
  • count the number of ID selectors in the selector (= a)
  • -
  • count the number of class selectors, attributes selectors, and pseudo-classes in the selector (= b)
  • -
  • count the number of element names in the selector (= c)
  • -
  • ignore pseudo-elements
  • -
- -

Selectors inside the negation pseudo-class -are counted like any other, but the negation itself does not count as -a pseudo-class.

- -

Concatenating the three numbers a-b-c (in a number system with a -large base) gives the specificity.

- -
-

Examples:

-
*               /* a=0 b=0 c=0 -> specificity =   0 */
-LI              /* a=0 b=0 c=1 -> specificity =   1 */
-UL LI           /* a=0 b=0 c=2 -> specificity =   2 */
-UL OL+LI        /* a=0 b=0 c=3 -> specificity =   3 */
-H1 + *[REL=up]  /* a=0 b=1 c=1 -> specificity =  11 */
-UL OL LI.red    /* a=0 b=1 c=3 -> specificity =  13 */
-LI.red.level    /* a=0 b=2 c=1 -> specificity =  21 */
-#x34y           /* a=1 b=0 c=0 -> specificity = 100 */
-#s12:not(FOO)   /* a=1 b=0 c=1 -> specificity = 101 */
-
-
- -

Note: the specificity of the styles -specified in an HTML style attribute is described in CSS -2.1. [CSS21].

- -

10. The grammar of Selectors

- -

10.1. Grammar

- -

The grammar below defines the syntax of Selectors. It is globally -LL(1) and can be locally LL(2) (but note that most UA's should not use -it directly, since it doesn't express the parsing conventions). The -format of the productions is optimized for human consumption and some -shorthand notations beyond Yacc (see [YACC]) -are used:

- -
    -
  • *: 0 or more -
  • +: 1 or more -
  • ?: 0 or 1 -
  • |: separates alternatives -
  • [ ]: grouping
  • -
- -

The productions are:

- -
selectors_group
-  : selector [ COMMA S* selector ]*
-  ;
-
-selector
-  : simple_selector_sequence [ combinator simple_selector_sequence ]*
-  ;
-
-combinator
-  /* combinators can be surrounded by white space */
-  : PLUS S* | GREATER S* | TILDE S* | S+
-  ;
-
-simple_selector_sequence
-  : [ type_selector | universal ]
-    [ HASH | class | attrib | pseudo | negation ]*
-  | [ HASH | class | attrib | pseudo | negation ]+
-  ;
-
-type_selector
-  : [ namespace_prefix ]? element_name
-  ;
-
-namespace_prefix
-  : [ IDENT | '*' ]? '|'
-  ;
-
-element_name
-  : IDENT
-  ;
-
-universal
-  : [ namespace_prefix ]? '*'
-  ;
-
-class
-  : '.' IDENT
-  ;
-
-attrib
-  : '[' S* [ namespace_prefix ]? IDENT S*
-        [ [ PREFIXMATCH |
-            SUFFIXMATCH |
-            SUBSTRINGMATCH |
-            '=' |
-            INCLUDES |
-            DASHMATCH ] S* [ IDENT | STRING ] S*
-        ]? ']'
-  ;
-
-pseudo
-  /* '::' starts a pseudo-element, ':' a pseudo-class */
-  /* Exceptions: :first-line, :first-letter, :before and :after. */
-  /* Note that pseudo-elements are restricted to one per selector and */
-  /* occur only in the last simple_selector_sequence. */
-  : ':' ':'? [ IDENT | functional_pseudo ]
-  ;
-
-functional_pseudo
-  : FUNCTION S* expression ')'
-  ;
-
-expression
-  /* In CSS3, the expressions are identifiers, strings, */
-  /* or of the form "an+b" */
-  : [ [ PLUS | '-' | DIMENSION | NUMBER | STRING | IDENT ] S* ]+
-  ;
-
-negation
-  : NOT S* negation_arg S* ')'
-  ;
-
-negation_arg
-  : type_selector | universal | HASH | class | attrib | pseudo
-  ;
- - -

10.2. Lexical scanner

- -

The following is the tokenizer, written in Flex (see -[FLEX]) notation. The tokenizer is -case-insensitive.

- -

The two occurrences of "\377" represent the highest character -number that current versions of Flex can deal with (decimal 255). They -should be read as "\4177777" (decimal 1114111), which is the highest -possible code point in Unicode/ISO-10646. [UNICODE]

- -
%option case-insensitive
-
-ident     [-]?{nmstart}{nmchar}*
-name      {nmchar}+
-nmstart   [_a-z]|{nonascii}|{escape}
-nonascii  [^\0-\177]
-unicode   \\[0-9a-f]{1,6}(\r\n|[ \n\r\t\f])?
-escape    {unicode}|\\[^\n\r\f0-9a-f]
-nmchar    [_a-z0-9-]|{nonascii}|{escape}
-num       [0-9]+|[0-9]*\.[0-9]+
-string    {string1}|{string2}
-string1   \"([^\n\r\f\\"]|\\{nl}|{nonascii}|{escape})*\"
-string2   \'([^\n\r\f\\']|\\{nl}|{nonascii}|{escape})*\'
-invalid   {invalid1}|{invalid2}
-invalid1  \"([^\n\r\f\\"]|\\{nl}|{nonascii}|{escape})*
-invalid2  \'([^\n\r\f\\']|\\{nl}|{nonascii}|{escape})*
-nl        \n|\r\n|\r|\f
-w         [ \t\r\n\f]*
-
-%%
-
-[ \t\r\n\f]+     return S;
-
-"~="             return INCLUDES;
-"|="             return DASHMATCH;
-"^="             return PREFIXMATCH;
-"$="             return SUFFIXMATCH;
-"*="             return SUBSTRINGMATCH;
-{ident}          return IDENT;
-{string}         return STRING;
-{ident}"("       return FUNCTION;
-{num}            return NUMBER;
-"#"{name}        return HASH;
-{w}"+"           return PLUS;
-{w}">"           return GREATER;
-{w}","           return COMMA;
-{w}"~"           return TILDE;
-":not("          return NOT;
-@{ident}         return ATKEYWORD;
-{invalid}        return INVALID;
-{num}%           return PERCENTAGE;
-{num}{ident}     return DIMENSION;
-"<!--"           return CDO;
-"-->"            return CDC;
-
-"url("{w}{string}{w}")"                           return URI;
-"url("{w}([!#$%&*-~]|{nonascii}|{escape})*{w}")"  return URI;
-U\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})?                return UNICODE_RANGE;
-
-\/\*[^*]*\*+([^/*][^*]*\*+)*\/                    /* ignore comments */
-
-.                return *yytext;
- - - -

11. Namespaces and down-level clients

- -

An important issue is the interaction of CSS selectors with XML -documents in web clients that were produced prior to this -document. Unfortunately, due to the fact that namespaces must be -matched based on the URI which identifies the namespace, not the -namespace prefix, some mechanism is required to identify namespaces in -CSS by their URI as well. Without such a mechanism, it is impossible -to construct a CSS style sheet which will properly match selectors in -all cases against a random set of XML documents. However, given -complete knowledge of the XML document to which a style sheet is to be -applied, and a limited use of namespaces within the XML document, it -is possible to construct a style sheet in which selectors would match -elements and attributes correctly.

- -

It should be noted that a down-level CSS client will (if it -properly conforms to CSS forward compatible parsing rules) ignore all -@namespace at-rules, as well as all style rules that make -use of namespace qualified element type or attribute selectors. The -syntax of delimiting namespace prefixes in CSS was deliberately chosen -so that down-level CSS clients would ignore the style rules rather -than possibly match them incorrectly.

- -

The use of default namespaces in CSS makes it possible to write -element type selectors that will function in both namespace aware CSS -clients as well as down-level clients. It should be noted that -down-level clients may incorrectly match selectors against XML -elements in other namespaces.

- -

The following are scenarios and examples in which it is possible to -construct style sheets which would function properly in web clients -that do not implement this proposal.

- -
    -
  1. - -

    The XML document does not use namespaces.

    - -
      - -
    • In this case, it is obviously not necessary to declare or use - namespaces in the style sheet. Standard CSS element type and - attribute selectors will function adequately in a down-level - client.
    • - -
    • In a CSS namespace aware client, the default behavior of - element selectors matching without regard to namespace will - function properly against all elements, since no namespaces are - present. However, the use of specific element type selectors that - match only elements that have no namespace ("|name") - will guarantee that selectors will match only XML elements that do - not have a declared namespace.
    • - -
    - -
  2. - -
  3. - -

    The XML document defines a single, default namespace used - throughout the document. No namespace prefixes are used in element - names.

    - -
      - -
    • In this case, a down-level client will function as if - namespaces were not used in the XML document at all. Standard CSS - element type and attribute selectors will match against all - elements.
    • - -
    - -
  4. - -
  5. - -

    The XML document does not use a default namespace, all - namespace prefixes used are known to the style sheet author, and - there is a direct mapping between namespace prefixes and namespace - URIs. (A given prefix may only be mapped to one namespace URI - throughout the XML document; there may be multiple prefixes mapped - to the same URI).

    - -
      - -
    • In this case, the down-level client will view and match - element type and attribute selectors based on their fully - qualified name, not the local part as outlined in the Type selectors and Namespaces section. CSS - selectors may be declared using an escaped colon "\:" - to describe the fully qualified names, e.g. - "html\:h1" will match - <html:h1>. Selectors using the qualified name - will only match XML elements that use the same prefix. Other - namespace prefixes used in the XML that are mapped to the same URI - will not match as expected unless additional CSS style rules are - declared for them.
    • - -
    • Note that selectors declared in this fashion will - only match in down-level clients. A CSS namespace aware - client will match element type and attribute selectors based on - the name's local part. Selectors declared with the fully - qualified name will not match (unless there is no namespace prefix - in the fully qualified name).
    • - -
    - -
  6. - -
- -

In other scenarios: when the namespace prefixes used in the XML are -not known in advance by the style sheet author; or a combination of -elements with no namespace are used in conjunction with elements using -a default namespace; or the same namespace prefix is mapped to -different namespace URIs within the same document, or in -different documents; it is impossible to construct a CSS style sheet -that will function properly against all elements in those documents, -unless, the style sheet is written using a namespace URI syntax (as -outlined in this document or similar) and the document is processed by -a CSS and XML namespace aware client.

- -

12. Profiles

- -

Each specification using Selectors must define the subset of W3C -Selectors it allows and excludes, and describe the local meaning of -all the components of that subset.

- -

Non normative examples: - -

- - - - - - - - - - - - - - - -
Selectors profile
SpecificationCSS level 1
Acceptstype selectors
class selectors
ID selectors
:link, - :visited and :active pseudo-classes
descendant combinator -
::first-line and ::first-letter pseudo-elements
Excludes - -

universal selector
attribute selectors
:hover and :focus - pseudo-classes
:target pseudo-class
:lang() pseudo-class
all UI - element states pseudo-classes
all structural - pseudo-classes
negation pseudo-class
all - UI element fragments pseudo-elements
::before and ::after - pseudo-elements
child combinators
sibling combinators - -

namespaces

Extra constraintsonly one class selector allowed per sequence of simple - selectors


- - - - - - - - - - - - - - - -
Selectors profile
SpecificationCSS level 2
Acceptstype selectors
universal selector
attribute presence and - values selectors
class selectors
ID selectors
:link, :visited, - :active, :hover, :focus, :lang() and :first-child pseudo-classes -
descendant combinator
child combinator
adjacent sibling - combinator
::first-line and ::first-letter pseudo-elements
::before - and ::after pseudo-elements
Excludes - -

content selectors
substring matching attribute - selectors
:target pseudo-classes
all UI element - states pseudo-classes
all structural pseudo-classes other - than :first-child
negation pseudo-class
all UI element - fragments pseudo-elements
general sibling combinators - -

namespaces

Extra constraintsmore than one class selector per sequence of simple selectors (CSS1 - constraint) allowed
- -

In CSS, selectors express pattern matching rules that determine which style -rules apply to elements in the document tree. - -

The following selector (CSS level 2) will match all anchors a -with attribute name set inside a section 1 header h1: -

h1 a[name]
- -

All CSS declarations attached to such a selector are applied to elements -matching it.

- -
- - - - - - - - - - - - - - - - -
Selectors profile
SpecificationSTTS 3
Accepts - -

type selectors
universal selectors
attribute selectors
class - selectors
ID selectors
all structural pseudo-classes
- all combinators - -

namespaces

Excludesnon-accepted pseudo-classes
pseudo-elements
Extra constraintssome selectors and combinators are not allowed in fragment - descriptions on the right side of STTS declarations.
- -

Selectors can be used in STTS 3 in two different - manners: -

    -
  1. a selection mechanism equivalent to CSS selection mechanism: declarations - attached to a given selector are applied to elements matching that selector, -
  2. fragment descriptions that appear on the right side of declarations. -
- -

13. Conformance and requirements

- -

This section defines conformance with the present specification only. - -

The inability of a user agent to implement part of this specification due to -the limitations of a particular device (e.g., non interactive user agents will -probably not implement dynamic pseudo-classes because they make no sense without -interactivity) does not imply non-conformance. - -

All specifications reusing Selectors must contain a Profile listing the -subset of Selectors it accepts or excludes, and describing the constraints -it adds to the current specification. - -

Invalidity is caused by a parsing error, e.g. an unrecognized token or a token -which is not allowed at the current parsing point. - -

User agents must observe the rules for handling parsing errors: -

    -
  • a simple selector containing an undeclared namespace prefix is invalid
  • -
  • a selector containing an invalid simple selector, an invalid combinator - or an invalid token is invalid.
  • -
  • a group of selectors containing an invalid selector is invalid.
  • -
- -

Specifications reusing Selectors must define how to handle parsing -errors. (In the case of CSS, the entire rule in which the selector is -used is dropped.)

- - - -

14. Tests

- -

This specification has a test -suite allowing user agents to verify their basic conformance to -the specification. This test suite does not pretend to be exhaustive -and does not cover all possible combined cases of Selectors.

- -

15. Acknowledgements

- -

The CSS working group would like to thank everyone who has sent -comments on this specification over the years.

- -

The working group would like to extend special thanks to Donna -McManus, Justin Baker, Joel Sklar, and Molly Ives Brower who perfermed -the final editorial review.

- -

16. References

- -
- -
[CSS1] -
Bert Bos, Håkon Wium Lie; "Cascading Style Sheets, level 1", W3C Recommendation, 17 Dec 1996, revised 11 Jan 1999 -
(http://www.w3.org/TR/REC-CSS1) - -
[CSS21] -
Bert Bos, Tantek Çelik, Ian Hickson, Håkon Wium Lie, editors; "Cascading Style Sheets, level 2 revision 1", W3C Working Draft, 13 June 2005 -
(http://www.w3.org/TR/CSS21) - -
[CWWW] -
Martin J. Dürst, François Yergeau, Misha Wolf, Asmus Freytag, Tex Texin, editors; "Character Model for the World Wide Web", W3C Recommendation, 15 February 2005 -
(http://www.w3.org/TR/charmod/) - -
[FLEX] -
"Flex: The Lexical Scanner Generator", Version 2.3.7, ISBN 1882114213 - -
[HTML4] -
Dave Ragget, Arnaud Le Hors, Ian Jacobs, editors; "HTML 4.01 Specification", W3C Recommendation, 24 December 1999 -
(http://www.w3.org/TR/html4/) - -
[MATH] -
Patrick Ion, Robert Miner, editors; "Mathematical Markup Language (MathML) 1.01", W3C Recommendation, revision of 7 July 1999 -
(http://www.w3.org/TR/REC-MathML/) - -
[RFC3066] -
H. Alvestrand; "Tags for the Identification of Languages", Request for Comments 3066, January 2001 -
(http://www.ietf.org/rfc/rfc3066.txt) - -
[STTS] -
Daniel Glazman; "Simple Tree Transformation Sheets 3", Electricité de France, submission to the W3C, 11 November 1998 -
(http://www.w3.org/TR/NOTE-STTS3) - -
[SVG] -
Jon Ferraiolo, 藤沢 淳, Dean Jackson, editors; "Scalable Vector Graphics (SVG) 1.1 Specification", W3C Recommendation, 14 January 2003 -
(http://www.w3.org/TR/SVG/) - -
[UNICODE]
-
The Unicode Standard, Version 4.1, The Unicode Consortium. Boston, MA, Addison-Wesley, March 2005. ISBN 0-321-18578-1, as amended by Unicode 4.0.1 and Unicode 4.1.0. -
(http://www.unicode.org/versions/)
- -
[XML10] -
Tim Bray, Jean Paoli, C. M. Sperberg-McQueen, Eve Maler, François Yergeau, editors; "Extensible Markup Language (XML) 1.0 (Third Edition)", W3C Recommendation, 4 February 2004 -
(http://www.w3.org/TR/REC-xml/) - -
[XMLNAMES] -
Tim Bray, Dave Hollander, Andrew Layman, editors; "Namespaces in XML", W3C Recommendation, 14 January 1999 -
(http://www.w3.org/TR/REC-xml-names/) - -
[YACC] -
S. C. Johnson; "YACC — Yet another compiler compiler", Technical Report, Murray Hill, 1975 - -
- - diff --git a/Src/Sunra/PhpSimple/simplehtmldom_1_5/testcase/std_testcase.php b/Src/Sunra/PhpSimple/simplehtmldom_1_5/testcase/std_testcase.php deleted file mode 100644 index 57d9f10..0000000 --- a/Src/Sunra/PhpSimple/simplehtmldom_1_5/testcase/std_testcase.php +++ /dev/null @@ -1,243 +0,0 @@ -load($str); -assert($dom->save()==$str); -// ----------------------------------------------- -$str = null; -$dom->load($str); -assert($dom->save()==$str); - -// ----------------------------------------------------------------------------- -// text test -$str = << - -HTML; -$dom->load($str); -assert(count($dom->find('unknown'))==1); -assert(count($dom->find('text'))==1); - -// ----------------------------------------------------------------------------- -// string quote test -$str = << - okok
- -
-
- -
-
- -HTML; -$dom->load($str); -$es = $dom->find('input'); -assert(count($es)==4); -assert($es[0]->onclick=='goto("url0")'); -assert($es[1]->onclick=="goto('url1'+'\'')"); -assert($es[2]->onclick=="goto('url2')"); -assert($es[3]->onclick=='goto("url4"+"\"")'); - -// ----------------------------------------------------------------------------- -// clone test -$str = << - okok
- -
-
- -
-
- -HTML; -$dom->load($str); -$es = $dom->find('input'); -assert(count($es)==4); -assert($es[0]->onclick=='goto("url0")'); -assert($es[1]->onclick=="goto('url1'+'\'')"); -assert($es[2]->onclick=="goto('url2')"); -assert($es[3]->onclick=='goto("url4"+"\"")'); - -unset($es); -$dom2 = clone($dom); -$es = $dom2->find('input'); -assert(count($es)==4); -assert($es[0]->onclick=='goto("url0")'); -assert($es[1]->onclick=="goto('url1'+'\'')"); -assert($es[2]->onclick=="goto('url2')"); -assert($es[3]->onclick=='goto("url4"+"\"")'); - -// ----------------------------------------------- -$str = << -HTML; -$dom->load($str); -assert($dom==$str); -assert($dom->save()==$str); - -// ----------------------------------------------------------------------------- -// monkey test -$str = <<load($str); -assert($dom==$str); -assert($dom->save()==$str); -// ----------------------------------------------- -$str = <<load($str); -assert($dom==$str); -assert($dom->save()==$str); -// ----------------------------------------------- -$str = <<load($str); -assert($dom==$str); -assert($dom->save()==$str); -// ----------------------------------------------- -$str = <<load($str); -assert($dom==$str); -assert($dom->save()==$str); -// ----------------------------------------------- -$str = <<load($str); -assert($dom==$str); -assert($dom->save()==$str); -// ----------------------------------------------- -$str = <<load($str); -assert($dom==$str); -assert($dom->save()==$str); -// ----------------------------------------------- -$str = <<load($str); -assert($dom==$str); -assert($dom->save()==$str); -// ----------------------------------------------- -$str = <<<<>ab -HTML; -$dom->load($str); -assert($dom==$str); -assert($dom->save()==$str); -// ----------------------------------------------- -$str = <<load($str); -assert($dom==$str); -assert($dom->save()==$str); -// ----------------------------------------------- -$str = << -HTML; -$dom->load($str); -assert($dom==$str); -assert($dom->save()==$str); -// ----------------------------------------------- -// $str = <<load($str); -// echo $dom; -// assert($dom==$str); -// assert($dom->save()==$str); -// ----------------------------------------------- -$str = <<load($str); -assert($dom==$str); -assert($dom->save()==$str); - -// ----------------------------------------------------------------------------- -// rnadom string test -function str_random($length) -{ - $str = ""; - srand((double)microtime()*1000000); - $char_list = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; - $char_list .= "abcdefghijklmnopqrstuvwxyz"; - $char_list .= "1234567890"; - $char_list .= "<>!?[]%^&*()"; - for($i=0; $i<$length; ++$i) - $str .= substr($char_list,(rand()%(strlen($char_list))), 1); - return $str; -} - -for($i=0; $i<60; ++$i) { - $str = str_random($i); - //echo $str."\n
"; - $dom->load($str, false); - //echo $dom->save()."\n
"; - assert($dom==$str); -} - -// ----------------------------------------------------------------------------- -// lowercase test -$str = << -HTML; -$dom->load($str); -assert(count($dom->find('img'))==1); -assert(count($dom->find('IMG'))==1); -assert(isset($dom->find('img', 0)->class)); -assert(!isset($dom->find('img', 0)->CLASS)); -assert($dom->find('img', 0)->class=='class0'); -assert($dom==$str); -// ----------------------------------------------- -$str = << -HTML; -$dom->load($str); -assert(count($dom->find('img'))==1); -assert(count($dom->find('IMG'))==1); -assert(isset($dom->find('img', 0)->class)); -assert(!isset($dom->find('img', 0)->CLASS)); -assert($dom->find('img', 0)->class=='class0'); -assert($dom==strtolower($str)); -// ----------------------------------------------- -$str = << -HTML; -$dom->load($str, false); -assert(count($dom->find('img'))==0); -assert(count($dom->find('IMG'))==1); -assert(isset($dom->find('IMG', 0)->CLASS)); -assert(!isset($dom->find('IMG', 0)->class)); -assert($dom->find('IMG', 0)->CLASS=='class0'); -assert($dom==$str); - -// ----------------------------------------------------------------------------- -// tear down -$dom->clear(); -unset($dom); -?> \ No newline at end of file diff --git a/Src/Sunra/PhpSimple/simplehtmldom_1_5/testcase/strip_testcase.php b/Src/Sunra/PhpSimple/simplehtmldom_1_5/testcase/strip_testcase.php deleted file mode 100644 index 66a45f6..0000000 --- a/Src/Sunra/PhpSimple/simplehtmldom_1_5/testcase/strip_testcase.php +++ /dev/null @@ -1,137 +0,0 @@ - - - -HTML; -$dom->load($str); -assert(count($dom->find('input'))==0); - -// ----------------------------------------------------------------------------- -// test -$str = << - - - - -HTML; -$dom->load($str); -assert(count($dom->find('code'))==1); -assert(count($dom->find('input'))==0); - -// ----------------------------------------------------------------------------- -//
 &  test
-$str = <<
-    
-
-HTML; -$dom->load($str); -assert(count($dom->find('pre'))==1); -assert(count($dom->find('input'))==0); - -// ----------------------------------------------------------------------------- -// - -HTML; -$dom->load($str); -assert(count($dom->find('style'))==1); -assert(count($dom->find('script'))==3); - -// ----------------------------------------------------------------------------- -// php short tag test -$str = <<hello - -HTML; -$dom->load($str); -assert($dom->find('a', 0)->href===""); -assert($dom->find('input', 0)->value===""); - -// ----------------------------------------------------------------------------- -// noise stripping test -$str = <<--> - - -HTML; -$dom->load($str); -assert(count($dom->find('img'))==1); -assert($dom==$str); -// ----------------------------------------------- -$str = <<ss - - - - - - - - - - - - - -HTML; -$dom->load($str); -assert(count($dom->find('script'))==8); -assert(count($dom->find('style'))==3); -//echo "\n\n\n\n".$dom->save(); -assert($dom==$str); - -// ----------------------------------------------------------------------------- -// tear down -$dom->clear(); -unset($dom); -?> \ No newline at end of file diff --git a/Src/Sunra/PhpSimple/simplehtmldom_1_8_1/CHANGELOG.md b/Src/Sunra/PhpSimple/simplehtmldom_1_8_1/CHANGELOG.md new file mode 100755 index 0000000..6aef854 --- /dev/null +++ b/Src/Sunra/PhpSimple/simplehtmldom_1_8_1/CHANGELOG.md @@ -0,0 +1,233 @@ +# Changelog +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). + +## [1.8.1] - 2019-01-13 +### Fixed +- Fixed various bugs related to parsing classes and ids + +## [1.8] - 2019-01-13 +### Added +- Added documentation for `simple_html_dom_node::find` +- Added documentation for `simple_html_dom_node::parse_selector` +- Added documentation for `simple_html_dom_node::seek` +- Added documentation for `simple_html_dom_node::match` +- Added unit tests for bug reports + - Added test for bug [#62](https://sourceforge.net/p/simplehtmldom/bugs/62/) + - Added test for bug [#79](https://sourceforge.net/p/simplehtmldom/bugs/79/) + - Added test for bug [#144](https://sourceforge.net/p/simplehtmldom/bugs/144/) +- Added unit tests for CSS selectors +- Added ability to define constants before simple_html_dom does + - 'DEFAULT_TARGET_CHARSET' + - 'DEFAULT_BR_TEXT' + - 'DEFAULT_SPAN_TEXT' + - 'MAX_FILE_SIZE' +- Added support for CSS combinators + - Added support for Child Combinator (`>`) + - Added support for Next Sibling Combinator (`+`) + - Added support for Subsequent Sibling Combinator (`~`) +- Added support for multiclass selectors (`.class.class.class`) +- Added support for multiattribute selectors (`[attr1][attr2][attribute3]`) +- Added support for attribute selectors + - Added support for pipe selectors (`|=`) + - Added support for tilde selectors (`~=`) + - Added support for case sensitivity selectors (`i` and `s`) +- Added unit tests for PHP compatibility to PHP 5.6+ +- Added coding standard using PHP_CodeSniffer +### Changed +- Removed automatic filtering of 'tbody' selectors (#79) + > Remove 'tbody' from all selectors to maintain the previous state! +- Coding standard using PHP_CodeSniffer +### Fixed +- Fixed broken CSS selector attributes with value "0" (#62) +- Fixed broken simple_html_dom::load_file +- Fixed forward slashes in CSS selector breaks value matching using '*=' (#144) +- Fixed Universal Selectors + +## [1.7] - 2018-12-10 +### Added +- Added code documentation to improve readability +- Added unit tests for `simple_html_dom::$self_closing_tags` +- Added unit tests for `simple_html_dom::$optional_closing_tags` +- Added unit tests for bug reports + - Added test for bug [#56](https://sourceforge.net/p/simplehtmldom/bugs/56/) + - Added test for bug [#97](https://sourceforge.net/p/simplehtmldom/bugs/97/) + - Added test for bug [#116](https://sourceforge.net/p/simplehtmldom/bugs/116/) + - Added test for bug [#121](https://sourceforge.net/p/simplehtmldom/bugs/127/) + - Added test for bug [#127](https://sourceforge.net/p/simplehtmldom/bugs/127/) + - Added test for bug [#154](https://sourceforge.net/p/simplehtmldom/bugs/154/) + - Added test for bug [#160](https://sourceforge.net/p/simplehtmldom/bugs/160/) +- Added unit tests for memory management of the parser +- Added bit flags to `simple_html_dom::load()` + - Added bit flag `HDOM_SMARTY_AS_TEXT` to optionally filter Smarty scripts (#154)\ + **Note**: Smarty scripts are no longer filtered by default!\ +- Added build script to automate releases +- Added support for attributes without whitespace to separate them +### Changed +- Improved documentation and readability for `$self_closing_tags` +- Improved documentation and readability for `$block_tags` +- Improved documentation and readability for `$optional_closing_tags` +- Updated list of `simple_html_dom::$self_closing_tags` + - Removed 'spacer' (obsolete) + - Added 'area' + - Added 'col' + - Added 'meta' + - Added 'param' + - Added 'source' + - Added 'track' + - Added 'wbr' +- Updated list of `simple_html_dom::$optional_closing_tags` + - Removed "nobr" (obsolete) + - Added 'th' as closable element to 'td' + - Added 'td' as closable element to 'th' + - Added 'optgroup' with 'optgroup' and 'option' as closable elements + - Added 'optgroup' as closable element to 'option' + - Added 'rp' with 'rp' and 'rt' as closable elements + - Added 'rt' with 'rt' and 'rp' as closable elements +- Clarified meaning of `simple_html_dom->parent` +- Changed default `$offset` for `file_get_html()` from -1 to 0 (#161) +- Changed `simple_html_dom::load()` to remove script tags before replacing newline characters +- `simple_html_dom_node::text()` no longer adds whitespace to top level span elements (only to sub-elements) +- `simple_html_dom_node::text()` adds blank lines between paragraphs +- Normalized line endings in the repository to LF via `.gitattributes` +- Improved performance of `simple_html_dom::parse_charset()` by approximately 25% +- Improved performance of `simple_html_dom::parse()` by approximately 10% +### Deprecated +- `str_get_html()` is deprecated and should be replaced by `new simple_html_dom()` +### Removed +- Removed protected function `simple_html_dom::copy_until_char_escaped()` +### Fixed +- Fixed compatibility issues with PHP 7.3 +- Fixed typo (#147) +- Fixed handling of incorrectly escaped text (#160) +- Restore functionality of `$maxLen` in `file_get_html()` +- Fixed load_file breaks if an error ocurred in another script + +## [1.6] - 2014-05-28 +### Added +- Added some ability to insert and create nodes +- Add ability to search the "noise" array + +## [1.5] - 2012-09-10 +### Added +- Added flag: LOCK_EX while calling "file_put_contents()" +- Added support for detecting the source html character set. This is used to convert characters when plaintext is requested. +- Other little fixes and features, too numerous to categorize +### Changed +- Error of "file_get_contents()" will be thrown as an exception +### Fixed +- Fixed the typo of "token_blank_t" +- Memory leak fixed + +## [1.11] - 2008-12-14 +### Added +- Supports xpath generated from Firebug +- New method "dump" of "simple_html_dom_node" +- New attribute "xmltext" of "simple_html_dom_node" +### Changed +- Remove preg_quote on selector match function: `[attribute*=value]` +- Element "Comment" will treat as children +### Fixed +- Fixed the problem with `
`
+- Fixed bug #2207477 (does not load some pages properly)
+- Fixed bug #2315853 (Error with character after < sign)
+
+## [1.10] - 2008-10-25
+### Changed
+- Negative indexes supports of "find" method, thanks for Vadim Voituk
+- Constructor with automatically load contents either text or file/url, thanks for Antcs
+- Fully supports wildcard in selectors
+### Fixed
+- Fixed bug of confusing by the < symbol inside the text
+- Fixed bug of dash in selectors
+- Fixed bug of ``
+- Fixed bug #2155883 (Nested List Parses Incorrectly)
+- Fixed bug #2155113 (error with unclosed html tags)
+
+## [1.00] - 2008-09-05
+### Added
+- New method "getAllAttributes" of "simple_html_dom_node"
+- Supports full javascript string in selector: `$e->find("a[onclick=alert('hello')]")`
+### Changed
+- Changed selector "*=" to case-insentive
+### Fixed
+- Fixed the bug of selector in some critical conditions
+- Fixed the bug of striping php tags
+- Fixed the bug of remove_noise()
+- Fixed the bug of noise in attributes
+
+## [0.99] - 2008-08-03
+### Changed
+- Performance tuning (boost 10%)
+- Memory requirement reduced by 25%
+- Changed function name from "file_get_dom()" to "file_get_html()"
+- Changed function name from "str_get_dom()" to "str_get_html()"
+### Fixed
+- Fixed bug #2011286 (Error with unclosed html tags)
+- Fixed bug #2012551 (Error parsing divs)
+- Fixed bug #2020924 (Error for missed tag)
+- Fixed bug (problem with `` tag's innertext)
+
+## [0.98] - 2008-06-24
+### Added
+- Supports "multiple class" selector feature: `
` +- New "callback function" feature +- New "multiple selectors" feature: $dom->find('p,a,b') +- New examples +- Supports extract contents from HTML features: $dom->plaintext +### Changed +- Performance tuning (boost 20%) +- Changed simple_html_dom_node method name from "text()" to "makeup()" +### Fixed +- Fixed the bug of $dom->clear() +- Fixed the bug of text nodes' innertext +- Fixed the bug of comment nodes' innertext +- Fixed the bug of decendent selector with optional tags + +## [0.97] - 2008-05-09 +### Added +- New node type "comment" (eg. $dom->find('comment')) +- Add self-closing tags: 'base', 'spacer' +- New example "simple_html_dom_utility.php" +### Changed +- File and class name changed (html_dom_parser->simple_html_dom) +### Removed +- ($dom->save_file) will not support anymore +- Remove example "example_customize_parser.php" +### Fixed +- Fixed the bug of outertext (th) +- Fixed the bug of regular expression escaping chars ($dom->find) +- Fixed the bug while line-breaker and "\t" in tags + +## [0.96] - 2008-04-27 +### Added +- Reference section in manual +- Added traverse section in manual +- Added the solution while server behind proxy in FAQ (Thanks to Yousuke Shaggy) +- New method to remove attribute. +- New DOM operations(first_child, last_child, next_sibling, previous_sibling) (Request #1936000) +### Changed +- Now file_get_dom supports full file_get_contents parameters +### Fixed +- Fixed the bug of self-closing tags in the end of file +- Fixed the bug of blanks in the end of tag +- Fixed some typo of testcase + +## [0.95] - 2008-04-13 +### Added +- Supports tag name with namespace +### Changed +- New attribute filters (Thanks to Yousuke Kumakura) +- Refine structure of testcase +### Fixed +- Fix the bug of optional-closing tags +- Fix the bug of parsing the line break next to the tag's name + +## [0.94] - 2008-04-06 +### Added +- Add FAQ section in manual +### Fixed +- Fixed infinity loop while the source content is BAD HTML +- Fixed the bug of adding new attributes to self closing tags +- Fixed the bug of customize parser without $dom->remove_noise() \ No newline at end of file diff --git a/Src/Sunra/PhpSimple/simplehtmldom_1_5/example/example_advanced_selector.php b/Src/Sunra/PhpSimple/simplehtmldom_1_8_1/example/example_advanced_selector.php old mode 100644 new mode 100755 similarity index 100% rename from Src/Sunra/PhpSimple/simplehtmldom_1_5/example/example_advanced_selector.php rename to Src/Sunra/PhpSimple/simplehtmldom_1_8_1/example/example_advanced_selector.php diff --git a/Src/Sunra/PhpSimple/simplehtmldom_1_5/example/example_basic_selector.php b/Src/Sunra/PhpSimple/simplehtmldom_1_8_1/example/example_basic_selector.php old mode 100644 new mode 100755 similarity index 100% rename from Src/Sunra/PhpSimple/simplehtmldom_1_5/example/example_basic_selector.php rename to Src/Sunra/PhpSimple/simplehtmldom_1_8_1/example/example_basic_selector.php diff --git a/Src/Sunra/PhpSimple/simplehtmldom_1_5/example/example_callback.php b/Src/Sunra/PhpSimple/simplehtmldom_1_8_1/example/example_callback.php old mode 100644 new mode 100755 similarity index 100% rename from Src/Sunra/PhpSimple/simplehtmldom_1_5/example/example_callback.php rename to Src/Sunra/PhpSimple/simplehtmldom_1_8_1/example/example_callback.php diff --git a/Src/Sunra/PhpSimple/simplehtmldom_1_5/example/example_extract_html.php b/Src/Sunra/PhpSimple/simplehtmldom_1_8_1/example/example_extract_html.php old mode 100644 new mode 100755 similarity index 100% rename from Src/Sunra/PhpSimple/simplehtmldom_1_5/example/example_extract_html.php rename to Src/Sunra/PhpSimple/simplehtmldom_1_8_1/example/example_extract_html.php diff --git a/Src/Sunra/PhpSimple/simplehtmldom_1_5/example/example_modify_contents.php b/Src/Sunra/PhpSimple/simplehtmldom_1_8_1/example/example_modify_contents.php old mode 100644 new mode 100755 similarity index 100% rename from Src/Sunra/PhpSimple/simplehtmldom_1_5/example/example_modify_contents.php rename to Src/Sunra/PhpSimple/simplehtmldom_1_8_1/example/example_modify_contents.php diff --git a/Src/Sunra/PhpSimple/simplehtmldom_1_5/example/scraping/example_scraping_digg.php b/Src/Sunra/PhpSimple/simplehtmldom_1_8_1/example/scraping/example_scraping_digg.php old mode 100644 new mode 100755 similarity index 100% rename from Src/Sunra/PhpSimple/simplehtmldom_1_5/example/scraping/example_scraping_digg.php rename to Src/Sunra/PhpSimple/simplehtmldom_1_8_1/example/scraping/example_scraping_digg.php diff --git a/Src/Sunra/PhpSimple/simplehtmldom_1_8_1/example/scraping/example_scraping_general.php b/Src/Sunra/PhpSimple/simplehtmldom_1_8_1/example/scraping/example_scraping_general.php new file mode 100755 index 0000000..701bf55 --- /dev/null +++ b/Src/Sunra/PhpSimple/simplehtmldom_1_8_1/example/scraping/example_scraping_general.php @@ -0,0 +1,59 @@ +"; + // create HTML DOM + $html = file_get_html($url); + echo "url has been read.
"; + + // get article block + foreach($html->find($search) as $found) { + // Found at least one. + $return - true; + echo "found a: " . $search . "
";
+		$found->dump();
+		echo "

"; + } + + // clean up memory + $html->clear(); + unset($html); + + return $return; +} + + +// ------------------------------------------ +error_log ("post:" . print_r($_POST, true)); +$url = ""; +if (isset($_POST['url'])) +{ + $url = $_POST['url']; +} +$search = ""; +if (isset($_POST['search'])) +{ + $search = $_POST['search']; +} +?> +
+ URL:
+ Search: + +
+"; + } +} +?> \ No newline at end of file diff --git a/Src/Sunra/PhpSimple/simplehtmldom_1_5/example/scraping/example_scraping_imdb.php b/Src/Sunra/PhpSimple/simplehtmldom_1_8_1/example/scraping/example_scraping_imdb.php old mode 100644 new mode 100755 similarity index 100% rename from Src/Sunra/PhpSimple/simplehtmldom_1_5/example/scraping/example_scraping_imdb.php rename to Src/Sunra/PhpSimple/simplehtmldom_1_8_1/example/scraping/example_scraping_imdb.php diff --git a/Src/Sunra/PhpSimple/simplehtmldom_1_5/example/scraping/example_scraping_slashdot.php b/Src/Sunra/PhpSimple/simplehtmldom_1_8_1/example/scraping/example_scraping_slashdot.php old mode 100644 new mode 100755 similarity index 100% rename from Src/Sunra/PhpSimple/simplehtmldom_1_5/example/scraping/example_scraping_slashdot.php rename to Src/Sunra/PhpSimple/simplehtmldom_1_8_1/example/scraping/example_scraping_slashdot.php diff --git a/Src/Sunra/PhpSimple/simplehtmldom_1_5/example/simple_html_dom_utility.php b/Src/Sunra/PhpSimple/simplehtmldom_1_8_1/example/simple_html_dom_utility.php old mode 100644 new mode 100755 similarity index 100% rename from Src/Sunra/PhpSimple/simplehtmldom_1_5/example/simple_html_dom_utility.php rename to Src/Sunra/PhpSimple/simplehtmldom_1_8_1/example/simple_html_dom_utility.php diff --git a/Src/Sunra/PhpSimple/simplehtmldom_1_5/manual/css/default.css b/Src/Sunra/PhpSimple/simplehtmldom_1_8_1/manual/css/default.css old mode 100644 new mode 100755 similarity index 100% rename from Src/Sunra/PhpSimple/simplehtmldom_1_5/manual/css/default.css rename to Src/Sunra/PhpSimple/simplehtmldom_1_8_1/manual/css/default.css diff --git a/Src/Sunra/PhpSimple/simplehtmldom_1_5/manual/css/ui.tabs.css b/Src/Sunra/PhpSimple/simplehtmldom_1_8_1/manual/css/ui.tabs.css old mode 100644 new mode 100755 similarity index 100% rename from Src/Sunra/PhpSimple/simplehtmldom_1_5/manual/css/ui.tabs.css rename to Src/Sunra/PhpSimple/simplehtmldom_1_8_1/manual/css/ui.tabs.css diff --git a/Src/Sunra/PhpSimple/simplehtmldom_1_5/manual/img/tab.png b/Src/Sunra/PhpSimple/simplehtmldom_1_8_1/manual/img/tab.png old mode 100644 new mode 100755 similarity index 100% rename from Src/Sunra/PhpSimple/simplehtmldom_1_5/manual/img/tab.png rename to Src/Sunra/PhpSimple/simplehtmldom_1_8_1/manual/img/tab.png diff --git a/Src/Sunra/PhpSimple/simplehtmldom_1_8_1/manual/index.htm b/Src/Sunra/PhpSimple/simplehtmldom_1_8_1/manual/index.htm new file mode 100755 index 0000000..9ca181e --- /dev/null +++ b/Src/Sunra/PhpSimple/simplehtmldom_1_8_1/manual/index.htm @@ -0,0 +1,117 @@ + + + + + + +PHP Simple HTML DOM Parser + + + + + + + +

PHP Simple HTML DOM Parser

+
+

Description, Requirement & Features

+
    +
  • A HTML DOM parser written in PHP5+ let you manipulate HTML in a very easy way!
  • +
  • Require PHP 5+.
  • +
  • Supports invalid HTML.
  • +
  • Find tags on an HTML page with selectors just like jQuery.
  • +
  • Extract contents from HTML in a single line.
  • +
+

Download & Documents

+ +

Quick Start

+
+ +
+
+ // Create DOM from URL or file
+ $html = file_get_html('http://www.google.com/');
+
+ // Find all images
+ foreach($html->find('img') as $element)
+       echo $element->src . '<br>';
+
+// Find all links
+foreach($html->find('a') as $element)
+       echo $element->href . '<br>';
+
+
+
+ // Create DOM from string
+ $html = str_get_html('<div id="hello">Hello</div><div id="world">World</div>');
+
+
+ +$html->find('div', 1)->class = 'bar';
+
+$html->find('div[id=hello]', 0)->innertext = 'foo';
+
+ echo $html; // Output: <div id="hello">foo</div><div id="world" class="bar">World</div>
+
+
+

+ // Dump contents (without tags) from HTML
+ echo file_get_html('http://www.google.com/')->plaintext; +
+
+
+
+
+
+ // Create DOM from URL
+ $html = file_get_html('http://slashdot.org/');
+
+ // Find all article blocks
+ foreach($html->find('div.article') as $article) {
+    $item['title']     = $article->find('div.title', 0)->plaintext;
+    $item['intro']    = $article->find('div.intro', 0)->plaintext;
+    $item['details'] = $article->find('div.details', 0)->plaintext;
+    $articles[] = $item;
+ }
+
+ print_r($articles); +
+
+
+

Feedback

+ +

+ Author: S.C. Chen (me578022@gmail.com)
+ Original idea is from Jose Solorzano's HTML Parser for PHP 4.
+ Contributions by: Yousuke Kumakura (Attribute Filters)
+

SourceForge.net Logo

+
+ + + + + diff --git a/Src/Sunra/PhpSimple/simplehtmldom_1_5/manual/js/jquery-1.2.3.pack.js b/Src/Sunra/PhpSimple/simplehtmldom_1_8_1/manual/js/jquery-1.2.3.pack.js old mode 100644 new mode 100755 similarity index 100% rename from Src/Sunra/PhpSimple/simplehtmldom_1_5/manual/js/jquery-1.2.3.pack.js rename to Src/Sunra/PhpSimple/simplehtmldom_1_8_1/manual/js/jquery-1.2.3.pack.js diff --git a/Src/Sunra/PhpSimple/simplehtmldom_1_5/manual/js/ui.tabs.pack.js b/Src/Sunra/PhpSimple/simplehtmldom_1_8_1/manual/js/ui.tabs.pack.js old mode 100644 new mode 100755 similarity index 100% rename from Src/Sunra/PhpSimple/simplehtmldom_1_5/manual/js/ui.tabs.pack.js rename to Src/Sunra/PhpSimple/simplehtmldom_1_8_1/manual/js/ui.tabs.pack.js diff --git a/Src/Sunra/PhpSimple/simplehtmldom_1_5/manual/manual.htm b/Src/Sunra/PhpSimple/simplehtmldom_1_8_1/manual/manual.htm old mode 100644 new mode 100755 similarity index 96% rename from Src/Sunra/PhpSimple/simplehtmldom_1_5/manual/manual.htm rename to Src/Sunra/PhpSimple/simplehtmldom_1_8_1/manual/manual.htm index ed796e3..9a466e8 --- a/Src/Sunra/PhpSimple/simplehtmldom_1_5/manual/manual.htm +++ b/Src/Sunra/PhpSimple/simplehtmldom_1_8_1/manual/manual.htm @@ -1,23 +1,23 @@ - -PHP Simple HTML DOM Parser: Manual - - - - - + + PHP Simple HTML DOM Parser: Manual + + + + +

PHP Simple HTML DOM Parser Manual

@@ -445,4 +445,4 @@

How to customize the parsing behavior?

- \ No newline at end of file + \ No newline at end of file diff --git a/Src/Sunra/PhpSimple/simplehtmldom_1_5/manual/manual_api.htm b/Src/Sunra/PhpSimple/simplehtmldom_1_8_1/manual/manual_api.htm old mode 100644 new mode 100755 similarity index 97% rename from Src/Sunra/PhpSimple/simplehtmldom_1_5/manual/manual_api.htm rename to Src/Sunra/PhpSimple/simplehtmldom_1_8_1/manual/manual_api.htm index 365870f..f8af542 --- a/Src/Sunra/PhpSimple/simplehtmldom_1_5/manual/manual_api.htm +++ b/Src/Sunra/PhpSimple/simplehtmldom_1_8_1/manual/manual_api.htm @@ -2,11 +2,13 @@ + + PHP Simple HTML DOM Parser: API Reference -

PHP Simple HTML DOM Parser Manual

+

PHP Simple HTML DOM Parser

Index

    @@ -317,4 +319,4 @@

    Camel naming convertions

- \ No newline at end of file + \ No newline at end of file diff --git a/Src/Sunra/PhpSimple/simplehtmldom_1_5/manual/manual_faq.htm b/Src/Sunra/PhpSimple/simplehtmldom_1_8_1/manual/manual_faq.htm old mode 100644 new mode 100755 similarity index 93% rename from Src/Sunra/PhpSimple/simplehtmldom_1_5/manual/manual_faq.htm rename to Src/Sunra/PhpSimple/simplehtmldom_1_8_1/manual/manual_faq.htm index 3763eaa..cca05d9 --- a/Src/Sunra/PhpSimple/simplehtmldom_1_5/manual/manual_faq.htm +++ b/Src/Sunra/PhpSimple/simplehtmldom_1_8_1/manual/manual_faq.htm @@ -2,11 +2,14 @@ + + + PHP Simple HTML DOM Parser: FAQ -

PHP Simple HTML DOM Parser Manual

+

PHP Simple HTML DOM Parser

FAQ

    @@ -88,4 +91,4 @@

    Memory leak!

- + diff --git a/Src/Sunra/PhpSimple/simplehtmldom_1_8_1/phpcompatibility.xml b/Src/Sunra/PhpSimple/simplehtmldom_1_8_1/phpcompatibility.xml new file mode 100755 index 0000000..3fa7f92 --- /dev/null +++ b/Src/Sunra/PhpSimple/simplehtmldom_1_8_1/phpcompatibility.xml @@ -0,0 +1,11 @@ + + + Defines rules for PHPCompatibility + ./app + ./example + ./manual + ./testcase + ./tests + + + diff --git a/Src/Sunra/PhpSimple/simplehtmldom_1_8_1/phpcs.xml b/Src/Sunra/PhpSimple/simplehtmldom_1_8_1/phpcs.xml new file mode 100755 index 0000000..1f4c506 --- /dev/null +++ b/Src/Sunra/PhpSimple/simplehtmldom_1_8_1/phpcs.xml @@ -0,0 +1,48 @@ + + + Created with the PHP Coding Standard Generator. http://edorian.github.com/php-coding-standard-generator/ + ./app + ./example + ./manual + ./testcase + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/Src/Sunra/PhpSimple/simplehtmldom_1_8_1/simple_html_dom.php b/Src/Sunra/PhpSimple/simplehtmldom_1_8_1/simple_html_dom.php new file mode 100755 index 0000000..44a958f --- /dev/null +++ b/Src/Sunra/PhpSimple/simplehtmldom_1_8_1/simple_html_dom.php @@ -0,0 +1,2811 @@ +size is the "real" + * number of bytes the dom was created from. But for most purposes, it's a + * really good estimation. + * + * Paperg - Added the forceTagsClosed to the dom constructor. Forcing tags + * closed is great for malformed html, but it CAN lead to parsing errors. + * + * Allow the user to tell us how much they trust the html. + * + * Paperg add the text and plaintext to the selectors for the find syntax. + * plaintext implies text in the innertext of a node. text implies that the + * tag is a text node. This allows for us to find tags based on the text they + * contain. + * + * Create find_ancestor_tag to see if a tag is - at any level - inside of + * another specific tag. + * + * Paperg: added parse_charset so that we know about the character set of + * the source document. NOTE: If the user's system has a routine called + * get_last_retrieve_url_contents_content_type availalbe, we will assume it's + * returning the content-type header from the last transfer or curl_exec, and + * we will parse that and use it in preference to any other method of charset + * detection. + * + * Found infinite loop in the case of broken html in restore_noise. Rewrote to + * protect from that. + * + * PaperG (John Schlick) Added get_display_size for "IMG" tags. + * + * Licensed under The MIT License + * Redistributions of files must retain the above copyright notice. + * + * @author S.C. Chen + * @author John Schlick + * @author Rus Carroll + * @version Rev. 1.8.1 (247) + * @package PlaceLocalInclude + * @subpackage simple_html_dom + */ + +/** + * All of the Defines for the classes below. + * @author S.C. Chen + */ +define('HDOM_TYPE_ELEMENT', 1); +define('HDOM_TYPE_COMMENT', 2); +define('HDOM_TYPE_TEXT', 3); +define('HDOM_TYPE_ENDTAG', 4); +define('HDOM_TYPE_ROOT', 5); +define('HDOM_TYPE_UNKNOWN', 6); +define('HDOM_QUOTE_DOUBLE', 0); +define('HDOM_QUOTE_SINGLE', 1); +define('HDOM_QUOTE_NO', 3); +define('HDOM_INFO_BEGIN', 0); +define('HDOM_INFO_END', 1); +define('HDOM_INFO_QUOTE', 2); +define('HDOM_INFO_SPACE', 3); +define('HDOM_INFO_TEXT', 4); +define('HDOM_INFO_INNER', 5); +define('HDOM_INFO_OUTER', 6); +define('HDOM_INFO_ENDSPACE', 7); + +/** The default target charset */ +defined('DEFAULT_TARGET_CHARSET') || define('DEFAULT_TARGET_CHARSET', 'UTF-8'); + +/** The default
text used instead of
tags when returning text */ +defined('DEFAULT_BR_TEXT') || define('DEFAULT_BR_TEXT', "\r\n"); + +/** The default text used instead of tags when returning text */ +defined('DEFAULT_SPAN_TEXT') || define('DEFAULT_SPAN_TEXT', ' '); + +/** The maximum file size the parser should load */ +defined('MAX_FILE_SIZE') || define('MAX_FILE_SIZE', 600000); + +/** Contents between curly braces "{" and "}" are interpreted as text */ +define('HDOM_SMARTY_AS_TEXT', 1); + +// helper functions +// ----------------------------------------------------------------------------- +// get html dom from file +// $maxlen is defined in the code as PHP_STREAM_COPY_ALL which is defined as -1. +function file_get_html( + $url, + $use_include_path = false, + $context = null, + $offset = 0, + $maxLen = -1, + $lowercase = true, + $forceTagsClosed = true, + $target_charset = DEFAULT_TARGET_CHARSET, + $stripRN = true, + $defaultBRText = DEFAULT_BR_TEXT, + $defaultSpanText = DEFAULT_SPAN_TEXT) +{ + // Ensure maximum length is greater than zero + if($maxLen <= 0) { $maxLen = MAX_FILE_SIZE; } + + // We DO force the tags to be terminated. + $dom = new simple_html_dom( + null, + $lowercase, + $forceTagsClosed, + $target_charset, + $stripRN, + $defaultBRText, + $defaultSpanText); + + /** + * For sourceforge users: uncomment the next line and comment the + * retrieve_url_contents line 2 lines down if it is not already done. + */ + $contents = file_get_contents( + $url, + $use_include_path, + $context, + $offset, + $maxLen); + + // Paperg - use our own mechanism for getting the contents as we want to + // control the timeout. + // $contents = retrieve_url_contents($url); + if (empty($contents) || strlen($contents) > $maxLen) { return false; } + + // The second parameter can force the selectors to all be lowercase. + $dom->load($contents, $lowercase, $stripRN); + return $dom; +} + +// get html dom from string +function str_get_html( + $str, + $lowercase = true, + $forceTagsClosed = true, + $target_charset = DEFAULT_TARGET_CHARSET, + $stripRN = true, + $defaultBRText = DEFAULT_BR_TEXT, + $defaultSpanText = DEFAULT_SPAN_TEXT) +{ + $dom = new simple_html_dom( + null, + $lowercase, + $forceTagsClosed, + $target_charset, + $stripRN, + $defaultBRText, + $defaultSpanText); + + if (empty($str) || strlen($str) > MAX_FILE_SIZE) { + $dom->clear(); + return false; + } + + $dom->load($str, $lowercase, $stripRN); + return $dom; +} + +// dump html dom tree +function dump_html_tree($node, $show_attr = true, $deep = 0) +{ + $node->dump($node); +} + +/** + * simple html dom node + * PaperG - added ability for "find" routine to lowercase the value of the + * selector. + * + * PaperG - added $tag_start to track the start position of the tag in the total + * byte index + * + * @package PlaceLocalInclude + */ +class simple_html_dom_node +{ + /** + * Node type + * + * Default is {@see HDOM_TYPE_TEXT} + * + * @var int + */ + public $nodetype = HDOM_TYPE_TEXT; + + /** + * Tag name + * + * Default is 'text' + * + * @var string + */ + public $tag = 'text'; + + /** + * List of attributes + * + * @var array + */ + public $attr = array(); + + /** + * List of child node objects + * + * @var array + */ + public $children = array(); + public $nodes = array(); + + /** + * The parent node object + * + * @var object|null + */ + public $parent = null; + + // The "info" array - see HDOM_INFO_... for what each element contains. + public $_ = array(); + + /** + * Start position of the tag in the document + * + * @var int + */ + public $tag_start = 0; + + /** + * The DOM object + * + * @var object|null + */ + private $dom = null; + + /** + * Construct new node object + * + * Adds itself to the list of DOM Nodes {@see simple_html_dom::$nodes} + */ + function __construct($dom) + { + $this->dom = $dom; + $dom->nodes[] = $this; + } + + function __destruct() + { + $this->clear(); + } + + function __toString() + { + return $this->outertext(); + } + + // clean up memory due to php5 circular references memory leak... + function clear() + { + $this->dom = null; + $this->nodes = null; + $this->parent = null; + $this->children = null; + } + + // dump node's tree + function dump($show_attr = true, $deep = 0) + { + $lead = str_repeat(' ', $deep); + + echo $lead . $this->tag; + + if ($show_attr && count($this->attr) > 0) { + echo '('; + foreach ($this->attr as $k => $v) { + echo "[$k]=>\"" . $this->$k . '", '; + } + echo ')'; + } + + echo "\n"; + + if ($this->nodes) { + foreach ($this->nodes as $c) { + $c->dump($show_attr, $deep + 1); + } + } + } + + + // Debugging function to dump a single dom node with a bunch of information about it. + function dump_node($echo = true) + { + $string = $this->tag; + + if (count($this->attr) > 0) { + $string .= '('; + foreach ($this->attr as $k => $v) { + $string .= "[$k]=>\"" . $this->$k . '", '; + } + $string .= ')'; + } + + if (count($this->_) > 0) { + $string .= ' $_ ('; + foreach ($this->_ as $k => $v) { + if (is_array($v)) { + $string .= "[$k]=>("; + foreach ($v as $k2 => $v2) { + $string .= "[$k2]=>\"" . $v2 . '", '; + } + $string .= ')'; + } else { + $string .= "[$k]=>\"" . $v . '", '; + } + } + $string .= ')'; + } + + if (isset($this->text)) { + $string .= ' text: (' . $this->text . ')'; + } + + $string .= " HDOM_INNER_INFO: '"; + + if (isset($node->_[HDOM_INFO_INNER])) { + $string .= $node->_[HDOM_INFO_INNER] . "'"; + } else { + $string .= ' NULL '; + } + + $string .= ' children: ' . count($this->children); + $string .= ' nodes: ' . count($this->nodes); + $string .= ' tag_start: ' . $this->tag_start; + $string .= "\n"; + + if ($echo) { + echo $string; + return; + } else { + return $string; + } + } + + /** + * Return or set parent node + * + * @param object|null $parent (optional) The parent node, `null` to return + * the current parent node. + * @return object|null The parent node + */ + function parent($parent = null) + { + // I am SURE that this doesn't work properly. + // It fails to unset the current node from it's current parents nodes or + // children list first. + if ($parent !== null) { + $this->parent = $parent; + $this->parent->nodes[] = $this; + $this->parent->children[] = $this; + } + + return $this->parent; + } + + /** + * @return bool True if the node has at least one child node + */ + function has_child() + { + return !empty($this->children); + } + + /** + * Get child node at specified index + * + * @param int $idx The index of the child node to return, `-1` to return all + * child nodes. + * @return object|array|null The child node at the specified index, all child + * nodes or null if the index is invalid. + */ + function children($idx = -1) + { + if ($idx === -1) { + return $this->children; + } + + if (isset($this->children[$idx])) { + return $this->children[$idx]; + } + + return null; + } + + /** + * Get first child node + * + * @return object|null The first child node or null if the current node has + * no child nodes. + * + * @todo Use `empty()` instead of `count()` to improve performance on large + * arrays. + */ + function first_child() + { + if (count($this->children) > 0) { + return $this->children[0]; + } + return null; + } + + /** + * Get last child node + * + * @return object|null The last child node or null if the current node has + * no child nodes. + * + * @todo Use `end()` to slightly improve performance on large arrays. + */ + function last_child() + { + if (($count = count($this->children)) > 0) { + return $this->children[$count - 1]; + } + return null; + } + + /** + * Get next sibling node + * + * @return object|null The sibling node or null if the current node has no + * sibling nodes. + */ + function next_sibling() + { + if ($this->parent === null) { + return null; + } + + $idx = 0; + $count = count($this->parent->children); + + while ($idx < $count && $this !== $this->parent->children[$idx]) { + ++$idx; + } + + if (++$idx >= $count) { + return null; + } + + return $this->parent->children[$idx]; + } + + /** + * Get previous sibling node + * + * @return object|null The sibling node or null if the current node has no + * sibling nodes. + */ + function prev_sibling() + { + if ($this->parent === null) { return null; } + + $idx = 0; + $count = count($this->parent->children); + + while ($idx < $count && $this !== $this->parent->children[$idx]) { + ++$idx; + } + + if (--$idx < 0) { return null; } + + return $this->parent->children[$idx]; + } + + /** + * Traverse ancestors to the first matching tag. + * + * @param string $tag Tag to find + * @return object|null First matching node in the DOM tree or null if no + * match was found. + * + * @todo Null is returned implicitly by calling ->parent on the root node. + * This behaviour could change at any time, rendering this function invalid. + */ + function find_ancestor_tag($tag) + { + global $debug_object; + if (is_object($debug_object)) { $debug_object->debug_log_entry(1); } + + // Start by including ourselves in the comparison. + $returnDom = $this; + + while (!is_null($returnDom)) { + if (is_object($debug_object)) { + $debug_object->debug_log(2, 'Current tag is: ' . $returnDom->tag); + } + + if ($returnDom->tag == $tag) { + break; + } + + $returnDom = $returnDom->parent; + } + + return $returnDom; + } + + /** + * Get node's inner text (everything inside the opening and closing tags) + * + * @return string + */ + function innertext() + { + if (isset($this->_[HDOM_INFO_INNER])) { + return $this->_[HDOM_INFO_INNER]; + } + + if (isset($this->_[HDOM_INFO_TEXT])) { + return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]); + } + + $ret = ''; + + foreach ($this->nodes as $n) { + $ret .= $n->outertext(); + } + + return $ret; + } + + /** + * Get node's outer text (everything including the opening and closing tags) + * + * @return string + */ + function outertext() + { + global $debug_object; + + if (is_object($debug_object)) { + $text = ''; + + if ($this->tag === 'text') { + if (!empty($this->text)) { + $text = ' with text: ' . $this->text; + } + } + + $debug_object->debug_log(1, 'Innertext of tag: ' . $this->tag . $text); + } + + if ($this->tag === 'root') return $this->innertext(); + + // trigger callback + if ($this->dom && $this->dom->callback !== null) { + call_user_func_array($this->dom->callback, array($this)); + } + + if (isset($this->_[HDOM_INFO_OUTER])) { + return $this->_[HDOM_INFO_OUTER]; + } + + if (isset($this->_[HDOM_INFO_TEXT])) { + return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]); + } + + // render begin tag + if ($this->dom && $this->dom->nodes[$this->_[HDOM_INFO_BEGIN]]) { + $ret = $this->dom->nodes[$this->_[HDOM_INFO_BEGIN]]->makeup(); + } else { + $ret = ''; + } + + // render inner text + if (isset($this->_[HDOM_INFO_INNER])) { + // If it's a br tag... don't return the HDOM_INNER_INFO that we + // may or may not have added. + if ($this->tag !== 'br') { + $ret .= $this->_[HDOM_INFO_INNER]; + } + } else { + if ($this->nodes) { + foreach ($this->nodes as $n) { + $ret .= $this->convert_text($n->outertext()); + } + } + } + + // render end tag + if (isset($this->_[HDOM_INFO_END]) && $this->_[HDOM_INFO_END] != 0) { + $ret .= 'tag . '>'; + } + + return $ret; + } + + /** + * Get node's plain text (everything excluding all tags) + * + * @return string + */ + function text() + { + if (isset($this->_[HDOM_INFO_INNER])) { + return $this->_[HDOM_INFO_INNER]; + } + + switch ($this->nodetype) { + case HDOM_TYPE_TEXT: return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]); + case HDOM_TYPE_COMMENT: return ''; + case HDOM_TYPE_UNKNOWN: return ''; + } + + if (strcasecmp($this->tag, 'script') === 0) { return ''; } + if (strcasecmp($this->tag, 'style') === 0) { return ''; } + + $ret = ''; + + // In rare cases, (always node type 1 or HDOM_TYPE_ELEMENT - observed + // for some span tags, and some p tags) $this->nodes is set to NULL. + // NOTE: This indicates that there is a problem where it's set to NULL + // without a clear happening. + // WHY is this happening? + if (!is_null($this->nodes)) { + foreach ($this->nodes as $n) { + // Start paragraph after a blank line + if ($n->tag === 'p') { + $ret .= "\n\n"; + } + + $ret .= $this->convert_text($n->text()); + + // If this node is a span... add a space at the end of it so + // multiple spans don't run into each other. This is plaintext + // after all. + if ($n->tag === 'span') { + $ret .= $this->dom->default_span_text; + } + } + } + return trim($ret); + } + + /** + * Get node's xml text (inner text as a CDATA section) + * + * @return string + */ + function xmltext() + { + $ret = $this->innertext(); + $ret = str_ireplace('', '', $ret); + return $ret; + } + + // build node's text with tag + function makeup() + { + // text, comment, unknown + if (isset($this->_[HDOM_INFO_TEXT])) { + return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]); + } + + $ret = '<' . $this->tag; + $i = -1; + + foreach ($this->attr as $key => $val) { + ++$i; + + // skip removed attribute + if ($val === null || $val === false) { continue; } + + $ret .= $this->_[HDOM_INFO_SPACE][$i][0]; + + //no value attr: nowrap, checked selected... + if ($val === true) { + $ret .= $key; + } else { + switch ($this->_[HDOM_INFO_QUOTE][$i]) + { + case HDOM_QUOTE_DOUBLE: $quote = '"'; break; + case HDOM_QUOTE_SINGLE: $quote = '\''; break; + default: $quote = ''; + } + + $ret .= $key + . $this->_[HDOM_INFO_SPACE][$i][1] + . '=' + . $this->_[HDOM_INFO_SPACE][$i][2] + . $quote + . $val + . $quote; + } + } + + $ret = $this->dom->restore_noise($ret); + return $ret . $this->_[HDOM_INFO_ENDSPACE] . '>'; + } + + /** + * Find elements by CSS selector + * + * @param string $selector The CSS selector + * @param int|null $idx Index of element to return form the list of matching + * elements (default: `null` = disabled). + * @param bool $lowercase Matches tag names case insensitive (lowercase) if + * enabled (default: `false`) + * @return array|object|null A list of elements matching the specified CSS + * selector or a single element if $idx is specified or null if no element + * was found. + */ + function find($selector, $idx = null, $lowercase = false) + { + $selectors = $this->parse_selector($selector); + if (($count = count($selectors)) === 0) { return array(); } + $found_keys = array(); + + // find each selector + for ($c = 0; $c < $count; ++$c) { + // The change on the below line was documented on the sourceforge + // code tracker id 2788009 + // used to be: if (($levle=count($selectors[0]))===0) return array(); + if (($levle = count($selectors[$c])) === 0) { return array(); } + if (!isset($this->_[HDOM_INFO_BEGIN])) { return array(); } + + $head = array($this->_[HDOM_INFO_BEGIN] => 1); + $cmd = ' '; // Combinator + + // handle descendant selectors, no recursive! + for ($l = 0; $l < $levle; ++$l) { + $ret = array(); + + foreach ($head as $k => $v) { + $n = ($k === -1) ? $this->dom->root : $this->dom->nodes[$k]; + //PaperG - Pass this optional parameter on to the seek function. + $n->seek($selectors[$c][$l], $ret, $cmd, $lowercase); + } + + $head = $ret; + $cmd = $selectors[$c][$l][4]; // Next Combinator + } + + foreach ($head as $k => $v) { + if (!isset($found_keys[$k])) { + $found_keys[$k] = 1; + } + } + } + + // sort keys + ksort($found_keys); + + $found = array(); + foreach ($found_keys as $k => $v) { + $found[] = $this->dom->nodes[$k]; + } + + // return nth-element or array + if (is_null($idx)) { return $found; } + elseif ($idx < 0) { $idx = count($found) + $idx; } + return (isset($found[$idx])) ? $found[$idx] : null; + } + + /** + * Seek DOM elements by selector + * + * **Note** + * The selector element must be compatible to a selector from + * {@see simple_html_dom_node::parse_selector()} + * + * @param array $selector A selector element + * @param array $ret An array of matches + * @param bool $lowercase Matches tag names case insensitive (lowercase) if + * enabled (default: `false`) + * @return void + */ + protected function seek($selector, &$ret, $parent_cmd, $lowercase = false) + { + global $debug_object; + if (is_object($debug_object)) { $debug_object->debug_log_entry(1); } + + list($tag, $id, $class, $attributes, $cmb) = $selector; + $nodes = array(); + + if ($parent_cmd === ' ') { // Descendant Combinator + // Find parent closing tag if the current element doesn't have a closing + // tag (i.e. void element) + $end = (!empty($this->_[HDOM_INFO_END])) ? $this->_[HDOM_INFO_END] : 0; + if ($end == 0) { + $parent = $this->parent; + while (!isset($parent->_[HDOM_INFO_END]) && $parent !== null) { + $end -= 1; + $parent = $parent->parent; + } + $end += $parent->_[HDOM_INFO_END]; + } + + // Get list of target nodes + $nodes_start = $this->_[HDOM_INFO_BEGIN] + 1; + $nodes_count = $end - $nodes_start; + $nodes = array_slice($this->dom->nodes, $nodes_start, $nodes_count, true); + } elseif ($parent_cmd === '>') { // Child Combinator + $nodes = $this->children; + } elseif ($parent_cmd === '+' + && $this->parent + && in_array($this, $this->parent->children)) { // Next-Sibling Combinator + $index = array_search($this, $this->parent->children, true) + 1; + $nodes[] = $this->parent->children[$index]; + } elseif ($parent_cmd === '~' + && $this->parent + && in_array($this, $this->parent->children)) { // Subsequent Sibling Combinator + $index = array_search($this, $this->parent->children, true); + $nodes = array_slice($this->parent->children, $index); + } + + // Go throgh each element starting at this element until the end tag + // Note: If this element is a void tag, any previous void element is + // skipped. + foreach($nodes as $node) { + $pass = true; + + // Skip root nodes + if(!$node->parent) { + $pass = false; + } + + // Skip if node isn't a child node (i.e. text nodes) + if($pass && !in_array($node, $node->parent->children, true)) { + $pass = false; + } + + // Skip if tag doesn't match + if ($pass && $tag !== '' && $tag !== $node->tag && $tag !== '*') { + $pass = false; + } + + // Skip if ID doesn't exist + if ($pass && $id !== '' && !isset($node->attr['id'])) { + $pass = false; + } + + // Check if ID matches + if ($pass && $id !== '' && isset($node->attr['id'])) { + // Note: Only consider the first ID (as browsers do) + $node_id = explode(' ', trim($node->attr['id']))[0]; + + if($id !== $node_id) { $pass = false; } + } + + // Check if all class(es) exist + if ($pass && $class !== '' && is_array($class) && !empty($class)) { + if (isset($node->attr['class'])) { + $node_classes = explode(' ', $node->attr['class']); + + if ($lowercase) { + $node_classes = array_map('strtolower', $node_classes); + } + + foreach($class as $c) { + if(!in_array($c, $node_classes)) { + $pass = false; + break; + } + } + } else { + $pass = false; + } + } + + // Check attributes + if ($pass + && $attributes !== '' + && is_array($attributes) + && !empty($attributes)) { + foreach($attributes as $a) { + list ( + $att_name, + $att_expr, + $att_val, + $att_inv, + $att_case_sensitivity + ) = $a; + + // Handle indexing attributes (i.e. "[2]") + /** + * Note: This is not supported by the CSS Standard but adds + * the ability to select items compatible to XPath (i.e. + * the 3rd element within it's parent). + * + * Note: This doesn't conflict with the CSS Standard which + * doesn't work on numeric attributes anyway. + */ + if (is_numeric($att_name) + && $att_expr === '' + && $att_val === '') { + $count = 0; + + // Find index of current element in parent + foreach ($node->parent->children as $c) { + if ($c->tag === $node->tag) ++$count; + if ($c === $node) break; + } + + // If this is the correct node, continue with next + // attribute + if ($count === (int)$att_name) continue; + } + + // Check attribute availability + if ($att_inv) { // Attribute should NOT be set + if (isset($node->attr[$att_name])) { + $pass = false; + break; + } + } else { // Attribute should be set + // todo: "plaintext" is not a valid CSS selector! + if ($att_name !== 'plaintext' + && !isset($node->attr[$att_name])) { + $pass = false; + break; + } + } + + // Continue with next attribute if expression isn't defined + if ($att_expr === '') continue; + + // If they have told us that this is a "plaintext" + // search then we want the plaintext of the node - right? + // todo "plaintext" is not a valid CSS selector! + if ($att_name === 'plaintext') { + $nodeKeyValue = $node->text(); + } else { + $nodeKeyValue = $node->attr[$att_name]; + } + + if (is_object($debug_object)) { + $debug_object->debug_log(2, + 'testing node: ' + . $node->tag + . ' for attribute: ' + . $att_name + . $att_expr + . $att_val + . ' where nodes value is: ' + . $nodeKeyValue + ); + } + + // If lowercase is set, do a case insensitive test of + // the value of the selector. + if ($lowercase) { + $check = $this->match( + $att_expr, + strtolower($att_val), + strtolower($nodeKeyValue), + $att_case_sensitivity + ); + } else { + $check = $this->match( + $att_expr, + $att_val, + $nodeKeyValue, + $att_case_sensitivity + ); + } + + if (is_object($debug_object)) { + $debug_object->debug_log(2, + 'after match: ' + . ($check ? 'true' : 'false') + ); + } + + if (!$check) { + $pass = false; + break; + } + } + } + + // Found a match. Add to list and clear node + if ($pass) $ret[$node->_[HDOM_INFO_BEGIN]] = 1; + unset($node); + } + // It's passed by reference so this is actually what this function returns. + if (is_object($debug_object)) { + $debug_object->debug_log(1, 'EXIT - ret: ', $ret); + } + } + + /** + * Match value and pattern for a given CSS expression + * + * **Supported Expressions** + * + * | Expression | Description + * | ---------- | ----------- + * | `=` | $value and $pattern must be equal + * | `!=` | $value and $pattern must not be equal + * | `^=` | $value must start with $pattern + * | `$=` | $value must end with $pattern + * | `*=` | $value must contain $pattern + * + * @param string $exp The expression. + * @param string $pattern The pattern + * @param string $value The value + * @value bool True if $value matches $pattern + */ + protected function match($exp, $pattern, $value, $case_sensitivity) + { + global $debug_object; + if (is_object($debug_object)) {$debug_object->debug_log_entry(1);} + + if ($case_sensitivity === 'i') { + $pattern = strtolower($pattern); + $value = strtolower($value); + } + + switch ($exp) { + case '=': + return ($value === $pattern); + case '!=': + return ($value !== $pattern); + case '^=': + return preg_match('/^' . preg_quote($pattern, '/') . '/', $value); + case '$=': + return preg_match('/' . preg_quote($pattern, '/') . '$/', $value); + case '*=': + return preg_match('/' . preg_quote($pattern, '/') . '/', $value); + case '|=': + /** + * [att|=val] + * + * Represents an element with the att attribute, its value + * either being exactly "val" or beginning with "val" + * immediately followed by "-" (U+002D). + */ + return strpos($value, $pattern) === 0; + case '~=': + /** + * [att~=val] + * + * Represents an element with the att attribute whose value is a + * whitespace-separated list of words, one of which is exactly + * "val". If "val" contains whitespace, it will never represent + * anything (since the words are separated by spaces). Also if + * "val" is the empty string, it will never represent anything. + */ + return in_array($pattern, explode(' ', trim($value)), true); + } + return false; + } + + /** + * Parse CSS selector + * + * @param string $selector_string CSS selector string + * @return array List of CSS selectors. The format depends on the type of + * selector: + * + * ```php + * + * array( // list of selectors (each separated by a comma), i.e. 'img, p, div' + * array( // list of combinator selectors, i.e. 'img > p > div' + * array( // selector element + * [0], // (string) The element tag + * [1], // (string) The element id + * [2], // (array) The element classes + * [3], // (array>) The list of attributes, each + * // with four elements: name, expression, value, inverted + * [4] // (string) The selector combinator (' ' | '>' | '+' | '~') + * ) + * ) + * ) + * ``` + * + * @link https://www.w3.org/TR/selectors/#compound Compound selector + */ + protected function parse_selector($selector_string) + { + global $debug_object; + if (is_object($debug_object)) { $debug_object->debug_log_entry(1); } + + /** + * Pattern of CSS selectors, modified from mootools (https://mootools.net/) + * + * Paperg: Add the colon to the attribute, so that it properly finds + * like google does. + * + * Note: if you try to look at this attribute, you MUST use getAttribute + * since $dom->x:y will fail the php syntax check. + * + * Notice the \[ starting the attribute? and the @? following? This + * implies that an attribute can begin with an @ sign that is not + * captured. This implies that an html attribute specifier may start + * with an @ sign that is NOT captured by the expression. Farther study + * is required to determine of this should be documented or removed. + * + * Matches selectors in this order: + * + * [0] - full match + * + * [1] - tag name + * ([\w:\*-]*) + * Matches the tag name consisting of zero or more words, colons, + * asterisks and hyphens. + * + * [2] - id name + * (?:\#([\w-]+)) + * Optionally matches a id name, consisting of an "#" followed by + * the id name (one or more words and hyphens). + * + * [3] - class names (including dots) + * (?:\.([\w\.-]+))? + * Optionally matches a list of classs, consisting of an "." + * followed by the class name (one or more words and hyphens) + * where multiple classes can be chained (i.e. ".foo.bar.baz") + * + * [4] - attributes + * ((?:\[@?(?:!?[\w:-]+)(?:(?:[!*^$|~]?=)[\"']?(?:.*?)[\"']?)?(?:\s*?(?:[iIsS])?)?\])+)? + * Optionally matches the attributes list + * + * [5] - separator + * ([\/, >+~]+) + * Matches the selector list separator + */ + // phpcs:ignore Generic.Files.LineLength + $pattern = "/([\w:\*-]*)(?:\#([\w-]+))?(?:|\.([\w\.-]+))?((?:\[@?(?:!?[\w:-]+)(?:(?:[!*^$|~]?=)[\"']?(?:.*?)[\"']?)?(?:\s*?(?:[iIsS])?)?\])+)?([\/, >+~]+)/is"; + + preg_match_all( + $pattern, + trim($selector_string) . ' ', // Add final ' ' as pseudo separator + $matches, + PREG_SET_ORDER + ); + + if (is_object($debug_object)) { + $debug_object->debug_log(2, 'Matches Array: ', $matches); + } + + $selectors = array(); + $result = array(); + + foreach ($matches as $m) { + $m[0] = trim($m[0]); + + // Skip NoOps + if ($m[0] === '' || $m[0] === '/' || $m[0] === '//') { continue; } + + // Convert to lowercase + if ($this->dom->lowercase) { + $m[1] = strtolower($m[1]); + } + + // Extract classes + if ($m[3] !== '') { $m[3] = explode('.', $m[3]); } + + /* Extract attributes (pattern based on the pattern above!) + + * [0] - full match + * [1] - attribute name + * [2] - attribute expression + * [3] - attribute value + * [4] - case sensitivity + * + * Note: Attributes can be negated with a "!" prefix to their name + */ + if($m[4] !== '') { + preg_match_all( + "/\[@?(!?[\w:-]+)(?:([!*^$|~]?=)[\"']?(.*?)[\"']?)?(?:\s*?([iIsS])?)?\]/is", + trim($m[4]), + $attributes, + PREG_SET_ORDER + ); + + // Replace element by array + $m[4] = array(); + + foreach($attributes as $att) { + // Skip empty matches + if(trim($att[0]) === '') { continue; } + + $inverted = (isset($att[1][0]) && $att[1][0] === '!'); + $m[4][] = array( + $inverted ? substr($att[1], 1) : $att[1], // Name + (isset($att[2])) ? $att[2] : '', // Expression + (isset($att[3])) ? $att[3] : '', // Value + $inverted, // Inverted Flag + (isset($att[4])) ? strtolower($att[4]) : '', // Case-Sensitivity + ); + } + } + + // Sanitize Separator + if ($m[5] !== '' && trim($m[5]) === '') { // Descendant Separator + $m[5] = ' '; + } else { // Other Separator + $m[5] = trim($m[5]); + } + + // Clear Separator if it's a Selector List + if ($is_list = ($m[5] === ',')) { $m[5] = ''; } + + // Remove full match before adding to results + array_shift($m); + $result[] = $m; + + if ($is_list) { // Selector List + $selectors[] = $result; + $result = array(); + } + } + + if (count($result) > 0) { $selectors[] = $result; } + return $selectors; + } + + function __get($name) + { + if (isset($this->attr[$name])) { + return $this->convert_text($this->attr[$name]); + } + switch ($name) { + case 'outertext': return $this->outertext(); + case 'innertext': return $this->innertext(); + case 'plaintext': return $this->text(); + case 'xmltext': return $this->xmltext(); + default: return array_key_exists($name, $this->attr); + } + } + + function __set($name, $value) + { + global $debug_object; + if (is_object($debug_object)) { $debug_object->debug_log_entry(1); } + + switch ($name) { + case 'outertext': return $this->_[HDOM_INFO_OUTER] = $value; + case 'innertext': + if (isset($this->_[HDOM_INFO_TEXT])) { + return $this->_[HDOM_INFO_TEXT] = $value; + } + return $this->_[HDOM_INFO_INNER] = $value; + } + + if (!isset($this->attr[$name])) { + $this->_[HDOM_INFO_SPACE][] = array(' ', '', ''); + $this->_[HDOM_INFO_QUOTE][] = HDOM_QUOTE_DOUBLE; + } + + $this->attr[$name] = $value; + } + + function __isset($name) + { + switch ($name) { + case 'outertext': return true; + case 'innertext': return true; + case 'plaintext': return true; + } + //no value attr: nowrap, checked selected... + return (array_key_exists($name, $this->attr)) ? true : isset($this->attr[$name]); + } + + function __unset($name) + { + if (isset($this->attr[$name])) { unset($this->attr[$name]); } + } + + // PaperG - Function to convert the text from one character set to another + // if the two sets are not the same. + function convert_text($text) + { + global $debug_object; + if (is_object($debug_object)) { $debug_object->debug_log_entry(1); } + + $converted_text = $text; + + $sourceCharset = ''; + $targetCharset = ''; + + if ($this->dom) { + $sourceCharset = strtoupper($this->dom->_charset); + $targetCharset = strtoupper($this->dom->_target_charset); + } + + if (is_object($debug_object)) { + $debug_object->debug_log(3, + 'source charset: ' + . $sourceCharset + . ' target charaset: ' + . $targetCharset + ); + } + + if (!empty($sourceCharset) + && !empty($targetCharset) + && (strcasecmp($sourceCharset, $targetCharset) != 0)) { + // Check if the reported encoding could have been incorrect and the text is actually already UTF-8 + if ((strcasecmp($targetCharset, 'UTF-8') == 0) + && ($this->is_utf8($text))) { + $converted_text = $text; + } else { + $converted_text = iconv($sourceCharset, $targetCharset, $text); + } + } + + // Lets make sure that we don't have that silly BOM issue with any of the utf-8 text we output. + if ($targetCharset === 'UTF-8') { + if (substr($converted_text, 0, 3) === "\xef\xbb\xbf") { + $converted_text = substr($converted_text, 3); + } + + if (substr($converted_text, -3) === "\xef\xbb\xbf") { + $converted_text = substr($converted_text, 0, -3); + } + } + + return $converted_text; + } + + /** + * Returns true if $string is valid UTF-8 and false otherwise. + * + * @param mixed $str String to be tested + * @return boolean + */ + static function is_utf8($str) + { + $c = 0; $b = 0; + $bits = 0; + $len = strlen($str); + for($i = 0; $i < $len; $i++) { + $c = ord($str[$i]); + if($c > 128) { + if(($c >= 254)) { return false; } + elseif($c >= 252) { $bits = 6; } + elseif($c >= 248) { $bits = 5; } + elseif($c >= 240) { $bits = 4; } + elseif($c >= 224) { $bits = 3; } + elseif($c >= 192) { $bits = 2; } + else { return false; } + if(($i + $bits) > $len) { return false; } + while($bits > 1) { + $i++; + $b = ord($str[$i]); + if($b < 128 || $b > 191) { return false; } + $bits--; + } + } + } + return true; + } + + /** + * Function to try a few tricks to determine the displayed size of an img on + * the page. NOTE: This will ONLY work on an IMG tag. Returns FALSE on all + * other tag types. + * + * @author John Schlick + * @version April 19 2012 + * @return array an array containing the 'height' and 'width' of the image + * on the page or -1 if we can't figure it out. + */ + function get_display_size() + { + global $debug_object; + + $width = -1; + $height = -1; + + if ($this->tag !== 'img') { + return false; + } + + // See if there is aheight or width attribute in the tag itself. + if (isset($this->attr['width'])) { + $width = $this->attr['width']; + } + + if (isset($this->attr['height'])) { + $height = $this->attr['height']; + } + + // Now look for an inline style. + if (isset($this->attr['style'])) { + // Thanks to user gnarf from stackoverflow for this regular expression. + $attributes = array(); + + preg_match_all( + '/([\w-]+)\s*:\s*([^;]+)\s*;?/', + $this->attr['style'], + $matches, + PREG_SET_ORDER + ); + + foreach ($matches as $match) { + $attributes[$match[1]] = $match[2]; + } + + // If there is a width in the style attributes: + if (isset($attributes['width']) && $width == -1) { + // check that the last two characters are px (pixels) + if (strtolower(substr($attributes['width'], -2)) === 'px') { + $proposed_width = substr($attributes['width'], 0, -2); + // Now make sure that it's an integer and not something stupid. + if (filter_var($proposed_width, FILTER_VALIDATE_INT)) { + $width = $proposed_width; + } + } + } + + // If there is a width in the style attributes: + if (isset($attributes['height']) && $height == -1) { + // check that the last two characters are px (pixels) + if (strtolower(substr($attributes['height'], -2)) == 'px') { + $proposed_height = substr($attributes['height'], 0, -2); + // Now make sure that it's an integer and not something stupid. + if (filter_var($proposed_height, FILTER_VALIDATE_INT)) { + $height = $proposed_height; + } + } + } + + } + + // Future enhancement: + // Look in the tag to see if there is a class or id specified that has + // a height or width attribute to it. + + // Far future enhancement + // Look at all the parent tags of this image to see if they specify a + // class or id that has an img selector that specifies a height or width + // Note that in this case, the class or id will have the img subselector + // for it to apply to the image. + + // ridiculously far future development + // If the class or id is specified in a SEPARATE css file thats not on + // the page, go get it and do what we were just doing for the ones on + // the page. + + $result = array( + 'height' => $height, + 'width' => $width + ); + + return $result; + } + + // camel naming conventions + function getAllAttributes() + { + return $this->attr; + } + + function getAttribute($name) + { + return $this->__get($name); + } + + function setAttribute($name, $value) + { + $this->__set($name, $value); + } + + function hasAttribute($name) + { + return $this->__isset($name); + } + + function removeAttribute($name) + { + $this->__set($name, null); + } + + function getElementById($id) + { + return $this->find("#$id", 0); + } + + function getElementsById($id, $idx = null) + { + return $this->find("#$id", $idx); + } + + function getElementByTagName($name) + { + return $this->find($name, 0); + } + + function getElementsByTagName($name, $idx = null) + { + return $this->find($name, $idx); + } + + function parentNode() + { + return $this->parent(); + } + + function childNodes($idx = -1) + { + return $this->children($idx); + } + + function firstChild() + { + return $this->first_child(); + } + + function lastChild() + { + return $this->last_child(); + } + + function nextSibling() + { + return $this->next_sibling(); + } + + function previousSibling() + { + return $this->prev_sibling(); + } + + function hasChildNodes() + { + return $this->has_child(); + } + + function nodeName() + { + return $this->tag; + } + + function appendChild($node) + { + $node->parent($this); + return $node; + } + +} + +/** + * simple html dom parser + * + * Paperg - in the find routine: allow us to specify that we want case + * insensitive testing of the value of the selector. + * + * Paperg - change $size from protected to public so we can easily access it + * + * Paperg - added ForceTagsClosed in the constructor which tells us whether we + * trust the html or not. Default is to NOT trust it. + * + * @package PlaceLocalInclude + */ +class simple_html_dom +{ + /** + * The root node of the document + * + * @var object + */ + public $root = null; + + /** + * List of nodes in the current DOM + * + * @var array + */ + public $nodes = array(); + + /** + * Callback function to run for each element in the DOM. + * + * @var callable|null + */ + public $callback = null; + + /** + * Indicates how tags and attributes are matched + * + * @var bool When set to **true** tags and attributes will be converted to + * lowercase before matching. + */ + public $lowercase = false; + + /** + * Original document size + * + * Holds the original document size. + * + * @var int + */ + public $original_size; + + /** + * Current document size + * + * Holds the current document size. The document size is determined by the + * string length of ({@see simple_html_dom::$doc}). + * + * _Note_: Using this variable is more efficient than calling `strlen($doc)` + * + * @var int + * */ + public $size; + + /** + * Current position in the document + * + * @var int + */ + protected $pos; + + /** + * The document + * + * @var string + */ + protected $doc; + + /** + * Current character + * + * Holds the current character at position {@see simple_html_dom::$pos} in + * the document {@see simple_html_dom::$doc} + * + * _Note_: Using this variable is more efficient than calling + * `substr($doc, $pos, 1)` + * + * @var string + */ + protected $char; + + protected $cursor; + + /** + * Parent node of the next node detected by the parser + * + * @var object + */ + protected $parent; + protected $noise = array(); + + /** + * Tokens considered blank in HTML + * + * @var string + */ + protected $token_blank = " \t\r\n"; + + /** + * Tokens to identify the equal sign for attributes, stopping either at the + * closing tag ("/" i.e. "") or the end of an opening tag (">" i.e. + * "") + * + * @var string + */ + protected $token_equal = ' =/>'; + + /** + * Tokens to identify the end of a tag name. A tag name either ends on the + * ending slash ("/" i.e. "") or whitespace ("\s\r\n\t") + * + * @var string + */ + protected $token_slash = " />\r\n\t"; + + /** + * Tokens to identify the end of an attribute + * + * @var string + */ + protected $token_attr = ' >'; + + // Note that this is referenced by a child node, and so it needs to be + // public for that node to see this information. + public $_charset = ''; + public $_target_charset = ''; + + /** + * Innertext for
elements + * + * @var string + */ + protected $default_br_text = ''; + + /** + * Suffix for elements + * + * @var string + */ + public $default_span_text = ''; + + /** + * Defines a list of self-closing tags (Void elements) according to the HTML + * Specification + * + * _Remarks_: + * - Use `isset()` instead of `in_array()` on array elements to boost + * performance about 30% + * - Sort elements by name for better readability! + * + * @link https://www.w3.org/TR/html HTML Specification + * @link https://www.w3.org/TR/html/syntax.html#void-elements Void elements + */ + protected $self_closing_tags = array( + 'area' => 1, + 'base' => 1, + 'br' => 1, + 'col' => 1, + 'embed' => 1, + 'hr' => 1, + 'img' => 1, + 'input' => 1, + 'link' => 1, + 'meta' => 1, + 'param' => 1, + 'source' => 1, + 'track' => 1, + 'wbr' => 1 + ); + + /** + * Defines a list of tags which - if closed - close all optional closing + * elements within if they haven't been closed yet. (So, an element where + * neither opening nor closing tag is omissible consistently closes every + * optional closing element within) + * + * _Remarks_: + * - Use `isset()` instead of `in_array()` on array elements to boost + * performance about 30% + * - Sort elements by name for better readability! + */ + protected $block_tags = array( + 'body' => 1, + 'div' => 1, + 'form' => 1, + 'root' => 1, + 'span' => 1, + 'table' => 1 + ); + + /** + * Defines elements whose end tag is omissible. + * + * * key = Name of an element whose end tag is omissible. + * * value = Names of elements whose end tag is omissible, that are closed + * by the current element. + * + * _Remarks_: + * - Use `isset()` instead of `in_array()` on array elements to boost + * performance about 30% + * - Sort elements by name for better readability! + * + * **Example** + * + * An `li` element’s end tag may be omitted if the `li` element is immediately + * followed by another `li` element. To do that, add following element to the + * array: + * + * ```php + * 'li' => array('li'), + * ``` + * + * With this, the following two examples are considered equal. Note that the + * second example is missing the closing tags on `li` elements. + * + * ```html + *
  • First Item
  • Second Item
+ * ``` + * + *
  • First Item
  • Second Item
+ * + * ```html + *
  • First Item
  • Second Item
+ * ``` + * + *
  • First Item
  • Second Item
+ * + * @var array A two-dimensional array where the key is the name of an + * element whose end tag is omissible and the value is an array of elements + * whose end tag is omissible, that are closed by the current element. + * + * @link https://www.w3.org/TR/html/syntax.html#optional-tags Optional tags + * + * @todo The implementation of optional closing tags doesn't work in all cases + * because it only consideres elements who close other optional closing + * tags, not taking into account that some (non-blocking) tags should close + * these optional closing tags. For example, the end tag for "p" is omissible + * and can be closed by an "address" element, whose end tag is NOT omissible. + * Currently a "p" element without closing tag stops at the next "p" element + * or blocking tag, even if it contains other elements. + * + * @todo Known sourceforge issue #2977341 + * B tags that are not closed cause us to return everything to the end of + * the document. + */ + protected $optional_closing_tags = array( + // Not optional, see + // https://www.w3.org/TR/html/textlevel-semantics.html#the-b-element + 'b' => array('b' => 1), + 'dd' => array('dd' => 1, 'dt' => 1), + // Not optional, see + // https://www.w3.org/TR/html/grouping-content.html#the-dl-element + 'dl' => array('dd' => 1, 'dt' => 1), + 'dt' => array('dd' => 1, 'dt' => 1), + 'li' => array('li' => 1), + 'optgroup' => array('optgroup' => 1, 'option' => 1), + 'option' => array('optgroup' => 1, 'option' => 1), + 'p' => array('p' => 1), + 'rp' => array('rp' => 1, 'rt' => 1), + 'rt' => array('rp' => 1, 'rt' => 1), + 'td' => array('td' => 1, 'th' => 1), + 'th' => array('td' => 1, 'th' => 1), + 'tr' => array('td' => 1, 'th' => 1, 'tr' => 1), + ); + + function __construct( + $str = null, + $lowercase = true, + $forceTagsClosed = true, + $target_charset = DEFAULT_TARGET_CHARSET, + $stripRN = true, + $defaultBRText = DEFAULT_BR_TEXT, + $defaultSpanText = DEFAULT_SPAN_TEXT, + $options = 0) + { + if ($str) { + if (preg_match('/^http:\/\//i', $str) || is_file($str)) { + $this->load_file($str); + } else { + $this->load( + $str, + $lowercase, + $stripRN, + $defaultBRText, + $defaultSpanText, + $options + ); + } + } + // Forcing tags to be closed implies that we don't trust the html, but + // it can lead to parsing errors if we SHOULD trust the html. + if (!$forceTagsClosed) { + $this->optional_closing_array = array(); + } + + $this->_target_charset = $target_charset; + } + + function __destruct() + { + $this->clear(); + } + + // load html from string + function load( + $str, + $lowercase = true, + $stripRN = true, + $defaultBRText = DEFAULT_BR_TEXT, + $defaultSpanText = DEFAULT_SPAN_TEXT, + $options = 0) + { + global $debug_object; + + // prepare + $this->prepare($str, $lowercase, $defaultBRText, $defaultSpanText); + + // Per sourceforge http://sourceforge.net/tracker/?func=detail&aid=2949097&group_id=218559&atid=1044037 + // Script tags removal now preceeds style tag removal. + // strip out