hcxselect
1.1
A CSS selector engine for htmlcxx
|
00001 /* 00002 * hcxselect - A CSS selector engine for htmlcxx 00003 * Copyright (C) 2011 Jonas Gehring 00004 * All rights reserved. 00005 * 00006 * Redistribution and use in source and binary forms, with or without 00007 * modification, are permitted provided that the following conditions are met: 00008 * * Redistributions of source code must retain the above copyright 00009 * notice, this list of conditions and the following disclaimer. 00010 * * Redistributions in binary form must reproduce the above copyright 00011 * notice, this list of conditions and the following disclaimer in the 00012 * documentation and/or other materials provided with the distribution. 00013 * * Neither the name of the copyright holders nor the 00014 * names of its contributors may be used to endorse or promote products 00015 * derived from this software without specific prior written permission. 00016 * 00017 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 00018 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00019 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 00020 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 00021 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 00022 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 00023 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 00024 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 00025 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 00026 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 00027 * POSSIBILITY OF SUCH DAMAGE. 00028 */ 00029 00030 00031 #include <stack> 00032 #include <sstream> 00033 #include <vector> 00034 00035 #include "hcxselect.h" 00036 00037 extern "C" { 00038 #include "lexer.h" 00039 } 00040 00041 //#define TRACE printf("%s: ", __FUNCTION__); printf 00042 inline void TRACE(...) { } 00043 00044 #define ENSURE(c, s) \ 00045 if (!(c)) { throw ParseException(l->pos, s); } 00046 00047 00048 namespace hcxselect 00049 { 00050 00051 // Anonymous namespace for local helpers 00052 namespace 00053 { 00054 00055 typedef htmlcxx::HTML::Node HTMLNode; 00056 00057 // Trims whitespace from the beginning and end of a string 00058 std::string trim(const std::string &str) 00059 { 00060 int start = 0; 00061 int end = str.length()-1; 00062 const char *data = str.c_str(); 00063 00064 while (start <= end && isspace(data[start])) ++start; 00065 while (end >= start && isspace(data[end])) --end; 00066 00067 if (start > 0 || end < (int)str.length()) { 00068 return str.substr(start, (end - start + 1)); 00069 } 00070 return str; 00071 } 00072 00073 // Wrapper for strcasecmp 00074 inline int strcasecmp(const std::string &s1, const std::string &s2) 00075 { 00076 return ::strcasecmp(s1.c_str(), s2.c_str()); 00077 } 00078 00079 // Checks for string prefix 00080 inline bool starts_with(const std::string &str, const std::string &start) 00081 { 00082 return !::strncasecmp(str.c_str(), start.c_str(), start.length()); 00083 } 00084 00085 // Checks for string suffix 00086 inline bool ends_with(const std::string &str, const std::string &end) 00087 { 00088 if (str.length() >= end.length()) { 00089 return !::strcasecmp(str.c_str() + str.length() - end.length(), end.c_str()); 00090 } 00091 return false; 00092 } 00093 00094 // Delete all elements of a container 00095 template<typename T> 00096 inline void delete_all(const T &v) 00097 { 00098 for (typename T::const_iterator it = v.begin(); it != v.end(); ++it) { 00099 delete *it; 00100 } 00101 } 00102 00103 // String to number 00104 template <class T> inline bool stoi(T *t, const std::string& s) 00105 { 00106 std::istringstream iss(s); 00107 return !(iss >> *t).fail(); 00108 } 00109 00110 // Lexer for CSS selector grammar (wrapper for reentrant FLEX parser) 00111 class Lexer 00112 { 00113 public: 00114 Lexer(const std::string &str) 00115 : pos(0), spos(0) 00116 { 00117 yylex_init(&yy); 00118 yy_scan_string(str.c_str(), yy); 00119 } 00120 00121 ~Lexer() 00122 { 00123 yylex_destroy(yy); 00124 } 00125 00126 void unescape(std::string *str) 00127 { 00128 size_t pos = 0; 00129 while ((pos = str->find('\\', pos)) != std::string::npos) { 00130 if ((*str)[pos+1] == '\\') { 00131 str->replace(pos, 2, ""); 00132 } else if (isdigit((*str)[pos+1])) { 00133 // Un-escape UTF-8 codes, sometimes followed by space 00134 size_t len = 1; 00135 while (isdigit((*str)[pos+len])) ++len; 00136 while (isspace((*str)[pos+len])) ++len; 00137 if (len > 1) { 00138 unsigned int x; 00139 std::stringstream ss; 00140 ss << std::hex << str->substr(pos+1, len-1); 00141 ss >> x; 00142 str->replace(pos, len, 1, x); 00143 } 00144 } else { 00145 str->replace(pos, 1, ""); 00146 } 00147 } 00148 } 00149 00150 inline int lex(std::string *text) 00151 { 00152 int token = yylex(yy); 00153 spos += yyget_leng(yy); 00154 if (token > 0) { 00155 *text = yyget_text(yy); 00156 pos = spos + 1 - text->length(); 00157 } else { 00158 pos = spos; 00159 } 00160 if (token == IDENT) { 00161 unescape(text); 00162 } 00163 return token; 00164 } 00165 00166 yyscan_t yy; 00167 int pos, spos; 00168 }; 00169 00170 namespace Selectors 00171 { 00172 00173 // Abstract base class for selector functions 00174 struct SelectorFn 00175 { 00176 typedef tree<HTMLNode>::iterator NodeIt; 00177 00178 virtual ~SelectorFn() { } 00179 virtual bool match(const NodeIt &it) const = 0; 00180 00181 protected: 00182 inline static bool hasParent(const NodeIt &it) { 00183 return (it.node->parent == NULL || strcasecmp(it->tagName(), "html")); 00184 } 00185 }; 00186 00187 // Universal selector (*) 00188 struct Universal : SelectorFn 00189 { 00190 bool match(const NodeIt &it) const 00191 { 00192 return it->isTag(); 00193 } 00194 }; 00195 00196 // Type selector (E) 00197 struct Type : SelectorFn 00198 { 00199 Type(const std::string &type) : type(type) { } 00200 00201 bool match(const NodeIt &it) const 00202 { 00203 return (it->isTag() && !strcasecmp(it->tagName(), type)); 00204 } 00205 00206 std::string type; 00207 }; 00208 00209 // Attribute selector (E[foo]) 00210 struct Attribute : SelectorFn 00211 { 00212 Attribute(const std::string &attr) : attr(attr) { } 00213 00214 bool match(const NodeIt &it) const 00215 { 00216 if (it->attributes().empty()) it->parseAttributes(); 00217 return it->attribute(attr).first; 00218 } 00219 00220 std::string attr; 00221 }; 00222 00223 // Attribute value, with optional comparison operator (E[foo=bar]) 00224 struct AttributeValue : SelectorFn 00225 { 00226 AttributeValue(const std::string &attr, const std::string &value, char c = '=') 00227 : attr(attr), value(value), c(c) { } 00228 00229 bool match(const NodeIt &it) const 00230 { 00231 if (value.empty() && c != '=') return false; 00232 00233 if (it->attributes().empty()) it->parseAttributes(); 00234 const std::string &str = it->attribute(attr).second; 00235 switch (c) { 00236 case '=': return !strcasecmp(str, value); 00237 case '^': return starts_with(str, value); 00238 case '$': return ends_with(str, value); 00239 case '*': return (str.find(value) != std::string::npos); 00240 case '|': return !(strcasecmp(str, value) && !starts_with(str, value + "-")); 00241 case '~': { 00242 // Split string by space and compare every part 00243 const char *ptr = str.c_str(), *last = str.c_str(); 00244 const char *end = str.c_str() + str.length(); 00245 int l = value.length(); 00246 while (ptr < end) { 00247 while (*ptr && !isspace(*ptr)) ++ptr; 00248 if ((ptr - last) == l && !::strncasecmp(value.c_str(), last, l)) { 00249 return true; 00250 } 00251 while (*ptr && isspace(*ptr)) ++ptr; 00252 last = ptr; 00253 } 00254 return false; 00255 } 00256 default: break; 00257 } 00258 return true; 00259 } 00260 00261 std::string attr; 00262 std::string value; 00263 char c; 00264 }; 00265 00266 // Pseudo class or element 00267 struct Pseudo : SelectorFn 00268 { 00269 Pseudo(const std::string &type, int an = 0, int b = 0) 00270 : type(type), an(an), b(b) { } 00271 00272 bool checkNum(int i) const 00273 { 00274 if (an == 0) { 00275 return (i == b); 00276 } 00277 return (((i - b) % an) == 0); 00278 } 00279 00280 bool matchs(const NodeIt &it, const std::string &type) const 00281 { 00282 if (type == "root") { 00283 return !hasParent(it); 00284 } else if (type == "first-child") { 00285 if (!hasParent(it)) return false; 00286 NodeIt jt(it.node->parent->first_child); 00287 while (jt.node && !jt->isTag()) { 00288 jt = jt.node->next_sibling; 00289 } 00290 return (jt.node == it.node); 00291 } else if (type == "last-child") { 00292 if (!hasParent(it)) return false; 00293 NodeIt jt(it.node->parent->last_child); 00294 while (jt.node && !jt->isTag()) { 00295 jt = jt.node->prev_sibling; 00296 } 00297 return (jt.node == it.node); 00298 } else if (type == "first-of-type") { 00299 if (!hasParent(it)) return false; 00300 NodeIt jt(it.node->parent->first_child); 00301 while (jt.node && (!jt->isTag() || strcasecmp(jt->tagName(), it->tagName()))) { 00302 jt = jt.node->next_sibling; 00303 } 00304 return (jt.node == it.node); 00305 } else if (type == "last-of-type") { 00306 if (!hasParent(it)) return false; 00307 NodeIt jt(it.node->parent->last_child); 00308 while (jt.node && (!jt->isTag() || strcasecmp(jt->tagName(), it->tagName()))) { 00309 jt = jt.node->prev_sibling; 00310 } 00311 return (jt.node == it.node); 00312 } else if (type == "empty") { 00313 if (it->isTag()) { 00314 return (it.node->first_child == NULL || 00315 (it.node->first_child->data.isComment() && it.node->first_child == it.node->last_child)); 00316 } 00317 return (it->isComment() || it->length() == 0); 00318 } else if (type == "nth-child") { 00319 if (!hasParent(it)) return false; 00320 int i = 1; 00321 NodeIt jt(it.node->parent->first_child); 00322 while (jt.node && it.node != jt.node) { 00323 if (jt->isTag()) ++i; 00324 jt = jt.node->next_sibling; 00325 } 00326 return checkNum(i); 00327 } else if (type == "nth-last-child") { 00328 if (!hasParent(it)) return false; 00329 int i = 1; 00330 NodeIt jt(it.node->parent->last_child); 00331 while (jt.node && it.node != jt.node) { 00332 if (jt->isTag()) ++i; 00333 jt = jt.node->prev_sibling; 00334 } 00335 return checkNum(i); 00336 } else if (type == "nth-of-type") { 00337 if (!hasParent(it)) return false; 00338 int i = 1; 00339 NodeIt jt(it.node->parent->first_child); 00340 while (jt.node && it.node != jt.node) { 00341 if (jt->isTag() && !strcasecmp(jt->tagName(), it->tagName())) ++i; 00342 jt = jt.node->next_sibling; 00343 } 00344 return checkNum(i); 00345 } else if (type == "nth-last-of-type") { 00346 if (!hasParent(it)) return false; 00347 int i = 1; 00348 NodeIt jt(it.node->parent->last_child); 00349 while (jt.node && it.node != jt.node) { 00350 if (jt->isTag() && !strcasecmp(jt->tagName(), it->tagName())) ++i; 00351 jt = jt.node->prev_sibling; 00352 } 00353 return checkNum(i); 00354 } else if (type == "text") { 00355 return (!it->isTag() && !it->isComment()); 00356 } else if (type == "comment") { 00357 return it->isComment(); 00358 } 00359 return false; 00360 } 00361 00362 bool match(const NodeIt &it) const 00363 { 00364 if (type == "only-child") { 00365 return matchs(it, "first-child") && matchs(it, "last-child"); 00366 } else if (type == "only-of-type") { 00367 return matchs(it, "first-of-type") && matchs(it, "last-of-type"); 00368 } 00369 return matchs(it, type); 00370 } 00371 00372 std::string type; 00373 int an, b; 00374 }; 00375 00376 // Negation (:not) 00377 struct Negation : SelectorFn 00378 { 00379 Negation(SelectorFn *fn) : fn(fn) { } 00380 ~Negation() { delete fn; } 00381 00382 bool match(const NodeIt &it) const 00383 { 00384 return !fn->match(it); 00385 } 00386 00387 SelectorFn *fn; 00388 }; 00389 00390 // A simple selector sequence 00391 struct SimpleSequence : SelectorFn 00392 { 00393 SimpleSequence(const std::vector<SelectorFn *> &fns) : fns(fns) { } 00394 ~SimpleSequence() { delete_all(fns); } 00395 00396 bool match(const NodeIt &it) const 00397 { 00398 std::vector<SelectorFn *>::const_iterator ft(fns.begin()); 00399 std::vector<SelectorFn *>::const_iterator end(fns.end()); 00400 while (ft != end && (*ft)->match(it)) { 00401 ++ft; 00402 } 00403 return (ft == end); 00404 } 00405 00406 std::vector<SelectorFn *> fns; 00407 }; 00408 00409 // Combinator ( , >, ~, +) 00410 struct Combinator : SelectorFn 00411 { 00412 Combinator(SelectorFn *left, SelectorFn *right, char c) : left(left), right(right), c(c) { } 00413 ~Combinator() { delete left; delete right; } 00414 00415 bool match(const NodeIt &it) const 00416 { 00417 // First, check if the node matches the right side of the combinator 00418 if (!right->match(it)) { 00419 return false; 00420 } 00421 00422 // Check all suitable neighbor nodes using the left selector 00423 NodeIt jt; 00424 switch (c) { 00425 case ' ': // Descendant 00426 case '*': // Greatchild or further descendant 00427 if (!hasParent(it)) return false; 00428 jt = it.node->parent; 00429 if (c == '*' && jt.node) { 00430 jt = jt.node->parent; 00431 } 00432 while (jt.node) { 00433 if (left->match(jt)) { 00434 return true; 00435 } 00436 jt = jt.node->parent; 00437 } 00438 return false; 00439 00440 case '>': // Child 00441 if (!hasParent(it)) return false; 00442 jt = it.node->parent; 00443 return jt.node && left->match(jt); 00444 00445 case '+': // Adjacent sibling 00446 if (!hasParent(it)) return false; 00447 jt = it.node->prev_sibling; 00448 while (jt.node && !jt->isTag()) { 00449 jt = jt.node->prev_sibling; 00450 } 00451 return jt.node && left->match(jt); 00452 00453 case '~': // General sibling 00454 if (!hasParent(it)) return false; 00455 jt = it.node->prev_sibling; 00456 while (jt.node) { 00457 if (jt->isTag() && left->match(jt)) { 00458 return true; 00459 } 00460 jt = jt.node->prev_sibling; 00461 } 00462 return false; 00463 00464 default: break; 00465 } 00466 00467 return false; 00468 } 00469 00470 SelectorFn *left, *right; 00471 char c; 00472 }; 00473 00474 } // namespace Selectors 00475 00476 using Selectors::SelectorFn; 00477 00478 SelectorFn *parseSelector(Lexer *l, int &token, std::string &s); 00479 00480 // Tries to parse a simple selector 00481 SelectorFn *parseSimpleSequence(Lexer *l, int &token, std::string &s) 00482 { 00483 std::vector<SelectorFn *> fns; 00484 00485 // [ type_selector | universal ] 00486 TRACE("%d: %s\n", token, s.c_str()); 00487 if (token == IDENT) { 00488 fns.push_back(new Selectors::Type(s)); 00489 token = l->lex(&s); 00490 } else if (token == '*') { 00491 fns.push_back(new Selectors::Universal()); 00492 token = l->lex(&s); 00493 } 00494 00495 // [ HASH | class | attrib | pseudo | negation ]* 00496 bool lex = true; 00497 while (token) { 00498 switch (token) { 00499 case HASH: 00500 fns.push_back(new Selectors::AttributeValue("id", s.substr(1))); 00501 break; 00502 case '.': 00503 token = l->lex(&s); 00504 ENSURE(token == IDENT, "Identifier expected"); 00505 TRACE("%d: %s\n", token, s.c_str()); 00506 fns.push_back(new Selectors::AttributeValue("class", s, '~')); 00507 break; 00508 case '[': { 00509 token = l->lex(&s); 00510 if (token == S) token = l->lex(&s); 00511 ENSURE(token == IDENT, "Identifier expected"); 00512 std::string a = s; 00513 00514 token = l->lex(&s); 00515 if (token == S) token = l->lex(&s); 00516 if (token == ']') { 00517 fns.push_back(new Selectors::Attribute(a)); 00518 break; 00519 } 00520 00521 int c = 0; 00522 switch (token) { 00523 case INCLUDES: c = '~'; break; 00524 case DASHMATCH: c = '|'; break; 00525 case PREFIXMATCH: c = '^'; break; 00526 case SUFFIXMATCH: c = '$'; break; 00527 case SUBSTRINGMATCH: c = '*'; break; 00528 case '=': c = '='; break; 00529 default: throw ParseException(l->pos, "Invalid character"); 00530 } 00531 TRACE("got %d, %c\n", token, token); 00532 00533 token = l->lex(&s); 00534 if (token == S) token = l->lex(&s); 00535 ENSURE(token == STRING || token == IDENT, "Token is neither string nor identifier"); 00536 std::string v = (token == STRING ? s.substr(1, s.length()-2) : s); 00537 00538 fns.push_back(new Selectors::AttributeValue(a, v, c)); 00539 token = l->lex(&s); 00540 if (token == S) token = l->lex(&s); 00541 ENSURE(token == ']', "']' expected"); 00542 break; 00543 } 00544 case ':': { 00545 token = l->lex(&s); 00546 if (token == ':') { token = l->lex(&s); s.insert(0, ":"); } 00547 if (token == IDENT) { 00548 fns.push_back(new Selectors::Pseudo(s)); 00549 } else if (token == FUNCTION) { 00550 std::string f(s.substr(0, s.length()-1)); 00551 int an = 0, b = 0; 00552 token = l->lex(&s); 00553 if (token == S) token = l->lex(&s); 00554 if (token == IDENT) { 00555 if (s == "odd" || s == "even") { 00556 an = 2; b = (s == "odd" ? 1 : 0); 00557 token = l->lex(&s); 00558 ENSURE(token == ')', "')' expected"); 00559 } else if (s == "n" || s == "-n") { 00560 an = (s == "n" ? 1 : -1); 00561 token = l->lex(&s); 00562 if (token == PLUS) { 00563 token = l->lex(&s); 00564 if (token == S) token = l->lex(&s); 00565 ENSURE(token == NUMBER && stoi(&b, s), "Number expected"); 00566 token = l->lex(&s); 00567 ENSURE(token == ')', "')' expected"); 00568 } else if (token != ')') { 00569 throw ParseException(l->pos, "')' expected"); 00570 } 00571 } else { 00572 throw ParseException(l->pos, "odd/even/n expected"); 00573 } 00574 } else if (token == DIMENSION) { 00575 std::istringstream ss(s); 00576 ss >> an >> s; 00577 ENSURE(!ss.fail() && s == "n", "Expression of form 'an' expected"); 00578 token = l->lex(&s); 00579 if (token == PLUS) { 00580 token = l->lex(&s); 00581 if (token == S) token = l->lex(&s); 00582 ENSURE(token == NUMBER && stoi(&b, s), "Number expected"); 00583 token = l->lex(&s); 00584 ENSURE(token == ')', "')' expected"); 00585 } 00586 } else if (token == NUMBER) { 00587 ENSURE(stoi(&b, s), "Number expected"); 00588 token = l->lex(&s); 00589 ENSURE(token == ')', "')' expected"); 00590 } else { 00591 throw ParseException(l->pos, "Invalid expression"); 00592 } 00593 TRACE("pseudo %s,with %dn + %d\n", f.c_str(), an, b); 00594 fns.push_back(new Selectors::Pseudo(f, an, b)); 00595 } else { 00596 throw ParseException(l->pos, "Identifier or funtion expected"); 00597 } 00598 break; 00599 } 00600 case NOT: { 00601 token = l->lex(&s); 00602 fns.push_back(new Selectors::Negation(parseSelector(l, token, s))); 00603 ENSURE(token == ')', "')' expected"); 00604 break; 00605 } 00606 case ')': // For negations 00607 default: goto finish; 00608 } 00609 00610 if (lex) { 00611 token = l->lex(&s); 00612 } 00613 lex = true; 00614 } 00615 00616 finish: 00617 return new Selectors::SimpleSequence(fns); 00618 } 00619 00620 // Recursive parsing function 00621 SelectorFn *parseSelector(Lexer *l, int &token, std::string &s) 00622 { 00623 if (token == S) token = l->lex(&s); 00624 SelectorFn *fn = parseSimpleSequence(l, token, s); 00625 00626 while (token) { 00627 TRACE("%d: %s\n", token, s.c_str()); 00628 bool space = false; 00629 if (token == S) { 00630 space = true; 00631 token = l->lex(&s); 00632 } 00633 TRACE("%d: %s\n", token, s.c_str()); 00634 00635 char c = -1; 00636 switch (token) { 00637 case S: c = ' '; break; 00638 case PLUS: c = '+'; break; 00639 case GREATER: c = '>'; break; 00640 case TILDE: c = '~'; break; 00641 case '*': c = '*'; break; 00642 00643 case 0: return fn; 00644 default: 00645 if (space) { 00646 c = ' '; 00647 } else { 00648 return fn; 00649 } 00650 break; 00651 } 00652 00653 if (c != ' ') { 00654 token = l->lex(&s); 00655 TRACE("%d: %s\n", token, s.c_str()); 00656 if (token == S) token = l->lex(&s); 00657 } 00658 TRACE("%d: %s\n", token, s.c_str()); 00659 SelectorFn *fn2 = parseSimpleSequence(l, token, s); 00660 fn = new Selectors::Combinator(fn, fn2, c); 00661 } 00662 00663 return fn; 00664 } 00665 00666 // Parses a CSS selector expression and returns a set of functions 00667 std::vector<SelectorFn *> parse(const std::string &expr) 00668 { 00669 std::vector<SelectorFn *> fns; 00670 int token; 00671 std::string s; 00672 00673 Lexer l(trim(expr)); 00674 while ((token = l.lex(&s))) { 00675 fns.push_back(parseSelector(&l, token, s)); 00676 if (token != COMMA && token != 0) { 00677 throw ParseException(l.pos, "Comma expected"); 00678 } 00679 } 00680 00681 return fns; 00682 } 00683 00684 // Matches a set of nodes against a selector 00685 NodeSet match(const NodeSet &nodes, const SelectorFn *fn) 00686 { 00687 std::stack<tree<HTMLNode>::iterator> stack; 00688 for (NodeSet::const_iterator it(nodes.begin()); it != nodes.end(); ++it) { 00689 stack.push(tree<HTMLNode>::iterator(*it)); 00690 } 00691 00692 // Depth-first traversal using a stack 00693 NodeSet result; 00694 while (!stack.empty()) { 00695 tree<HTMLNode>::iterator it = stack.top(); 00696 stack.pop(); 00697 00698 // Check if selector matches 00699 if (fn->match(it)) { 00700 result.insert(it.node); 00701 } 00702 00703 // Inspect all child nodes of non-matching elements 00704 tree<HTMLNode>::sibling_iterator jt; 00705 for (jt = it.begin(); jt != it.end(); ++jt) { 00706 stack.push(jt); 00707 } 00708 } 00709 00710 return result; 00711 } 00712 00713 } // Anonymous namespace 00714 00715 00719 bool NodeComp::operator()(Node *a, Node *b) const { 00720 if (a == NULL || b == NULL) { 00721 return a < b; 00722 } 00723 return a->data.offset() < b->data.offset(); 00724 } 00725 00726 // Applies a CSS selector expression to a document tree. 00727 NodeSet select(const tree<HTMLNode> &tree, const std::string &expr) 00728 { 00729 // Select the <html> node from the tree and use it as the root node 00730 NodeSet v; 00731 ::tree<HTMLNode>::iterator it; 00732 for (it = tree.begin(); it != tree.end(); ++it) { 00733 if (!strcasecmp(it->tagName(), "html")) { 00734 v.insert(it.node); 00735 break; 00736 } 00737 } 00738 00739 if (expr.empty()) { 00740 return v; 00741 } 00742 return select(v, expr); 00743 } 00744 00745 // Applies a CSS selector expression to a set of nodes. 00746 NodeSet select(const NodeSet &nodes, const std::string &expr) 00747 { 00748 // Parse expression 00749 std::vector<SelectorFn *> fns = parse(expr); 00750 00751 NodeSet result; 00752 std::vector<SelectorFn *>::const_iterator it; 00753 for (it = fns.begin(); it != fns.end(); ++it) { 00754 NodeSet v = match(nodes, *it); 00755 result.insert(v.begin(), v.end()); 00756 } 00757 00758 delete_all(fns); 00759 return result; 00760 } 00761 00762 00766 Selection::Selection() 00767 { 00768 } 00769 00774 Selection::Selection(const tree<htmlcxx::HTML::Node> &tree, const std::string &expr) 00775 { 00776 NodeSet v = hcxselect::select(tree, expr); 00777 insert(v.begin(), v.end()); 00778 } 00779 00784 Selection::Selection(const NodeSet &nodes, const std::string &expr) 00785 { 00786 if (!expr.empty()) { 00787 NodeSet v = hcxselect::select(nodes, expr); 00788 insert(v.begin(), v.end()); 00789 } else { 00790 insert(nodes.begin(), nodes.end()); 00791 } 00792 } 00793 00798 Selection Selection::select(const std::string &expr) 00799 { 00800 return hcxselect::select(*this, expr); 00801 } 00802 00803 } // namespace hcxselect