001package org.jsoup.select; 002 003import org.jsoup.helper.Validate; 004import org.jsoup.nodes.Comment; 005import org.jsoup.nodes.Document; 006import org.jsoup.nodes.DocumentType; 007import org.jsoup.nodes.Element; 008import org.jsoup.nodes.Node; 009import org.jsoup.nodes.PseudoTextElement; 010import org.jsoup.nodes.TextNode; 011import org.jsoup.nodes.XmlDeclaration; 012import org.jsoup.parser.ParseSettings; 013 014import java.util.List; 015import java.util.function.Predicate; 016import java.util.regex.Matcher; 017import java.util.regex.Pattern; 018 019import static org.jsoup.internal.Normalizer.lowerCase; 020import static org.jsoup.internal.Normalizer.normalize; 021import static org.jsoup.internal.StringUtil.normaliseWhitespace; 022 023 024/** 025 * Evaluates that an element matches the selector. 026 */ 027public abstract class Evaluator { 028 protected Evaluator() { 029 } 030 031 /** 032 Provides a Predicate for this Evaluator, matching the test Element. 033 * @param root the root Element, for match evaluation 034 * @return a predicate that accepts an Element to test for matches with this Evaluator 035 * @since 1.17.1 036 */ 037 public Predicate<Element> asPredicate(Element root) { 038 return element -> matches(root, element); 039 } 040 041 /** 042 * Test if the element meets the evaluator's requirements. 043 * 044 * @param root Root of the matching subtree 045 * @param element tested element 046 * @return Returns <tt>true</tt> if the requirements are met or 047 * <tt>false</tt> otherwise 048 */ 049 public abstract boolean matches(Element root, Element element); 050 051 /** 052 Reset any internal state in this Evaluator before executing a new Collector evaluation. 053 */ 054 protected void reset() { 055 } 056 057 /** 058 A relative evaluator cost function. During evaluation, Evaluators are sorted by ascending cost as an optimization. 059 * @return the relative cost of this Evaluator 060 */ 061 protected int cost() { 062 return 5; // a nominal default cost 063 } 064 065 /** 066 * Evaluator for tag name 067 */ 068 public static final class Tag extends Evaluator { 069 private final String tagName; 070 071 public Tag(String tagName) { 072 this.tagName = tagName; 073 } 074 075 @Override 076 public boolean matches(Element root, Element element) { 077 return (element.nameIs(tagName)); 078 } 079 080 @Override protected int cost() { 081 return 1; 082 } 083 084 @Override 085 public String toString() { 086 return String.format("%s", tagName); 087 } 088 } 089 090 091 /** 092 * Evaluator for tag name that ends with 093 */ 094 public static final class TagEndsWith extends Evaluator { 095 private final String tagName; 096 097 public TagEndsWith(String tagName) { 098 this.tagName = tagName; 099 } 100 101 @Override 102 public boolean matches(Element root, Element element) { 103 return (element.normalName().endsWith(tagName)); 104 } 105 106 @Override 107 public String toString() { 108 return String.format("%s", tagName); 109 } 110 } 111 112 /** 113 * Evaluator for element id 114 */ 115 public static final class Id extends Evaluator { 116 private final String id; 117 118 public Id(String id) { 119 this.id = id; 120 } 121 122 @Override 123 public boolean matches(Element root, Element element) { 124 return (id.equals(element.id())); 125 } 126 127 @Override protected int cost() { 128 return 2; 129 } 130 @Override 131 public String toString() { 132 return String.format("#%s", id); 133 } 134 } 135 136 /** 137 * Evaluator for element class 138 */ 139 public static final class Class extends Evaluator { 140 private final String className; 141 142 public Class(String className) { 143 this.className = className; 144 } 145 146 @Override 147 public boolean matches(Element root, Element element) { 148 return (element.hasClass(className)); 149 } 150 151 @Override protected int cost() { 152 return 6; // does whitespace scanning 153 } 154 155 @Override 156 public String toString() { 157 return String.format(".%s", className); 158 } 159 160 } 161 162 /** 163 * Evaluator for attribute name matching 164 */ 165 public static final class Attribute extends Evaluator { 166 private final String key; 167 168 public Attribute(String key) { 169 this.key = key; 170 } 171 172 @Override 173 public boolean matches(Element root, Element element) { 174 return element.hasAttr(key); 175 } 176 177 @Override protected int cost() { 178 return 2; 179 } 180 181 @Override 182 public String toString() { 183 return String.format("[%s]", key); 184 } 185 } 186 187 /** 188 * Evaluator for attribute name prefix matching 189 */ 190 public static final class AttributeStarting extends Evaluator { 191 private final String keyPrefix; 192 193 public AttributeStarting(String keyPrefix) { 194 Validate.notNull(keyPrefix); // OK to be empty - will find elements with any attributes 195 this.keyPrefix = lowerCase(keyPrefix); 196 } 197 198 @Override 199 public boolean matches(Element root, Element element) { 200 List<org.jsoup.nodes.Attribute> values = element.attributes().asList(); 201 for (org.jsoup.nodes.Attribute attribute : values) { 202 if (lowerCase(attribute.getKey()).startsWith(keyPrefix)) 203 return true; 204 } 205 return false; 206 } 207 208 @Override protected int cost() { 209 return 6; 210 } 211 212 @Override 213 public String toString() { 214 return String.format("[^%s]", keyPrefix); 215 } 216 217 } 218 219 /** 220 * Evaluator for attribute name/value matching 221 */ 222 public static final class AttributeWithValue extends AttributeKeyPair { 223 public AttributeWithValue(String key, String value) { 224 super(key, value); 225 } 226 227 @Override 228 public boolean matches(Element root, Element element) { 229 return element.hasAttr(key) && value.equalsIgnoreCase(element.attr(key).trim()); 230 } 231 232 @Override protected int cost() { 233 return 3; 234 } 235 236 @Override 237 public String toString() { 238 return String.format("[%s=%s]", key, value); 239 } 240 241 } 242 243 /** 244 * Evaluator for attribute name != value matching 245 */ 246 public static final class AttributeWithValueNot extends AttributeKeyPair { 247 public AttributeWithValueNot(String key, String value) { 248 super(key, value); 249 } 250 251 @Override 252 public boolean matches(Element root, Element element) { 253 return !value.equalsIgnoreCase(element.attr(key)); 254 } 255 256 @Override protected int cost() { 257 return 3; 258 } 259 260 @Override 261 public String toString() { 262 return String.format("[%s!=%s]", key, value); 263 } 264 265 } 266 267 /** 268 * Evaluator for attribute name/value matching (value prefix) 269 */ 270 public static final class AttributeWithValueStarting extends AttributeKeyPair { 271 public AttributeWithValueStarting(String key, String value) { 272 super(key, value, false); 273 } 274 275 @Override 276 public boolean matches(Element root, Element element) { 277 return element.hasAttr(key) && lowerCase(element.attr(key)).startsWith(value); // value is lower case already 278 } 279 280 @Override protected int cost() { 281 return 4; 282 } 283 284 @Override 285 public String toString() { 286 return String.format("[%s^=%s]", key, value); 287 } 288 } 289 290 /** 291 * Evaluator for attribute name/value matching (value ending) 292 */ 293 public static final class AttributeWithValueEnding extends AttributeKeyPair { 294 public AttributeWithValueEnding(String key, String value) { 295 super(key, value, false); 296 } 297 298 @Override 299 public boolean matches(Element root, Element element) { 300 return element.hasAttr(key) && lowerCase(element.attr(key)).endsWith(value); // value is lower case 301 } 302 303 @Override protected int cost() { 304 return 4; 305 } 306 307 @Override 308 public String toString() { 309 return String.format("[%s$=%s]", key, value); 310 } 311 } 312 313 /** 314 * Evaluator for attribute name/value matching (value containing) 315 */ 316 public static final class AttributeWithValueContaining extends AttributeKeyPair { 317 public AttributeWithValueContaining(String key, String value) { 318 super(key, value); 319 } 320 321 @Override 322 public boolean matches(Element root, Element element) { 323 return element.hasAttr(key) && lowerCase(element.attr(key)).contains(value); // value is lower case 324 } 325 326 @Override protected int cost() { 327 return 6; 328 } 329 330 @Override 331 public String toString() { 332 return String.format("[%s*=%s]", key, value); 333 } 334 335 } 336 337 /** 338 * Evaluator for attribute name/value matching (value regex matching) 339 */ 340 public static final class AttributeWithValueMatching extends Evaluator { 341 final String key; 342 final Pattern pattern; 343 344 public AttributeWithValueMatching(String key, Pattern pattern) { 345 this.key = normalize(key); 346 this.pattern = pattern; 347 } 348 349 @Override 350 public boolean matches(Element root, Element element) { 351 return element.hasAttr(key) && pattern.matcher(element.attr(key)).find(); 352 } 353 354 @Override protected int cost() { 355 return 8; 356 } 357 358 @Override 359 public String toString() { 360 return String.format("[%s~=%s]", key, pattern.toString()); 361 } 362 363 } 364 365 /** 366 * Abstract evaluator for attribute name/value matching 367 */ 368 public abstract static class AttributeKeyPair extends Evaluator { 369 final String key; 370 final String value; 371 372 public AttributeKeyPair(String key, String value) { 373 this(key, value, true); 374 } 375 376 public AttributeKeyPair(String key, String value, boolean trimValue) { 377 Validate.notEmpty(key); 378 Validate.notEmpty(value); 379 380 this.key = normalize(key); 381 boolean isStringLiteral = value.startsWith("'") && value.endsWith("'") 382 || value.startsWith("\"") && value.endsWith("\""); 383 if (isStringLiteral) { 384 value = value.substring(1, value.length()-1); 385 } 386 387 this.value = trimValue ? normalize(value) : normalize(value, isStringLiteral); 388 } 389 } 390 391 /** 392 * Evaluator for any / all element matching 393 */ 394 public static final class AllElements extends Evaluator { 395 396 @Override 397 public boolean matches(Element root, Element element) { 398 return true; 399 } 400 401 @Override protected int cost() { 402 return 10; 403 } 404 405 @Override 406 public String toString() { 407 return "*"; 408 } 409 } 410 411 /** 412 * Evaluator for matching by sibling index number (e {@literal <} idx) 413 */ 414 public static final class IndexLessThan extends IndexEvaluator { 415 public IndexLessThan(int index) { 416 super(index); 417 } 418 419 @Override 420 public boolean matches(Element root, Element element) { 421 return root != element && element.elementSiblingIndex() < index; 422 } 423 424 @Override 425 public String toString() { 426 return String.format(":lt(%d)", index); 427 } 428 429 } 430 431 /** 432 * Evaluator for matching by sibling index number (e {@literal >} idx) 433 */ 434 public static final class IndexGreaterThan extends IndexEvaluator { 435 public IndexGreaterThan(int index) { 436 super(index); 437 } 438 439 @Override 440 public boolean matches(Element root, Element element) { 441 return element.elementSiblingIndex() > index; 442 } 443 444 @Override 445 public String toString() { 446 return String.format(":gt(%d)", index); 447 } 448 449 } 450 451 /** 452 * Evaluator for matching by sibling index number (e = idx) 453 */ 454 public static final class IndexEquals extends IndexEvaluator { 455 public IndexEquals(int index) { 456 super(index); 457 } 458 459 @Override 460 public boolean matches(Element root, Element element) { 461 return element.elementSiblingIndex() == index; 462 } 463 464 @Override 465 public String toString() { 466 return String.format(":eq(%d)", index); 467 } 468 469 } 470 471 /** 472 * Evaluator for matching the last sibling (css :last-child) 473 */ 474 public static final class IsLastChild extends Evaluator { 475 @Override 476 public boolean matches(Element root, Element element) { 477 final Element p = element.parent(); 478 return p != null && !(p instanceof Document) && element == p.lastElementChild(); 479 } 480 481 @Override 482 public String toString() { 483 return ":last-child"; 484 } 485 } 486 487 public static final class IsFirstOfType extends IsNthOfType { 488 public IsFirstOfType() { 489 super(0,1); 490 } 491 @Override 492 public String toString() { 493 return ":first-of-type"; 494 } 495 } 496 497 public static final class IsLastOfType extends IsNthLastOfType { 498 public IsLastOfType() { 499 super(0,1); 500 } 501 @Override 502 public String toString() { 503 return ":last-of-type"; 504 } 505 } 506 507 508 public static abstract class CssNthEvaluator extends Evaluator { 509 protected final int a, b; 510 511 public CssNthEvaluator(int a, int b) { 512 this.a = a; 513 this.b = b; 514 } 515 public CssNthEvaluator(int b) { 516 this(0,b); 517 } 518 519 @Override 520 public boolean matches(Element root, Element element) { 521 final Element p = element.parent(); 522 if (p == null || (p instanceof Document)) return false; 523 524 final int pos = calculatePosition(root, element); 525 if (a == 0) return pos == b; 526 527 return (pos-b)*a >= 0 && (pos-b)%a==0; 528 } 529 530 @Override 531 public String toString() { 532 if (a == 0) 533 return String.format(":%s(%d)",getPseudoClass(), b); 534 if (b == 0) 535 return String.format(":%s(%dn)",getPseudoClass(), a); 536 return String.format(":%s(%dn%+d)", getPseudoClass(),a, b); 537 } 538 539 protected abstract String getPseudoClass(); 540 protected abstract int calculatePosition(Element root, Element element); 541 } 542 543 544 /** 545 * css-compatible Evaluator for :eq (css :nth-child) 546 * 547 * @see IndexEquals 548 */ 549 public static final class IsNthChild extends CssNthEvaluator { 550 551 public IsNthChild(int a, int b) { 552 super(a,b); 553 } 554 555 @Override protected int calculatePosition(Element root, Element element) { 556 return element.elementSiblingIndex()+1; 557 } 558 559 560 @Override protected String getPseudoClass() { 561 return "nth-child"; 562 } 563 } 564 565 /** 566 * css pseudo class :nth-last-child) 567 * 568 * @see IndexEquals 569 */ 570 public static final class IsNthLastChild extends CssNthEvaluator { 571 public IsNthLastChild(int a, int b) { 572 super(a,b); 573 } 574 575 @Override 576 protected int calculatePosition(Element root, Element element) { 577 if (element.parent() == null) 578 return 0; 579 return element.parent().childrenSize()- element.elementSiblingIndex(); 580 } 581 582 @Override 583 protected String getPseudoClass() { 584 return "nth-last-child"; 585 } 586 } 587 588 /** 589 * css pseudo class nth-of-type 590 * 591 */ 592 public static class IsNthOfType extends CssNthEvaluator { 593 public IsNthOfType(int a, int b) { 594 super(a, b); 595 } 596 597 @Override protected int calculatePosition(Element root, Element element) { 598 Element parent = element.parent(); 599 if (parent == null) 600 return 0; 601 602 int pos = 0; 603 final int size = parent.childNodeSize(); 604 for (int i = 0; i < size; i++) { 605 Node node = parent.childNode(i); 606 if (node.normalName().equals(element.normalName())) pos++; 607 if (node == element) break; 608 } 609 return pos; 610 } 611 612 @Override 613 protected String getPseudoClass() { 614 return "nth-of-type"; 615 } 616 } 617 618 public static class IsNthLastOfType extends CssNthEvaluator { 619 620 public IsNthLastOfType(int a, int b) { 621 super(a, b); 622 } 623 624 @Override 625 protected int calculatePosition(Element root, Element element) { 626 Element parent = element.parent(); 627 if (parent == null) 628 return 0; 629 630 int pos = 0; 631 Element next = element; 632 while (next != null) { 633 if (next.normalName().equals(element.normalName())) 634 pos++; 635 next = next.nextElementSibling(); 636 } 637 return pos; 638 } 639 640 @Override 641 protected String getPseudoClass() { 642 return "nth-last-of-type"; 643 } 644 } 645 646 /** 647 * Evaluator for matching the first sibling (css :first-child) 648 */ 649 public static final class IsFirstChild extends Evaluator { 650 @Override 651 public boolean matches(Element root, Element element) { 652 final Element p = element.parent(); 653 return p != null && !(p instanceof Document) && element == p.firstElementChild(); 654 } 655 656 @Override 657 public String toString() { 658 return ":first-child"; 659 } 660 } 661 662 /** 663 * css3 pseudo-class :root 664 * @see <a href="http://www.w3.org/TR/selectors/#root-pseudo">:root selector</a> 665 * 666 */ 667 public static final class IsRoot extends Evaluator { 668 @Override 669 public boolean matches(Element root, Element element) { 670 final Element r = root instanceof Document ? root.firstElementChild() : root; 671 return element == r; 672 } 673 674 @Override protected int cost() { 675 return 1; 676 } 677 678 @Override 679 public String toString() { 680 return ":root"; 681 } 682 } 683 684 public static final class IsOnlyChild extends Evaluator { 685 @Override 686 public boolean matches(Element root, Element element) { 687 final Element p = element.parent(); 688 return p!=null && !(p instanceof Document) && element.siblingElements().isEmpty(); 689 } 690 @Override 691 public String toString() { 692 return ":only-child"; 693 } 694 } 695 696 public static final class IsOnlyOfType extends Evaluator { 697 @Override 698 public boolean matches(Element root, Element element) { 699 final Element p = element.parent(); 700 if (p==null || p instanceof Document) return false; 701 702 int pos = 0; 703 Element next = p.firstElementChild(); 704 while (next != null) { 705 if (next.normalName().equals(element.normalName())) 706 pos++; 707 if (pos > 1) 708 break; 709 next = next.nextElementSibling(); 710 } 711 return pos == 1; 712 } 713 @Override 714 public String toString() { 715 return ":only-of-type"; 716 } 717 } 718 719 public static final class IsEmpty extends Evaluator { 720 @Override 721 public boolean matches(Element root, Element element) { 722 List<Node> family = element.childNodes(); 723 for (Node n : family) { 724 if (n instanceof TextNode) 725 return ((TextNode)n).isBlank(); 726 if (!(n instanceof Comment || n instanceof XmlDeclaration || n instanceof DocumentType)) 727 return false; 728 } 729 return true; 730 } 731 @Override 732 public String toString() { 733 return ":empty"; 734 } 735 } 736 737 /** 738 * Abstract evaluator for sibling index matching 739 * 740 * @author ant 741 */ 742 public abstract static class IndexEvaluator extends Evaluator { 743 final int index; 744 745 public IndexEvaluator(int index) { 746 this.index = index; 747 } 748 } 749 750 /** 751 * Evaluator for matching Element (and its descendants) text 752 */ 753 public static final class ContainsText extends Evaluator { 754 private final String searchText; 755 756 public ContainsText(String searchText) { 757 this.searchText = lowerCase(normaliseWhitespace(searchText)); 758 } 759 760 @Override 761 public boolean matches(Element root, Element element) { 762 return lowerCase(element.text()).contains(searchText); 763 } 764 765 @Override protected int cost() { 766 return 10; 767 } 768 769 @Override 770 public String toString() { 771 return String.format(":contains(%s)", searchText); 772 } 773 } 774 775 /** 776 * Evaluator for matching Element (and its descendants) wholeText. Neither the input nor the element text is 777 * normalized. <code>:containsWholeText()</code> 778 * @since 1.15.1. 779 */ 780 public static final class ContainsWholeText extends Evaluator { 781 private final String searchText; 782 783 public ContainsWholeText(String searchText) { 784 this.searchText = searchText; 785 } 786 787 @Override 788 public boolean matches(Element root, Element element) { 789 return element.wholeText().contains(searchText); 790 } 791 792 @Override protected int cost() { 793 return 10; 794 } 795 796 @Override 797 public String toString() { 798 return String.format(":containsWholeText(%s)", searchText); 799 } 800 } 801 802 /** 803 * Evaluator for matching Element (but <b>not</b> its descendants) wholeText. Neither the input nor the element text is 804 * normalized. <code>:containsWholeOwnText()</code> 805 * @since 1.15.1. 806 */ 807 public static final class ContainsWholeOwnText extends Evaluator { 808 private final String searchText; 809 810 public ContainsWholeOwnText(String searchText) { 811 this.searchText = searchText; 812 } 813 814 @Override 815 public boolean matches(Element root, Element element) { 816 return element.wholeOwnText().contains(searchText); 817 } 818 819 @Override 820 public String toString() { 821 return String.format(":containsWholeOwnText(%s)", searchText); 822 } 823 } 824 825 /** 826 * Evaluator for matching Element (and its descendants) data 827 */ 828 public static final class ContainsData extends Evaluator { 829 private final String searchText; 830 831 public ContainsData(String searchText) { 832 this.searchText = lowerCase(searchText); 833 } 834 835 @Override 836 public boolean matches(Element root, Element element) { 837 return lowerCase(element.data()).contains(searchText); // not whitespace normalized 838 } 839 840 @Override 841 public String toString() { 842 return String.format(":containsData(%s)", searchText); 843 } 844 } 845 846 /** 847 * Evaluator for matching Element's own text 848 */ 849 public static final class ContainsOwnText extends Evaluator { 850 private final String searchText; 851 852 public ContainsOwnText(String searchText) { 853 this.searchText = lowerCase(normaliseWhitespace(searchText)); 854 } 855 856 @Override 857 public boolean matches(Element root, Element element) { 858 return lowerCase(element.ownText()).contains(searchText); 859 } 860 861 @Override 862 public String toString() { 863 return String.format(":containsOwn(%s)", searchText); 864 } 865 } 866 867 /** 868 * Evaluator for matching Element (and its descendants) text with regex 869 */ 870 public static final class Matches extends Evaluator { 871 private final Pattern pattern; 872 873 public Matches(Pattern pattern) { 874 this.pattern = pattern; 875 } 876 877 @Override 878 public boolean matches(Element root, Element element) { 879 Matcher m = pattern.matcher(element.text()); 880 return m.find(); 881 } 882 883 @Override protected int cost() { 884 return 8; 885 } 886 887 @Override 888 public String toString() { 889 return String.format(":matches(%s)", pattern); 890 } 891 } 892 893 /** 894 * Evaluator for matching Element's own text with regex 895 */ 896 public static final class MatchesOwn extends Evaluator { 897 private final Pattern pattern; 898 899 public MatchesOwn(Pattern pattern) { 900 this.pattern = pattern; 901 } 902 903 @Override 904 public boolean matches(Element root, Element element) { 905 Matcher m = pattern.matcher(element.ownText()); 906 return m.find(); 907 } 908 909 @Override protected int cost() { 910 return 7; 911 } 912 913 @Override 914 public String toString() { 915 return String.format(":matchesOwn(%s)", pattern); 916 } 917 } 918 919 /** 920 * Evaluator for matching Element (and its descendants) whole text with regex. 921 * @since 1.15.1. 922 */ 923 public static final class MatchesWholeText extends Evaluator { 924 private final Pattern pattern; 925 926 public MatchesWholeText(Pattern pattern) { 927 this.pattern = pattern; 928 } 929 930 @Override 931 public boolean matches(Element root, Element element) { 932 Matcher m = pattern.matcher(element.wholeText()); 933 return m.find(); 934 } 935 936 @Override protected int cost() { 937 return 8; 938 } 939 940 @Override 941 public String toString() { 942 return String.format(":matchesWholeText(%s)", pattern); 943 } 944 } 945 946 /** 947 * Evaluator for matching Element's own whole text with regex. 948 * @since 1.15.1. 949 */ 950 public static final class MatchesWholeOwnText extends Evaluator { 951 private final Pattern pattern; 952 953 public MatchesWholeOwnText(Pattern pattern) { 954 this.pattern = pattern; 955 } 956 957 @Override 958 public boolean matches(Element root, Element element) { 959 Matcher m = pattern.matcher(element.wholeOwnText()); 960 return m.find(); 961 } 962 963 @Override protected int cost() { 964 return 7; 965 } 966 967 @Override 968 public String toString() { 969 return String.format(":matchesWholeOwnText(%s)", pattern); 970 } 971 } 972 973 public static final class MatchText extends Evaluator { 974 975 @Override 976 public boolean matches(Element root, Element element) { 977 if (element instanceof PseudoTextElement) 978 return true; 979 980 List<TextNode> textNodes = element.textNodes(); 981 for (TextNode textNode : textNodes) { 982 PseudoTextElement pel = new PseudoTextElement( 983 org.jsoup.parser.Tag.valueOf(element.tagName(), element.tag().namespace(), ParseSettings.preserveCase), element.baseUri(), element.attributes()); 984 textNode.replaceWith(pel); 985 pel.appendChild(textNode); 986 } 987 return false; 988 } 989 990 @Override protected int cost() { 991 return -1; // forces first evaluation, which prepares the DOM for later evaluator matches 992 } 993 994 @Override 995 public String toString() { 996 return ":matchText"; 997 } 998 } 999}