001package org.jsoup.select;
002
003import org.jsoup.helper.Validate;
004import org.jsoup.nodes.Comment;
005import org.jsoup.nodes.Document;
006import org.jsoup.nodes.DocumentType;
007import org.jsoup.nodes.Element;
008import org.jsoup.nodes.Node;
009import org.jsoup.nodes.PseudoTextElement;
010import org.jsoup.nodes.TextNode;
011import org.jsoup.nodes.XmlDeclaration;
012import org.jsoup.parser.ParseSettings;
013
014import java.util.List;
015import java.util.function.Predicate;
016import java.util.regex.Matcher;
017import java.util.regex.Pattern;
018
019import static org.jsoup.internal.Normalizer.lowerCase;
020import static org.jsoup.internal.Normalizer.normalize;
021import static org.jsoup.internal.StringUtil.normaliseWhitespace;
022
023
024/**
025 * Evaluates that an element matches the selector.
026 */
027public abstract class Evaluator {
028    protected Evaluator() {
029    }
030
031    /**
032     Provides a Predicate for this Evaluator, matching the test Element.
033     * @param root the root Element, for match evaluation
034     * @return a predicate that accepts an Element to test for matches with this Evaluator
035     * @since 1.17.1
036     */
037    public Predicate<Element> asPredicate(Element root) {
038        return element -> matches(root, element);
039    }
040
041    /**
042     * Test if the element meets the evaluator's requirements.
043     *
044     * @param root    Root of the matching subtree
045     * @param element tested element
046     * @return Returns <tt>true</tt> if the requirements are met or
047     * <tt>false</tt> otherwise
048     */
049    public abstract boolean matches(Element root, Element element);
050
051    /**
052     Reset any internal state in this Evaluator before executing a new Collector evaluation.
053     */
054    protected void reset() {
055    }
056
057    /**
058     A relative evaluator cost function. During evaluation, Evaluators are sorted by ascending cost as an optimization.
059     * @return the relative cost of this Evaluator
060     */
061    protected int cost() {
062        return 5; // a nominal default cost
063    }
064
065    /**
066     * Evaluator for tag name
067     */
068    public static final class Tag extends Evaluator {
069        private final String tagName;
070
071        public Tag(String tagName) {
072            this.tagName = tagName;
073        }
074
075        @Override
076        public boolean matches(Element root, Element element) {
077            return (element.nameIs(tagName));
078        }
079
080        @Override protected int cost() {
081            return 1;
082        }
083
084        @Override
085        public String toString() {
086            return String.format("%s", tagName);
087        }
088    }
089
090
091    /**
092     * Evaluator for tag name that ends with
093     */
094    public static final class TagEndsWith extends Evaluator {
095        private final String tagName;
096
097        public TagEndsWith(String tagName) {
098            this.tagName = tagName;
099        }
100
101        @Override
102        public boolean matches(Element root, Element element) {
103            return (element.normalName().endsWith(tagName));
104        }
105
106        @Override
107        public String toString() {
108            return String.format("%s", tagName);
109        }
110    }
111
112    /**
113     * Evaluator for element id
114     */
115    public static final class Id extends Evaluator {
116        private final String id;
117
118        public Id(String id) {
119            this.id = id;
120        }
121
122        @Override
123        public boolean matches(Element root, Element element) {
124            return (id.equals(element.id()));
125        }
126
127        @Override protected int cost() {
128            return 2;
129        }
130        @Override
131        public String toString() {
132            return String.format("#%s", id);
133        }
134    }
135
136    /**
137     * Evaluator for element class
138     */
139    public static final class Class extends Evaluator {
140        private final String className;
141
142        public Class(String className) {
143            this.className = className;
144        }
145
146        @Override
147        public boolean matches(Element root, Element element) {
148            return (element.hasClass(className));
149        }
150
151        @Override protected int cost() {
152            return 6; // does whitespace scanning
153        }
154
155        @Override
156        public String toString() {
157            return String.format(".%s", className);
158        }
159
160    }
161
162    /**
163     * Evaluator for attribute name matching
164     */
165    public static final class Attribute extends Evaluator {
166        private final String key;
167
168        public Attribute(String key) {
169            this.key = key;
170        }
171
172        @Override
173        public boolean matches(Element root, Element element) {
174            return element.hasAttr(key);
175        }
176
177        @Override protected int cost() {
178            return 2;
179        }
180
181        @Override
182        public String toString() {
183            return String.format("[%s]", key);
184        }
185    }
186
187    /**
188     * Evaluator for attribute name prefix matching
189     */
190    public static final class AttributeStarting extends Evaluator {
191        private final String keyPrefix;
192
193        public AttributeStarting(String keyPrefix) {
194            Validate.notNull(keyPrefix); // OK to be empty - will find elements with any attributes
195            this.keyPrefix = lowerCase(keyPrefix);
196        }
197
198        @Override
199        public boolean matches(Element root, Element element) {
200            List<org.jsoup.nodes.Attribute> values = element.attributes().asList();
201            for (org.jsoup.nodes.Attribute attribute : values) {
202                if (lowerCase(attribute.getKey()).startsWith(keyPrefix))
203                    return true;
204            }
205            return false;
206        }
207
208        @Override protected int cost() {
209            return 6;
210        }
211
212        @Override
213        public String toString() {
214            return String.format("[^%s]", keyPrefix);
215        }
216
217    }
218
219    /**
220     * Evaluator for attribute name/value matching
221     */
222    public static final class AttributeWithValue extends AttributeKeyPair {
223        public AttributeWithValue(String key, String value) {
224            super(key, value);
225        }
226
227        @Override
228        public boolean matches(Element root, Element element) {
229            return element.hasAttr(key) && value.equalsIgnoreCase(element.attr(key).trim());
230        }
231
232        @Override protected int cost() {
233            return 3;
234        }
235
236        @Override
237        public String toString() {
238            return String.format("[%s=%s]", key, value);
239        }
240
241    }
242
243    /**
244     * Evaluator for attribute name != value matching
245     */
246    public static final class AttributeWithValueNot extends AttributeKeyPair {
247        public AttributeWithValueNot(String key, String value) {
248            super(key, value);
249        }
250
251        @Override
252        public boolean matches(Element root, Element element) {
253            return !value.equalsIgnoreCase(element.attr(key));
254        }
255
256        @Override protected int cost() {
257            return 3;
258        }
259
260        @Override
261        public String toString() {
262            return String.format("[%s!=%s]", key, value);
263        }
264
265    }
266
267    /**
268     * Evaluator for attribute name/value matching (value prefix)
269     */
270    public static final class AttributeWithValueStarting extends AttributeKeyPair {
271        public AttributeWithValueStarting(String key, String value) {
272            super(key, value, false);
273        }
274
275        @Override
276        public boolean matches(Element root, Element element) {
277            return element.hasAttr(key) && lowerCase(element.attr(key)).startsWith(value); // value is lower case already
278        }
279
280        @Override protected int cost() {
281            return 4;
282        }
283
284        @Override
285        public String toString() {
286            return String.format("[%s^=%s]", key, value);
287        }
288    }
289
290    /**
291     * Evaluator for attribute name/value matching (value ending)
292     */
293    public static final class AttributeWithValueEnding extends AttributeKeyPair {
294        public AttributeWithValueEnding(String key, String value) {
295            super(key, value, false);
296        }
297
298        @Override
299        public boolean matches(Element root, Element element) {
300            return element.hasAttr(key) && lowerCase(element.attr(key)).endsWith(value); // value is lower case
301        }
302
303        @Override protected int cost() {
304            return 4;
305        }
306
307        @Override
308        public String toString() {
309            return String.format("[%s$=%s]", key, value);
310        }
311    }
312
313    /**
314     * Evaluator for attribute name/value matching (value containing)
315     */
316    public static final class AttributeWithValueContaining extends AttributeKeyPair {
317        public AttributeWithValueContaining(String key, String value) {
318            super(key, value);
319        }
320
321        @Override
322        public boolean matches(Element root, Element element) {
323            return element.hasAttr(key) && lowerCase(element.attr(key)).contains(value); // value is lower case
324        }
325
326        @Override protected int cost() {
327            return 6;
328        }
329
330        @Override
331        public String toString() {
332            return String.format("[%s*=%s]", key, value);
333        }
334
335    }
336
337    /**
338     * Evaluator for attribute name/value matching (value regex matching)
339     */
340    public static final class AttributeWithValueMatching extends Evaluator {
341        final String key;
342        final Pattern pattern;
343
344        public AttributeWithValueMatching(String key, Pattern pattern) {
345            this.key = normalize(key);
346            this.pattern = pattern;
347        }
348
349        @Override
350        public boolean matches(Element root, Element element) {
351            return element.hasAttr(key) && pattern.matcher(element.attr(key)).find();
352        }
353
354        @Override protected int cost() {
355            return 8;
356        }
357
358        @Override
359        public String toString() {
360            return String.format("[%s~=%s]", key, pattern.toString());
361        }
362
363    }
364
365    /**
366     * Abstract evaluator for attribute name/value matching
367     */
368    public abstract static class AttributeKeyPair extends Evaluator {
369        final String key;
370        final String value;
371
372        public AttributeKeyPair(String key, String value) {
373            this(key, value, true);
374        }
375
376        public AttributeKeyPair(String key, String value, boolean trimValue) {
377            Validate.notEmpty(key);
378            Validate.notEmpty(value);
379
380            this.key = normalize(key);
381            boolean isStringLiteral = value.startsWith("'") && value.endsWith("'")
382                                        || value.startsWith("\"") && value.endsWith("\"");
383            if (isStringLiteral) {
384                value = value.substring(1, value.length()-1);
385            }
386
387            this.value = trimValue ? normalize(value) : normalize(value, isStringLiteral);
388        }
389    }
390
391    /**
392     * Evaluator for any / all element matching
393     */
394    public static final class AllElements extends Evaluator {
395
396        @Override
397        public boolean matches(Element root, Element element) {
398            return true;
399        }
400
401        @Override protected int cost() {
402            return 10;
403        }
404
405        @Override
406        public String toString() {
407            return "*";
408        }
409    }
410
411    /**
412     * Evaluator for matching by sibling index number (e {@literal <} idx)
413     */
414    public static final class IndexLessThan extends IndexEvaluator {
415        public IndexLessThan(int index) {
416            super(index);
417        }
418
419        @Override
420        public boolean matches(Element root, Element element) {
421            return root != element && element.elementSiblingIndex() < index;
422        }
423
424        @Override
425        public String toString() {
426            return String.format(":lt(%d)", index);
427        }
428
429    }
430
431    /**
432     * Evaluator for matching by sibling index number (e {@literal >} idx)
433     */
434    public static final class IndexGreaterThan extends IndexEvaluator {
435        public IndexGreaterThan(int index) {
436            super(index);
437        }
438
439        @Override
440        public boolean matches(Element root, Element element) {
441            return element.elementSiblingIndex() > index;
442        }
443
444        @Override
445        public String toString() {
446            return String.format(":gt(%d)", index);
447        }
448
449    }
450
451    /**
452     * Evaluator for matching by sibling index number (e = idx)
453     */
454    public static final class IndexEquals extends IndexEvaluator {
455        public IndexEquals(int index) {
456            super(index);
457        }
458
459        @Override
460        public boolean matches(Element root, Element element) {
461            return element.elementSiblingIndex() == index;
462        }
463
464        @Override
465        public String toString() {
466            return String.format(":eq(%d)", index);
467        }
468
469    }
470
471    /**
472     * Evaluator for matching the last sibling (css :last-child)
473     */
474    public static final class IsLastChild extends Evaluator {
475                @Override
476                public boolean matches(Element root, Element element) {
477                        final Element p = element.parent();
478                        return p != null && !(p instanceof Document) && element == p.lastElementChild();
479                }
480
481                @Override
482                public String toString() {
483                        return ":last-child";
484                }
485    }
486
487    public static final class IsFirstOfType extends IsNthOfType {
488                public IsFirstOfType() {
489                        super(0,1);
490                }
491                @Override
492                public String toString() {
493                        return ":first-of-type";
494                }
495    }
496
497    public static final class IsLastOfType extends IsNthLastOfType {
498                public IsLastOfType() {
499                        super(0,1);
500                }
501                @Override
502                public String toString() {
503                        return ":last-of-type";
504                }
505    }
506
507
508    public static abstract class CssNthEvaluator extends Evaluator {
509        protected final int a, b;
510
511        public CssNthEvaluator(int a, int b) {
512                this.a = a;
513                this.b = b;
514        }
515        public CssNthEvaluator(int b) {
516                this(0,b);
517        }
518
519        @Override
520        public boolean matches(Element root, Element element) {
521                final Element p = element.parent();
522                if (p == null || (p instanceof Document)) return false;
523
524                final int pos = calculatePosition(root, element);
525                if (a == 0) return pos == b;
526
527                return (pos-b)*a >= 0 && (pos-b)%a==0;
528        }
529
530                @Override
531                public String toString() {
532                        if (a == 0)
533                                return String.format(":%s(%d)",getPseudoClass(), b);
534                        if (b == 0)
535                                return String.format(":%s(%dn)",getPseudoClass(), a);
536                        return String.format(":%s(%dn%+d)", getPseudoClass(),a, b);
537                }
538
539                protected abstract String getPseudoClass();
540                protected abstract int calculatePosition(Element root, Element element);
541    }
542
543
544    /**
545     * css-compatible Evaluator for :eq (css :nth-child)
546     *
547     * @see IndexEquals
548     */
549    public static final class IsNthChild extends CssNthEvaluator {
550
551        public IsNthChild(int a, int b) {
552                super(a,b);
553                }
554
555                @Override protected int calculatePosition(Element root, Element element) {
556                        return element.elementSiblingIndex()+1;
557                }
558
559
560                @Override protected String getPseudoClass() {
561                        return "nth-child";
562                }
563    }
564
565    /**
566     * css pseudo class :nth-last-child)
567     *
568     * @see IndexEquals
569     */
570    public static final class IsNthLastChild extends CssNthEvaluator {
571        public IsNthLastChild(int a, int b) {
572                super(a,b);
573        }
574
575        @Override
576        protected int calculatePosition(Element root, Element element) {
577            if (element.parent() == null)
578                return 0;
579                return element.parent().childrenSize()- element.elementSiblingIndex();
580        }
581
582                @Override
583                protected String getPseudoClass() {
584                        return "nth-last-child";
585                }
586    }
587
588    /**
589     * css pseudo class nth-of-type
590     *
591     */
592    public static class IsNthOfType extends CssNthEvaluator {
593        public IsNthOfType(int a, int b) {
594            super(a, b);
595        }
596
597        @Override protected int calculatePosition(Element root, Element element) {
598            Element parent = element.parent();
599            if (parent == null)
600                return 0;
601
602            int pos = 0;
603            final int size = parent.childNodeSize();
604            for (int i = 0; i < size; i++) {
605                Node node = parent.childNode(i);
606                if (node.normalName().equals(element.normalName())) pos++;
607                if (node == element) break;
608            }
609            return pos;
610        }
611
612        @Override
613        protected String getPseudoClass() {
614            return "nth-of-type";
615        }
616    }
617
618    public static class IsNthLastOfType extends CssNthEvaluator {
619
620        public IsNthLastOfType(int a, int b) {
621            super(a, b);
622        }
623
624        @Override
625        protected int calculatePosition(Element root, Element element) {
626            Element parent = element.parent();
627            if (parent == null)
628                return 0;
629
630            int pos = 0;
631            Element next = element;
632            while (next != null) {
633                if (next.normalName().equals(element.normalName()))
634                    pos++;
635                next = next.nextElementSibling();
636            }
637            return pos;
638        }
639
640        @Override
641        protected String getPseudoClass() {
642            return "nth-last-of-type";
643        }
644    }
645
646    /**
647     * Evaluator for matching the first sibling (css :first-child)
648     */
649    public static final class IsFirstChild extends Evaluator {
650        @Override
651        public boolean matches(Element root, Element element) {
652                final Element p = element.parent();
653                return p != null && !(p instanceof Document) && element == p.firstElementChild();
654        }
655
656        @Override
657        public String toString() {
658                return ":first-child";
659        }
660    }
661
662    /**
663     * css3 pseudo-class :root
664     * @see <a href="http://www.w3.org/TR/selectors/#root-pseudo">:root selector</a>
665     *
666     */
667    public static final class IsRoot extends Evaluator {
668        @Override
669        public boolean matches(Element root, Element element) {
670                final Element r = root instanceof Document ? root.firstElementChild() : root;
671                return element == r;
672        }
673
674        @Override protected int cost() {
675            return 1;
676        }
677
678        @Override
679        public String toString() {
680                return ":root";
681        }
682    }
683
684    public static final class IsOnlyChild extends Evaluator {
685                @Override
686                public boolean matches(Element root, Element element) {
687                        final Element p = element.parent();
688                        return p!=null && !(p instanceof Document) && element.siblingElements().isEmpty();
689                }
690        @Override
691        public String toString() {
692                return ":only-child";
693        }
694    }
695
696    public static final class IsOnlyOfType extends Evaluator {
697                @Override
698                public boolean matches(Element root, Element element) {
699                        final Element p = element.parent();
700                        if (p==null || p instanceof Document) return false;
701
702                        int pos = 0;
703            Element next = p.firstElementChild();
704            while (next != null) {
705                if (next.normalName().equals(element.normalName()))
706                    pos++;
707                if (pos > 1)
708                    break;
709                next = next.nextElementSibling();
710            }
711                return pos == 1;
712                }
713        @Override
714        public String toString() {
715                return ":only-of-type";
716        }
717    }
718
719    public static final class IsEmpty extends Evaluator {
720                @Override
721                public boolean matches(Element root, Element element) {
722                List<Node> family = element.childNodes();
723            for (Node n : family) {
724                if (n instanceof TextNode)
725                    return ((TextNode)n).isBlank();
726                if (!(n instanceof Comment || n instanceof XmlDeclaration || n instanceof DocumentType))
727                    return false;
728            }
729                return true;
730                }
731        @Override
732        public String toString() {
733                return ":empty";
734        }
735    }
736
737    /**
738     * Abstract evaluator for sibling index matching
739     *
740     * @author ant
741     */
742    public abstract static class IndexEvaluator extends Evaluator {
743        final int index;
744
745        public IndexEvaluator(int index) {
746            this.index = index;
747        }
748    }
749
750    /**
751     * Evaluator for matching Element (and its descendants) text
752     */
753    public static final class ContainsText extends Evaluator {
754        private final String searchText;
755
756        public ContainsText(String searchText) {
757            this.searchText = lowerCase(normaliseWhitespace(searchText));
758        }
759
760        @Override
761        public boolean matches(Element root, Element element) {
762            return lowerCase(element.text()).contains(searchText);
763        }
764
765        @Override protected int cost() {
766            return 10;
767        }
768
769        @Override
770        public String toString() {
771            return String.format(":contains(%s)", searchText);
772        }
773    }
774
775    /**
776     * Evaluator for matching Element (and its descendants) wholeText. Neither the input nor the element text is
777     * normalized. <code>:containsWholeText()</code>
778     * @since 1.15.1.
779     */
780    public static final class ContainsWholeText extends Evaluator {
781        private final String searchText;
782
783        public ContainsWholeText(String searchText) {
784            this.searchText = searchText;
785        }
786
787        @Override
788        public boolean matches(Element root, Element element) {
789            return element.wholeText().contains(searchText);
790        }
791
792        @Override protected int cost() {
793            return 10;
794        }
795
796        @Override
797        public String toString() {
798            return String.format(":containsWholeText(%s)", searchText);
799        }
800    }
801
802    /**
803     * Evaluator for matching Element (but <b>not</b> its descendants) wholeText. Neither the input nor the element text is
804     * normalized. <code>:containsWholeOwnText()</code>
805     * @since 1.15.1.
806     */
807    public static final class ContainsWholeOwnText extends Evaluator {
808        private final String searchText;
809
810        public ContainsWholeOwnText(String searchText) {
811            this.searchText = searchText;
812        }
813
814        @Override
815        public boolean matches(Element root, Element element) {
816            return element.wholeOwnText().contains(searchText);
817        }
818
819        @Override
820        public String toString() {
821            return String.format(":containsWholeOwnText(%s)", searchText);
822        }
823    }
824
825    /**
826     * Evaluator for matching Element (and its descendants) data
827     */
828    public static final class ContainsData extends Evaluator {
829        private final String searchText;
830
831        public ContainsData(String searchText) {
832            this.searchText = lowerCase(searchText);
833        }
834
835        @Override
836        public boolean matches(Element root, Element element) {
837            return lowerCase(element.data()).contains(searchText); // not whitespace normalized
838        }
839
840        @Override
841        public String toString() {
842            return String.format(":containsData(%s)", searchText);
843        }
844    }
845
846    /**
847     * Evaluator for matching Element's own text
848     */
849    public static final class ContainsOwnText extends Evaluator {
850        private final String searchText;
851
852        public ContainsOwnText(String searchText) {
853            this.searchText = lowerCase(normaliseWhitespace(searchText));
854        }
855
856        @Override
857        public boolean matches(Element root, Element element) {
858            return lowerCase(element.ownText()).contains(searchText);
859        }
860
861        @Override
862        public String toString() {
863            return String.format(":containsOwn(%s)", searchText);
864        }
865    }
866
867    /**
868     * Evaluator for matching Element (and its descendants) text with regex
869     */
870    public static final class Matches extends Evaluator {
871        private final Pattern pattern;
872
873        public Matches(Pattern pattern) {
874            this.pattern = pattern;
875        }
876
877        @Override
878        public boolean matches(Element root, Element element) {
879            Matcher m = pattern.matcher(element.text());
880            return m.find();
881        }
882
883        @Override protected int cost() {
884            return 8;
885        }
886
887        @Override
888        public String toString() {
889            return String.format(":matches(%s)", pattern);
890        }
891    }
892
893    /**
894     * Evaluator for matching Element's own text with regex
895     */
896    public static final class MatchesOwn extends Evaluator {
897        private final Pattern pattern;
898
899        public MatchesOwn(Pattern pattern) {
900            this.pattern = pattern;
901        }
902
903        @Override
904        public boolean matches(Element root, Element element) {
905            Matcher m = pattern.matcher(element.ownText());
906            return m.find();
907        }
908
909        @Override protected int cost() {
910            return 7;
911        }
912
913        @Override
914        public String toString() {
915            return String.format(":matchesOwn(%s)", pattern);
916        }
917    }
918
919    /**
920     * Evaluator for matching Element (and its descendants) whole text with regex.
921     * @since 1.15.1.
922     */
923    public static final class MatchesWholeText extends Evaluator {
924        private final Pattern pattern;
925
926        public MatchesWholeText(Pattern pattern) {
927            this.pattern = pattern;
928        }
929
930        @Override
931        public boolean matches(Element root, Element element) {
932            Matcher m = pattern.matcher(element.wholeText());
933            return m.find();
934        }
935
936        @Override protected int cost() {
937            return 8;
938        }
939
940        @Override
941        public String toString() {
942            return String.format(":matchesWholeText(%s)", pattern);
943        }
944    }
945
946    /**
947     * Evaluator for matching Element's own whole text with regex.
948     * @since 1.15.1.
949     */
950    public static final class MatchesWholeOwnText extends Evaluator {
951        private final Pattern pattern;
952
953        public MatchesWholeOwnText(Pattern pattern) {
954            this.pattern = pattern;
955        }
956
957        @Override
958        public boolean matches(Element root, Element element) {
959            Matcher m = pattern.matcher(element.wholeOwnText());
960            return m.find();
961        }
962
963        @Override protected int cost() {
964            return 7;
965        }
966
967        @Override
968        public String toString() {
969            return String.format(":matchesWholeOwnText(%s)", pattern);
970        }
971    }
972
973    public static final class MatchText extends Evaluator {
974
975        @Override
976        public boolean matches(Element root, Element element) {
977            if (element instanceof PseudoTextElement)
978                return true;
979
980            List<TextNode> textNodes = element.textNodes();
981            for (TextNode textNode : textNodes) {
982                PseudoTextElement pel = new PseudoTextElement(
983                    org.jsoup.parser.Tag.valueOf(element.tagName(), element.tag().namespace(), ParseSettings.preserveCase), element.baseUri(), element.attributes());
984                textNode.replaceWith(pel);
985                pel.appendChild(textNode);
986            }
987            return false;
988        }
989
990        @Override protected int cost() {
991            return -1; // forces first evaluation, which prepares the DOM for later evaluator matches
992        }
993
994        @Override
995        public String toString() {
996            return ":matchText";
997        }
998    }
999}