001package org.jsoup.parser;
002
003import org.jsoup.helper.Validate;
004import org.jsoup.internal.Normalizer;
005import org.jsoup.internal.StringUtil;
006import org.jsoup.nodes.Attributes;
007import org.jsoup.nodes.CDataNode;
008import org.jsoup.nodes.Comment;
009import org.jsoup.nodes.DataNode;
010import org.jsoup.nodes.Document;
011import org.jsoup.nodes.Element;
012import org.jsoup.nodes.FormElement;
013import org.jsoup.nodes.Node;
014import org.jsoup.nodes.TextNode;
015import org.jspecify.annotations.Nullable;
016
017import java.io.Reader;
018import java.io.StringReader;
019import java.util.ArrayList;
020import java.util.List;
021
022import static org.jsoup.internal.StringUtil.inSorted;
023import static org.jsoup.parser.HtmlTreeBuilderState.Constants.InTableFoster;
024import static org.jsoup.parser.HtmlTreeBuilderState.ForeignContent;
025import static org.jsoup.parser.Parser.NamespaceHtml;
026
027/**
028 * HTML Tree Builder; creates a DOM from Tokens.
029 */
030public class HtmlTreeBuilder extends TreeBuilder {
031    // tag searches. must be sorted, used in inSorted. HtmlTreeBuilderTest validates they're sorted.
032    static final String[] TagsSearchInScope = new String[]{"applet", "caption", "html", "marquee", "object", "table", "td", "th"};
033    static final String[] TagSearchList = new String[]{"ol", "ul"};
034    static final String[] TagSearchButton = new String[]{"button"};
035    static final String[] TagSearchTableScope = new String[]{"html", "table"};
036    static final String[] TagSearchSelectScope = new String[]{"optgroup", "option"};
037    static final String[] TagSearchEndTags = new String[]{"dd", "dt", "li", "optgroup", "option", "p", "rb", "rp", "rt", "rtc"};
038    static final String[] TagThoroughSearchEndTags = new String[]{"caption", "colgroup", "dd", "dt", "li", "optgroup", "option", "p", "rb", "rp", "rt", "rtc", "tbody", "td", "tfoot", "th", "thead", "tr"};
039    static final String[] TagSearchSpecial = new String[]{"address", "applet", "area", "article", "aside", "base", "basefont", "bgsound",
040        "blockquote", "body", "br", "button", "caption", "center", "col", "colgroup", "command", "dd",
041        "details", "dir", "div", "dl", "dt", "embed", "fieldset", "figcaption", "figure", "footer", "form",
042        "frame", "frameset", "h1", "h2", "h3", "h4", "h5", "h6", "head", "header", "hgroup", "hr", "html",
043        "iframe", "img", "input", "isindex", "li", "link", "listing", "marquee", "menu", "meta", "nav",
044        "noembed", "noframes", "noscript", "object", "ol", "p", "param", "plaintext", "pre", "script",
045        "section", "select", "style", "summary", "table", "tbody", "td", "textarea", "tfoot", "th", "thead",
046        "title", "tr", "ul", "wbr", "xmp"};
047    static final String[] TagMathMlTextIntegration = new String[]{"mi", "mn", "mo", "ms", "mtext"};
048    static final String[] TagSvgHtmlIntegration = new String[]{"desc", "foreignObject", "title"};
049
050    public static final int MaxScopeSearchDepth = 100; // prevents the parser bogging down in exceptionally broken pages
051
052    private HtmlTreeBuilderState state; // the current state
053    private HtmlTreeBuilderState originalState; // original / marked state
054
055    private boolean baseUriSetFromDoc;
056    private @Nullable Element headElement; // the current head element
057    private @Nullable FormElement formElement; // the current form element
058    private @Nullable Element contextElement; // fragment parse context -- could be null even if fragment parsing
059    private ArrayList<Element> formattingElements; // active (open) formatting elements
060    private ArrayList<HtmlTreeBuilderState> tmplInsertMode; // stack of Template Insertion modes
061    private List<Token.Character> pendingTableCharacters; // chars in table to be shifted out
062    private Token.EndTag emptyEnd; // reused empty end tag
063
064    private boolean framesetOk; // if ok to go into frameset
065    private boolean fosterInserts; // if next inserts should be fostered
066    private boolean fragmentParsing; // if parsing a fragment of html
067
068    @Override ParseSettings defaultSettings() {
069        return ParseSettings.htmlDefault;
070    }
071
072    @Override
073    HtmlTreeBuilder newInstance() {
074        return new HtmlTreeBuilder();
075    }
076
077    @Override
078    protected void initialiseParse(Reader input, String baseUri, Parser parser) {
079        super.initialiseParse(input, baseUri, parser);
080
081        // this is a bit mucky. todo - probably just create new parser objects to ensure all reset.
082        state = HtmlTreeBuilderState.Initial;
083        originalState = null;
084        baseUriSetFromDoc = false;
085        headElement = null;
086        formElement = null;
087        contextElement = null;
088        formattingElements = new ArrayList<>();
089        tmplInsertMode = new ArrayList<>();
090        pendingTableCharacters = new ArrayList<>();
091        emptyEnd = new Token.EndTag(this);
092        framesetOk = true;
093        fosterInserts = false;
094        fragmentParsing = false;
095    }
096
097    @Override List<Node> parseFragment(String inputFragment, @Nullable Element context, String baseUri, Parser parser) {
098        // context may be null
099        state = HtmlTreeBuilderState.Initial;
100        initialiseParse(new StringReader(inputFragment), baseUri, parser);
101        contextElement = context;
102        fragmentParsing = true;
103        Element root = null;
104
105        if (context != null) {
106            if (context.ownerDocument() != null) // quirks setup:
107                doc.quirksMode(context.ownerDocument().quirksMode());
108
109            // initialise the tokeniser state:
110            String contextTag = context.normalName();
111            switch (contextTag) {
112                case "title":
113                case "textarea":
114                    tokeniser.transition(TokeniserState.Rcdata);
115                    break;
116                case "iframe":
117                case "noembed":
118                case "noframes":
119                case "style":
120                case "xmp":
121                    tokeniser.transition(TokeniserState.Rawtext);
122                    break;
123                case "script":
124                    tokeniser.transition(TokeniserState.ScriptData);
125                    break;
126                case "plaintext":
127                    tokeniser.transition(TokeniserState.PLAINTEXT);
128                    break;
129                case "template":
130                    tokeniser.transition(TokeniserState.Data);
131                    pushTemplateMode(HtmlTreeBuilderState.InTemplate);
132                    break;
133                default:
134                    tokeniser.transition(TokeniserState.Data);
135            }
136            root = new Element(tagFor(contextTag, settings), baseUri);
137            doc.appendChild(root);
138            push(root);
139            resetInsertionMode();
140
141            // setup form element to nearest form on context (up ancestor chain). ensures form controls are associated
142            // with form correctly
143            Element formSearch = context;
144            while (formSearch != null) {
145                if (formSearch instanceof FormElement) {
146                    formElement = (FormElement) formSearch;
147                    break;
148                }
149                formSearch = formSearch.parent();
150            }
151        }
152
153        runParser();
154        if (context != null) {
155            // depending on context and the input html, content may have been added outside of the root el
156            // e.g. context=p, input=div, the div will have been pushed out.
157            List<Node> nodes = root.siblingNodes();
158            if (!nodes.isEmpty())
159                root.insertChildren(-1, nodes);
160            return root.childNodes();
161        }
162        else
163            return doc.childNodes();
164    }
165
166    @Override
167    protected boolean process(Token token) {
168        HtmlTreeBuilderState dispatch = useCurrentOrForeignInsert(token) ? this.state : ForeignContent;
169        return dispatch.process(token, this);
170    }
171
172    boolean useCurrentOrForeignInsert(Token token) {
173        // https://html.spec.whatwg.org/multipage/parsing.html#tree-construction
174        // If the stack of open elements is empty
175        if (stack.isEmpty())
176            return true;
177        final Element el = currentElement();
178        final String ns = el.tag().namespace();
179
180        // If the adjusted current node is an element in the HTML namespace
181        if (NamespaceHtml.equals(ns))
182            return true;
183
184        // If the adjusted current node is a MathML text integration point and the token is a start tag whose tag name is neither "mglyph" nor "malignmark"
185        // If the adjusted current node is a MathML text integration point and the token is a character token
186        if (isMathmlTextIntegration(el)) {
187            if (token.isStartTag()
188                    && !"mglyph".equals(token.asStartTag().normalName)
189                    && !"malignmark".equals(token.asStartTag().normalName))
190                    return true;
191            if (token.isCharacter())
192                    return true;
193        }
194        // If the adjusted current node is a MathML annotation-xml element and the token is a start tag whose tag name is "svg"
195        if (Parser.NamespaceMathml.equals(ns)
196            && el.nameIs("annotation-xml")
197            && token.isStartTag()
198            && "svg".equals(token.asStartTag().normalName))
199            return true;
200
201        // If the adjusted current node is an HTML integration point and the token is a start tag
202        // If the adjusted current node is an HTML integration point and the token is a character token
203        if (isHtmlIntegration(el)
204            && (token.isStartTag() || token.isCharacter()))
205            return true;
206
207        // If the token is an end-of-file token
208        return token.isEOF();
209    }
210
211    static boolean isMathmlTextIntegration(Element el) {
212        /*
213        A node is a MathML text integration point if it is one of the following elements:
214        A MathML mi element
215        A MathML mo element
216        A MathML mn element
217        A MathML ms element
218        A MathML mtext element
219         */
220        return (Parser.NamespaceMathml.equals(el.tag().namespace())
221            && StringUtil.inSorted(el.normalName(), TagMathMlTextIntegration));
222    }
223
224    static boolean isHtmlIntegration(Element el) {
225        /*
226        A node is an HTML integration point if it is one of the following elements:
227        A MathML annotation-xml element whose start tag token had an attribute with the name "encoding" whose value was an ASCII case-insensitive match for the string "text/html"
228        A MathML annotation-xml element whose start tag token had an attribute with the name "encoding" whose value was an ASCII case-insensitive match for the string "application/xhtml+xml"
229        An SVG foreignObject element
230        An SVG desc element
231        An SVG title element
232         */
233        if (Parser.NamespaceMathml.equals(el.tag().namespace())
234            && el.nameIs("annotation-xml")) {
235            String encoding = Normalizer.normalize(el.attr("encoding"));
236            if (encoding.equals("text/html") || encoding.equals("application/xhtml+xml"))
237                return true;
238        }
239        if (Parser.NamespaceSvg.equals(el.tag().namespace())
240            && StringUtil.in(el.tagName(), TagSvgHtmlIntegration)) // note using .tagName for case-sensitive hit here of foreignObject
241            return true;
242
243        return false;
244    }
245
246    boolean process(Token token, HtmlTreeBuilderState state) {
247        return state.process(token, this);
248    }
249
250    void transition(HtmlTreeBuilderState state) {
251        this.state = state;
252    }
253
254    HtmlTreeBuilderState state() {
255        return state;
256    }
257
258    void markInsertionMode() {
259        originalState = state;
260    }
261
262    HtmlTreeBuilderState originalState() {
263        return originalState;
264    }
265
266    void framesetOk(boolean framesetOk) {
267        this.framesetOk = framesetOk;
268    }
269
270    boolean framesetOk() {
271        return framesetOk;
272    }
273
274    Document getDocument() {
275        return doc;
276    }
277
278    String getBaseUri() {
279        return baseUri;
280    }
281
282    void maybeSetBaseUri(Element base) {
283        if (baseUriSetFromDoc) // only listen to the first <base href> in parse
284            return;
285
286        String href = base.absUrl("href");
287        if (href.length() != 0) { // ignore <base target> etc
288            baseUri = href;
289            baseUriSetFromDoc = true;
290            doc.setBaseUri(href); // set on the doc so doc.createElement(Tag) will get updated base, and to update all descendants
291        }
292    }
293
294    boolean isFragmentParsing() {
295        return fragmentParsing;
296    }
297
298    void error(HtmlTreeBuilderState state) {
299        if (parser.getErrors().canAddError())
300            parser.getErrors().add(new ParseError(reader, "Unexpected %s token [%s] when in state [%s]",
301                currentToken.tokenType(), currentToken, state));
302    }
303
304    Element createElementFor(Token.StartTag startTag, String namespace, boolean forcePreserveCase) {
305        // dedupe and normalize the attributes:
306        Attributes attributes = startTag.attributes;
307        if (!forcePreserveCase)
308            attributes = settings.normalizeAttributes(attributes);
309        if (attributes != null && !attributes.isEmpty()) {
310            int dupes = attributes.deduplicate(settings);
311            if (dupes > 0) {
312                error("Dropped duplicate attribute(s) in tag [%s]", startTag.normalName);
313            }
314        }
315
316        Tag tag = tagFor(startTag.tagName, namespace,
317            forcePreserveCase ? ParseSettings.preserveCase : settings);
318
319        return (tag.normalName().equals("form")) ?
320            new FormElement(tag, null, attributes) :
321            new Element(tag, null, attributes);
322    }
323
324    /** Inserts an HTML element for the given tag) */
325    Element insertElementFor(final Token.StartTag startTag) {
326        Element el = createElementFor(startTag, NamespaceHtml, false);
327        doInsertElement(el, startTag);
328
329        // handle self-closing tags. when the spec expects an empty tag, will directly hit insertEmpty, so won't generate this fake end tag.
330        if (startTag.isSelfClosing()) {
331            Tag tag = el.tag();
332            if (tag.isKnownTag()) {
333                if (!tag.isEmpty())
334                    tokeniser.error("Tag [%s] cannot be self closing; not a void tag", tag.normalName());
335                // else: ok
336            }
337            else { // unknown tag: remember this is self-closing, for output
338                tag.setSelfClosing();
339            }
340
341            // effectively a pop, but fiddles with the state. handles empty style, title etc which would otherwise leave us in data state
342            tokeniser.transition(TokeniserState.Data); // handles <script />, otherwise needs breakout steps from script data
343            tokeniser.emit(emptyEnd.reset().name(el.tagName()));  // ensure we get out of whatever state we are in. emitted for yielded processing
344        }
345
346        return el;
347    }
348
349    /**
350     Inserts a foreign element. Preserves the case of the tag name and of the attributes.
351     */
352    Element insertForeignElementFor(final Token.StartTag startTag, String namespace) {
353        Element el = createElementFor(startTag, namespace, true);
354        doInsertElement(el, startTag);
355
356        if (startTag.isSelfClosing()) {
357            el.tag().setSelfClosing(); // remember this is self-closing for output
358            pop();
359        }
360
361        return el;
362    }
363
364    Element insertEmptyElementFor(Token.StartTag startTag) {
365        Element el = createElementFor(startTag, NamespaceHtml, false);
366        doInsertElement(el, startTag);
367        pop();
368        return el;
369    }
370
371    FormElement insertFormElement(Token.StartTag startTag, boolean onStack, boolean checkTemplateStack) {
372        FormElement el = (FormElement) createElementFor(startTag, NamespaceHtml, false);
373
374        if (checkTemplateStack) {
375            if(!onStack("template"))
376                setFormElement(el);
377        } else
378            setFormElement(el);
379
380        doInsertElement(el, startTag);
381        if (!onStack) pop();
382        return el;
383    }
384
385    /** Inserts the Element onto the stack. All element inserts must run through this method. Performs any general
386     tests on the Element before insertion.
387     * @param el the Element to insert and make the current element
388     * @param token the token this element was parsed from. If null, uses a zero-width current token as intrinsic insert
389     */
390    private void doInsertElement(Element el, @Nullable Token token) {
391        if (el.tag().isFormListed() && formElement != null)
392            formElement.addElement(el); // connect form controls to their form element
393
394        // in HTML, the xmlns attribute if set must match what the parser set the tag's namespace to
395        if (el.hasAttr("xmlns") && !el.attr("xmlns").equals(el.tag().namespace()))
396            error("Invalid xmlns attribute [%s] on tag [%s]", el.attr("xmlns"), el.tagName());
397
398        if (isFosterInserts() && StringUtil.inSorted(currentElement().normalName(), InTableFoster))
399            insertInFosterParent(el);
400        else
401            currentElement().appendChild(el);
402
403        push(el);
404    }
405
406    void insertCommentNode(Token.Comment token) {
407        Comment node = new Comment(token.getData());
408        currentElement().appendChild(node);
409        onNodeInserted(node);
410    }
411
412    /** Inserts the provided character token into the current element. */
413    void insertCharacterNode(Token.Character characterToken) {
414        Element el = currentElement(); // will be doc if no current element; allows for whitespace to be inserted into the doc root object (not on the stack)
415        insertCharacterToElement(characterToken, el);
416    }
417
418    /** Inserts the provided character token into the provided element. */
419    void insertCharacterToElement(Token.Character characterToken, Element el) {
420        final Node node;
421        final String tagName = el.normalName();
422        final String data = characterToken.getData();
423
424        if (characterToken.isCData())
425            node = new CDataNode(data);
426        else if (isContentForTagData(tagName))
427            node = new DataNode(data);
428        else
429            node = new TextNode(data);
430        el.appendChild(node); // doesn't use insertNode, because we don't foster these; and will always have a stack.
431        onNodeInserted(node);
432    }
433
434    ArrayList<Element> getStack() {
435        return stack;
436    }
437
438    boolean onStack(Element el) {
439        return onStack(stack, el);
440    }
441
442    /** Checks if there is an HTML element with the given name on the stack. */
443    boolean onStack(String elName) {
444        return getFromStack(elName) != null;
445    }
446
447    private static final int maxQueueDepth = 256; // an arbitrary tension point between real HTML and crafted pain
448    private static boolean onStack(ArrayList<Element> queue, Element element) {
449        final int bottom = queue.size() - 1;
450        final int upper = bottom >= maxQueueDepth ? bottom - maxQueueDepth : 0;
451        for (int pos = bottom; pos >= upper; pos--) {
452            Element next = queue.get(pos);
453            if (next == element) {
454                return true;
455            }
456        }
457        return false;
458    }
459
460    /** Gets the nearest (lowest) HTML element with the given name from the stack. */
461    @Nullable
462    Element getFromStack(String elName) {
463        final int bottom = stack.size() - 1;
464        final int upper = bottom >= maxQueueDepth ? bottom - maxQueueDepth : 0;
465        for (int pos = bottom; pos >= upper; pos--) {
466            Element next = stack.get(pos);
467            if (next.elementIs(elName, NamespaceHtml)) {
468                return next;
469            }
470        }
471        return null;
472    }
473
474    boolean removeFromStack(Element el) {
475        for (int pos = stack.size() -1; pos >= 0; pos--) {
476            Element next = stack.get(pos);
477            if (next == el) {
478                stack.remove(pos);
479                onNodeClosed(el);
480                return true;
481            }
482        }
483        return false;
484    }
485
486    /** Pops the stack until the given HTML element is removed. */
487    @Nullable
488    Element popStackToClose(String elName) {
489        for (int pos = stack.size() -1; pos >= 0; pos--) {
490            Element el = pop();
491            if (el.elementIs(elName, NamespaceHtml)) {
492                return el;
493            }
494        }
495        return null;
496    }
497
498    /** Pops the stack until an element with the supplied name is removed, irrespective of namespace. */
499    @Nullable
500    Element popStackToCloseAnyNamespace(String elName) {
501        for (int pos = stack.size() -1; pos >= 0; pos--) {
502            Element el = pop();
503            if (el.nameIs(elName)) {
504                return el;
505            }
506        }
507        return null;
508    }
509
510    /** Pops the stack until one of the given HTML elements is removed. */
511    void popStackToClose(String... elNames) { // elnames is sorted, comes from Constants
512        for (int pos = stack.size() -1; pos >= 0; pos--) {
513            Element el = pop();
514            if (inSorted(el.normalName(), elNames) && NamespaceHtml.equals(el.tag().namespace())) {
515                break;
516            }
517        }
518    }
519
520    void clearStackToTableContext() {
521        clearStackToContext("table", "template");
522    }
523
524    void clearStackToTableBodyContext() {
525        clearStackToContext("tbody", "tfoot", "thead", "template");
526    }
527
528    void clearStackToTableRowContext() {
529        clearStackToContext("tr", "template");
530    }
531
532    /** Removes elements from the stack until one of the supplied HTML elements is removed. */
533    private void clearStackToContext(String... nodeNames) {
534        for (int pos = stack.size() -1; pos >= 0; pos--) {
535            Element next = stack.get(pos);
536            if (NamespaceHtml.equals(next.tag().namespace()) &&
537                (StringUtil.in(next.normalName(), nodeNames) || next.nameIs("html")))
538                break;
539            else
540                pop();
541        }
542    }
543
544    @Nullable Element aboveOnStack(Element el) {
545        assert onStack(el);
546        for (int pos = stack.size() -1; pos >= 0; pos--) {
547            Element next = stack.get(pos);
548            if (next == el) {
549                return stack.get(pos-1);
550            }
551        }
552        return null;
553    }
554
555    void insertOnStackAfter(Element after, Element in) {
556        int i = stack.lastIndexOf(after);
557        Validate.isTrue(i != -1);
558        stack.add(i+1, in);
559    }
560
561    void replaceOnStack(Element out, Element in) {
562        replaceInQueue(stack, out, in);
563    }
564
565    private static void replaceInQueue(ArrayList<Element> queue, Element out, Element in) {
566        int i = queue.lastIndexOf(out);
567        Validate.isTrue(i != -1);
568        queue.set(i, in);
569    }
570
571    /**
572     * Reset the insertion mode, by searching up the stack for an appropriate insertion mode. The stack search depth
573     * is limited to {@link #maxQueueDepth}.
574     * @return true if the insertion mode was actually changed.
575     */
576    boolean resetInsertionMode() {
577        // https://html.spec.whatwg.org/multipage/parsing.html#the-insertion-mode
578        boolean last = false;
579        final int bottom = stack.size() - 1;
580        final int upper = bottom >= maxQueueDepth ? bottom - maxQueueDepth : 0;
581        final HtmlTreeBuilderState origState = this.state;
582
583        if (stack.size() == 0) { // nothing left of stack, just get to body
584            transition(HtmlTreeBuilderState.InBody);
585        }
586
587        LOOP: for (int pos = bottom; pos >= upper; pos--) {
588            Element node = stack.get(pos);
589            if (pos == upper) {
590                last = true;
591                if (fragmentParsing)
592                    node = contextElement;
593            }
594            String name = node != null ? node.normalName() : "";
595            if (!NamespaceHtml.equals(node.tag().namespace()))
596                continue; // only looking for HTML elements here
597
598            switch (name) {
599                case "select":
600                    transition(HtmlTreeBuilderState.InSelect);
601                    // todo - should loop up (with some limit) and check for table or template hits
602                    break LOOP;
603                case "td":
604                case "th":
605                    if (!last) {
606                        transition(HtmlTreeBuilderState.InCell);
607                        break LOOP;
608                    }
609                    break;
610                case "tr":
611                    transition(HtmlTreeBuilderState.InRow);
612                    break LOOP;
613                case "tbody":
614                case "thead":
615                case "tfoot":
616                    transition(HtmlTreeBuilderState.InTableBody);
617                    break LOOP;
618                case "caption":
619                    transition(HtmlTreeBuilderState.InCaption);
620                    break LOOP;
621                case "colgroup":
622                    transition(HtmlTreeBuilderState.InColumnGroup);
623                    break LOOP;
624                case "table":
625                    transition(HtmlTreeBuilderState.InTable);
626                    break LOOP;
627                case "template":
628                    HtmlTreeBuilderState tmplState = currentTemplateMode();
629                    Validate.notNull(tmplState, "Bug: no template insertion mode on stack!");
630                    transition(tmplState);
631                    break LOOP;
632                case "head":
633                    if (!last) {
634                        transition(HtmlTreeBuilderState.InHead);
635                        break LOOP;
636                    }
637                    break;
638                case "body":
639                    transition(HtmlTreeBuilderState.InBody);
640                    break LOOP;
641                case "frameset":
642                    transition(HtmlTreeBuilderState.InFrameset);
643                    break LOOP;
644                case "html":
645                    transition(headElement == null ? HtmlTreeBuilderState.BeforeHead : HtmlTreeBuilderState.AfterHead);
646                    break LOOP;
647            }
648            if (last) {
649                transition(HtmlTreeBuilderState.InBody);
650                break;
651            }
652        }
653        return state != origState;
654    }
655
656    /** Places the body back onto the stack and moves to InBody, for cases in AfterBody / AfterAfterBody when more content comes */
657    void resetBody() {
658        if (!onStack("body")) {
659            stack.add(doc.body()); // not onNodeInserted, as already seen
660        }
661        transition(HtmlTreeBuilderState.InBody);
662    }
663
664    // todo: tidy up in specific scope methods
665    private final String[] specificScopeTarget = {null};
666
667    private boolean inSpecificScope(String targetName, String[] baseTypes, String[] extraTypes) {
668        specificScopeTarget[0] = targetName;
669        return inSpecificScope(specificScopeTarget, baseTypes, extraTypes);
670    }
671
672    private boolean inSpecificScope(String[] targetNames, String[] baseTypes, @Nullable String[] extraTypes) {
673        // https://html.spec.whatwg.org/multipage/parsing.html#has-an-element-in-the-specific-scope
674        final int bottom = stack.size() -1;
675        final int top = bottom > MaxScopeSearchDepth ? bottom - MaxScopeSearchDepth : 0;
676        // don't walk too far up the tree
677
678        for (int pos = bottom; pos >= top; pos--) {
679            Element el = stack.get(pos);
680            if (!el.tag().namespace().equals(NamespaceHtml)) continue;
681
682            final String elName = el.normalName();
683            if (inSorted(elName, targetNames))
684                return true;
685            if (inSorted(elName, baseTypes))
686                return false;
687            if (extraTypes != null && inSorted(elName, extraTypes))
688                return false;
689        }
690        //Validate.fail("Should not be reachable"); // would end up false because hitting 'html' at root (basetypes)
691        return false;
692    }
693
694    boolean inScope(String[] targetNames) {
695        return inSpecificScope(targetNames, TagsSearchInScope, null);
696    }
697
698    boolean inScope(String targetName) {
699        return inScope(targetName, null);
700    }
701
702    boolean inScope(String targetName, String[] extras) {
703        return inSpecificScope(targetName, TagsSearchInScope, extras);
704        // todo: in mathml namespace: mi, mo, mn, ms, mtext annotation-xml
705        // todo: in svg namespace: forignOjbect, desc, title
706    }
707
708    boolean inListItemScope(String targetName) {
709        return inScope(targetName, TagSearchList);
710    }
711
712    boolean inButtonScope(String targetName) {
713        return inScope(targetName, TagSearchButton);
714    }
715
716    boolean inTableScope(String targetName) {
717        return inSpecificScope(targetName, TagSearchTableScope, null);
718    }
719
720    boolean inSelectScope(String targetName) {
721        for (int pos = stack.size() -1; pos >= 0; pos--) {
722            Element el = stack.get(pos);
723            String elName = el.normalName();
724            if (elName.equals(targetName))
725                return true;
726            if (!inSorted(elName, TagSearchSelectScope)) // all elements except
727                return false;
728        }
729        Validate.fail("Should not be reachable");
730        return false;
731    }
732
733    /** Tests if there is some element on the stack that is not in the provided set. */
734    boolean onStackNot(String[] allowedTags) {
735        final int bottom = stack.size() -1;
736        final int top = bottom > MaxScopeSearchDepth ? bottom - MaxScopeSearchDepth : 0;
737        // don't walk too far up the tree
738
739        for (int pos = bottom; pos >= top; pos--) {
740            final String elName = stack.get(pos).normalName();
741            if (!inSorted(elName, allowedTags))
742                return true;
743        }
744        return false;
745    }
746
747    void setHeadElement(Element headElement) {
748        this.headElement = headElement;
749    }
750
751    Element getHeadElement() {
752        return headElement;
753    }
754
755    boolean isFosterInserts() {
756        return fosterInserts;
757    }
758
759    void setFosterInserts(boolean fosterInserts) {
760        this.fosterInserts = fosterInserts;
761    }
762
763    @Nullable FormElement getFormElement() {
764        return formElement;
765    }
766
767    void setFormElement(FormElement formElement) {
768        this.formElement = formElement;
769    }
770
771    void resetPendingTableCharacters() {
772        pendingTableCharacters.clear();
773    }
774
775    List<Token.Character> getPendingTableCharacters() {
776        return pendingTableCharacters;
777    }
778
779    void addPendingTableCharacters(Token.Character c) {
780        // make a clone of the token to maintain its state (as Tokens are otherwise reset)
781        Token.Character clone = c.clone();
782        pendingTableCharacters.add(clone);
783    }
784
785    /**
786     13.2.6.3 Closing elements that have implied end tags
787     When the steps below require the UA to generate implied end tags, then, while the current node is a dd element, a dt element, an li element, an optgroup element, an option element, a p element, an rb element, an rp element, an rt element, or an rtc element, the UA must pop the current node off the stack of open elements.
788
789     If a step requires the UA to generate implied end tags but lists an element to exclude from the process, then the UA must perform the above steps as if that element was not in the above list.
790
791     When the steps below require the UA to generate all implied end tags thoroughly, then, while the current node is a caption element, a colgroup element, a dd element, a dt element, an li element, an optgroup element, an option element, a p element, an rb element, an rp element, an rt element, an rtc element, a tbody element, a td element, a tfoot element, a th element, a thead element, or a tr element, the UA must pop the current node off the stack of open elements.
792
793     @param excludeTag If a step requires the UA to generate implied end tags but lists an element to exclude from the
794     process, then the UA must perform the above steps as if that element was not in the above list.
795     */
796    void generateImpliedEndTags(String excludeTag) {
797        while (inSorted(currentElement().normalName(), TagSearchEndTags)) {
798            if (excludeTag != null && currentElementIs(excludeTag))
799                break;
800            pop();
801        }
802    }
803
804    void generateImpliedEndTags() {
805        generateImpliedEndTags(false);
806    }
807
808    /**
809     Pops HTML elements off the stack according to the implied end tag rules
810     @param thorough if we are thorough (includes table elements etc) or not
811     */
812    void generateImpliedEndTags(boolean thorough) {
813        final String[] search = thorough ? TagThoroughSearchEndTags : TagSearchEndTags;
814        while (NamespaceHtml.equals(currentElement().tag().namespace())
815            && inSorted(currentElement().normalName(), search)) {
816            pop();
817        }
818    }
819
820    void closeElement(String name) {
821        generateImpliedEndTags(name);
822        if (!name.equals(currentElement().normalName())) error(state());
823        popStackToClose(name);
824    }
825
826    static boolean isSpecial(Element el) {
827        // todo: mathml's mi, mo, mn
828        // todo: svg's foreigObject, desc, title
829        String name = el.normalName();
830        return inSorted(name, TagSearchSpecial);
831    }
832
833    Element lastFormattingElement() {
834        return formattingElements.size() > 0 ? formattingElements.get(formattingElements.size()-1) : null;
835    }
836
837    int positionOfElement(Element el){
838        for (int i = 0; i < formattingElements.size(); i++){
839            if (el == formattingElements.get(i))
840                return i;
841        }
842        return -1;
843    }
844
845    Element removeLastFormattingElement() {
846        int size = formattingElements.size();
847        if (size > 0)
848            return formattingElements.remove(size-1);
849        else
850            return null;
851    }
852
853    // active formatting elements
854    void pushActiveFormattingElements(Element in) {
855        checkActiveFormattingElements(in);
856        formattingElements.add(in);
857    }
858
859    void pushWithBookmark(Element in, int bookmark){
860        checkActiveFormattingElements(in);
861        // catch any range errors and assume bookmark is incorrect - saves a redundant range check.
862        try {
863            formattingElements.add(bookmark, in);
864        } catch (IndexOutOfBoundsException e) {
865            formattingElements.add(in);
866        }
867    }
868
869    void checkActiveFormattingElements(Element in){
870        int numSeen = 0;
871        final int size = formattingElements.size() -1;
872        int ceil = size - maxUsedFormattingElements; if (ceil <0) ceil = 0;
873
874        for (int pos = size; pos >= ceil; pos--) {
875            Element el = formattingElements.get(pos);
876            if (el == null) // marker
877                break;
878
879            if (isSameFormattingElement(in, el))
880                numSeen++;
881
882            if (numSeen == 3) {
883                formattingElements.remove(pos);
884                break;
885            }
886        }
887    }
888
889    private static boolean isSameFormattingElement(Element a, Element b) {
890        // same if: same namespace, tag, and attributes. Element.equals only checks tag, might in future check children
891        return a.normalName().equals(b.normalName()) &&
892                // a.namespace().equals(b.namespace()) &&
893                a.attributes().equals(b.attributes());
894        // todo: namespaces
895    }
896
897    void reconstructFormattingElements() {
898        if (stack.size() > maxQueueDepth)
899            return;
900        Element last = lastFormattingElement();
901        if (last == null || onStack(last))
902            return;
903
904        Element entry = last;
905        int size = formattingElements.size();
906        int ceil = size - maxUsedFormattingElements; if (ceil <0) ceil = 0;
907        int pos = size - 1;
908        boolean skip = false;
909        while (true) {
910            if (pos == ceil) { // step 4. if none before, skip to 8
911                skip = true;
912                break;
913            }
914            entry = formattingElements.get(--pos); // step 5. one earlier than entry
915            if (entry == null || onStack(entry)) // step 6 - neither marker nor on stack
916                break; // jump to 8, else continue back to 4
917        }
918        while(true) {
919            if (!skip) // step 7: on later than entry
920                entry = formattingElements.get(++pos);
921            Validate.notNull(entry); // should not occur, as we break at last element
922
923            // 8. create new element from element, 9 insert into current node, onto stack
924            skip = false; // can only skip increment from 4.
925            Element newEl = new Element(tagFor(entry.normalName(), settings), null, entry.attributes().clone());
926            doInsertElement(newEl, null);
927
928            // 10. replace entry with new entry
929            formattingElements.set(pos, newEl);
930
931            // 11
932            if (pos == size-1) // if not last entry in list, jump to 7
933                break;
934        }
935    }
936    private static final int maxUsedFormattingElements = 12; // limit how many elements get recreated
937
938    void clearFormattingElementsToLastMarker() {
939        while (!formattingElements.isEmpty()) {
940            Element el = removeLastFormattingElement();
941            if (el == null)
942                break;
943        }
944    }
945
946    void removeFromActiveFormattingElements(Element el) {
947        for (int pos = formattingElements.size() -1; pos >= 0; pos--) {
948            Element next = formattingElements.get(pos);
949            if (next == el) {
950                formattingElements.remove(pos);
951                break;
952            }
953        }
954    }
955
956    boolean isInActiveFormattingElements(Element el) {
957        return onStack(formattingElements, el);
958    }
959
960    @Nullable
961    Element getActiveFormattingElement(String nodeName) {
962        for (int pos = formattingElements.size() -1; pos >= 0; pos--) {
963            Element next = formattingElements.get(pos);
964            if (next == null) // scope marker
965                break;
966            else if (next.nameIs(nodeName))
967                return next;
968        }
969        return null;
970    }
971
972    void replaceActiveFormattingElement(Element out, Element in) {
973        replaceInQueue(formattingElements, out, in);
974    }
975
976    void insertMarkerToFormattingElements() {
977        formattingElements.add(null);
978    }
979
980    void insertInFosterParent(Node in) {
981        Element fosterParent;
982        Element lastTable = getFromStack("table");
983        boolean isLastTableParent = false;
984        if (lastTable != null) {
985            if (lastTable.parent() != null) {
986                fosterParent = lastTable.parent();
987                isLastTableParent = true;
988            } else
989                fosterParent = aboveOnStack(lastTable);
990        } else { // no table == frag
991            fosterParent = stack.get(0);
992        }
993
994        if (isLastTableParent) {
995            Validate.notNull(lastTable); // last table cannot be null by this point.
996            lastTable.before(in);
997        }
998        else
999            fosterParent.appendChild(in);
1000    }
1001
1002    // Template Insertion Mode stack
1003    void pushTemplateMode(HtmlTreeBuilderState state) {
1004        tmplInsertMode.add(state);
1005    }
1006
1007    @Nullable HtmlTreeBuilderState popTemplateMode() {
1008        if (tmplInsertMode.size() > 0) {
1009            return tmplInsertMode.remove(tmplInsertMode.size() -1);
1010        } else {
1011            return null;
1012        }
1013    }
1014
1015    int templateModeSize() {
1016        return tmplInsertMode.size();
1017    }
1018
1019    @Nullable HtmlTreeBuilderState currentTemplateMode() {
1020        return (tmplInsertMode.size() > 0) ? tmplInsertMode.get(tmplInsertMode.size() -1)  : null;
1021    }
1022
1023    @Override
1024    public String toString() {
1025        return "TreeBuilder{" +
1026                "currentToken=" + currentToken +
1027                ", state=" + state +
1028                ", currentElement=" + currentElement() +
1029                '}';
1030    }
1031
1032    @Override protected boolean isContentForTagData(final String normalName) {
1033        return (normalName.equals("script") || normalName.equals("style"));
1034    }
1035}