001package org.jsoup.parser;
002
003import org.jsoup.helper.Validate;
004import org.jsoup.internal.Normalizer;
005
006import java.util.HashMap;
007import java.util.Map;
008import java.util.function.Consumer;
009
010/**
011 * Tag capabilities.
012 *
013 * @author Jonathan Hedley, jonathan@hedley.net
014 */
015public class Tag implements Cloneable {
016    private static final Map<String, Tag> Tags = new HashMap<>(); // map of known tags
017
018    private String tagName;
019    private final String normalName; // always the lower case version of this tag, regardless of case preservation mode
020    private String namespace;
021    private boolean isBlock = true; // block
022    private boolean formatAsBlock = true; // should be formatted as a block
023    private boolean empty = false; // can hold nothing; e.g. img
024    private boolean selfClosing = false; // can self close (<foo />). used for unknown tags that self close, without forcing them as empty.
025    private boolean preserveWhitespace = false; // for pre, textarea, script etc
026    private boolean formList = false; // a control that appears in forms: input, textarea, output etc
027    private boolean formSubmit = false; // a control that can be submitted in a form: input etc
028
029    private Tag(String tagName, String namespace) {
030        this.tagName = tagName;
031        normalName = Normalizer.lowerCase(tagName);
032        this.namespace = namespace;
033    }
034
035    /**
036     * Get this tag's name.
037     *
038     * @return the tag's name
039     */
040    public String getName() {
041        return tagName;
042    }
043
044    /**
045     * Get this tag's normalized (lowercased) name.
046     * @return the tag's normal name.
047     */
048    public String normalName() {
049        return normalName;
050    }
051
052    public String namespace() {
053        return namespace;
054    }
055
056    /**
057     * Get a Tag by name. If not previously defined (unknown), returns a new generic tag, that can do anything.
058     * <p>
059     * Pre-defined tags (p, div etc) will be ==, but unknown tags are not registered and will only .equals().
060     * </p>
061     * 
062     * @param tagName Name of tag, e.g. "p". Case-insensitive.
063     * @param namespace the namespace for the tag.
064     * @param settings used to control tag name sensitivity
065     * @return The tag, either defined or new generic.
066     */
067    public static Tag valueOf(String tagName, String namespace, ParseSettings settings) {
068        Validate.notEmpty(tagName);
069        Validate.notNull(namespace);
070        Tag tag = Tags.get(tagName);
071        if (tag != null && tag.namespace.equals(namespace))
072            return tag;
073
074        tagName = settings.normalizeTag(tagName); // the name we'll use
075        Validate.notEmpty(tagName);
076        String normalName = Normalizer.lowerCase(tagName); // the lower-case name to get tag settings off
077        tag = Tags.get(normalName);
078        if (tag != null && tag.namespace.equals(namespace)) {
079            if (settings.preserveTagCase() && !tagName.equals(normalName)) {
080                tag = tag.clone(); // get a new version vs the static one, so name update doesn't reset all
081                tag.tagName = tagName;
082            }
083            return tag;
084        }
085
086        // not defined: create default; go anywhere, do anything! (incl be inside a <p>)
087        tag = new Tag(tagName, namespace);
088        tag.isBlock = false;
089
090        return tag;
091    }
092
093    /**
094     * Get a Tag by name. If not previously defined (unknown), returns a new generic tag, that can do anything.
095     * <p>
096     * Pre-defined tags (P, DIV etc) will be ==, but unknown tags are not registered and will only .equals().
097     * </p>
098     *
099     * @param tagName Name of tag, e.g. "p". <b>Case sensitive</b>.
100     * @return The tag, either defined or new generic.
101     * @see #valueOf(String tagName, String namespace, ParseSettings settings)
102     */
103    public static Tag valueOf(String tagName) {
104        return valueOf(tagName, Parser.NamespaceHtml, ParseSettings.preserveCase);
105    }
106
107    /**
108     * Get a Tag by name. If not previously defined (unknown), returns a new generic tag, that can do anything.
109     * <p>
110     * Pre-defined tags (P, DIV etc) will be ==, but unknown tags are not registered and will only .equals().
111     * </p>
112     *
113     * @param tagName Name of tag, e.g. "p". <b>Case sensitive</b>.
114     * @param settings used to control tag name sensitivity
115     * @return The tag, either defined or new generic.
116     * @see #valueOf(String tagName, String namespace, ParseSettings settings)
117     */
118    public static Tag valueOf(String tagName, ParseSettings settings) {
119        return valueOf(tagName, Parser.NamespaceHtml, settings);
120    }
121
122    /**
123     * Gets if this is a block tag.
124     *
125     * @return if block tag
126     */
127    public boolean isBlock() {
128        return isBlock;
129    }
130
131    /**
132     * Gets if this tag should be formatted as a block (or as inline)
133     *
134     * @return if should be formatted as block or inline
135     */
136    public boolean formatAsBlock() {
137        return formatAsBlock;
138    }
139
140    /**
141     * Gets if this tag is an inline tag.
142     *
143     * @return if this tag is an inline tag.
144     */
145    public boolean isInline() {
146        return !isBlock;
147    }
148
149    /**
150     * Get if this is an empty tag
151     *
152     * @return if this is an empty tag
153     */
154    public boolean isEmpty() {
155        return empty;
156    }
157
158    /**
159     * Get if this tag is self-closing.
160     *
161     * @return if this tag should be output as self-closing.
162     */
163    public boolean isSelfClosing() {
164        return empty || selfClosing;
165    }
166
167    /**
168     * Get if this is a pre-defined tag, or was auto created on parsing.
169     *
170     * @return if a known tag
171     */
172    public boolean isKnownTag() {
173        return Tags.containsKey(tagName);
174    }
175
176    /**
177     * Check if this tagname is a known tag.
178     *
179     * @param tagName name of tag
180     * @return if known HTML tag
181     */
182    public static boolean isKnownTag(String tagName) {
183        return Tags.containsKey(tagName);
184    }
185
186    /**
187     * Get if this tag should preserve whitespace within child text nodes.
188     *
189     * @return if preserve whitespace
190     */
191    public boolean preserveWhitespace() {
192        return preserveWhitespace;
193    }
194
195    /**
196     * Get if this tag represents a control associated with a form. E.g. input, textarea, output
197     * @return if associated with a form
198     */
199    public boolean isFormListed() {
200        return formList;
201    }
202
203    /**
204     * Get if this tag represents an element that should be submitted with a form. E.g. input, option
205     * @return if submittable with a form
206     */
207    public boolean isFormSubmittable() {
208        return formSubmit;
209    }
210
211    Tag setSelfClosing() {
212        selfClosing = true;
213        return this;
214    }
215
216    @Override
217    public boolean equals(Object o) {
218        if (this == o) return true;
219        if (!(o instanceof Tag)) return false;
220
221        Tag tag = (Tag) o;
222
223        if (!tagName.equals(tag.tagName)) return false;
224        if (empty != tag.empty) return false;
225        if (formatAsBlock != tag.formatAsBlock) return false;
226        if (isBlock != tag.isBlock) return false;
227        if (preserveWhitespace != tag.preserveWhitespace) return false;
228        if (selfClosing != tag.selfClosing) return false;
229        if (formList != tag.formList) return false;
230        return formSubmit == tag.formSubmit;
231    }
232
233    @Override
234    public int hashCode() {
235        int result = tagName.hashCode();
236        result = 31 * result + (isBlock ? 1 : 0);
237        result = 31 * result + (formatAsBlock ? 1 : 0);
238        result = 31 * result + (empty ? 1 : 0);
239        result = 31 * result + (selfClosing ? 1 : 0);
240        result = 31 * result + (preserveWhitespace ? 1 : 0);
241        result = 31 * result + (formList ? 1 : 0);
242        result = 31 * result + (formSubmit ? 1 : 0);
243        return result;
244    }
245
246    @Override
247    public String toString() {
248        return tagName;
249    }
250
251    @Override
252    protected Tag clone() {
253        try {
254            return (Tag) super.clone();
255        } catch (CloneNotSupportedException e) {
256            throw new RuntimeException(e);
257        }
258    }
259
260    // internal static initialisers:
261    // prepped from http://www.w3.org/TR/REC-html40/sgml/dtd.html and other sources
262    private static final String[] blockTags = {
263            "html", "head", "body", "frameset", "script", "noscript", "style", "meta", "link", "title", "frame",
264            "noframes", "section", "nav", "aside", "hgroup", "header", "footer", "p", "h1", "h2", "h3", "h4", "h5", "h6",
265            "ul", "ol", "pre", "div", "blockquote", "hr", "address", "figure", "figcaption", "form", "fieldset", "ins",
266            "del", "dl", "dt", "dd", "li", "table", "caption", "thead", "tfoot", "tbody", "colgroup", "col", "tr", "th",
267            "td", "video", "audio", "canvas", "details", "menu", "plaintext", "template", "article", "main",
268            "svg", "math", "center", "template",
269            "dir", "applet", "marquee", "listing" // deprecated but still known / special handling
270    };
271    private static final String[] inlineTags = {
272            "object", "base", "font", "tt", "i", "b", "u", "big", "small", "em", "strong", "dfn", "code", "samp", "kbd",
273            "var", "cite", "abbr", "time", "acronym", "mark", "ruby", "rt", "rp", "rtc", "a", "img", "br", "wbr", "map", "q",
274            "sub", "sup", "bdo", "iframe", "embed", "span", "input", "select", "textarea", "label", "button", "optgroup",
275            "option", "legend", "datalist", "keygen", "output", "progress", "meter", "area", "param", "source", "track",
276            "summary", "command", "device", "area", "basefont", "bgsound", "menuitem", "param", "source", "track",
277            "data", "bdi", "s", "strike", "nobr",
278            "rb", // deprecated but still known / special handling
279            "text", // in SVG NS
280            "mi", "mo", "msup", "mn", "mtext" // in MathML NS, to ensure inline
281    };
282    private static final String[] emptyTags = {
283            "meta", "link", "base", "frame", "img", "br", "wbr", "embed", "hr", "input", "keygen", "col", "command",
284            "device", "area", "basefont", "bgsound", "menuitem", "param", "source", "track"
285    };
286    // todo - rework this to format contents as inline; and update html emitter in Element. Same output, just neater.
287    private static final String[] formatAsInlineTags = {
288            "title", "a", "p", "h1", "h2", "h3", "h4", "h5", "h6", "pre", "address", "li", "th", "td", "script", "style",
289            "ins", "del", "s"
290    };
291    private static final String[] preserveWhitespaceTags = {
292            "pre", "plaintext", "title", "textarea"
293            // script is not here as it is a data node, which always preserve whitespace
294    };
295    // todo: I think we just need submit tags, and can scrub listed
296    private static final String[] formListedTags = {
297            "button", "fieldset", "input", "keygen", "object", "output", "select", "textarea"
298    };
299    private static final String[] formSubmitTags = {
300            "input", "keygen", "object", "select", "textarea"
301    };
302
303    private static final Map<String, String[]> namespaces = new HashMap<>();
304    static {
305        namespaces.put(Parser.NamespaceMathml, new String[]{"math", "mi", "mo", "msup", "mn", "mtext"});
306        namespaces.put(Parser.NamespaceSvg, new String[]{"svg", "text"});
307        // We don't need absolute coverage here as other cases will be inferred by the HtmlTreeBuilder
308    }
309
310    private static void setupTags(String[] tagNames, Consumer<Tag> tagModifier) {
311        for (String tagName : tagNames) {
312            Tag tag = Tags.get(tagName);
313            if (tag == null) {
314                tag = new Tag(tagName, Parser.NamespaceHtml);
315                Tags.put(tag.tagName, tag);
316            }
317            tagModifier.accept(tag);
318        }
319    }
320
321    static {
322        setupTags(blockTags, tag -> {
323            tag.isBlock = true;
324            tag.formatAsBlock = true;
325        });
326
327        setupTags(inlineTags, tag -> {
328            tag.isBlock = false;
329            tag.formatAsBlock = false;
330        });
331
332        setupTags(emptyTags, tag -> tag.empty = true);
333        setupTags(formatAsInlineTags, tag -> tag.formatAsBlock = false);
334        setupTags(preserveWhitespaceTags, tag -> tag.preserveWhitespace = true);
335        setupTags(formListedTags, tag -> tag.formList = true);
336        setupTags(formSubmitTags, tag -> tag.formSubmit = true);
337        for (Map.Entry<String, String[]> ns : namespaces.entrySet()) {
338            setupTags(ns.getValue(), tag -> tag.namespace = ns.getKey());
339        }
340    }
341}