001package org.jsoup.nodes; 002 003import org.jsoup.helper.Validate; 004import org.jsoup.internal.StringUtil; 005 006import java.io.IOException; 007 008/** 009 A text node. 010 011 @author Jonathan Hedley, jonathan@hedley.net */ 012public class TextNode extends LeafNode { 013 /** 014 Create a new TextNode representing the supplied (unencoded) text). 015 016 @param text raw text 017 @see #createFromEncoded(String) 018 */ 019 public TextNode(String text) { 020 value = text; 021 } 022 023 public String nodeName() { 024 return "#text"; 025 } 026 027 /** 028 * Get the text content of this text node. 029 * @return Unencoded, normalised text. 030 * @see TextNode#getWholeText() 031 */ 032 public String text() { 033 return StringUtil.normaliseWhitespace(getWholeText()); 034 } 035 036 /** 037 * Set the text content of this text node. 038 * @param text unencoded text 039 * @return this, for chaining 040 */ 041 public TextNode text(String text) { 042 coreValue(text); 043 return this; 044 } 045 046 /** 047 Get the (unencoded) text of this text node, including any newlines and spaces present in the original. 048 @return text 049 */ 050 public String getWholeText() { 051 return coreValue(); 052 } 053 054 /** 055 Test if this text node is blank -- that is, empty or only whitespace (including newlines). 056 @return true if this document is empty or only whitespace, false if it contains any text content. 057 */ 058 public boolean isBlank() { 059 return StringUtil.isBlank(coreValue()); 060 } 061 062 /** 063 * Split this text node into two nodes at the specified string offset. After splitting, this node will contain the 064 * original text up to the offset, and will have a new text node sibling containing the text after the offset. 065 * @param offset string offset point to split node at. 066 * @return the newly created text node containing the text after the offset. 067 */ 068 public TextNode splitText(int offset) { 069 final String text = coreValue(); 070 Validate.isTrue(offset >= 0, "Split offset must be not be negative"); 071 Validate.isTrue(offset < text.length(), "Split offset must not be greater than current text length"); 072 073 String head = text.substring(0, offset); 074 String tail = text.substring(offset); 075 text(head); 076 TextNode tailNode = new TextNode(tail); 077 if (parentNode != null) 078 parentNode.addChildren(siblingIndex()+1, tailNode); 079 080 return tailNode; 081 } 082 083 @Override 084 void outerHtmlHead(Appendable accum, int depth, Document.OutputSettings out) throws IOException { 085 final boolean prettyPrint = out.prettyPrint(); 086 final Element parent = parentNode instanceof Element ? ((Element) parentNode) : null; 087 final boolean normaliseWhite = prettyPrint && !Element.preserveWhitespace(parentNode); 088 final boolean trimLikeBlock = parent != null && (parent.tag().isBlock() || parent.tag().formatAsBlock()); 089 boolean trimLeading = false, trimTrailing = false; 090 091 if (normaliseWhite) { 092 trimLeading = (trimLikeBlock && siblingIndex == 0) || parentNode instanceof Document; 093 trimTrailing = trimLikeBlock && nextSibling() == null; 094 095 // if this text is just whitespace, and the next node will cause an indent, skip this text: 096 Node next = nextSibling(); 097 Node prev = previousSibling(); 098 boolean isBlank = isBlank(); 099 boolean couldSkip = (next instanceof Element && ((Element) next).shouldIndent(out)) // next will indent 100 || (next instanceof TextNode && (((TextNode) next).isBlank())) // next is blank text, from re-parenting 101 || (prev instanceof Element && (((Element) prev).isBlock() || prev.nameIs("br"))) // br is a bit special - make sure we don't get a dangling blank line, but not a block otherwise wraps in head 102 ; 103 if (couldSkip && isBlank) return; 104 105 if ( 106 (prev == null && parent != null && parent.tag().formatAsBlock() && !isBlank) || 107 (out.outline() && siblingNodes().size() > 0 && !isBlank) || 108 (prev != null && prev.nameIs("br")) // special case wrap on inline <br> - doesn't make sense as a block tag 109 ) 110 indent(accum, depth, out); 111 } 112 113 Entities.escape(accum, coreValue(), out, false, normaliseWhite, trimLeading, trimTrailing); 114 } 115 116 @Override 117 void outerHtmlTail(Appendable accum, int depth, Document.OutputSettings out) throws IOException {} 118 119 @Override 120 public String toString() { 121 return outerHtml(); 122 } 123 124 @Override 125 public TextNode clone() { 126 return (TextNode) super.clone(); 127 } 128 129 /** 130 * Create a new TextNode from HTML encoded (aka escaped) data. 131 * @param encodedText Text containing encoded HTML (e.g. {@code <}) 132 * @return TextNode containing unencoded data (e.g. {@code <}) 133 */ 134 public static TextNode createFromEncoded(String encodedText) { 135 String text = Entities.unescape(encodedText); 136 return new TextNode(text); 137 } 138 139 static String normaliseWhitespace(String text) { 140 text = StringUtil.normaliseWhitespace(text); 141 return text; 142 } 143 144 static String stripLeadingWhitespace(String text) { 145 return text.replaceFirst("^\\s+", ""); 146 } 147 148 static boolean lastCharIsWhitespace(StringBuilder sb) { 149 return sb.length() != 0 && sb.charAt(sb.length() - 1) == ' '; 150 } 151}