001package org.jsoup.select; 002 003import org.jsoup.helper.Validate; 004import org.jsoup.internal.StringUtil; 005import org.jsoup.nodes.Comment; 006import org.jsoup.nodes.DataNode; 007import org.jsoup.nodes.Element; 008import org.jsoup.nodes.FormElement; 009import org.jsoup.nodes.Node; 010import org.jsoup.nodes.TextNode; 011import org.jspecify.annotations.Nullable; 012 013import java.util.ArrayList; 014import java.util.Arrays; 015import java.util.Collection; 016import java.util.HashSet; 017import java.util.Iterator; 018import java.util.LinkedHashSet; 019import java.util.List; 020import java.util.function.Predicate; 021import java.util.function.UnaryOperator; 022 023/** 024 A list of {@link Element}s, with methods that act on every element in the list. 025 <p>To get an {@code Elements} object, use the {@link Element#select(String)} method.</p> 026 <p>Methods that {@link #set(int, Element) set}, {@link #remove(int) remove}, or {@link #replaceAll(UnaryOperator) 027 replace} Elements in the list will also act on the underlying {@link org.jsoup.nodes.Document DOM}.</p> 028 029 @author Jonathan Hedley, jonathan@hedley.net */ 030public class Elements extends ArrayList<Element> { 031 public Elements() { 032 } 033 034 public Elements(int initialCapacity) { 035 super(initialCapacity); 036 } 037 038 public Elements(Collection<Element> elements) { 039 super(elements); 040 } 041 042 public Elements(List<Element> elements) { 043 super(elements); 044 } 045 046 public Elements(Element... elements) { 047 super(Arrays.asList(elements)); 048 } 049 050 /** 051 * Creates a deep copy of these elements. 052 * @return a deep copy 053 */ 054 @Override 055 public Elements clone() { 056 Elements clone = new Elements(size()); 057 058 for(Element e : this) 059 clone.add(e.clone()); 060 061 return clone; 062 } 063 064 // attribute methods 065 /** 066 Get an attribute value from the first matched element that has the attribute. 067 @param attributeKey The attribute key. 068 @return The attribute value from the first matched element that has the attribute. If no elements were matched (isEmpty() == true), 069 or if the no elements have the attribute, returns empty string. 070 @see #hasAttr(String) 071 */ 072 public String attr(String attributeKey) { 073 for (Element element : this) { 074 if (element.hasAttr(attributeKey)) 075 return element.attr(attributeKey); 076 } 077 return ""; 078 } 079 080 /** 081 Checks if any of the matched elements have this attribute defined. 082 @param attributeKey attribute key 083 @return true if any of the elements have the attribute; false if none do. 084 */ 085 public boolean hasAttr(String attributeKey) { 086 for (Element element : this) { 087 if (element.hasAttr(attributeKey)) 088 return true; 089 } 090 return false; 091 } 092 093 /** 094 * Get the attribute value for each of the matched elements. If an element does not have this attribute, no value is 095 * included in the result set for that element. 096 * @param attributeKey the attribute name to return values for. You can add the {@code abs:} prefix to the key to 097 * get absolute URLs from relative URLs, e.g.: {@code doc.select("a").eachAttr("abs:href")} . 098 * @return a list of each element's attribute value for the attribute 099 */ 100 public List<String> eachAttr(String attributeKey) { 101 List<String> attrs = new ArrayList<>(size()); 102 for (Element element : this) { 103 if (element.hasAttr(attributeKey)) 104 attrs.add(element.attr(attributeKey)); 105 } 106 return attrs; 107 } 108 109 /** 110 * Set an attribute on all matched elements. 111 * @param attributeKey attribute key 112 * @param attributeValue attribute value 113 * @return this 114 */ 115 public Elements attr(String attributeKey, String attributeValue) { 116 for (Element element : this) { 117 element.attr(attributeKey, attributeValue); 118 } 119 return this; 120 } 121 122 /** 123 * Remove an attribute from every matched element. 124 * @param attributeKey The attribute to remove. 125 * @return this (for chaining) 126 */ 127 public Elements removeAttr(String attributeKey) { 128 for (Element element : this) { 129 element.removeAttr(attributeKey); 130 } 131 return this; 132 } 133 134 /** 135 Add the class name to every matched element's {@code class} attribute. 136 @param className class name to add 137 @return this 138 */ 139 public Elements addClass(String className) { 140 for (Element element : this) { 141 element.addClass(className); 142 } 143 return this; 144 } 145 146 /** 147 Remove the class name from every matched element's {@code class} attribute, if present. 148 @param className class name to remove 149 @return this 150 */ 151 public Elements removeClass(String className) { 152 for (Element element : this) { 153 element.removeClass(className); 154 } 155 return this; 156 } 157 158 /** 159 Toggle the class name on every matched element's {@code class} attribute. 160 @param className class name to add if missing, or remove if present, from every element. 161 @return this 162 */ 163 public Elements toggleClass(String className) { 164 for (Element element : this) { 165 element.toggleClass(className); 166 } 167 return this; 168 } 169 170 /** 171 Determine if any of the matched elements have this class name set in their {@code class} attribute. 172 @param className class name to check for 173 @return true if any do, false if none do 174 */ 175 public boolean hasClass(String className) { 176 for (Element element : this) { 177 if (element.hasClass(className)) 178 return true; 179 } 180 return false; 181 } 182 183 /** 184 * Get the form element's value of the first matched element. 185 * @return The form element's value, or empty if not set. 186 * @see Element#val() 187 */ 188 public String val() { 189 if (size() > 0) 190 //noinspection ConstantConditions 191 return first().val(); // first() != null as size() > 0 192 else 193 return ""; 194 } 195 196 /** 197 * Set the form element's value in each of the matched elements. 198 * @param value The value to set into each matched element 199 * @return this (for chaining) 200 */ 201 public Elements val(String value) { 202 for (Element element : this) 203 element.val(value); 204 return this; 205 } 206 207 /** 208 * Get the combined text of all the matched elements. 209 * <p> 210 * Note that it is possible to get repeats if the matched elements contain both parent elements and their own 211 * children, as the Element.text() method returns the combined text of a parent and all its children. 212 * @return string of all text: unescaped and no HTML. 213 * @see Element#text() 214 * @see #eachText() 215 */ 216 public String text() { 217 StringBuilder sb = StringUtil.borrowBuilder(); 218 for (Element element : this) { 219 if (sb.length() != 0) 220 sb.append(" "); 221 sb.append(element.text()); 222 } 223 return StringUtil.releaseBuilder(sb); 224 } 225 226 /** 227 Test if any matched Element has any text content, that is not just whitespace. 228 @return true if any element has non-blank text content. 229 @see Element#hasText() 230 */ 231 public boolean hasText() { 232 for (Element element: this) { 233 if (element.hasText()) 234 return true; 235 } 236 return false; 237 } 238 239 /** 240 * Get the text content of each of the matched elements. If an element has no text, then it is not included in the 241 * result. 242 * @return A list of each matched element's text content. 243 * @see Element#text() 244 * @see Element#hasText() 245 * @see #text() 246 */ 247 public List<String> eachText() { 248 ArrayList<String> texts = new ArrayList<>(size()); 249 for (Element el: this) { 250 if (el.hasText()) 251 texts.add(el.text()); 252 } 253 return texts; 254 } 255 256 /** 257 * Get the combined inner HTML of all matched elements. 258 * @return string of all element's inner HTML. 259 * @see #text() 260 * @see #outerHtml() 261 */ 262 public String html() { 263 StringBuilder sb = StringUtil.borrowBuilder(); 264 for (Element element : this) { 265 if (sb.length() != 0) 266 sb.append("\n"); 267 sb.append(element.html()); 268 } 269 return StringUtil.releaseBuilder(sb); 270 } 271 272 /** 273 * Get the combined outer HTML of all matched elements. 274 * @return string of all element's outer HTML. 275 * @see #text() 276 * @see #html() 277 */ 278 public String outerHtml() { 279 StringBuilder sb = StringUtil.borrowBuilder(); 280 for (Element element : this) { 281 if (sb.length() != 0) 282 sb.append("\n"); 283 sb.append(element.outerHtml()); 284 } 285 return StringUtil.releaseBuilder(sb); 286 } 287 288 /** 289 * Get the combined outer HTML of all matched elements. Alias of {@link #outerHtml()}. 290 * @return string of all element's outer HTML. 291 * @see #text() 292 * @see #html() 293 */ 294 @Override 295 public String toString() { 296 return outerHtml(); 297 } 298 299 /** 300 * Update (rename) the tag name of each matched element. For example, to change each {@code <i>} to a {@code <em>}, do 301 * {@code doc.select("i").tagName("em");} 302 * 303 * @param tagName the new tag name 304 * @return this, for chaining 305 * @see Element#tagName(String) 306 */ 307 public Elements tagName(String tagName) { 308 for (Element element : this) { 309 element.tagName(tagName); 310 } 311 return this; 312 } 313 314 /** 315 * Set the inner HTML of each matched element. 316 * @param html HTML to parse and set into each matched element. 317 * @return this, for chaining 318 * @see Element#html(String) 319 */ 320 public Elements html(String html) { 321 for (Element element : this) { 322 element.html(html); 323 } 324 return this; 325 } 326 327 /** 328 * Add the supplied HTML to the start of each matched element's inner HTML. 329 * @param html HTML to add inside each element, before the existing HTML 330 * @return this, for chaining 331 * @see Element#prepend(String) 332 */ 333 public Elements prepend(String html) { 334 for (Element element : this) { 335 element.prepend(html); 336 } 337 return this; 338 } 339 340 /** 341 * Add the supplied HTML to the end of each matched element's inner HTML. 342 * @param html HTML to add inside each element, after the existing HTML 343 * @return this, for chaining 344 * @see Element#append(String) 345 */ 346 public Elements append(String html) { 347 for (Element element : this) { 348 element.append(html); 349 } 350 return this; 351 } 352 353 /** 354 * Insert the supplied HTML before each matched element's outer HTML. 355 * @param html HTML to insert before each element 356 * @return this, for chaining 357 * @see Element#before(String) 358 */ 359 public Elements before(String html) { 360 for (Element element : this) { 361 element.before(html); 362 } 363 return this; 364 } 365 366 /** 367 * Insert the supplied HTML after each matched element's outer HTML. 368 * @param html HTML to insert after each element 369 * @return this, for chaining 370 * @see Element#after(String) 371 */ 372 public Elements after(String html) { 373 for (Element element : this) { 374 element.after(html); 375 } 376 return this; 377 } 378 379 /** 380 Wrap the supplied HTML around each matched elements. For example, with HTML 381 {@code <p><b>This</b> is <b>Jsoup</b></p>}, 382 <code>doc.select("b").wrap("<i></i>");</code> 383 becomes {@code <p><i><b>This</b></i> is <i><b>jsoup</b></i></p>} 384 @param html HTML to wrap around each element, e.g. {@code <div class="head"></div>}. Can be arbitrarily deep. 385 @return this (for chaining) 386 @see Element#wrap 387 */ 388 public Elements wrap(String html) { 389 Validate.notEmpty(html); 390 for (Element element : this) { 391 element.wrap(html); 392 } 393 return this; 394 } 395 396 /** 397 * Removes the matched elements from the DOM, and moves their children up into their parents. This has the effect of 398 * dropping the elements but keeping their children. 399 * <p> 400 * This is useful for e.g removing unwanted formatting elements but keeping their contents. 401 * </p> 402 * 403 * E.g. with HTML: <p>{@code <div><font>One</font> <font><a href="/">Two</a></font></div>}</p> 404 * <p>{@code doc.select("font").unwrap();}</p> 405 * <p>HTML = {@code <div>One <a href="/">Two</a></div>}</p> 406 * 407 * @return this (for chaining) 408 * @see Node#unwrap 409 */ 410 public Elements unwrap() { 411 for (Element element : this) { 412 element.unwrap(); 413 } 414 return this; 415 } 416 417 /** 418 * Empty (remove all child nodes from) each matched element. This is similar to setting the inner HTML of each 419 * element to nothing. 420 * <p> 421 * E.g. HTML: {@code <div><p>Hello <b>there</b></p> <p>now</p></div>}<br> 422 * <code>doc.select("p").empty();</code><br> 423 * HTML = {@code <div><p></p> <p></p></div>} 424 * @return this, for chaining 425 * @see Element#empty() 426 * @see #remove() 427 */ 428 public Elements empty() { 429 for (Element element : this) { 430 element.empty(); 431 } 432 return this; 433 } 434 435 /** 436 * Remove each matched element from the DOM. This is similar to setting the outer HTML of each element to nothing. 437 * <p>The elements will still be retained in this list, in case further processing of them is desired.</p> 438 * <p> 439 * E.g. HTML: {@code <div><p>Hello</p> <p>there</p> <img /></div>}<br> 440 * <code>doc.select("p").remove();</code><br> 441 * HTML = {@code <div> <img /></div>} 442 * <p> 443 * Note that this method should not be used to clean user-submitted HTML; rather, use {@link org.jsoup.safety.Cleaner} to clean HTML. 444 * @return this, for chaining 445 * @see Element#empty() 446 * @see #empty() 447 * @see #clear() 448 */ 449 public Elements remove() { 450 for (Element element : this) { 451 element.remove(); 452 } 453 return this; 454 } 455 456 // filters 457 458 /** 459 * Find matching elements within this element list. 460 * @param query A {@link Selector} query 461 * @return the filtered list of elements, or an empty list if none match. 462 */ 463 public Elements select(String query) { 464 return Selector.select(query, this); 465 } 466 467 /** 468 * Remove elements from this list that match the {@link Selector} query. 469 * <p> 470 * E.g. HTML: {@code <div class=logo>One</div> <div>Two</div>}<br> 471 * <code>Elements divs = doc.select("div").not(".logo");</code><br> 472 * Result: {@code divs: [<div>Two</div>]} 473 * <p> 474 * @param query the selector query whose results should be removed from these elements 475 * @return a new elements list that contains only the filtered results 476 */ 477 public Elements not(String query) { 478 Elements out = Selector.select(query, this); 479 return Selector.filterOut(this, out); 480 } 481 482 /** 483 * Get the <i>nth</i> matched element as an Elements object. 484 * <p> 485 * See also {@link #get(int)} to retrieve an Element. 486 * @param index the (zero-based) index of the element in the list to retain 487 * @return Elements containing only the specified element, or, if that element did not exist, an empty list. 488 */ 489 public Elements eq(int index) { 490 return size() > index ? new Elements(get(index)) : new Elements(); 491 } 492 493 /** 494 * Test if any of the matched elements match the supplied query. 495 * @param query A selector 496 * @return true if at least one element in the list matches the query. 497 */ 498 public boolean is(String query) { 499 Evaluator eval = QueryParser.parse(query); 500 for (Element e : this) { 501 if (e.is(eval)) 502 return true; 503 } 504 return false; 505 } 506 507 /** 508 * Get the immediate next element sibling of each element in this list. 509 * @return next element siblings. 510 */ 511 public Elements next() { 512 return siblings(null, true, false); 513 } 514 515 /** 516 * Get the immediate next element sibling of each element in this list, filtered by the query. 517 * @param query CSS query to match siblings against 518 * @return next element siblings. 519 */ 520 public Elements next(String query) { 521 return siblings(query, true, false); 522 } 523 524 /** 525 * Get each of the following element siblings of each element in this list. 526 * @return all following element siblings. 527 */ 528 public Elements nextAll() { 529 return siblings(null, true, true); 530 } 531 532 /** 533 * Get each of the following element siblings of each element in this list, that match the query. 534 * @param query CSS query to match siblings against 535 * @return all following element siblings. 536 */ 537 public Elements nextAll(String query) { 538 return siblings(query, true, true); 539 } 540 541 /** 542 * Get the immediate previous element sibling of each element in this list. 543 * @return previous element siblings. 544 */ 545 public Elements prev() { 546 return siblings(null, false, false); 547 } 548 549 /** 550 * Get the immediate previous element sibling of each element in this list, filtered by the query. 551 * @param query CSS query to match siblings against 552 * @return previous element siblings. 553 */ 554 public Elements prev(String query) { 555 return siblings(query, false, false); 556 } 557 558 /** 559 * Get each of the previous element siblings of each element in this list. 560 * @return all previous element siblings. 561 */ 562 public Elements prevAll() { 563 return siblings(null, false, true); 564 } 565 566 /** 567 * Get each of the previous element siblings of each element in this list, that match the query. 568 * @param query CSS query to match siblings against 569 * @return all previous element siblings. 570 */ 571 public Elements prevAll(String query) { 572 return siblings(query, false, true); 573 } 574 575 private Elements siblings(@Nullable String query, boolean next, boolean all) { 576 Elements els = new Elements(); 577 Evaluator eval = query != null? QueryParser.parse(query) : null; 578 for (Element e : this) { 579 do { 580 Element sib = next ? e.nextElementSibling() : e.previousElementSibling(); 581 if (sib == null) break; 582 if (eval == null) 583 els.add(sib); 584 else if (sib.is(eval)) 585 els.add(sib); 586 e = sib; 587 } while (all); 588 } 589 return els; 590 } 591 592 /** 593 * Get all of the parents and ancestor elements of the matched elements. 594 * @return all of the parents and ancestor elements of the matched elements 595 */ 596 public Elements parents() { 597 HashSet<Element> combo = new LinkedHashSet<>(); 598 for (Element e: this) { 599 combo.addAll(e.parents()); 600 } 601 return new Elements(combo); 602 } 603 604 // list-like methods 605 /** 606 Get the first matched element. 607 @return The first matched element, or <code>null</code> if contents is empty. 608 */ 609 public @Nullable Element first() { 610 return isEmpty() ? null : get(0); 611 } 612 613 /** 614 Get the last matched element. 615 @return The last matched element, or <code>null</code> if contents is empty. 616 */ 617 public @Nullable Element last() { 618 return isEmpty() ? null : get(size() - 1); 619 } 620 621 /** 622 * Perform a depth-first traversal on each of the selected elements. 623 * @param nodeVisitor the visitor callbacks to perform on each node 624 * @return this, for chaining 625 */ 626 public Elements traverse(NodeVisitor nodeVisitor) { 627 NodeTraversor.traverse(nodeVisitor, this); 628 return this; 629 } 630 631 /** 632 * Perform a depth-first filtering on each of the selected elements. 633 * @param nodeFilter the filter callbacks to perform on each node 634 * @return this, for chaining 635 */ 636 public Elements filter(NodeFilter nodeFilter) { 637 NodeTraversor.filter(nodeFilter, this); 638 return this; 639 } 640 641 /** 642 * Get the {@link FormElement} forms from the selected elements, if any. 643 * @return a list of {@link FormElement}s pulled from the matched elements. The list will be empty if the elements contain 644 * no forms. 645 */ 646 public List<FormElement> forms() { 647 ArrayList<FormElement> forms = new ArrayList<>(); 648 for (Element el: this) 649 if (el instanceof FormElement) 650 forms.add((FormElement) el); 651 return forms; 652 } 653 654 /** 655 * Get {@link Comment} nodes that are direct child nodes of the selected elements. 656 * @return Comment nodes, or an empty list if none. 657 */ 658 public List<Comment> comments() { 659 return childNodesOfType(Comment.class); 660 } 661 662 /** 663 * Get {@link TextNode} nodes that are direct child nodes of the selected elements. 664 * @return TextNode nodes, or an empty list if none. 665 */ 666 public List<TextNode> textNodes() { 667 return childNodesOfType(TextNode.class); 668 } 669 670 /** 671 * Get {@link DataNode} nodes that are direct child nodes of the selected elements. DataNode nodes contain the 672 * content of tags such as {@code script}, {@code style} etc and are distinct from {@link TextNode}s. 673 * @return Comment nodes, or an empty list if none. 674 */ 675 public List<DataNode> dataNodes() { 676 return childNodesOfType(DataNode.class); 677 } 678 679 private <T extends Node> List<T> childNodesOfType(Class<T> tClass) { 680 ArrayList<T> nodes = new ArrayList<>(); 681 for (Element el: this) { 682 for (int i = 0; i < el.childNodeSize(); i++) { 683 Node node = el.childNode(i); 684 if (tClass.isInstance(node)) 685 nodes.add(tClass.cast(node)); 686 } 687 } 688 return nodes; 689 } 690 691 // list methods that update the DOM: 692 693 /** 694 Replace the Element at the specified index in this list, and in the DOM. 695 * @param index index of the element to replace 696 * @param element element to be stored at the specified position 697 * @return the old Element at this index 698 * @since 1.17.1 699 */ 700 @Override public Element set(int index, Element element) { 701 Validate.notNull(element); 702 Element old = super.set(index, element); 703 old.replaceWith(element); 704 return old; 705 } 706 707 /** 708 Remove the Element at the specified index in this ist, and from the DOM. 709 * @param index the index of the element to be removed 710 * @return the old element at this index 711 * @since 1.17.1 712 */ 713 @Override public Element remove(int index) { 714 Element old = super.remove(index); 715 old.remove(); 716 return old; 717 } 718 719 /** 720 Remove the specified Element from this list, and from th DOM 721 * @param o element to be removed from this list, if present 722 * @return if this list contained the Element 723 * @since 1.17.1 724 */ 725 @Override public boolean remove(Object o) { 726 int index = super.indexOf(o); 727 if (index == -1) { 728 return false; 729 } else { 730 remove(index); 731 return true; 732 } 733 } 734 735 /** 736 Removes all the elements from this list, and each of them from the DOM. 737 * @since 1.17.1 738 * @see #remove() 739 */ 740 @Override public void clear() { 741 remove(); 742 super.clear(); 743 } 744 745 /** 746 Removes from this list, and from the DOM, each of the elements that are contained in the specified collection and 747 are in this list. 748 * @param c collection containing elements to be removed from this list 749 * @return {@code true} if elements were removed from this list 750 * @since 1.17.1 751 */ 752 @Override public boolean removeAll(Collection<?> c) { 753 boolean anyRemoved = false; 754 for (Object o : c) { 755 anyRemoved |= this.remove(o); 756 } 757 return anyRemoved; 758 } 759 760 /** 761 Retain in this list, and in the DOM, only the elements that are in the specified collection and are in this list. 762 In other words, remove elements from this list and the DOM any item that is in this list but not in the specified 763 collection. 764 * @param c collection containing elements to be retained in this list 765 * @return {@code true} if elements were removed from this list 766 * @since 1.17.1 767 */ 768 @Override public boolean retainAll(Collection<?> c) { 769 boolean anyRemoved = false; 770 for (Iterator<Element> it = this.iterator(); it.hasNext(); ) { 771 Element el = it.next(); 772 if (!c.contains(el)) { 773 it.remove(); 774 anyRemoved = true; 775 } 776 } 777 return anyRemoved; 778 } 779 780 /** 781 Remove from the list, and from the DOM, all elements in this list that mach the given filter. 782 * @param filter a predicate which returns {@code true} for elements to be removed 783 * @return {@code true} if elements were removed from this list 784 * @since 1.17.1 785 */ 786 @Override public boolean removeIf(Predicate<? super Element> filter) { 787 boolean anyRemoved = false; 788 for (Iterator<Element> it = this.iterator(); it.hasNext(); ) { 789 Element el = it.next(); 790 if (filter.test(el)) { 791 it.remove(); 792 anyRemoved = true; 793 } 794 } 795 return anyRemoved; 796 } 797 798 /** 799 Replace each element in this list with the result of the operator, and update the DOM. 800 * @param operator the operator to apply to each element 801 * @since 1.17.1 802 */ 803 @Override public void replaceAll(UnaryOperator<Element> operator) { 804 for (int i = 0; i < this.size(); i++) { 805 this.set(i, operator.apply(this.get(i))); 806 } 807 } 808}