// Copyright 2013 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "components/autofill/content/renderer/form_autofill_util.h" #include #include "base/command_line.h" #include "base/logging.h" #include "base/memory/scoped_vector.h" #include "base/metrics/field_trial.h" #include "base/strings/string_util.h" #include "base/strings/utf_string_conversions.h" #include "components/autofill/core/common/autofill_switches.h" #include "components/autofill/core/common/form_data.h" #include "components/autofill/core/common/form_field_data.h" #include "components/autofill/core/common/web_element_descriptor.h" #include "third_party/WebKit/public/platform/WebString.h" #include "third_party/WebKit/public/platform/WebVector.h" #include "third_party/WebKit/public/web/WebDocument.h" #include "third_party/WebKit/public/web/WebElement.h" #include "third_party/WebKit/public/web/WebExceptionCode.h" #include "third_party/WebKit/public/web/WebFormControlElement.h" #include "third_party/WebKit/public/web/WebFormElement.h" #include "third_party/WebKit/public/web/WebFrame.h" #include "third_party/WebKit/public/web/WebInputElement.h" #include "third_party/WebKit/public/web/WebLabelElement.h" #include "third_party/WebKit/public/web/WebNode.h" #include "third_party/WebKit/public/web/WebNodeList.h" #include "third_party/WebKit/public/web/WebOptionElement.h" #include "third_party/WebKit/public/web/WebSelectElement.h" #include "third_party/WebKit/public/web/WebTextAreaElement.h" using blink::WebDocument; using blink::WebElement; using blink::WebExceptionCode; using blink::WebFormControlElement; using blink::WebFormElement; using blink::WebFrame; using blink::WebInputElement; using blink::WebLabelElement; using blink::WebNode; using blink::WebNodeList; using blink::WebOptionElement; using blink::WebSelectElement; using blink::WebTextAreaElement; using blink::WebString; using blink::WebVector; namespace autofill { namespace { // The maximum length allowed for form data. const size_t kMaxDataLength = 1024; // A bit field mask for FillForm functions to not fill some fields. enum FieldFilterMask { FILTER_NONE = 0, FILTER_DISABLED_ELEMENTS = 1 << 0, FILTER_READONLY_ELEMENTS = 1 << 1, FILTER_NON_FOCUSABLE_ELEMENTS = 1 << 2, FILTER_ALL_NON_EDITIABLE_ELEMENTS = FILTER_DISABLED_ELEMENTS | FILTER_READONLY_ELEMENTS | FILTER_NON_FOCUSABLE_ELEMENTS, }; bool IsOptionElement(const WebElement& element) { CR_DEFINE_STATIC_LOCAL(WebString, kOption, ("option")); return element.hasTagName(kOption); } bool IsScriptElement(const WebElement& element) { CR_DEFINE_STATIC_LOCAL(WebString, kScript, ("script")); return element.hasTagName(kScript); } bool IsNoScriptElement(const WebElement& element) { CR_DEFINE_STATIC_LOCAL(WebString, kNoScript, ("noscript")); return element.hasTagName(kNoScript); } bool HasTagName(const WebNode& node, const blink::WebString& tag) { return node.isElementNode() && node.toConst().hasHTMLTagName(tag); } bool IsAutofillableElement(const WebFormControlElement& element) { const WebInputElement* input_element = toWebInputElement(&element); return IsAutofillableInputElement(input_element) || IsSelectElement(element) || IsTextAreaElement(element); } // Check whether the given field satisfies the REQUIRE_AUTOCOMPLETE requirement. bool SatisfiesRequireAutocomplete(const WebInputElement& input_element) { return input_element.autoComplete(); } // Appends |suffix| to |prefix| so that any intermediary whitespace is collapsed // to a single space. If |force_whitespace| is true, then the resulting string // is guaranteed to have a space between |prefix| and |suffix|. Otherwise, the // result includes a space only if |prefix| has trailing whitespace or |suffix| // has leading whitespace. // A few examples: // * CombineAndCollapseWhitespace("foo", "bar", false) -> "foobar" // * CombineAndCollapseWhitespace("foo", "bar", true) -> "foo bar" // * CombineAndCollapseWhitespace("foo ", "bar", false) -> "foo bar" // * CombineAndCollapseWhitespace("foo", " bar", false) -> "foo bar" // * CombineAndCollapseWhitespace("foo", " bar", true) -> "foo bar" // * CombineAndCollapseWhitespace("foo ", " bar", false) -> "foo bar" // * CombineAndCollapseWhitespace(" foo", "bar ", false) -> " foobar " // * CombineAndCollapseWhitespace(" foo", "bar ", true) -> " foo bar " const base::string16 CombineAndCollapseWhitespace( const base::string16& prefix, const base::string16& suffix, bool force_whitespace) { base::string16 prefix_trimmed; TrimPositions prefix_trailing_whitespace = TrimWhitespace(prefix, TRIM_TRAILING, &prefix_trimmed); // Recursively compute the children's text. base::string16 suffix_trimmed; TrimPositions suffix_leading_whitespace = TrimWhitespace(suffix, TRIM_LEADING, &suffix_trimmed); if (prefix_trailing_whitespace || suffix_leading_whitespace || force_whitespace) { return prefix_trimmed + ASCIIToUTF16(" ") + suffix_trimmed; } else { return prefix_trimmed + suffix_trimmed; } } // This is a helper function for the FindChildText() function (see below). // Search depth is limited with the |depth| parameter. base::string16 FindChildTextInner(const WebNode& node, int depth) { if (depth <= 0 || node.isNull()) return base::string16(); // Skip over comments. if (node.nodeType() == WebNode::CommentNode) return FindChildTextInner(node.nextSibling(), depth - 1); if (node.nodeType() != WebNode::ElementNode && node.nodeType() != WebNode::TextNode) return base::string16(); // Ignore elements known not to contain inferable labels. if (node.isElementNode()) { const WebElement element = node.toConst(); if (IsOptionElement(element) || IsScriptElement(element) || IsNoScriptElement(element) || (element.isFormControlElement() && IsAutofillableElement(element.toConst()))) { return base::string16(); } } // Extract the text exactly at this node. base::string16 node_text = node.nodeValue(); // Recursively compute the children's text. // Preserve inter-element whitespace separation. base::string16 child_text = FindChildTextInner(node.firstChild(), depth - 1); bool add_space = node.nodeType() == WebNode::TextNode && node_text.empty(); node_text = CombineAndCollapseWhitespace(node_text, child_text, add_space); // Recursively compute the siblings' text. // Again, preserve inter-element whitespace separation. base::string16 sibling_text = FindChildTextInner(node.nextSibling(), depth - 1); add_space = node.nodeType() == WebNode::TextNode && node_text.empty(); node_text = CombineAndCollapseWhitespace(node_text, sibling_text, add_space); return node_text; } // Returns the aggregated values of the descendants of |element| that are // non-empty text nodes. This is a faster alternative to |innerText()| for // performance critical operations. It does a full depth-first search so can be // used when the structure is not directly known. However, unlike with // |innerText()|, the search depth and breadth are limited to a fixed threshold. // Whitespace is trimmed from text accumulated at descendant nodes. base::string16 FindChildText(const WebNode& node) { if (node.isTextNode()) return node.nodeValue(); WebNode child = node.firstChild(); const int kChildSearchDepth = 10; base::string16 node_text = FindChildTextInner(child, kChildSearchDepth); TrimWhitespace(node_text, TRIM_ALL, &node_text); return node_text; } // Helper for |InferLabelForElement()| that infers a label, if possible, from // a previous sibling of |element|, // e.g. Some Text // or Some Text // or

Some Text

// or // or Some Text // or Some Text
. base::string16 InferLabelFromPrevious(const WebFormControlElement& element) { base::string16 inferred_label; WebNode previous = element; while (true) { previous = previous.previousSibling(); if (previous.isNull()) break; // Skip over comments. WebNode::NodeType node_type = previous.nodeType(); if (node_type == WebNode::CommentNode) continue; // Otherwise, only consider normal HTML elements and their contents. if (node_type != WebNode::TextNode && node_type != WebNode::ElementNode) break; // A label might be split across multiple "lightweight" nodes. // Coalesce any text contained in multiple consecutive // (a) plain text nodes or // (b) inline HTML elements that are essentially equivalent to text nodes. CR_DEFINE_STATIC_LOCAL(WebString, kBold, ("b")); CR_DEFINE_STATIC_LOCAL(WebString, kStrong, ("strong")); CR_DEFINE_STATIC_LOCAL(WebString, kSpan, ("span")); CR_DEFINE_STATIC_LOCAL(WebString, kFont, ("font")); if (previous.isTextNode() || HasTagName(previous, kBold) || HasTagName(previous, kStrong) || HasTagName(previous, kSpan) || HasTagName(previous, kFont)) { base::string16 value = FindChildText(previous); // A text node's value will be empty if it is for a line break. bool add_space = previous.isTextNode() && value.empty(); inferred_label = CombineAndCollapseWhitespace(value, inferred_label, add_space); continue; } // If we have identified a partial label and have reached a non-lightweight // element, consider the label to be complete. base::string16 trimmed_label; TrimWhitespace(inferred_label, TRIM_ALL, &trimmed_label); if (!trimmed_label.empty()) break; // and
tags often appear between the input element and its // label text, so skip over them. CR_DEFINE_STATIC_LOCAL(WebString, kImage, ("img")); CR_DEFINE_STATIC_LOCAL(WebString, kBreak, ("br")); if (HasTagName(previous, kImage) || HasTagName(previous, kBreak)) continue; // We only expect

and

  • Some Text base::string16 InferLabelFromListItem(const WebFormControlElement& element) { WebNode parent = element.parentNode(); CR_DEFINE_STATIC_LOCAL(WebString, kListItem, ("li")); while (!parent.isNull() && parent.isElementNode() && !parent.to().hasTagName(kListItem)) { parent = parent.parentNode(); } if (!parent.isNull() && HasTagName(parent, kListItem)) return FindChildText(parent); return base::string16(); } // Helper for |InferLabelForElement()| that infers a label, if possible, from // surrounding table structure, // e.g. Some Text // or Some Text // or Some Text // or Some Text base::string16 InferLabelFromTableColumn(const WebFormControlElement& element) { CR_DEFINE_STATIC_LOCAL(WebString, kTableCell, ("td")); WebNode parent = element.parentNode(); while (!parent.isNull() && parent.isElementNode() && !parent.to().hasTagName(kTableCell)) { parent = parent.parentNode(); } if (parent.isNull()) return base::string16(); // Check all previous siblings, skipping non-element nodes, until we find a // non-empty text block. base::string16 inferred_label; WebNode previous = parent.previousSibling(); CR_DEFINE_STATIC_LOCAL(WebString, kTableHeader, ("th")); while (inferred_label.empty() && !previous.isNull()) { if (HasTagName(previous, kTableCell) || HasTagName(previous, kTableHeader)) inferred_label = FindChildText(previous); previous = previous.previousSibling(); } return inferred_label; } // Helper for |InferLabelForElement()| that infers a label, if possible, from // surrounding table structure, // e.g. Some Text base::string16 InferLabelFromTableRow(const WebFormControlElement& element) { CR_DEFINE_STATIC_LOCAL(WebString, kTableRow, ("tr")); WebNode parent = element.parentNode(); while (!parent.isNull() && parent.isElementNode() && !parent.to().hasTagName(kTableRow)) { parent = parent.parentNode(); } if (parent.isNull()) return base::string16(); // Check all previous siblings, skipping non-element nodes, until we find a // non-empty text block. base::string16 inferred_label; WebNode previous = parent.previousSibling(); while (inferred_label.empty() && !previous.isNull()) { if (HasTagName(previous, kTableRow)) inferred_label = FindChildText(previous); previous = previous.previousSibling(); } return inferred_label; } // Helper for |InferLabelForElement()| that infers a label, if possible, from // a surrounding div table, // e.g.
    Some Text
    // e.g.
    Some Text
    base::string16 InferLabelFromDivTable(const WebFormControlElement& element) { WebNode node = element.parentNode(); bool looking_for_parent = true; // Search the sibling and parent
    s until we find a candidate label. base::string16 inferred_label; CR_DEFINE_STATIC_LOCAL(WebString, kDiv, ("div")); CR_DEFINE_STATIC_LOCAL(WebString, kTable, ("table")); CR_DEFINE_STATIC_LOCAL(WebString, kFieldSet, ("fieldset")); while (inferred_label.empty() && !node.isNull()) { if (HasTagName(node, kDiv)) { looking_for_parent = false; inferred_label = FindChildText(node); } else if (looking_for_parent && (HasTagName(node, kTable) || HasTagName(node, kFieldSet))) { // If the element is in a table or fieldset, its label most likely is too. break; } if (node.previousSibling().isNull()) { // If there are no more siblings, continue walking up the tree. looking_for_parent = true; } if (looking_for_parent) node = node.parentNode(); else node = node.previousSibling(); } return inferred_label; } // Helper for |InferLabelForElement()| that infers a label, if possible, from // a surrounding definition list, // e.g.
    Some Text
    // e.g.
    Some Text
    base::string16 InferLabelFromDefinitionList( const WebFormControlElement& element) { CR_DEFINE_STATIC_LOCAL(WebString, kDefinitionData, ("dd")); WebNode parent = element.parentNode(); while (!parent.isNull() && parent.isElementNode() && !parent.to().hasTagName(kDefinitionData)) parent = parent.parentNode(); if (parent.isNull() || !HasTagName(parent, kDefinitionData)) return base::string16(); // Skip by any intervening text nodes. WebNode previous = parent.previousSibling(); while (!previous.isNull() && previous.isTextNode()) previous = previous.previousSibling(); CR_DEFINE_STATIC_LOCAL(WebString, kDefinitionTag, ("dt")); if (previous.isNull() || !HasTagName(previous, kDefinitionTag)) return base::string16(); return FindChildText(previous); } // Infers corresponding label for |element| from surrounding context in the DOM, // e.g. the contents of the preceding

    tag or text element. base::string16 InferLabelForElement(const WebFormControlElement& element) { base::string16 inferred_label = InferLabelFromPrevious(element); if (!inferred_label.empty()) return inferred_label; // If we didn't find a label, check for list item case. inferred_label = InferLabelFromListItem(element); if (!inferred_label.empty()) return inferred_label; // If we didn't find a label, check for table cell case. inferred_label = InferLabelFromTableColumn(element); if (!inferred_label.empty()) return inferred_label; // If we didn't find a label, check for table row case. inferred_label = InferLabelFromTableRow(element); if (!inferred_label.empty()) return inferred_label; // If we didn't find a label, check for definition list case. inferred_label = InferLabelFromDefinitionList(element); if (!inferred_label.empty()) return inferred_label; // If we didn't find a label, check for div table case. return InferLabelFromDivTable(element); } // Fills |option_strings| with the values of the