|
|
package cascadia
import ( "bytes" "fmt" "regexp" "strings"
"golang.org/x/net/html" )
// the Selector type, and functions for creating them
// A Selector is a function which tells whether a node matches or not.
type Selector func(*html.Node) bool
// hasChildMatch returns whether n has any child that matches a.
func hasChildMatch(n *html.Node, a Selector) bool { for c := n.FirstChild; c != nil; c = c.NextSibling { if a(c) { return true } } return false }
// hasDescendantMatch performs a depth-first search of n's descendants,
// testing whether any of them match a. It returns true as soon as a match is
// found, or false if no match is found.
func hasDescendantMatch(n *html.Node, a Selector) bool { for c := n.FirstChild; c != nil; c = c.NextSibling { if a(c) || (c.Type == html.ElementNode && hasDescendantMatch(c, a)) { return true } } return false }
// Compile parses a selector and returns, if successful, a Selector object
// that can be used to match against html.Node objects.
func Compile(sel string) (Selector, error) { p := &parser{s: sel} compiled, err := p.parseSelectorGroup() if err != nil { return nil, err }
if p.i < len(sel) { return nil, fmt.Errorf("parsing %q: %d bytes left over", sel, len(sel)-p.i) }
return compiled, nil }
// MustCompile is like Compile, but panics instead of returning an error.
func MustCompile(sel string) Selector { compiled, err := Compile(sel) if err != nil { panic(err) } return compiled }
// MatchAll returns a slice of the nodes that match the selector,
// from n and its children.
func (s Selector) MatchAll(n *html.Node) []*html.Node { return s.matchAllInto(n, nil) }
func (s Selector) matchAllInto(n *html.Node, storage []*html.Node) []*html.Node { if s(n) { storage = append(storage, n) }
for child := n.FirstChild; child != nil; child = child.NextSibling { storage = s.matchAllInto(child, storage) }
return storage }
// Match returns true if the node matches the selector.
func (s Selector) Match(n *html.Node) bool { return s(n) }
// MatchFirst returns the first node that matches s, from n and its children.
func (s Selector) MatchFirst(n *html.Node) *html.Node { if s.Match(n) { return n }
for c := n.FirstChild; c != nil; c = c.NextSibling { m := s.MatchFirst(c) if m != nil { return m } } return nil }
// Filter returns the nodes in nodes that match the selector.
func (s Selector) Filter(nodes []*html.Node) (result []*html.Node) { for _, n := range nodes { if s(n) { result = append(result, n) } } return result }
// typeSelector returns a Selector that matches elements with a given tag name.
func typeSelector(tag string) Selector { tag = toLowerASCII(tag) return func(n *html.Node) bool { return n.Type == html.ElementNode && n.Data == tag } }
// toLowerASCII returns s with all ASCII capital letters lowercased.
func toLowerASCII(s string) string { var b []byte for i := 0; i < len(s); i++ { if c := s[i]; 'A' <= c && c <= 'Z' { if b == nil { b = make([]byte, len(s)) copy(b, s) } b[i] = s[i] + ('a' - 'A') } }
if b == nil { return s }
return string(b) }
// attributeSelector returns a Selector that matches elements
// where the attribute named key satisifes the function f.
func attributeSelector(key string, f func(string) bool) Selector { key = toLowerASCII(key) return func(n *html.Node) bool { if n.Type != html.ElementNode { return false } for _, a := range n.Attr { if a.Key == key && f(a.Val) { return true } } return false } }
// attributeExistsSelector returns a Selector that matches elements that have
// an attribute named key.
func attributeExistsSelector(key string) Selector { return attributeSelector(key, func(string) bool { return true }) }
// attributeEqualsSelector returns a Selector that matches elements where
// the attribute named key has the value val.
func attributeEqualsSelector(key, val string) Selector { return attributeSelector(key, func(s string) bool { return s == val }) }
// attributeIncludesSelector returns a Selector that matches elements where
// the attribute named key is a whitespace-separated list that includes val.
func attributeIncludesSelector(key, val string) Selector { return attributeSelector(key, func(s string) bool { for s != "" { i := strings.IndexAny(s, " \t\r\n\f") if i == -1 { return s == val } if s[:i] == val { return true } s = s[i+1:] } return false }) }
// attributeDashmatchSelector returns a Selector that matches elements where
// the attribute named key equals val or starts with val plus a hyphen.
func attributeDashmatchSelector(key, val string) Selector { return attributeSelector(key, func(s string) bool { if s == val { return true } if len(s) <= len(val) { return false } if s[:len(val)] == val && s[len(val)] == '-' { return true } return false }) }
// attributePrefixSelector returns a Selector that matches elements where
// the attribute named key starts with val.
func attributePrefixSelector(key, val string) Selector { return attributeSelector(key, func(s string) bool { return strings.HasPrefix(s, val) }) }
// attributeSuffixSelector returns a Selector that matches elements where
// the attribute named key ends with val.
func attributeSuffixSelector(key, val string) Selector { return attributeSelector(key, func(s string) bool { return strings.HasSuffix(s, val) }) }
// attributeSubstringSelector returns a Selector that matches nodes where
// the attribute named key contains val.
func attributeSubstringSelector(key, val string) Selector { return attributeSelector(key, func(s string) bool { return strings.Contains(s, val) }) }
// attributeRegexSelector returns a Selector that matches nodes where
// the attribute named key matches the regular expression rx
func attributeRegexSelector(key string, rx *regexp.Regexp) Selector { return attributeSelector(key, func(s string) bool { return rx.MatchString(s) }) }
// intersectionSelector returns a selector that matches nodes that match
// both a and b.
func intersectionSelector(a, b Selector) Selector { return func(n *html.Node) bool { return a(n) && b(n) } }
// unionSelector returns a selector that matches elements that match
// either a or b.
func unionSelector(a, b Selector) Selector { return func(n *html.Node) bool { return a(n) || b(n) } }
// negatedSelector returns a selector that matches elements that do not match a.
func negatedSelector(a Selector) Selector { return func(n *html.Node) bool { if n.Type != html.ElementNode { return false } return !a(n) } }
// writeNodeText writes the text contained in n and its descendants to b.
func writeNodeText(n *html.Node, b *bytes.Buffer) { switch n.Type { case html.TextNode: b.WriteString(n.Data) case html.ElementNode: for c := n.FirstChild; c != nil; c = c.NextSibling { writeNodeText(c, b) } } }
// nodeText returns the text contained in n and its descendants.
func nodeText(n *html.Node) string { var b bytes.Buffer writeNodeText(n, &b) return b.String() }
// nodeOwnText returns the contents of the text nodes that are direct
// children of n.
func nodeOwnText(n *html.Node) string { var b bytes.Buffer for c := n.FirstChild; c != nil; c = c.NextSibling { if c.Type == html.TextNode { b.WriteString(c.Data) } } return b.String() }
// textSubstrSelector returns a selector that matches nodes that
// contain the given text.
func textSubstrSelector(val string) Selector { return func(n *html.Node) bool { text := strings.ToLower(nodeText(n)) return strings.Contains(text, val) } }
// ownTextSubstrSelector returns a selector that matches nodes that
// directly contain the given text
func ownTextSubstrSelector(val string) Selector { return func(n *html.Node) bool { text := strings.ToLower(nodeOwnText(n)) return strings.Contains(text, val) } }
// textRegexSelector returns a selector that matches nodes whose text matches
// the specified regular expression
func textRegexSelector(rx *regexp.Regexp) Selector { return func(n *html.Node) bool { return rx.MatchString(nodeText(n)) } }
// ownTextRegexSelector returns a selector that matches nodes whose text
// directly matches the specified regular expression
func ownTextRegexSelector(rx *regexp.Regexp) Selector { return func(n *html.Node) bool { return rx.MatchString(nodeOwnText(n)) } }
// hasChildSelector returns a selector that matches elements
// with a child that matches a.
func hasChildSelector(a Selector) Selector { return func(n *html.Node) bool { if n.Type != html.ElementNode { return false } return hasChildMatch(n, a) } }
// hasDescendantSelector returns a selector that matches elements
// with any descendant that matches a.
func hasDescendantSelector(a Selector) Selector { return func(n *html.Node) bool { if n.Type != html.ElementNode { return false } return hasDescendantMatch(n, a) } }
// nthChildSelector returns a selector that implements :nth-child(an+b).
// If last is true, implements :nth-last-child instead.
// If ofType is true, implements :nth-of-type instead.
func nthChildSelector(a, b int, last, ofType bool) Selector { return func(n *html.Node) bool { if n.Type != html.ElementNode { return false }
parent := n.Parent if parent == nil { return false }
i := -1 count := 0 for c := parent.FirstChild; c != nil; c = c.NextSibling { if (c.Type != html.ElementNode) || (ofType && c.Data != n.Data) { continue } count++ if c == n { i = count if !last { break } } }
if i == -1 { // This shouldn't happen, since n should always be one of its parent's children.
return false }
if last { i = count - i + 1 }
i -= b if a == 0 { return i == 0 }
return i%a == 0 && i/a >= 0 } }
// simpleNthChildSelector returns a selector that implements :nth-child(b).
// If ofType is true, implements :nth-of-type instead.
func simpleNthChildSelector(b int, ofType bool) Selector { return func(n *html.Node) bool { if n.Type != html.ElementNode { return false }
parent := n.Parent if parent == nil { return false }
count := 0 for c := parent.FirstChild; c != nil; c = c.NextSibling { if c.Type != html.ElementNode || (ofType && c.Data != n.Data) { continue } count++ if c == n { return count == b } if count >= b { return false } } return false } }
// simpleNthLastChildSelector returns a selector that implements
// :nth-last-child(b). If ofType is true, implements :nth-last-of-type
// instead.
func simpleNthLastChildSelector(b int, ofType bool) Selector { return func(n *html.Node) bool { if n.Type != html.ElementNode { return false }
parent := n.Parent if parent == nil { return false }
count := 0 for c := parent.LastChild; c != nil; c = c.PrevSibling { if c.Type != html.ElementNode || (ofType && c.Data != n.Data) { continue } count++ if c == n { return count == b } if count >= b { return false } } return false } }
// onlyChildSelector returns a selector that implements :only-child.
// If ofType is true, it implements :only-of-type instead.
func onlyChildSelector(ofType bool) Selector { return func(n *html.Node) bool { if n.Type != html.ElementNode { return false }
parent := n.Parent if parent == nil { return false }
count := 0 for c := parent.FirstChild; c != nil; c = c.NextSibling { if (c.Type != html.ElementNode) || (ofType && c.Data != n.Data) { continue } count++ if count > 1 { return false } }
return count == 1 } }
// inputSelector is a Selector that matches input, select, textarea and button elements.
func inputSelector(n *html.Node) bool { return n.Type == html.ElementNode && (n.Data == "input" || n.Data == "select" || n.Data == "textarea" || n.Data == "button") }
// emptyElementSelector is a Selector that matches empty elements.
func emptyElementSelector(n *html.Node) bool { if n.Type != html.ElementNode { return false }
for c := n.FirstChild; c != nil; c = c.NextSibling { switch c.Type { case html.ElementNode, html.TextNode: return false } }
return true }
// descendantSelector returns a Selector that matches an element if
// it matches d and has an ancestor that matches a.
func descendantSelector(a, d Selector) Selector { return func(n *html.Node) bool { if !d(n) { return false }
for p := n.Parent; p != nil; p = p.Parent { if a(p) { return true } }
return false } }
// childSelector returns a Selector that matches an element if
// it matches d and its parent matches a.
func childSelector(a, d Selector) Selector { return func(n *html.Node) bool { return d(n) && n.Parent != nil && a(n.Parent) } }
// siblingSelector returns a Selector that matches an element
// if it matches s2 and in is preceded by an element that matches s1.
// If adjacent is true, the sibling must be immediately before the element.
func siblingSelector(s1, s2 Selector, adjacent bool) Selector { return func(n *html.Node) bool { if !s2(n) { return false }
if adjacent { for n = n.PrevSibling; n != nil; n = n.PrevSibling { if n.Type == html.TextNode || n.Type == html.CommentNode { continue } return s1(n) } return false }
// Walk backwards looking for element that matches s1
for c := n.PrevSibling; c != nil; c = c.PrevSibling { if s1(c) { return true } }
return false } }
|