You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

135 lines
4.1 KiB

  1. package goquery
  2. import (
  3. "errors"
  4. "io"
  5. "net/http"
  6. "net/url"
  7. "github.com/andybalholm/cascadia"
  8. "golang.org/x/net/html"
  9. )
  10. // Document represents an HTML document to be manipulated. Unlike jQuery, which
  11. // is loaded as part of a DOM document, and thus acts upon its containing
  12. // document, GoQuery doesn't know which HTML document to act upon. So it needs
  13. // to be told, and that's what the Document class is for. It holds the root
  14. // document node to manipulate, and can make selections on this document.
  15. type Document struct {
  16. *Selection
  17. Url *url.URL
  18. rootNode *html.Node
  19. }
  20. // NewDocumentFromNode is a Document constructor that takes a root html Node
  21. // as argument.
  22. func NewDocumentFromNode(root *html.Node) *Document {
  23. return newDocument(root, nil)
  24. }
  25. // NewDocument is a Document constructor that takes a string URL as argument.
  26. // It loads the specified document, parses it, and stores the root Document
  27. // node, ready to be manipulated.
  28. func NewDocument(url string) (*Document, error) {
  29. // Load the URL
  30. res, e := http.Get(url)
  31. if e != nil {
  32. return nil, e
  33. }
  34. return NewDocumentFromResponse(res)
  35. }
  36. // NewDocumentFromReader returns a Document from a generic reader.
  37. // It returns an error as second value if the reader's data cannot be parsed
  38. // as html. It does *not* check if the reader is also an io.Closer, so the
  39. // provided reader is never closed by this call, it is the responsibility
  40. // of the caller to close it if required.
  41. func NewDocumentFromReader(r io.Reader) (*Document, error) {
  42. root, e := html.Parse(r)
  43. if e != nil {
  44. return nil, e
  45. }
  46. return newDocument(root, nil), nil
  47. }
  48. // NewDocumentFromResponse is another Document constructor that takes an http response as argument.
  49. // It loads the specified response's document, parses it, and stores the root Document
  50. // node, ready to be manipulated. The response's body is closed on return.
  51. func NewDocumentFromResponse(res *http.Response) (*Document, error) {
  52. if res == nil {
  53. return nil, errors.New("Response is nil")
  54. }
  55. defer res.Body.Close()
  56. if res.Request == nil {
  57. return nil, errors.New("Response.Request is nil")
  58. }
  59. // Parse the HTML into nodes
  60. root, e := html.Parse(res.Body)
  61. if e != nil {
  62. return nil, e
  63. }
  64. // Create and fill the document
  65. return newDocument(root, res.Request.URL), nil
  66. }
  67. // CloneDocument creates a deep-clone of a document.
  68. func CloneDocument(doc *Document) *Document {
  69. return newDocument(cloneNode(doc.rootNode), doc.Url)
  70. }
  71. // Private constructor, make sure all fields are correctly filled.
  72. func newDocument(root *html.Node, url *url.URL) *Document {
  73. // Create and fill the document
  74. d := &Document{nil, url, root}
  75. d.Selection = newSingleSelection(root, d)
  76. return d
  77. }
  78. // Selection represents a collection of nodes matching some criteria. The
  79. // initial Selection can be created by using Document.Find, and then
  80. // manipulated using the jQuery-like chainable syntax and methods.
  81. type Selection struct {
  82. Nodes []*html.Node
  83. document *Document
  84. prevSel *Selection
  85. }
  86. // Helper constructor to create an empty selection
  87. func newEmptySelection(doc *Document) *Selection {
  88. return &Selection{nil, doc, nil}
  89. }
  90. // Helper constructor to create a selection of only one node
  91. func newSingleSelection(node *html.Node, doc *Document) *Selection {
  92. return &Selection{[]*html.Node{node}, doc, nil}
  93. }
  94. // Matcher is an interface that defines the methods to match
  95. // HTML nodes against a compiled selector string. Cascadia's
  96. // Selector implements this interface.
  97. type Matcher interface {
  98. Match(*html.Node) bool
  99. MatchAll(*html.Node) []*html.Node
  100. Filter([]*html.Node) []*html.Node
  101. }
  102. // compileMatcher compiles the selector string s and returns
  103. // the corresponding Matcher. If s is an invalid selector string,
  104. // it returns a Matcher that fails all matches.
  105. func compileMatcher(s string) Matcher {
  106. cs, err := cascadia.Compile(s)
  107. if err != nil {
  108. return invalidMatcher{}
  109. }
  110. return cs
  111. }
  112. // invalidMatcher is a Matcher that always fails to match.
  113. type invalidMatcher struct{}
  114. func (invalidMatcher) Match(n *html.Node) bool { return false }
  115. func (invalidMatcher) MatchAll(n *html.Node) []*html.Node { return nil }
  116. func (invalidMatcher) Filter(ns []*html.Node) []*html.Node { return nil }