You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

183 lines
5.2 KiB

  1. // Package wikipedia implements a Service which adds !commands for Wikipedia search.
  2. package wikipedia
  3. import (
  4. "encoding/json"
  5. "fmt"
  6. "io/ioutil"
  7. "net/http"
  8. "net/url"
  9. "strings"
  10. "github.com/jaytaylor/html2text"
  11. "github.com/matrix-org/go-neb/types"
  12. "github.com/matrix-org/gomatrix"
  13. log "github.com/sirupsen/logrus"
  14. )
  15. // ServiceType of the Wikipedia service
  16. const ServiceType = "wikipedia"
  17. const maxExtractLength = 1024 // Max length of extract string in bytes
  18. var httpClient = &http.Client{}
  19. // Search results (returned by search query)
  20. type wikipediaSearchResults struct {
  21. Query wikipediaQuery `json:"query"` // Containter for the query response
  22. }
  23. // Wikipeda pages returned in search results
  24. type wikipediaQuery struct {
  25. Pages map[string]wikipediaPage `json:"pages"` // Map of wikipedia page IDs to page objects
  26. }
  27. // Representation of an individual wikipedia page
  28. type wikipediaPage struct {
  29. PageID int64 `json:"pageid"` // Unique ID for the wikipedia page
  30. NS int `json:"ns"` // Namespace ID
  31. Title string `json:"title"` // Page title text
  32. Touched string `json:"touched"` // Date that the page was last touched / modified
  33. LastRevID int64 `json:"lastrevid"` //
  34. Extract string `json:"extract"` // Page extract text
  35. }
  36. // Service contains the Config fields for the Wikipedia service.
  37. type Service struct {
  38. types.DefaultService
  39. }
  40. // Commands supported:
  41. // !wikipedia some_search_query_without_quotes
  42. // Responds with a suitable article extract and link to the referenced page into the same room as the command.
  43. func (s *Service) Commands(client *gomatrix.Client) []types.Command {
  44. return []types.Command{
  45. types.Command{
  46. Path: []string{"wikipedia"},
  47. Command: func(roomID, userID string, args []string) (interface{}, error) {
  48. return s.cmdWikipediaSearch(client, roomID, userID, args)
  49. },
  50. },
  51. }
  52. }
  53. // usageMessage returns a matrix TextMessage representation of the service usage
  54. func usageMessage() *gomatrix.TextMessage {
  55. return &gomatrix.TextMessage{"m.notice",
  56. `Usage: !wikipedia search_text`}
  57. }
  58. func (s *Service) cmdWikipediaSearch(client *gomatrix.Client, roomID, userID string, args []string) (interface{}, error) {
  59. // Check for query text
  60. if len(args) < 1 {
  61. return usageMessage(), nil
  62. }
  63. // Get the query text and per,form search
  64. querySentence := strings.Join(args, " ")
  65. searchResultPage, err := s.text2Wikipedia(querySentence)
  66. if err != nil {
  67. return nil, err
  68. }
  69. // No article extracts
  70. if searchResultPage == nil || searchResultPage.Extract == "" {
  71. return gomatrix.TextMessage{
  72. MsgType: "m.notice",
  73. Body: "No results",
  74. }, nil
  75. }
  76. // Convert article HTML to text
  77. extractText, err := html2text.FromString(searchResultPage.Extract)
  78. if err != nil {
  79. return gomatrix.TextMessage{
  80. MsgType: "m.notice",
  81. Body: "Failed to convert extract to plain text - " + err.Error(),
  82. }, nil
  83. }
  84. // Truncate the extract text, if necessary
  85. if len(extractText) > maxExtractLength {
  86. extractText = extractText[:maxExtractLength] + "..."
  87. }
  88. // Add a link to the bottom of the extract
  89. extractText += fmt.Sprintf("\nhttp://en.wikipedia.org/?curid=%d", searchResultPage.PageID)
  90. // Return article extract
  91. return gomatrix.TextMessage{
  92. MsgType: "m.notice",
  93. Body: extractText,
  94. }, nil
  95. }
  96. // text2Wikipedia returns a Wikipedia article summary
  97. func (s *Service) text2Wikipedia(query string) (*wikipediaPage, error) {
  98. log.Info("Searching Wikipedia for: ", query)
  99. u, err := url.Parse("https://en.wikipedia.org/w/api.php")
  100. if err != nil {
  101. return nil, err
  102. }
  103. // Example query - https://en.wikipedia.org/w/api.php?action=query&prop=extracts&format=json&exintro=&titles=RMS+Titanic
  104. q := u.Query()
  105. q.Set("action", "query") // Action - query for articles
  106. q.Set("prop", "extracts") // Return article extracts
  107. q.Set("format", "json")
  108. q.Set("redirects", "")
  109. // q.Set("exintro", "")
  110. q.Set("titles", query) // Text to search for
  111. u.RawQuery = q.Encode()
  112. // log.Info("Request URL: ", u)
  113. // Perform wikipedia search request
  114. res, err := httpClient.Get(u.String())
  115. if res != nil {
  116. defer res.Body.Close()
  117. }
  118. if err != nil {
  119. return nil, err
  120. }
  121. if res.StatusCode < 200 || res.StatusCode >= 300 {
  122. return nil, fmt.Errorf("Request error: %d, %s", res.StatusCode, response2String(res))
  123. }
  124. // Parse search results
  125. var searchResults wikipediaSearchResults
  126. // log.Info(response2String(res))
  127. if err := json.NewDecoder(res.Body).Decode(&searchResults); err != nil {
  128. return nil, fmt.Errorf("ERROR - %s", err.Error())
  129. } else if len(searchResults.Query.Pages) < 1 {
  130. return nil, fmt.Errorf("No articles found")
  131. }
  132. // Return only the first search result with an extract
  133. for _, page := range searchResults.Query.Pages {
  134. if page.Extract != "" {
  135. return &page, nil
  136. }
  137. }
  138. return nil, fmt.Errorf("No articles with extracts found")
  139. }
  140. // response2String returns a string representation of an HTTP response body
  141. func response2String(res *http.Response) string {
  142. bs, err := ioutil.ReadAll(res.Body)
  143. if err != nil {
  144. return "Failed to decode response body"
  145. }
  146. str := string(bs)
  147. return str
  148. }
  149. // Initialise the service
  150. func init() {
  151. types.RegisterService(func(serviceID, serviceUserID, webhookEndpointURL string) types.Service {
  152. return &Service{
  153. DefaultService: types.NewDefaultService(serviceID, serviceUserID, ServiceType),
  154. }
  155. })
  156. }