You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

186 lines
5.4 KiB

  1. // Package wikipedia implements a Service which adds !commands for Wikipedia search.
  2. package wikipedia
  3. import (
  4. "encoding/json"
  5. "fmt"
  6. "io/ioutil"
  7. "net/http"
  8. "net/url"
  9. "strings"
  10. "github.com/jaytaylor/html2text"
  11. "github.com/matrix-org/go-neb/types"
  12. log "github.com/sirupsen/logrus"
  13. mevt "maunium.net/go/mautrix/event"
  14. "maunium.net/go/mautrix/id"
  15. )
  16. // ServiceType of the Wikipedia service
  17. const ServiceType = "wikipedia"
  18. const maxExtractLength = 1024 // Max length of extract string in bytes
  19. var httpClient = &http.Client{}
  20. // Search results (returned by search query)
  21. type wikipediaSearchResults struct {
  22. Query wikipediaQuery `json:"query"` // Containter for the query response
  23. }
  24. // Wikipeda pages returned in search results
  25. type wikipediaQuery struct {
  26. Pages map[string]wikipediaPage `json:"pages"` // Map of wikipedia page IDs to page objects
  27. }
  28. // Representation of an individual wikipedia page
  29. type wikipediaPage struct {
  30. PageID int64 `json:"pageid"` // Unique ID for the wikipedia page
  31. NS int `json:"ns"` // Namespace ID
  32. Title string `json:"title"` // Page title text
  33. Touched string `json:"touched"` // Date that the page was last touched / modified
  34. LastRevID int64 `json:"lastrevid"` //
  35. Extract string `json:"extract"` // Page extract text
  36. }
  37. // Service contains the Config fields for the Wikipedia service.
  38. type Service struct {
  39. types.DefaultService
  40. }
  41. // Commands supported:
  42. // !wikipedia some_search_query_without_quotes
  43. // Responds with a suitable article extract and link to the referenced page into the same room as the command.
  44. func (s *Service) Commands(client types.MatrixClient) []types.Command {
  45. return []types.Command{
  46. {
  47. Path: []string{"wikipedia"},
  48. Command: func(roomID id.RoomID, userID id.UserID, args []string) (interface{}, error) {
  49. return s.cmdWikipediaSearch(client, roomID, userID, args)
  50. },
  51. },
  52. }
  53. }
  54. // usageMessage returns a matrix TextMessage representation of the service usage
  55. func usageMessage() *mevt.MessageEventContent {
  56. return &mevt.MessageEventContent{
  57. MsgType: mevt.MsgNotice,
  58. Body: "Usage: !wikipedia search_text",
  59. }
  60. }
  61. func (s *Service) cmdWikipediaSearch(client types.MatrixClient, roomID id.RoomID, userID id.UserID, args []string) (interface{}, error) {
  62. // Check for query text
  63. if len(args) < 1 {
  64. return usageMessage(), nil
  65. }
  66. // Get the query text and per,form search
  67. querySentence := strings.Join(args, " ")
  68. searchResultPage, err := s.text2Wikipedia(querySentence)
  69. if err != nil {
  70. return nil, err
  71. }
  72. // No article extracts
  73. if searchResultPage == nil || searchResultPage.Extract == "" {
  74. return mevt.MessageEventContent{
  75. MsgType: "m.notice",
  76. Body: "No results",
  77. }, nil
  78. }
  79. // Convert article HTML to text
  80. extractText, err := html2text.FromString(searchResultPage.Extract)
  81. if err != nil {
  82. return mevt.MessageEventContent{
  83. MsgType: "m.notice",
  84. Body: "Failed to convert extract to plain text - " + err.Error(),
  85. }, nil
  86. }
  87. // Truncate the extract text, if necessary
  88. if len(extractText) > maxExtractLength {
  89. extractText = extractText[:maxExtractLength] + "..."
  90. }
  91. // Add a link to the bottom of the extract
  92. extractText += fmt.Sprintf("\nhttp://en.wikipedia.org/?curid=%d", searchResultPage.PageID)
  93. // Return article extract
  94. return mevt.MessageEventContent{
  95. MsgType: "m.notice",
  96. Body: extractText,
  97. }, nil
  98. }
  99. // text2Wikipedia returns a Wikipedia article summary
  100. func (s *Service) text2Wikipedia(query string) (*wikipediaPage, error) {
  101. log.Info("Searching Wikipedia for: ", query)
  102. u, err := url.Parse("https://en.wikipedia.org/w/api.php")
  103. if err != nil {
  104. return nil, err
  105. }
  106. // Example query - https://en.wikipedia.org/w/api.php?action=query&prop=extracts&format=json&exintro=&titles=RMS+Titanic
  107. q := u.Query()
  108. q.Set("action", "query") // Action - query for articles
  109. q.Set("prop", "extracts") // Return article extracts
  110. q.Set("format", "json")
  111. q.Set("redirects", "")
  112. // q.Set("exintro", "")
  113. q.Set("titles", query) // Text to search for
  114. u.RawQuery = q.Encode()
  115. // log.Info("Request URL: ", u)
  116. // Perform wikipedia search request
  117. res, err := httpClient.Get(u.String())
  118. if res != nil {
  119. defer res.Body.Close()
  120. }
  121. if err != nil {
  122. return nil, err
  123. }
  124. if res.StatusCode < 200 || res.StatusCode >= 300 {
  125. return nil, fmt.Errorf("Request error: %d, %s", res.StatusCode, response2String(res))
  126. }
  127. // Parse search results
  128. var searchResults wikipediaSearchResults
  129. // log.Info(response2String(res))
  130. if err := json.NewDecoder(res.Body).Decode(&searchResults); err != nil {
  131. return nil, fmt.Errorf("ERROR - %s", err.Error())
  132. } else if len(searchResults.Query.Pages) < 1 {
  133. return nil, fmt.Errorf("No articles found")
  134. }
  135. // Return only the first search result with an extract
  136. for _, page := range searchResults.Query.Pages {
  137. if page.Extract != "" {
  138. return &page, nil
  139. }
  140. }
  141. return nil, fmt.Errorf("No articles with extracts found")
  142. }
  143. // response2String returns a string representation of an HTTP response body
  144. func response2String(res *http.Response) string {
  145. bs, err := ioutil.ReadAll(res.Body)
  146. if err != nil {
  147. return "Failed to decode response body"
  148. }
  149. str := string(bs)
  150. return str
  151. }
  152. // Initialise the service
  153. func init() {
  154. types.RegisterService(func(serviceID string, serviceUserID id.UserID, webhookEndpointURL string) types.Service {
  155. return &Service{
  156. DefaultService: types.NewDefaultService(serviceID, serviceUserID, ServiceType),
  157. }
  158. })
  159. }