|
|
// Package wikipedia implements a Service which adds !commands for Wikipedia search.
package wikipedia
import ( "encoding/json" "fmt" "io/ioutil" "net/http" "net/url" "strings"
"github.com/jaytaylor/html2text" "github.com/matrix-org/go-neb/types" log "github.com/sirupsen/logrus" mevt "maunium.net/go/mautrix/event" "maunium.net/go/mautrix/id" )
// ServiceType of the Wikipedia service
const ServiceType = "wikipedia" const maxExtractLength = 1024 // Max length of extract string in bytes
var httpClient = &http.Client{}
// Search results (returned by search query)
type wikipediaSearchResults struct { Query wikipediaQuery `json:"query"` // Containter for the query response
}
// Wikipeda pages returned in search results
type wikipediaQuery struct { Pages map[string]wikipediaPage `json:"pages"` // Map of wikipedia page IDs to page objects
}
// Representation of an individual wikipedia page
type wikipediaPage struct { PageID int64 `json:"pageid"` // Unique ID for the wikipedia page
NS int `json:"ns"` // Namespace ID
Title string `json:"title"` // Page title text
Touched string `json:"touched"` // Date that the page was last touched / modified
LastRevID int64 `json:"lastrevid"` //
Extract string `json:"extract"` // Page extract text
}
// Service contains the Config fields for the Wikipedia service.
type Service struct { types.DefaultService }
// Commands supported:
// !wikipedia some_search_query_without_quotes
// Responds with a suitable article extract and link to the referenced page into the same room as the command.
func (s *Service) Commands(client types.MatrixClient) []types.Command { return []types.Command{ { Path: []string{"wikipedia"}, Command: func(roomID id.RoomID, userID id.UserID, args []string) (interface{}, error) { return s.cmdWikipediaSearch(client, roomID, userID, args) }, }, } }
// usageMessage returns a matrix TextMessage representation of the service usage
func usageMessage() *mevt.MessageEventContent { return &mevt.MessageEventContent{ MsgType: mevt.MsgNotice, Body: "Usage: !wikipedia search_text", } }
func (s *Service) cmdWikipediaSearch(client types.MatrixClient, roomID id.RoomID, userID id.UserID, args []string) (interface{}, error) { // Check for query text
if len(args) < 1 { return usageMessage(), nil }
// Get the query text and per,form search
querySentence := strings.Join(args, " ") searchResultPage, err := s.text2Wikipedia(querySentence) if err != nil { return nil, err }
// No article extracts
if searchResultPage == nil || searchResultPage.Extract == "" { return mevt.MessageEventContent{ MsgType: "m.notice", Body: "No results", }, nil }
// Convert article HTML to text
extractText, err := html2text.FromString(searchResultPage.Extract) if err != nil { return mevt.MessageEventContent{ MsgType: "m.notice", Body: "Failed to convert extract to plain text - " + err.Error(), }, nil }
// Truncate the extract text, if necessary
if len(extractText) > maxExtractLength { extractText = extractText[:maxExtractLength] + "..." }
// Add a link to the bottom of the extract
extractText += fmt.Sprintf("\nhttp://en.wikipedia.org/?curid=%d", searchResultPage.PageID)
// Return article extract
return mevt.MessageEventContent{ MsgType: "m.notice", Body: extractText, }, nil }
// text2Wikipedia returns a Wikipedia article summary
func (s *Service) text2Wikipedia(query string) (*wikipediaPage, error) { log.Info("Searching Wikipedia for: ", query)
u, err := url.Parse("https://en.wikipedia.org/w/api.php") if err != nil { return nil, err }
// Example query - https://en.wikipedia.org/w/api.php?action=query&prop=extracts&format=json&exintro=&titles=RMS+Titanic
q := u.Query() q.Set("action", "query") // Action - query for articles
q.Set("prop", "extracts") // Return article extracts
q.Set("format", "json") q.Set("redirects", "") // q.Set("exintro", "")
q.Set("titles", query) // Text to search for
u.RawQuery = q.Encode() // log.Info("Request URL: ", u)
// Perform wikipedia search request
res, err := httpClient.Get(u.String()) if res != nil { defer res.Body.Close() } if err != nil { return nil, err } if res.StatusCode < 200 || res.StatusCode >= 300 { return nil, fmt.Errorf("Request error: %d, %s", res.StatusCode, response2String(res)) }
// Parse search results
var searchResults wikipediaSearchResults // log.Info(response2String(res))
if err := json.NewDecoder(res.Body).Decode(&searchResults); err != nil { return nil, fmt.Errorf("ERROR - %s", err.Error()) } else if len(searchResults.Query.Pages) < 1 { return nil, fmt.Errorf("No articles found") }
// Return only the first search result with an extract
for _, page := range searchResults.Query.Pages { if page.Extract != "" { return &page, nil } }
return nil, fmt.Errorf("No articles with extracts found") }
// response2String returns a string representation of an HTTP response body
func response2String(res *http.Response) string { bs, err := ioutil.ReadAll(res.Body) if err != nil { return "Failed to decode response body" } str := string(bs) return str }
// Initialise the service
func init() { types.RegisterService(func(serviceID string, serviceUserID id.UserID, webhookEndpointURL string) types.Service { return &Service{ DefaultService: types.NewDefaultService(serviceID, serviceUserID, ServiceType), } }) }
|