diff --git a/config.sample.yaml b/config.sample.yaml
index 69a52d3..f447f54 100644
--- a/config.sample.yaml
+++ b/config.sample.yaml
@@ -100,6 +100,16 @@ services:
feeds:
"http://lorem-rss.herokuapp.com/feed?unit=second&interval=60":
rooms: ["!qmElAGdFYCHoCJuaNt:localhost"]
+ must_include:
+ author:
+ - author1
+ description:
+ - lorem
+ - ipsum
+ must_not_include:
+ title:
+ - Lorem
+ - Ipsum
- ID: "github_cmd_service"
Type: "github"
diff --git a/src/github.com/matrix-org/go-neb/services/github/github.go b/src/github.com/matrix-org/go-neb/services/github/github.go
index cb867fb..914f628 100644
--- a/src/github.com/matrix-org/go-neb/services/github/github.go
+++ b/src/github.com/matrix-org/go-neb/services/github/github.go
@@ -450,8 +450,8 @@ func (s *Service) expandCommit(roomID, userID, owner, repo, sha string) interfac
if err != nil {
log.WithError(err).WithFields(log.Fields{
"owner": owner,
- "repo": repo,
- "sha": sha,
+ "repo": repo,
+ "sha": sha,
}).Print("Failed to fetch commit")
return nil
}
diff --git a/src/github.com/matrix-org/go-neb/services/rssbot/rssbot.go b/src/github.com/matrix-org/go-neb/services/rssbot/rssbot.go
index 53b09ba..2aa2934 100644
--- a/src/github.com/matrix-org/go-neb/services/rssbot/rssbot.go
+++ b/src/github.com/matrix-org/go-neb/services/rssbot/rssbot.go
@@ -7,7 +7,9 @@ import (
"html"
"net/http"
"strconv"
+ "strings"
"time"
+ "unicode"
log "github.com/Sirupsen/logrus"
"github.com/die-net/lrucache"
@@ -34,6 +36,30 @@ var (
const minPollingIntervalSeconds = 60 * 5 // 5 min (News feeds can be genuinely spammy)
+// includeRules contains the rules for including or excluding a feed item. For the fields Author, Title
+// and Description in a feed item, there can be some words specified in the config that determine whether
+// the item will be displayed or not, depending on whether these words are included in that field.
+//
+// - If specified in the `must_include` field, the feed item must include at least one word for each field
+// that has been specified. This means that if some words have been specified for both Author and Title,
+// both the Author and Title must contain at least one of their respective words or the item will be skipped.
+// - If specified in the `must_not_include` field, the feed item fields must not contain any of the words
+// that were specified for each field. This means that if some words have been specified for both Author
+// and Title, if either of them includes at least one of their respective words, the item will be skipped,
+// even in the case that the item matched the `must_include` rules.
+//
+// In both cases, specifying an empty list for a field or not specifying anything causes the field to be ignored.
+// The field being checked each time will be split into words (any non-alphanumeric character starts a new word)
+// and they will be checked against the provided list.
+type includeRules struct {
+ // Author is a case-sensitive list of words that the author name must contain or not contain.
+ Author []string `json:"author"`
+ // Title is a case-sensitive list of words that the author name must contain or not contain.
+ Title []string `json:"title"`
+ // Description is a case-sensitive list of words that the author name must contain or not contain.
+ Description []string `json:"description"`
+}
+
// Service contains the Config fields for this service.
//
// Example request:
@@ -62,6 +88,10 @@ type Service struct {
// The time of the last successful poll. This is populated by Go-NEB. Use /getService to retrieve
// this value.
FeedUpdatedTimestampSecs int64 `json:"last_updated_ts_secs"`
+ // Specified fields must each include at least one of these words.
+ MustInclude includeRules `json:"must_include"`
+ // None of the specified fields must include any of these words.
+ MustNotInclude includeRules `json:"must_not_include"`
// Internal field. When we should poll again.
NextPollTimestampSecs int64
// Internal field. The most recently seen GUIDs. Sized to the number of items in the feed.
@@ -302,7 +332,44 @@ func (s *Service) queryFeed(feedURL string) (*gofeed.Feed, []gofeed.Item, error)
return feed, items, nil
}
+// containsAny takes a string and an array of words and returns whether any of the words
+// in the list are contained in the string. The words in the string are considered to be
+// separated by any non-alphanumeric character.
+func containsAny(item string, filterWords []string) bool {
+ itemWords := strings.FieldsFunc(item, func(c rune) bool {
+ return !unicode.IsLetter(c) && !unicode.IsNumber(c)
+ })
+ for _, itemWord := range itemWords {
+ for _, filterWord := range filterWords {
+ if filterWord == itemWord {
+ return true
+ }
+ }
+ }
+ return false
+}
+
+func itemFiltered(i *gofeed.Item, mustInclude, mustNotInclude *includeRules) bool {
+ // At least one word for each field that has been specified must be included for an item to pass the filter.
+ if (i.Author != nil && len(mustInclude.Author) > 0 && !containsAny(i.Author.Name, mustInclude.Author)) ||
+ (len(mustInclude.Title) > 0 && !containsAny(i.Title, mustInclude.Title)) ||
+ (len(mustInclude.Description) > 0 && !containsAny(i.Description, mustInclude.Description)) {
+ return true
+ }
+
+ // If at least one word of any field that has been specified is included in the item, it doesn't pass the filter.
+ if (i.Author != nil && containsAny(i.Author.Name, mustNotInclude.Author)) ||
+ containsAny(i.Title, mustNotInclude.Title) ||
+ containsAny(i.Description, mustNotInclude.Description) {
+ return true
+ }
+ return false
+}
+
func (s *Service) newItems(feedURL string, allItems []*gofeed.Item) (items []gofeed.Item) {
+ mustInclude := s.Feeds[feedURL].MustInclude
+ mustNotInclude := s.Feeds[feedURL].MustNotInclude
+
for _, i := range allItems {
if i == nil {
continue
@@ -327,8 +394,14 @@ func (s *Service) newItems(feedURL string, allItems []*gofeed.Item) (items []gof
// This will inevitably break for some people, but that group of people are probably smaller, so *shrug*.
i.Title = html.UnescapeString(i.Title)
i.Description = html.UnescapeString(i.Description)
+ if i.Author != nil {
+ i.Author.Name = html.UnescapeString(i.Author.Name)
+ i.Author.Email = html.UnescapeString(i.Author.Email)
+ }
- items = append(items, *i)
+ if !itemFiltered(i, &mustInclude, &mustNotInclude) {
+ items = append(items, *i)
+ }
}
return
}
@@ -355,18 +428,30 @@ func itemToHTML(feed *gofeed.Feed, item gofeed.Item) gomatrix.HTMLMessage {
if itemTitle == "" {
itemTitle = feed.Title
}
-
+
+ fmtBody := fmt.Sprintf("%s: %s",
+ html.EscapeString(feed.Title), html.EscapeString(item.Link), html.EscapeString(itemTitle))
+ if item.Author != nil {
+ if len(item.Author.Name) > 0 && len(item.Author.Email) > 0 {
+ fmtBody += fmt.Sprintf(" by %s", html.EscapeString(item.Author.Email),
+ html.EscapeString(item.Author.Name))
+ } else if len(item.Author.Name) > 0 {
+ fmtBody += fmt.Sprintf(" by %s", html.EscapeString(item.Author.Name))
+ } else if len(item.Author.Email) > 0 {
+ fmtBody += fmt.Sprintf(" by %s", html.EscapeString(item.Author.Email),
+ html.EscapeString(item.Author.Email))
+ }
+ }
return gomatrix.HTMLMessage{
Body: fmt.Sprintf("%s: %s ( %s )",
- html.EscapeString(feed.Title), html.EscapeString(item.Title), html.EscapeString(item.Link)),
- MsgType: "m.notice",
- Format: "org.matrix.custom.html",
- FormattedBody: fmt.Sprintf("%s: %s",
- html.EscapeString(feed.Title), html.EscapeString(item.Link), html.EscapeString(itemTitle)),
- // FeedTitle:
- //
- // Title of the Entry
- }
+ html.EscapeString(feed.Title), html.EscapeString(itemTitle), html.EscapeString(item.Link)),
+ MsgType: "m.notice",
+ Format: "org.matrix.custom.html",
+ FormattedBody: fmtBody,
+ // FeedTitle:
+ //
+ // Title of the Entry
+ }
}
func ensureItemsHaveGUIDs(feed *gofeed.Feed) {
diff --git a/src/github.com/matrix-org/go-neb/services/rssbot/rssbot_test.go b/src/github.com/matrix-org/go-neb/services/rssbot/rssbot_test.go
index 5db87e6..b3253dd 100644
--- a/src/github.com/matrix-org/go-neb/services/rssbot/rssbot_test.go
+++ b/src/github.com/matrix-org/go-neb/services/rssbot/rssbot_test.go
@@ -32,13 +32,13 @@ const rssFeedXML = `
New Item: Majora’s Mask
http://go.neb/rss/majoras-mask
+ The Skullkid!
`
-func TestHTMLEntities(t *testing.T) {
+func createRSSClient(t *testing.T, feedURL string) *Service {
database.SetServiceDB(&database.NopStorage{})
- feedURL := "https://thehappymaskshop.hyrule"
// Replace the cachingClient with a mock so we can intercept RSS requests
rssTrans := testutils.NewRoundTripper(func(req *http.Request) (*http.Response, error) {
if req.URL.String() != feedURL {
@@ -55,9 +55,11 @@ func TestHTMLEntities(t *testing.T) {
srv, err := types.CreateService("id", "rssbot", "@happy_mask_salesman:hyrule", []byte(
`{"feeds": {"`+feedURL+`":{}}}`, // no config yet
))
+
if err != nil {
- t.Fatal("Failed to create RSS bot: ", err)
+ t.Fatal(err)
}
+
rssbot := srv.(*Service)
// Configure the service to force OnPoll to query the RSS feed and attempt to send results
@@ -67,6 +69,14 @@ func TestHTMLEntities(t *testing.T) {
f.NextPollTimestampSecs = time.Now().Unix()
rssbot.Feeds[feedURL] = f
+ return rssbot
+}
+
+func TestHTMLEntities(t *testing.T) {
+ feedURL := "https://thehappymaskshop.hyrule"
+
+ rssbot := createRSSClient(t, feedURL)
+
// Create the Matrix client which will send the notification
wg := sync.WaitGroup{}
wg.Add(1)
@@ -103,3 +113,59 @@ func TestHTMLEntities(t *testing.T) {
// Check that the Matrix client sent a message
wg.Wait()
}
+
+func TestFeedItemFiltering(t *testing.T) {
+ feedURL := "https://thehappymaskshop.hyrule"
+
+ // Create rssbot client
+ rssbot := createRSSClient(t, feedURL)
+
+ feed := rssbot.Feeds[feedURL]
+ feed.MustInclude.Title = []string{"Zelda"}
+ rssbot.Feeds[feedURL] = feed
+
+ _, items, _ := rssbot.queryFeed(feedURL)
+ // Expect that we get no items if we filter for 'Zelda' in title
+ if len(items) != 0 {
+ t.Errorf("Expected 0 items, got %v", items)
+ }
+
+ // Recreate rssbot client
+ rssbot = createRSSClient(t, feedURL)
+
+ feed = rssbot.Feeds[feedURL]
+ feed.MustInclude.Title = []string{"Majora"}
+ rssbot.Feeds[feedURL] = feed
+
+ _, items, _ = rssbot.queryFeed(feedURL)
+ // Expect one item if we filter for 'Majora' in title
+ if len(items) != 1 {
+ t.Errorf("Expected 1 item, got %d", len(items))
+ }
+
+ // Recreate rssbot client
+ rssbot = createRSSClient(t, feedURL)
+
+ feed = rssbot.Feeds[feedURL]
+ feed.MustNotInclude.Author = []string{"kid"}
+ rssbot.Feeds[feedURL] = feed
+
+ _, items, _ = rssbot.queryFeed(feedURL)
+ // 'kid' does not match an entire word in the author name, so it's not filtered
+ if len(items) != 1 {
+ t.Errorf("Expected 1 item, got %d", len(items))
+ }
+
+ // Recreate rssbot client
+ rssbot = createRSSClient(t, feedURL)
+
+ feed = rssbot.Feeds[feedURL]
+ feed.MustNotInclude.Author = []string{"Skullkid"}
+ rssbot.Feeds[feedURL] = feed
+
+ _, items, _ = rssbot.queryFeed(feedURL)
+ // Expect no items if we filter for 'Skullkid' not in author name
+ if len(items) != 0 {
+ t.Errorf("Expected 0 items, got %v", items)
+ }
+}