feat(backend): added black list for assets in sink service (#885)
This commit is contained in:
parent
2d15619027
commit
cce7a7bdad
2 changed files with 53 additions and 2 deletions
|
|
@ -21,6 +21,7 @@ type Config struct {
|
||||||
ProducerCloseTimeout int `env:"PRODUCER_CLOSE_TIMEOUT,default=15000"`
|
ProducerCloseTimeout int `env:"PRODUCER_CLOSE_TIMEOUT,default=15000"`
|
||||||
CacheThreshold int64 `env:"CACHE_THRESHOLD,default=5"`
|
CacheThreshold int64 `env:"CACHE_THRESHOLD,default=5"`
|
||||||
CacheExpiration int64 `env:"CACHE_EXPIRATION,default=120"`
|
CacheExpiration int64 `env:"CACHE_EXPIRATION,default=120"`
|
||||||
|
CacheBlackList string `env:"CACHE_BLACK_LIST,default="`
|
||||||
UseProfiler bool `env:"PROFILER_ENABLED,default=false"`
|
UseProfiler bool `env:"PROFILER_ENABLED,default=false"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -12,6 +12,7 @@ import (
|
||||||
"openreplay/backend/pkg/monitoring"
|
"openreplay/backend/pkg/monitoring"
|
||||||
"openreplay/backend/pkg/queue/types"
|
"openreplay/backend/pkg/queue/types"
|
||||||
"openreplay/backend/pkg/url/assets"
|
"openreplay/backend/pkg/url/assets"
|
||||||
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
|
|
@ -27,6 +28,7 @@ type AssetsCache struct {
|
||||||
rewriter *assets.Rewriter
|
rewriter *assets.Rewriter
|
||||||
producer types.Producer
|
producer types.Producer
|
||||||
cache map[string]*CachedAsset
|
cache map[string]*CachedAsset
|
||||||
|
blackList []string // use "example.com" to filter all domains or ".example.com" to filter only third-level domain
|
||||||
totalAssets syncfloat64.Counter
|
totalAssets syncfloat64.Counter
|
||||||
cachedAssets syncfloat64.Counter
|
cachedAssets syncfloat64.Counter
|
||||||
skippedAssets syncfloat64.Counter
|
skippedAssets syncfloat64.Counter
|
||||||
|
|
@ -61,12 +63,22 @@ func New(cfg *sink.Config, rewriter *assets.Rewriter, producer types.Producer, m
|
||||||
rewriter: rewriter,
|
rewriter: rewriter,
|
||||||
producer: producer,
|
producer: producer,
|
||||||
cache: make(map[string]*CachedAsset, 64),
|
cache: make(map[string]*CachedAsset, 64),
|
||||||
|
blackList: make([]string, 0),
|
||||||
totalAssets: totalAssets,
|
totalAssets: totalAssets,
|
||||||
cachedAssets: cachedAssets,
|
cachedAssets: cachedAssets,
|
||||||
skippedAssets: skippedAssets,
|
skippedAssets: skippedAssets,
|
||||||
assetSize: assetSize,
|
assetSize: assetSize,
|
||||||
assetDuration: assetDuration,
|
assetDuration: assetDuration,
|
||||||
}
|
}
|
||||||
|
// Parse black list for cache layer
|
||||||
|
if len(cfg.CacheBlackList) > 0 {
|
||||||
|
blackList := strings.Split(cfg.CacheBlackList, ",")
|
||||||
|
for _, domain := range blackList {
|
||||||
|
if len(domain) > 0 {
|
||||||
|
assetsCache.blackList = append(assetsCache.blackList, domain)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
go assetsCache.cleaner()
|
go assetsCache.cleaner()
|
||||||
return assetsCache
|
return assetsCache
|
||||||
}
|
}
|
||||||
|
|
@ -98,6 +110,22 @@ func (e *AssetsCache) clearCache() {
|
||||||
log.Printf("cache cleaner: deleted %d/%d assets", deleted, cacheSize)
|
log.Printf("cache cleaner: deleted %d/%d assets", deleted, cacheSize)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (e *AssetsCache) shouldSkipAsset(baseURL string) bool {
|
||||||
|
if len(e.blackList) == 0 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
host, err := parseHost(baseURL)
|
||||||
|
if err != nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
for _, blackHost := range e.blackList {
|
||||||
|
if strings.Contains(host, blackHost) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
func (e *AssetsCache) ParseAssets(msg messages.Message) messages.Message {
|
func (e *AssetsCache) ParseAssets(msg messages.Message) messages.Message {
|
||||||
switch m := msg.(type) {
|
switch m := msg.(type) {
|
||||||
case *messages.SetNodeAttributeURLBased:
|
case *messages.SetNodeAttributeURLBased:
|
||||||
|
|
@ -110,6 +138,9 @@ func (e *AssetsCache) ParseAssets(msg messages.Message) messages.Message {
|
||||||
newMsg.SetMeta(msg.Meta())
|
newMsg.SetMeta(msg.Meta())
|
||||||
return newMsg
|
return newMsg
|
||||||
} else if m.Name == "style" {
|
} else if m.Name == "style" {
|
||||||
|
if e.shouldSkipAsset(m.BaseURL) {
|
||||||
|
return msg
|
||||||
|
}
|
||||||
newMsg := &messages.SetNodeAttribute{
|
newMsg := &messages.SetNodeAttribute{
|
||||||
ID: m.ID,
|
ID: m.ID,
|
||||||
Name: m.Name,
|
Name: m.Name,
|
||||||
|
|
@ -119,6 +150,9 @@ func (e *AssetsCache) ParseAssets(msg messages.Message) messages.Message {
|
||||||
return newMsg
|
return newMsg
|
||||||
}
|
}
|
||||||
case *messages.SetCSSDataURLBased:
|
case *messages.SetCSSDataURLBased:
|
||||||
|
if e.shouldSkipAsset(m.BaseURL) {
|
||||||
|
return msg
|
||||||
|
}
|
||||||
newMsg := &messages.SetCSSData{
|
newMsg := &messages.SetCSSData{
|
||||||
ID: m.ID,
|
ID: m.ID,
|
||||||
Data: e.handleCSS(m.SessionID(), m.BaseURL, m.Data),
|
Data: e.handleCSS(m.SessionID(), m.BaseURL, m.Data),
|
||||||
|
|
@ -126,6 +160,9 @@ func (e *AssetsCache) ParseAssets(msg messages.Message) messages.Message {
|
||||||
newMsg.SetMeta(msg.Meta())
|
newMsg.SetMeta(msg.Meta())
|
||||||
return newMsg
|
return newMsg
|
||||||
case *messages.CSSInsertRuleURLBased:
|
case *messages.CSSInsertRuleURLBased:
|
||||||
|
if e.shouldSkipAsset(m.BaseURL) {
|
||||||
|
return msg
|
||||||
|
}
|
||||||
newMsg := &messages.CSSInsertRule{
|
newMsg := &messages.CSSInsertRule{
|
||||||
ID: m.ID,
|
ID: m.ID,
|
||||||
Index: m.Index,
|
Index: m.Index,
|
||||||
|
|
@ -134,6 +171,9 @@ func (e *AssetsCache) ParseAssets(msg messages.Message) messages.Message {
|
||||||
newMsg.SetMeta(msg.Meta())
|
newMsg.SetMeta(msg.Meta())
|
||||||
return newMsg
|
return newMsg
|
||||||
case *messages.AdoptedSSReplaceURLBased:
|
case *messages.AdoptedSSReplaceURLBased:
|
||||||
|
if e.shouldSkipAsset(m.BaseURL) {
|
||||||
|
return msg
|
||||||
|
}
|
||||||
newMsg := &messages.AdoptedSSReplace{
|
newMsg := &messages.AdoptedSSReplace{
|
||||||
SheetID: m.SheetID,
|
SheetID: m.SheetID,
|
||||||
Text: e.handleCSS(m.SessionID(), m.BaseURL, m.Text),
|
Text: e.handleCSS(m.SessionID(), m.BaseURL, m.Text),
|
||||||
|
|
@ -141,6 +181,9 @@ func (e *AssetsCache) ParseAssets(msg messages.Message) messages.Message {
|
||||||
newMsg.SetMeta(msg.Meta())
|
newMsg.SetMeta(msg.Meta())
|
||||||
return newMsg
|
return newMsg
|
||||||
case *messages.AdoptedSSInsertRuleURLBased:
|
case *messages.AdoptedSSInsertRuleURLBased:
|
||||||
|
if e.shouldSkipAsset(m.BaseURL) {
|
||||||
|
return msg
|
||||||
|
}
|
||||||
newMsg := &messages.AdoptedSSInsertRule{
|
newMsg := &messages.AdoptedSSInsertRule{
|
||||||
SheetID: m.SheetID,
|
SheetID: m.SheetID,
|
||||||
Index: m.Index,
|
Index: m.Index,
|
||||||
|
|
@ -180,13 +223,21 @@ func (e *AssetsCache) handleURL(sessionID uint64, baseURL string, urlVal string)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func parseHost(baseURL string) (string, error) {
|
||||||
|
u, err := url.Parse(baseURL)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
return u.Scheme + "://" + u.Host + "/", nil
|
||||||
|
}
|
||||||
|
|
||||||
func (e *AssetsCache) handleCSS(sessionID uint64, baseURL string, css string) string {
|
func (e *AssetsCache) handleCSS(sessionID uint64, baseURL string, css string) string {
|
||||||
ctx := context.Background()
|
ctx := context.Background()
|
||||||
e.totalAssets.Add(ctx, 1)
|
e.totalAssets.Add(ctx, 1)
|
||||||
// Try to find asset in cache
|
// Try to find asset in cache
|
||||||
h := md5.New()
|
h := md5.New()
|
||||||
// Cut first part of url (scheme + host)
|
// Cut first part of url (scheme + host)
|
||||||
u, err := url.Parse(baseURL)
|
justUrl, err := parseHost(baseURL)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Printf("can't parse url: %s, err: %s", baseURL, err)
|
log.Printf("can't parse url: %s, err: %s", baseURL, err)
|
||||||
if e.cfg.CacheAssets {
|
if e.cfg.CacheAssets {
|
||||||
|
|
@ -194,7 +245,6 @@ func (e *AssetsCache) handleCSS(sessionID uint64, baseURL string, css string) st
|
||||||
}
|
}
|
||||||
return e.getRewrittenCSS(sessionID, baseURL, css)
|
return e.getRewrittenCSS(sessionID, baseURL, css)
|
||||||
}
|
}
|
||||||
justUrl := u.Scheme + "://" + u.Host + "/"
|
|
||||||
// Calculate hash sum of url + css
|
// Calculate hash sum of url + css
|
||||||
io.WriteString(h, justUrl)
|
io.WriteString(h, justUrl)
|
||||||
io.WriteString(h, css)
|
io.WriteString(h, css)
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue