From 72b7a9c8695620f9709fe222ff89c749f4771769 Mon Sep 17 00:00:00 2001 From: Alexander Zavorotynskiy Date: Mon, 17 Oct 2022 16:22:12 +0200 Subject: [PATCH] feat(backend): added assets retrier --- backend/internal/assets/cacher/cacher.go | 48 +++++++++++++++--------- backend/internal/assets/cacher/pool.go | 8 +++- 2 files changed, 36 insertions(+), 20 deletions(-) diff --git a/backend/internal/assets/cacher/cacher.go b/backend/internal/assets/cacher/cacher.go index 4d14a7dc8..28c618446 100644 --- a/backend/internal/assets/cacher/cacher.go +++ b/backend/internal/assets/cacher/cacher.go @@ -70,33 +70,42 @@ func NewCacher(cfg *config.Config, metrics *monitoring.Metrics) *cacher { } func (c *cacher) CacheFile(task *Task) { - c.cacheURL(task.requestURL, task.sessionID, task.depth, task.urlContext, task.cachePath) + c.cacheURL(task) } -func (c *cacher) cacheURL(requestURL string, sessionID uint64, depth byte, urlContext, cachePath string) { - req, _ := http.NewRequest("GET", requestURL, nil) +func (c *cacher) cacheURL(t *Task) { + t.retries-- + //requestURL sessionID uint64, depth byte, urlContext, cachePath string) { + req, _ := http.NewRequest("GET", t.requestURL, nil) req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 6.1; rv:31.0) Gecko/20100101 Firefox/31.0") for k, v := range c.requestHeaders { req.Header.Set(k, v) } res, err := c.httpClient.Do(req) if err != nil { - c.Errors <- errors.Wrap(err, urlContext) + c.Errors <- errors.Wrap(err, t.urlContext) return } defer res.Body.Close() if res.StatusCode >= 400 { - // TODO: retry - c.Errors <- errors.Wrap(fmt.Errorf("Status code is %v, ", res.StatusCode), urlContext) + printErr := true + // Retry 403 error + if res.StatusCode == 403 && t.retries > 0 { + c.workers.AddTask(t) + printErr = false + } + if printErr { + c.Errors <- errors.Wrap(fmt.Errorf("Status code is %v, ", res.StatusCode), t.urlContext) + } return } data, err := ioutil.ReadAll(io.LimitReader(res.Body, int64(c.sizeLimit+1))) if err != nil { - c.Errors <- errors.Wrap(err, urlContext) + c.Errors <- errors.Wrap(err, t.urlContext) return } if len(data) > c.sizeLimit { - c.Errors <- errors.Wrap(errors.New("Maximum size exceeded"), urlContext) + c.Errors <- errors.Wrap(errors.New("Maximum size exceeded"), t.urlContext) return } @@ -108,36 +117,37 @@ func (c *cacher) cacheURL(requestURL string, sessionID uint64, depth byte, urlCo strData := string(data) if isCSS { - strData = c.rewriter.RewriteCSS(sessionID, requestURL, strData) // TODO: one method for rewrite and return list + strData = c.rewriter.RewriteCSS(t.sessionID, t.requestURL, strData) // TODO: one method for rewrite and return list } // TODO: implement in streams - err = c.s3.Upload(strings.NewReader(strData), cachePath, contentType, false) + err = c.s3.Upload(strings.NewReader(strData), t.cachePath, contentType, false) if err != nil { - c.Errors <- errors.Wrap(err, urlContext) + c.Errors <- errors.Wrap(err, t.urlContext) return } c.downloadedAssets.Add(context.Background(), 1) if isCSS { - if depth > 0 { + if t.depth > 0 { for _, extractedURL := range assets.ExtractURLsFromCSS(string(data)) { - if fullURL, cachable := assets.GetFullCachableURL(requestURL, extractedURL); cachable { + if fullURL, cachable := assets.GetFullCachableURL(t.requestURL, extractedURL); cachable { c.checkTask(&Task{ requestURL: fullURL, - sessionID: sessionID, - depth: depth - 1, - urlContext: urlContext + "\n -> " + fullURL, + sessionID: t.sessionID, + depth: t.depth - 1, + urlContext: t.urlContext + "\n -> " + fullURL, isJS: false, + retries: 5, }) } } if err != nil { - c.Errors <- errors.Wrap(err, urlContext) + c.Errors <- errors.Wrap(err, t.urlContext) return } } else { - c.Errors <- errors.Wrap(errors.New("Maximum recursion cache depth exceeded"), urlContext) + c.Errors <- errors.Wrap(errors.New("Maximum recursion cache depth exceeded"), t.urlContext) return } } @@ -172,6 +182,7 @@ func (c *cacher) CacheJSFile(sourceURL string) { depth: 0, urlContext: sourceURL, isJS: true, + retries: 5, }) } @@ -182,6 +193,7 @@ func (c *cacher) CacheURL(sessionID uint64, fullURL string) { depth: MAX_CACHE_DEPTH, urlContext: fullURL, isJS: false, + retries: 5, }) } diff --git a/backend/internal/assets/cacher/pool.go b/backend/internal/assets/cacher/pool.go index 00ce1a63d..15ccb74b0 100644 --- a/backend/internal/assets/cacher/pool.go +++ b/backend/internal/assets/cacher/pool.go @@ -12,6 +12,7 @@ type Task struct { urlContext string isJS bool cachePath string + retries int } type WorkerPool struct { @@ -62,8 +63,11 @@ func (p *WorkerPool) worker() { } } -func (p *WorkerPool) AddTask(newTask *Task) { - p.tasks <- newTask +func (p *WorkerPool) AddTask(task *Task) { + if task.retries <= 0 { + return + } + p.tasks <- task } func (p *WorkerPool) Stop() {