[PATCH] crawler: ignore 405 responses for HEAD requests
Export this patch
It also skips header checks because they usually don't have headers we
need (for example, most omit the Content-Type header).
Fixes: https://todo.sr.ht/~sircmpwn/searchhut/41
---
crawler/index.go | 2 ++
1 file changed, 2 insertions(+)
diff --git a/crawler/index.go b/crawler/index.go
index 3ea7949..c2e140d 100644
--- a/crawler/index.go
+++ b/crawler/index.go
@@ -194,6 +194,8 @@ func (c *Crawler) checkResponse(resp *http.Response, page scheduledPage) bool {
switch resp.StatusCode {
case http.StatusOK:
// no-op
+ case http.StatusMethodNotAllowed:
+ return resp.Request.Method == "HEAD"
case http.StatusTooManyRequests:
retryAfter := resp.Header.Get("Retry-After")
if retryAfter == "" {
--
2.32.1 (Apple Git-133)
Thanks!
To git@git.sr.ht:~sircmpwn/searchhut
562fecf..2b6407e master -> master