diff options
| author | Tom Smeding <tom@tomsmeding.com> | 2026-05-10 22:05:54 +0200 |
|---|---|---|
| committer | Tom Smeding <tom@tomsmeding.com> | 2026-05-10 22:05:54 +0200 |
| commit | e0e6b516b9dd132e067a226ff7fdf56d3e556559 (patch) | |
| tree | 73e93c4f41e441dbb1d06393ab0faab680b598fe | |
| parent | 50e74b214133adddf252a7fb7962272508d6d03d (diff) | |
meta-externalagent, stop crawling all the ?eid= URLs
| -rw-r--r-- | src/Main.hs | 43 |
1 files changed, 30 insertions, 13 deletions
diff --git a/src/Main.hs b/src/Main.hs index 77dfa29..7cd2c65 100644 --- a/src/Main.hs +++ b/src/Main.hs @@ -426,6 +426,20 @@ page404 message = return $ responseLBS [("Content-Type", "text/plain")] message +prefilter :: Request -> Maybe Response +prefilter req + -- Facebook should stop crawling all the individual ?eid=... URLs + | case lookup "user-agent" (reqHeaders req) of + Just ua | BS.take 18 ua == "meta-externalagent" -> True + _ -> False + , any ((== "eid") . fst) (reqQuery req) + = Just $ responseBS (Status 303 "See Other") + [("Location", BS.takeWhile (/= fromIntegral (ord '?')) (reqURI req))] + BS.empty + + | otherwise + = Nothing + mainServe :: FilePath -> IO () mainServe confpath = do config <- enrichConfig <$> readConfig confpath @@ -437,19 +451,22 @@ mainServe confpath = do let settings = defaultSettings { setPort = confPort config } atomicPrintS $ "Listening on port " ++ show (confPort config) - run settings $ \req -> case reqPath req of - [] -> - pageIndex config - ["log", TE.decodeUtf8' -> Right alias] -> - pageLog config index req alias - ["cal", TE.decodeUtf8' -> Right alias] -> - pageCalendar config index alias - ["cal", TE.decodeUtf8' -> Right alias, TE.decodeUtf8' -> Right date] -> - pageCalendarDay config index req alias date - [fname] | fname `elem` staticFiles -> - return $ responseFile status200 [] ("pages/" ++ BS8.unpack fname) - _ -> - page404 "URL not found" + run settings $ \req -> + case prefilter req of + Just res -> return res + Nothing -> case reqPath req of + [] -> + pageIndex config + ["log", TE.decodeUtf8' -> Right alias] -> + pageLog config index req alias + ["cal", TE.decodeUtf8' -> Right alias] -> + pageCalendar config index alias + ["cal", TE.decodeUtf8' -> Right alias, TE.decodeUtf8' -> Right date] -> + pageCalendarDay config index req alias date + [fname] | fname `elem` staticFiles -> + return $ responseFile status200 [] ("pages/" ++ BS8.unpack fname) + _ -> + page404 "URL not found" testParseLog :: FilePath -> IO () testParseLog fname = print =<< parseLog <$> BS.readFile fname |
