From b4b56253d6cbdb4d2184db0775fceafdd3531bcb Mon Sep 17 00:00:00 2001 From: Kisaragi Marine Date: Fri, 17 May 2024 18:54:42 +0900 Subject: [PATCH] feat!: reject some crawler access --- Cargo.lock | 13 ++++++++++++ packages/toy-blog/Cargo.toml | 1 + packages/toy-blog/src/service/rest.rs | 30 ++++++++++++++++++++++++++- 3 files changed, 43 insertions(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index dd0a8ed..912f677 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -672,6 +672,17 @@ version = "0.3.23" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d2acedae88d38235936c3922476b10fced7b2b68136f5e3c03c2d5be348a1115" +[[package]] +name = "futures-macro" +version = "0.3.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0db9cce532b0eae2ccf2766ab246f114b56b9cf6d445e00c2549fbc100ca045d" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.105", +] + [[package]] name = "futures-sink" version = "0.3.23" @@ -691,6 +702,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f0828a5471e340229c11c77ca80017937ce3c58cb788a17e5f1c2d5c485a9577" dependencies = [ "futures-core", + "futures-macro", "futures-task", "pin-project-lite", "pin-utils", @@ -1470,6 +1482,7 @@ dependencies = [ "clap", "fern", "fs2", + "futures-util", "log", "maplit", "once_cell", diff --git a/packages/toy-blog/Cargo.toml b/packages/toy-blog/Cargo.toml index 0fade2d..4776564 100644 --- a/packages/toy-blog/Cargo.toml +++ b/packages/toy-blog/Cargo.toml @@ -25,6 +25,7 @@ strum = { version = "0.26.2", features = ["derive"] } thiserror = "1.0.59" tokio = { version = "1.37.0", features = ["time", "macros"] } toy-blog-endpoint-model = { path = "../toy-blog-endpoint-model" } +futures-util = "0.3.23" [features] diff --git a/packages/toy-blog/src/service/rest.rs b/packages/toy-blog/src/service/rest.rs index f643ac7..9a6ecd4 100644 --- a/packages/toy-blog/src/service/rest.rs +++ b/packages/toy-blog/src/service/rest.rs @@ -7,8 +7,11 @@ mod header; use std::fs::File; use std::io::stdin; +use std::net::{IpAddr, Ipv4Addr}; use std::path::Path; -use actix_web::{App, HttpServer}; +use actix_web::{App, HttpResponseBuilder, HttpServer}; +use actix_web::dev::{ServiceRequest, ServiceResponse}; +use actix_web::http::StatusCode; use actix_web::middleware::Logger; use anyhow::Context; use log::info; @@ -21,6 +24,8 @@ use crate::service::rest::auth::WRITE_TOKEN; use crate::service::rest::repository::GLOBAL_FILE; use actix_web::web::scope as prefixed_service; use actix_web_httpauth::extractors::bearer::Config as BearerAuthConfig; +use futures_util::future::LocalBoxFuture; +use futures_util::FutureExt; mod inner_no_leak { use std::error::Error; @@ -110,6 +115,29 @@ pub async fn boot_http_server(port: u16, host: &str, proxied_by_cloudflare: bool .realm("Perform write operation") .scope("article:write"), ) + .wrap_fn(move |req, srv| { + let cloudflare_support = proxied_by_cloudflare; + + const HATENA_BOOKMARK_CRAWLER: Ipv4Addr = Ipv4Addr::new(133, 242, 243, 6); + let extract_real_ip = |req: &ServiceRequest, cloudflare_support: bool| { + if cloudflare_support { + req.headers().get("CF-Connecting-IP")?.to_str().ok()?.parse::().ok() + } else { + req.peer_addr().map(|x| x.ip()) + } + }; + + if extract_real_ip(&req, cloudflare_support).is_some_and(|x| x == HATENA_BOOKMARK_CRAWLER) { + Box::pin(async { + Ok(ServiceResponse::new(req.into_parts().0, HttpResponseBuilder::new(StatusCode::FORBIDDEN).body("Forbidden"))) + }) as LocalBoxFuture> + } else { + use actix_web::dev::Service; + + Box::pin(srv.call(req).map(|x| x.map(|y| y.map_into_boxed_body()))) + as LocalBoxFuture> + } + }) .wrap(Logger::new(logger_format)) .wrap(crate::service::rest::cors::middleware_factory()) };