Le mar. 11 nov. 2025 à 21:05, Daniel Baumann <[email protected]> a écrit :

> retitle 1117591 add support for nginx
> severity 1117591 wishlist
> thanks
>
> Hi Jeremy,
>
> thank you for your offer, please send attach a patch to this bug report
> based on https://forgejo.debian.net/web/ai.robots.txt


Here it is !
From f5b6ae3a04a29ede01b146069d907f614ae9017a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A9r=C3=A9my=20Lal?= <[email protected]>
Date: Sun, 30 Nov 2025 22:30:21 +0100
Subject: [PATCH] nginx-ai-bots

---
 debian/control                           | 17 +++++++++++++
 debian/local/nginx/block-ai-bots.conf    |  5 ++++
 debian/local/nginx/block-ai-bots.conf.in |  6 +++++
 debian/nginx-ai-bots.README.Debian       | 31 ++++++++++++++++++++++++
 debian/nginx-ai-bots.postinst            | 28 +++++++++++++++++++++
 debian/rules                             | 21 +++++++++++++---
 6 files changed, 104 insertions(+), 4 deletions(-)
 create mode 100644 debian/local/nginx/block-ai-bots.conf
 create mode 100644 debian/local/nginx/block-ai-bots.conf.in
 create mode 100644 debian/nginx-ai-bots.README.Debian
 create mode 100755 debian/nginx-ai-bots.postinst

diff --git a/debian/control b/debian/control
index 61ca8f0..c3a9a7b 100644
--- a/debian/control
+++ b/debian/control
@@ -25,3 +25,20 @@ Description: list of AI agents and robots to block (apache2)
  .
  This package contains the apache2 integration,
  please see /usr/share/doc/apache2-ai-bots/README.Debian on how to enable it.
+
+Package: nginx-ai-bots
+Section: web
+Architecture: all
+Depends:
+ nginx,
+ ${misc:Depends},
+Description: list of AI agents and robots to block (nginx)
+ ai.robots.txt is a list containing AI-related crawlers of all types, regardless
+ of purpose.
+ .
+ Blocking access based on the user agent does not block all crawlers, but it is
+ a simple and low overhead way of blocking most crawlers.
+ .
+ This package contains the nginx integration,
+ please see /usr/share/doc/nginx-ai-bots/README.Debian on how to enable it.
+
diff --git a/debian/local/nginx/block-ai-bots.conf b/debian/local/nginx/block-ai-bots.conf
new file mode 100644
index 0000000..bc6146c
--- /dev/null
+++ b/debian/local/nginx/block-ai-bots.conf
@@ -0,0 +1,5 @@
+# Needs /etc/nginx/conf.d/block-ai-bots.conf
+
+if ($block_ai_bots) {
+    return 403;
+}
diff --git a/debian/local/nginx/block-ai-bots.conf.in b/debian/local/nginx/block-ai-bots.conf.in
new file mode 100644
index 0000000..c21bf18
--- /dev/null
+++ b/debian/local/nginx/block-ai-bots.conf.in
@@ -0,0 +1,6 @@
+# To use this, include /etc/nginx/snippets/block-ai-bots.conf in a server or location block
+
+map $http_user_agent $block_ai_bots {
+    default 0;
+    ~*(@AI_BOTS@) 1;
+}
diff --git a/debian/nginx-ai-bots.README.Debian b/debian/nginx-ai-bots.README.Debian
new file mode 100644
index 0000000..245d0df
--- /dev/null
+++ b/debian/nginx-ai-bots.README.Debian
@@ -0,0 +1,31 @@
+ai.robots.txt for nginx
+=======================
+
+Bots are reported to now make up half of the web's traffic. There are several
+ways of blocking these.
+
+While there are more advanced tools like Anubis[0], for smaller sites often the
+simpler approach of blocking access by user-agent is currently still enough.
+
+Here are the different ways how to enable this blocklist with your website.
+
+
+Usage
+-----
+
+In a server or location block, do:
+
+```
+include /etc/nginx/snippets/block-ai-bots.conf;
+```
+
+Then reload nginx.
+
+Robots.txt
+----------
+
+An example rfc9309 file can be found at
+
+/usr/share/nginx-ai-bots/robots.txt
+
+It is usually meant to be dynamically appended to the pre-existing server's robots.txt.
diff --git a/debian/nginx-ai-bots.postinst b/debian/nginx-ai-bots.postinst
new file mode 100755
index 0000000..e1efe3e
--- /dev/null
+++ b/debian/nginx-ai-bots.postinst
@@ -0,0 +1,28 @@
+#!/bin/sh
+
+set -e
+
+case "${1}" in
+	configure)
+		if [ -d /run/systemd/system ]
+		then
+			if [ -n "${2}" ]
+			then
+				deb-systemd-invoke reload 'nginx.service' >/dev/null || true
+			fi
+		fi
+		;;
+
+	abort-upgrade|abort-remove|abort-deconfigure)
+
+		;;
+
+	*)
+		echo "postinst called with unknown argument \`${1}'" >&2
+		exit 1
+		;;
+esac
+
+#DEBHELPER#
+
+exit 0
diff --git a/debian/rules b/debian/rules
index c6f1fe6..3d10835 100755
--- a/debian/rules
+++ b/debian/rules
@@ -1,5 +1,8 @@
 #!/usr/bin/make -f
 
+apache_conf = debian/apache2-ai-bots/etc/apache2/conf-available/block-ai-bots.conf
+nginx_conf = debian/nginx-ai-bots/etc/nginx/conf.d/robots-ai-bots.conf
+
 %:
 	dh ${@}
 
@@ -9,16 +12,26 @@ override_dh_auto_install:
 	cp .htaccess debian/apache2-ai-bots/usr/share/apache2-ai-bots/htaccess
 	cp robots.txt debian/apache2-ai-bots//usr/share/apache2-ai-bots
 
+	mkdir -p debian/nginx-ai-bots/etc/nginx/snippets/
+	cp debian/local/nginx/block-ai-bots.conf debian/nginx-ai-bots/etc/nginx/snippets/block-ai-bots.conf
+
 	# apache2
-	mkdir -p debian/apache2-ai-bots/etc/apache2/conf-available
-	cp debian/local/apache2/block-ai-bots.conf.in debian/apache2-ai-bots/etc/apache2/conf-available/block-ai-bots.conf
+	mkdir -p $(dir $(apache_conf))
+	cp debian/local/apache2/block-ai-bots.conf.in $(apache_conf)
+
+	# nginx
+	mkdir -p $(dir $(nginx_conf))
+	cp debian/local/nginx/block-ai-bots.conf.in $(nginx_conf)
 
 	for BOT in $(shell jq 'keys[]' robots.json); \
 	do \
-		sed -i -e "s|@AI_BOTS@|BrowserMatchNoCase $${BOT} block_ai_bot=true\n@AI_BOTS@|" debian/apache2-ai-bots/etc/apache2/conf-available/block-ai-bots.conf; \
+		sed -i -e "s|@AI_BOTS@|BrowserMatchNoCase $${BOT} block_ai_bot=true\n@AI_BOTS@|" $(apache_conf); \
+		escaBOT=$$(echo $${BOT} | sed 's/ /\\\\s/g' | sed 's/\./\\\\./g'); \
+		sed -i -e "s|@AI_BOTS@|$${escaBOT}\\|@AI_BOTS@|" $(nginx_conf); \
 	done
 
-	sed -i -e '/@AI_BOTS@/d' debian/apache2-ai-bots/etc/apache2/conf-available/block-ai-bots.conf
+	sed -i -e '/@AI_BOTS@/d' $(apache_conf)
+	sed -i -e 's/|@AI_BOTS@//' $(nginx_conf)
 
 upstream:
 	rm -rf assets/images/noai-logo.png
-- 
2.51.0

Reply via email to