From 737467597d5ce647ed52cece85d284f3330cff25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reynir=20Bj=C3=B6rnsson?= Date: Mon, 10 Feb 2025 18:07:27 +0000 Subject: [PATCH] Forgejo: serve a robots.txt (#233) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Graciously borrowed from https://codeberg.org/robots.txt I checked, and forgejo is being hit by crawlers trying dead links and whatnot. Reviewed-on: https://git.data.coop/data.coop/ansible/pulls/233 Reviewed-by: valberg Co-authored-by: Reynir Björnsson Co-committed-by: Reynir Björnsson --- roles/docker/tasks/pre_deploy/forgejo.yml | 10 ++ roles/docker/templates/forgejo/robots.txt.j2 | 112 +++++++++++++++++++ 2 files changed, 122 insertions(+) create mode 100644 roles/docker/tasks/pre_deploy/forgejo.yml create mode 100644 roles/docker/templates/forgejo/robots.txt.j2 diff --git a/roles/docker/tasks/pre_deploy/forgejo.yml b/roles/docker/tasks/pre_deploy/forgejo.yml new file mode 100644 index 0000000..5fa6a1b --- /dev/null +++ b/roles/docker/tasks/pre_deploy/forgejo.yml @@ -0,0 +1,10 @@ +--- +- name: Create subfolder + file: + name: "{{ services.forgejo.volume_folder }}/gitea/public" + state: directory + +- name: Upload vhost config for Forgejo + template: + src: forgejo/robots.txt.j2 + dest: "{{ services.forgejo.volume_folder }}/gitea/public/robots.txt" diff --git a/roles/docker/templates/forgejo/robots.txt.j2 b/roles/docker/templates/forgejo/robots.txt.j2 new file mode 100644 index 0000000..9c48d3f --- /dev/null +++ b/roles/docker/templates/forgejo/robots.txt.j2 @@ -0,0 +1,112 @@ +{# Fetched from https://codeberg.org/robots.txt on 2025-02-10 15:48 CET with minor edits #} +User-agent: * +Disallow: /api/* +Disallow: /avatars +Disallow: /user/* +Disallow: /*/*/src/commit/* +Disallow: /*/*/commit/* +Disallow: /*/*/*/refs/* +Disallow: /*/*/*/star +Disallow: /*/*/*/watch +Disallow: /*/*/labels +Disallow: /*/*/activity/* +Disallow: /vendor/* +Disallow: /swagger.*.json + +Disallow: /explore/*?* + +Disallow: /repo/create +Disallow: /repo/migrate +Disallow: /org/create +Disallow: /*/*/fork + +Disallow: /*/*/watchers +Disallow: /*/*/stargazers +Disallow: /*/*/forks + +Disallow: /*/*/activity +Disallow: /*/*/projects +Disallow: /*/*/commits/ +Disallow: /*/*/branches +Disallow: /*/*/tags +Disallow: /*/*/compare +Disallow: /*/*/lastcommit/* + +Disallow: /*/*/issues/new +Disallow: /*/*/issues/?* +Disallow: /*/*/issues?* +Disallow: /*/*/pulls/?* +Disallow: /*/*/pulls?* +Disallow: /*/*/pulls/*/files + +Disallow: /*/tree/ +Disallow: /*/download +Disallow: /*/revisions +Disallow: /*/commits/*?author +Disallow: /*/commits/*?path +Disallow: /*/comments +Disallow: /*/blame/ +Disallow: /*/raw/ +Disallow: /*/cache/ +Disallow: /.git/ +Disallow: */.git/ +Disallow: /*.git +Disallow: /*.atom +Disallow: /*.rss + +Disallow: /*/*/archive/ +Disallow: *.bundle +Disallow: */commit/*.patch +Disallow: */commit/*.diff + +Disallow: /*lang=* +Disallow: /*source=* +Disallow: /*ref_cta=* +Disallow: /*plan=* +Disallow: /*return_to=* +Disallow: /*ref_loc=* +Disallow: /*setup_organization=* +Disallow: /*source_repo=* +Disallow: /*ref_page=* +Disallow: /*source=* +Disallow: /*referrer=* +Disallow: /*report=* +Disallow: /*author=* +Disallow: /*since=* +Disallow: /*until=* +Disallow: /*commits?author=* +Disallow: /*tab=* +Disallow: /*q=* +Disallow: /*repo-search-archived=* + +Crawl-delay: 2 + +User-agent: Amazonbot +User-agent: anthropic-ai +User-agent: Applebot-Extended +User-agent: Bytespider +User-agent: CCBot +User-agent: ChatGPT-User +User-agent: ClaudeBot +User-agent: Claude-Web +User-agent: cohere-ai +User-agent: Diffbot +User-agent: FacebookBot +User-agent: facebookexternalhit +User-agent: FriendlyCrawler +User-agent: Google-Extended +User-agent: GPTBot +User-agent: ICC-Crawler +User-agent: ImagesiftBot +User-agent: img2dataset +User-agent: meta-externalagent +User-agent: OAI-SearchBot +User-agent: Omgili +User-agent: Omgilibot +User-agent: PerplexityBot +User-agent: PetalBot +User-agent: Scrapy +User-agent: Timpibot +User-agent: VelenPublicWebCrawler +User-agent: YouBot +Disallow: /