Forgejo: serve a robots.txt (#233)

Graciously borrowed from https://codeberg.org/robots.txt

I checked, and forgejo is being hit by crawlers trying dead links and whatnot.

Reviewed-on: https://git.data.coop/data.coop/ansible/pulls/233
Reviewed-by: valberg <valberg@orn.li>
Co-authored-by: Reynir Björnsson <reynir@reynir.dk>
Co-committed-by: Reynir Björnsson <reynir@reynir.dk>
This commit is contained in:
Reynir Björnsson 2025-02-10 18:07:27 +00:00 committed by valberg
parent 7e3da99411
commit 737467597d
2 changed files with 122 additions and 0 deletions

View file

@ -0,0 +1,10 @@
---
- name: Create subfolder
file:
name: "{{ services.forgejo.volume_folder }}/gitea/public"
state: directory
- name: Upload vhost config for Forgejo
template:
src: forgejo/robots.txt.j2
dest: "{{ services.forgejo.volume_folder }}/gitea/public/robots.txt"

View file

@ -0,0 +1,112 @@
{# Fetched from https://codeberg.org/robots.txt on 2025-02-10 15:48 CET with minor edits #}
User-agent: *
Disallow: /api/*
Disallow: /avatars
Disallow: /user/*
Disallow: /*/*/src/commit/*
Disallow: /*/*/commit/*
Disallow: /*/*/*/refs/*
Disallow: /*/*/*/star
Disallow: /*/*/*/watch
Disallow: /*/*/labels
Disallow: /*/*/activity/*
Disallow: /vendor/*
Disallow: /swagger.*.json
Disallow: /explore/*?*
Disallow: /repo/create
Disallow: /repo/migrate
Disallow: /org/create
Disallow: /*/*/fork
Disallow: /*/*/watchers
Disallow: /*/*/stargazers
Disallow: /*/*/forks
Disallow: /*/*/activity
Disallow: /*/*/projects
Disallow: /*/*/commits/
Disallow: /*/*/branches
Disallow: /*/*/tags
Disallow: /*/*/compare
Disallow: /*/*/lastcommit/*
Disallow: /*/*/issues/new
Disallow: /*/*/issues/?*
Disallow: /*/*/issues?*
Disallow: /*/*/pulls/?*
Disallow: /*/*/pulls?*
Disallow: /*/*/pulls/*/files
Disallow: /*/tree/
Disallow: /*/download
Disallow: /*/revisions
Disallow: /*/commits/*?author
Disallow: /*/commits/*?path
Disallow: /*/comments
Disallow: /*/blame/
Disallow: /*/raw/
Disallow: /*/cache/
Disallow: /.git/
Disallow: */.git/
Disallow: /*.git
Disallow: /*.atom
Disallow: /*.rss
Disallow: /*/*/archive/
Disallow: *.bundle
Disallow: */commit/*.patch
Disallow: */commit/*.diff
Disallow: /*lang=*
Disallow: /*source=*
Disallow: /*ref_cta=*
Disallow: /*plan=*
Disallow: /*return_to=*
Disallow: /*ref_loc=*
Disallow: /*setup_organization=*
Disallow: /*source_repo=*
Disallow: /*ref_page=*
Disallow: /*source=*
Disallow: /*referrer=*
Disallow: /*report=*
Disallow: /*author=*
Disallow: /*since=*
Disallow: /*until=*
Disallow: /*commits?author=*
Disallow: /*tab=*
Disallow: /*q=*
Disallow: /*repo-search-archived=*
Crawl-delay: 2
User-agent: Amazonbot
User-agent: anthropic-ai
User-agent: Applebot-Extended
User-agent: Bytespider
User-agent: CCBot
User-agent: ChatGPT-User
User-agent: ClaudeBot
User-agent: Claude-Web
User-agent: cohere-ai
User-agent: Diffbot
User-agent: FacebookBot
User-agent: facebookexternalhit
User-agent: FriendlyCrawler
User-agent: Google-Extended
User-agent: GPTBot
User-agent: ICC-Crawler
User-agent: ImagesiftBot
User-agent: img2dataset
User-agent: meta-externalagent
User-agent: OAI-SearchBot
User-agent: Omgili
User-agent: Omgilibot
User-agent: PerplexityBot
User-agent: PetalBot
User-agent: Scrapy
User-agent: Timpibot
User-agent: VelenPublicWebCrawler
User-agent: YouBot
Disallow: /