Forgejo: serve a robots.txt (#233)
Graciously borrowed from https://codeberg.org/robots.txt I checked, and forgejo is being hit by crawlers trying dead links and whatnot. Reviewed-on: https://git.data.coop/data.coop/ansible/pulls/233 Reviewed-by: valberg <valberg@orn.li> Co-authored-by: Reynir Björnsson <reynir@reynir.dk> Co-committed-by: Reynir Björnsson <reynir@reynir.dk>
This commit is contained in:
parent
7e3da99411
commit
737467597d
2 changed files with 122 additions and 0 deletions
10
roles/docker/tasks/pre_deploy/forgejo.yml
Normal file
10
roles/docker/tasks/pre_deploy/forgejo.yml
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
---
|
||||||
|
- name: Create subfolder
|
||||||
|
file:
|
||||||
|
name: "{{ services.forgejo.volume_folder }}/gitea/public"
|
||||||
|
state: directory
|
||||||
|
|
||||||
|
- name: Upload vhost config for Forgejo
|
||||||
|
template:
|
||||||
|
src: forgejo/robots.txt.j2
|
||||||
|
dest: "{{ services.forgejo.volume_folder }}/gitea/public/robots.txt"
|
112
roles/docker/templates/forgejo/robots.txt.j2
Normal file
112
roles/docker/templates/forgejo/robots.txt.j2
Normal file
|
@ -0,0 +1,112 @@
|
||||||
|
{# Fetched from https://codeberg.org/robots.txt on 2025-02-10 15:48 CET with minor edits #}
|
||||||
|
User-agent: *
|
||||||
|
Disallow: /api/*
|
||||||
|
Disallow: /avatars
|
||||||
|
Disallow: /user/*
|
||||||
|
Disallow: /*/*/src/commit/*
|
||||||
|
Disallow: /*/*/commit/*
|
||||||
|
Disallow: /*/*/*/refs/*
|
||||||
|
Disallow: /*/*/*/star
|
||||||
|
Disallow: /*/*/*/watch
|
||||||
|
Disallow: /*/*/labels
|
||||||
|
Disallow: /*/*/activity/*
|
||||||
|
Disallow: /vendor/*
|
||||||
|
Disallow: /swagger.*.json
|
||||||
|
|
||||||
|
Disallow: /explore/*?*
|
||||||
|
|
||||||
|
Disallow: /repo/create
|
||||||
|
Disallow: /repo/migrate
|
||||||
|
Disallow: /org/create
|
||||||
|
Disallow: /*/*/fork
|
||||||
|
|
||||||
|
Disallow: /*/*/watchers
|
||||||
|
Disallow: /*/*/stargazers
|
||||||
|
Disallow: /*/*/forks
|
||||||
|
|
||||||
|
Disallow: /*/*/activity
|
||||||
|
Disallow: /*/*/projects
|
||||||
|
Disallow: /*/*/commits/
|
||||||
|
Disallow: /*/*/branches
|
||||||
|
Disallow: /*/*/tags
|
||||||
|
Disallow: /*/*/compare
|
||||||
|
Disallow: /*/*/lastcommit/*
|
||||||
|
|
||||||
|
Disallow: /*/*/issues/new
|
||||||
|
Disallow: /*/*/issues/?*
|
||||||
|
Disallow: /*/*/issues?*
|
||||||
|
Disallow: /*/*/pulls/?*
|
||||||
|
Disallow: /*/*/pulls?*
|
||||||
|
Disallow: /*/*/pulls/*/files
|
||||||
|
|
||||||
|
Disallow: /*/tree/
|
||||||
|
Disallow: /*/download
|
||||||
|
Disallow: /*/revisions
|
||||||
|
Disallow: /*/commits/*?author
|
||||||
|
Disallow: /*/commits/*?path
|
||||||
|
Disallow: /*/comments
|
||||||
|
Disallow: /*/blame/
|
||||||
|
Disallow: /*/raw/
|
||||||
|
Disallow: /*/cache/
|
||||||
|
Disallow: /.git/
|
||||||
|
Disallow: */.git/
|
||||||
|
Disallow: /*.git
|
||||||
|
Disallow: /*.atom
|
||||||
|
Disallow: /*.rss
|
||||||
|
|
||||||
|
Disallow: /*/*/archive/
|
||||||
|
Disallow: *.bundle
|
||||||
|
Disallow: */commit/*.patch
|
||||||
|
Disallow: */commit/*.diff
|
||||||
|
|
||||||
|
Disallow: /*lang=*
|
||||||
|
Disallow: /*source=*
|
||||||
|
Disallow: /*ref_cta=*
|
||||||
|
Disallow: /*plan=*
|
||||||
|
Disallow: /*return_to=*
|
||||||
|
Disallow: /*ref_loc=*
|
||||||
|
Disallow: /*setup_organization=*
|
||||||
|
Disallow: /*source_repo=*
|
||||||
|
Disallow: /*ref_page=*
|
||||||
|
Disallow: /*source=*
|
||||||
|
Disallow: /*referrer=*
|
||||||
|
Disallow: /*report=*
|
||||||
|
Disallow: /*author=*
|
||||||
|
Disallow: /*since=*
|
||||||
|
Disallow: /*until=*
|
||||||
|
Disallow: /*commits?author=*
|
||||||
|
Disallow: /*tab=*
|
||||||
|
Disallow: /*q=*
|
||||||
|
Disallow: /*repo-search-archived=*
|
||||||
|
|
||||||
|
Crawl-delay: 2
|
||||||
|
|
||||||
|
User-agent: Amazonbot
|
||||||
|
User-agent: anthropic-ai
|
||||||
|
User-agent: Applebot-Extended
|
||||||
|
User-agent: Bytespider
|
||||||
|
User-agent: CCBot
|
||||||
|
User-agent: ChatGPT-User
|
||||||
|
User-agent: ClaudeBot
|
||||||
|
User-agent: Claude-Web
|
||||||
|
User-agent: cohere-ai
|
||||||
|
User-agent: Diffbot
|
||||||
|
User-agent: FacebookBot
|
||||||
|
User-agent: facebookexternalhit
|
||||||
|
User-agent: FriendlyCrawler
|
||||||
|
User-agent: Google-Extended
|
||||||
|
User-agent: GPTBot
|
||||||
|
User-agent: ICC-Crawler
|
||||||
|
User-agent: ImagesiftBot
|
||||||
|
User-agent: img2dataset
|
||||||
|
User-agent: meta-externalagent
|
||||||
|
User-agent: OAI-SearchBot
|
||||||
|
User-agent: Omgili
|
||||||
|
User-agent: Omgilibot
|
||||||
|
User-agent: PerplexityBot
|
||||||
|
User-agent: PetalBot
|
||||||
|
User-agent: Scrapy
|
||||||
|
User-agent: Timpibot
|
||||||
|
User-agent: VelenPublicWebCrawler
|
||||||
|
User-agent: YouBot
|
||||||
|
Disallow: /
|
Loading…
Add table
Reference in a new issue