-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrobots.txt
More file actions
137 lines (100 loc) · 1.84 KB
/
Copy pathrobots.txt
File metadata and controls
137 lines (100 loc) · 1.84 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# robots.txt for 光伏储能地图站
# https://solar.miyucaicai.cn
# Last Updated: 2026-06-29
User-agent: *
Allow: /
# Sitemap
Sitemap: https://solar.miyucaicai.cn/sitemap.xml
# Crawl-delay
Crawl-delay: 1
# Disallow sensitive paths
Disallow: /admin/
Disallow: /api/
Disallow: /private/
Disallow: /config/
Disallow: /scripts/
# AI爬虫 - 明确允许
User-agent: GPTBot
Allow: /
User-agent: ChatGPT-User
Allow: /
User-agent: Claude-Web
Allow: /
User-agent: ClaudeBot
Allow: /
User-agent: PerplexityBot
Allow: /
User-agent: Google-Extended
Allow: /
User-agent: Googlebot
Allow: /
User-agent: Bingbot
Allow: /
User-agent: DuckDuckBot
Allow: /
User-agent: Applebot
Allow: /
User-agent: AnthropicAI
Allow: /
User-agent: OAI-SearchBot
Allow: /
User-agent: Bytespider
Allow: /
User-agent: Diffbot
Allow: /
User-agent: FacebookBot
Allow: /
User-agent: LinkedInBot
Allow: /
User-agent: Pinterestbot
Allow: /
User-agent: Slackbot
Allow: /
User-agent: TelegramBot
Allow: /
User-agent: TwitterBot
Allow: /
User-agent: YouBot
Allow: /
User-agent: AhrefsBot
Allow: /
User-agent: SemrushBot
Allow: /
User-agent: Majestic-12
Allow: /
User-agent: Sitebulb
Allow: /
# 允许搜索的主要AI服务
User-agent: CCBot
Allow: /
User-agent: cohere-ai
Allow: /
User-agent: Meta-ExternalAgent
Allow: /
User-agent: Amazonbot
Allow: /
# Allow province pages for better indexing
Allow: /province/
Allow: /data/
Allow: /components/
# 特定文件类型
Allow: *.html
Allow: *.json
Allow: *.xml
# Disallow non-HTML assets that may cause issues
Disallow: *.css
Disallow: *.js.map
Disallow: *.png
Disallow: *.jpg
Disallow: *.gif
Disallow: *.svg
Disallow: *.ico
Disallow: *.woff
Disallow: *.woff2
Disallow: *.ttf
Disallow: *.eot
# Crawl-delay for AI crawlers
Crawl-delay: GPTBot: 10
Crawl-delay: ClaudeBot: 10
Crawl-delay: PerplexityBot: 10
Crawl-delay: Google-Extended: 10