## http://www.robotstxt.org/wc/norobots.html
#User-agent: ultraseek
#Disallow:
#User-agent: recseek
#Disallow:
## Crawl-delay: 300
## These bots are harvester/collector bots and are
## used to siphon email addresses from websites:
# [DCS - 11/28/06] added urllib because of sustantial activity from multiple
# sources coincides with increased comments-form spamming.
User-agent: Python-urllib/2.4
Disallow: /
User-agent: CherryPickerElite/1.0
Disallow: /
User-agent: CherryPickerSE/1.0
Disallow: /
User-agent: Crescent Internet ToolPak HTTP OLE Control v.1.0
Disallow: /
User-agent: EmailCollector/1.0
Disallow: /
User-agent: EmailSiphon
Disallow: /
User-agent: EmailWolf 1.00
Disallow: /
User-agent: ExtractorPro
Disallow: /
User-agent: Mozilla/2.0 (compatible; NEWT ActiveX; Win32)
Disallow: /
User-agent: WebBandit/2.1
Disallow: /
User-agent: WebBandit/3.50
Disallow: /
User-agent: Webbandit/4.00.0
Disallow: /
#User-agent:
#Disallow: /
## The following bots are ill-behaved and tend to index our site during
## working hours - often with detrimental effects:
#User-agent: ia_archiver
#Disallow: /
#User-agent: mozilla/5.0 (compatible; archive.org_bot/1.0.5-200410121100 http://www.archives.gov/crawl.html)
#Disallow: /
#User-agent: wotbox/0.7-alpha
#Disallow: /
#User-agent: wotbox/0.7
#Disallow: /
#User-agent: archive.org_bot
#Disallow: /
# Turned off the disallow in an attempt to update Google indexes - 12/14/04 DMF.
User-agent: Googlebot/2.1 ( http://www.google.com/bot.html)
Disallow: /
## Googlebot appears to ignore robots.txt
#User-agent: msnbot/0.3
#Disallow: /
#User-agent: athenusbot
#Disallow: /
#User-agent: voilabot
#Disallow: /
#User-agent: npbot
#Disallow: /
#User-agent: psbot/0.1
#Disallow: /
#User-agent: linkbot
#Disallow: /
#User-agent: zealbot
#Disallow: /
#User-agent: BBCi Searchbot - search@bbc.co.uk http://www.bbc.co.uk/
#Disallow: /
#User-agent: bbci
#Disallow: /
#User-agent: Mozilla/4.0 compatible ZyBorg/1.0 Dead Link Checker (wn.dlc@looksmart.net; http://www.WISEnutbot.com)
#Disallow: /
#User-agent: 1Q1.ORG Spider 0.1 beta
#Disallow: /
#User-agent: Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp)
#Disallow: /
#User-agent: Motoricerca-Robots.txt-Checker/1.0
#Disallow: /
#User-agent: Ask Jeeves/Teoma
#Disallow: /
#User-agent: Openbot/3.0
#Disallow: /
#User-agent: NaverBot-1.0
#Disallow: /
#User-agent: MSIECrawler
#Disallow: /
#User-agent: Vagabondo/2.0
#Disallow: /
#User-agent: Jetbot/1.0
#Disallow: /
#User-agent: FAST-WebCrawler/3.8/Scirus
#Disallow: /
#User-agent: NutchCVS/0.05
#Disallow: /
#User-agent: appie 1.1
#Disallow: /
#User-agent: WebFilter Robot 1.0
#Disallow: /
User-agent: ColdFusion
Disallow: /
#User-agent: Mozilla/4.5 (compatible; HTTrack 3.0x; Windows 98)
#Disallow: /
#User-agent: CCGCrawl/1.16 (CCGCrawl; http://www.myworkbase.com/bot.html; bot at myworkbase dot com)
#Disallow: /
#User-agent: CCGCrawl/1.16
#Disallow: /
#User-agent: FAST-WebCrawler/3.8/Scirus (scirus-crawler@fast.no; http://www.scirus.com/srsapp/contactus/)
#Disallow: /
#User-agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT+5.1; SV1; NetCaptor 7.5.2)
#Disallow: /
## ExaBOT appears to ignore the robots.txt
#User-agent: Exabot NG/MimeLive Client (convert/http/0.169)
#Disallow: /
#User-agent: Exabot NG
#Disallow: /
## is NG the Nagios bot?
## User-agent: NG/2.0
## Disallow: /
#User-agent: Mozilla/4.0 (compatible; MSIE 5.0; Windows NT; Girafabot; girafabot at girafa dot com; http://www.girafa.com)
#Disallow: /
#User-agent: NutchCVS/0.06-dev (Nutch; http://www.nutch.org/docs/en/bot.html; nutch-agent@lists.sourceforge.net)
#Disallow: /
#User-agent: psbot/0.1 ( http://www.picsearch.com/bot.html)
#Disallow: /
#User-agent: IlTrovatore-Setaccio/1.2 (It-bot; http://www.iltrovatore.it/bot.html; info@iltrovatore.it)
#Disallow: /
## Seen immediately prior to web server crash.
#User-agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; MSN 9.0;MSN 9.1; MSNbQ001; MSNmen-us; MSNcOTH; MPLUS)
#Disallow: /
#User-agent: Mozilla/4.0 (compatible; ChangeDetection/1.0 (admin@ChangeDetection.com))
#Disallow: /
## appears to not obey crawl-delay - multiple frequent concurrent hits
#User-agent: Openfind data gatherer, Openbot/3.0 (robot-response@openfind.com.tw; http://www.openfind.com.tw/robot.html)
#Disallow: /
## rumoured to be an experimental Micro$oft bot
## site admins have reported random crashes related to it
#User-agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; .NET CLR 1.1.4322)
#Disallow: /
## TurnItIn is an education spider which pulls data from various
## sources identify plagarization in student papers.
User-agent: TurnitinBot/2.0 http://www.turnitin.com/robot/crawlerinfo.html
Crawl-delay: 30
## Below here are the good bots (like OURS) and
## default behaviour for unknown bots & spiders
User-agent: *
#Disallow:
Crawl-Delay: 45
User-agent: UltraSeek
#Disallow:
Crawl-Delay: 45
User-agent: Ocelli
Crawl-delay: 3000
User-agent: *
Disallow: /webaccess
Crawl-Delay: 300