########################## # Default Access Group # (NOTE: blank lines are not allowable in a group record) ########################## User-agent: * Disallow: /Templates Disallow: /russian/general/wruss/akcii.htm Disallow: /belarusian/general/wruss/akcii.htm ############################## # Section for misbehaving bots # The following directives to block specific robots were borrowed from Wikipedia's robots.txt ############################## # advertising-related bots: #User-agent: Mediapartners-Google* #Disallow: / # Crawlers that are kind enough to obey, but which we'd rather not have # unless they're feeding search engines. #User-agent: UbiCrawler #Disallow: / #User-agent: DOC #Disallow: / #User-agent: Zao #Disallow: / # Some bots are known to be trouble, particularly those designed to copy # entire sites. Please obey robots.txt. #User-agent: sitecheck.internetseer.com #Disallow: / #User-agent: Zealbot #Disallow: / #User-agent: MSIECrawler #Disallow: / #User-agent: SiteSnagger #Disallow: / #User-agent: WebStripper #Disallow: / #User-agent: WebCopier #Disallow: / #User-agent: Fetch #Disallow: / #User-agent: Offline Explorer #Disallow: / #User-agent: Teleport #Disallow: / #User-agent: TeleportPro #Disallow: / #User-agent: WebZIP #Disallow: / #User-agent: linko #Disallow: / #User-agent: HTTrack #Disallow: / #User-agent: Microsoft.URL.Control #Disallow: / #User-agent: Xenu #Disallow: / #User-agent: larbin #Disallow: / #User-agent: libwww #Disallow: / #User-agent: ZyBORG #Disallow: / #User-agent: Download Ninja #Disallow: / # Misbehaving: requests much too fast: #User-agent: fast #Disallow: / # # If your DSpace is going down because of someone using recursive wget, # you can activate the following rule. # # If your own faculty is bringing down your dspace with recursive wget, # you can advise them to use the --wait option to set the delay between hits. # #User-agent: wget #Disallow: / # # The 'grub' distributed client has been *very* poorly behaved. # #User-agent: grub-client #Disallow: / # # Doesn't follow robots.txt anyway, but... # #User-agent: k2spider #Disallow: / # # Hits many times per second, not acceptable # http://www.nameprotect.com/botinfo.html #User-agent: NPBot #Disallow: / # A capture bot, downloads gazillions of pages with no public benefit # http://www.webreaper.net/ User-agent: WebReaper Disallow: /