# $Id: robots.txt,v 1.9.2.1 2008/12/10 20:12:19 goba Exp $ # # robots.txt # # This file is to prevent the crawling and indexing of certain parts # of your site by web crawlers and spiders run by sites like Yahoo! # and Google. By telling these "robots" where not to go on your site, # you save bandwidth and server resources. # # This file will be ignored unless it is at the root of your host: # Used: http://example.com/robots.txt # Ignored: http://example.com/site/robots.txt # # For more information about the robots.txt standard, see: # http://www.robotstxt.org/wc/robots.html # # For syntax checking, see: # http://www.sxw.org.uk/computing/robots/check.html # # http://www.privacydigest.com/gsitemap # /ping?sitemap=http%3A%2F%2Fwww.privacydigest.com%2Fgsitemap # # http://search.yahooapis.com/SiteExplorerService/V1/ping?sitemap=http%3A%2F%2Fwww.privacydigest.com%2Fgsitemap # http://search.live.com/ping?sitemap=http%3A%2F%2Fwww.privacydigest.com%2Fgsitemap MSN NG - Error msg # http://search.yahooapis.com/SiteExplorerService/V1/ping?sitemap=http%3A%2F%2Fwww.privacydigest.com%2Fsitemap.xml # http://search.live.com/ping?sitemap=http%3A%2F%2Fwww.privacydigest.com%2Fsitemap.xml MSN NG - Error msg # # Old - Sitemap: http://www.privacydigest.com/gsitemap PPH 2007-0523 Sitemap: http://www.privacydigest.com/sitemap.xml User-agent: * Crawl-delay: 10 # Directories # added my local files directory PPH 2007-0523 Disallow: /PDfilesPD30/ # added my local upgrade directory PPH 2009-0706 Disallow: /Drupal519upgrade/ Disallow: /d610/ Disallow: /old-Drupal55/ Disallow: /Drupal613/ Disallow: /drupal-5.1/ Disallow: /drupal-5.5/ Disallow: /database/ # Original below 2009-0714 PPH Disallow: /includes/ Disallow: /misc/ Disallow: /modules/ Disallow: /profiles/ Disallow: /scripts/ Disallow: /sites/ Disallow: /themes/ # Files Disallow: /CHANGELOG.txt Disallow: /cron.php Disallow: /INSTALL.mysql.txt Disallow: /INSTALL.pgsql.txt Disallow: /install.php Disallow: /INSTALL.txt Disallow: /LICENSE.txt Disallow: /MAINTAINERS.txt Disallow: /update.php Disallow: /UPGRADE.txt Disallow: /xmlrpc.php # Paths (clean URLs) Disallow: /admin/ Disallow: /comment/reply/ Disallow: /contact/ Disallow: /logout/ # Since I have PathAuto drop original NODE/ URLs PPH 2009-0714 # Disallow: /node/ Need for Calais' RDF file PPH 2009-0715 Disallow: /node/add/ Disallow: /search/ Disallow: /user/register/ Disallow: /user/password/ Disallow: /user/login/ # Paths (no clean URLs) Disallow: /?q=admin/ Disallow: /?q=comment/reply/ Disallow: /?q=contact/ Disallow: /?q=logout/ # Since I have PathAuto drop original NODE/ URLs PPH 2009-0714 # Disallow: /?q=node/ Need for Calais' RDF file PPH 2009-0715 Disallow: /?q=node/add/ Disallow: /?q=search/ Disallow: /?q=user/password/ Disallow: /?q=user/register/ Disallow: /?q=user/login/ # robots.txt for http://www.PrivacyDigest.com/ # The part below if from my old site, above is from Drupal6x install. PPH # User-agent: * Already set by the code copied above from Drupal PPH Disallow: .htaccess Disallow: .pair Disallow: /beta/ Disallow: /data/ Disallow: /debug/ Disallow: /cgi-bin/webinator/ Disallow: /cgi-bin/texis/ Disallow: /cgi-bin/MT/ Disallow: /cgi-sys/MT/ Disallow: /images/ Disallow: /junk/ Disallow: /lists/ # blocked as part of the Drupal security fight PPH Disallow: /masquerade/ Disallow: /private/ Disallow: /static/ Disallow: /test/ Disallow: /temp/ # blocked as part of the trackback spammer fight PPH Disallow: /trackback/ Disallow: /Tools/ Disallow: /webinator/ Disallow: /WWW_REPORTS/ User-agent: WebZip Disallow: / User-agent: EmailCollector Disallow: / User-agent: EmailSiphon Disallow: / User-agent: WebBandit Disallow: / User-agent: EmailWolf Disallow: / User-agent: ExtractorPro Disallow: / User-agent: SiteSnagger Disallow: / User-agent: Harvest/1.5 Disallow: / # try allowing altavista and teoma for a while 2007-0522 PPH # I've been getting these database floods and AWS extended transactions 2007-0712 PPH User-agent: scooter # AltaVista web page search Disallow: /store User-agent: Teoma-agent # Ask Jeeves/Teoma web page search Disallow: / User-agent: Ask Jeeves # Ask Jeeves/Teoma web page search Disallow: / User-agent: Ask Jeeves Teoma # Ask Jeeves/Teoma web page search Disallow: / User-agent: Ask Jeeves/Teoma # Ask Jeeves/Teoma web page search Disallow: /