The fix seems to be to double most of the backslashes in robot-detection.py
Index: robot-detection-0.4.0/robot_detection.py =================================================================== --- robot-detection-0.4.0.orig/robot_detection.py +++ robot-detection-0.4.0/robot_detection.py @@ -10,7 +10,7 @@ robot_useragents = [ 'contentmatch', 'ferret', 'googlebot', - 'google\-sitemaps', + 'google\\-sitemaps', 'gulliver', 'virus[_+ ]detector', # Must be before harvest 'harvest', @@ -24,23 +24,23 @@ robot_useragents = [ 'nomad', 'scooter', 'slurp', - '^voyager\/', + '^voyager\\/', 'weblayers', # Common robots (Not in robot file) 'antibot', 'bruinbot', 'digout4u', 'echo!', - 'fast\-webcrawler', - 'ia_archiver\-web\.archive\.org', # Must be before ia_archiver to avoid confusion with alexa + 'fast\\-webcrawler', + 'ia_archiver\\-web\\.archive\\.org', # Must be before ia_archiver to avoid confusion with alexa 'ia_archiver', 'jennybot', 'mercator', 'netcraft', - 'msnbot\-media', + 'msnbot\\-media', 'msnbot', 'petersnews', - 'relevantnoise\.com', + 'relevantnoise\\.com', 'unlost_web_crawler', 'voila', 'webbase', @@ -52,7 +52,7 @@ robot_useragents = [ # Less common robots (In robot file) '[^a]fish', 'abcdatos', - 'acme\.spider', + 'acme\\.spider', 'ahoythehomepagefinder', 'alkaline', 'anthill', @@ -64,7 +64,7 @@ robot_useragents = [ 'powermarks', 'arks', 'aspider', - 'atn\.txt', + 'atn\\.txt', 'atomz', 'auresys', 'backrub', @@ -73,7 +73,7 @@ robot_useragents = [ 'blackwidow', 'blindekuh', 'bloodhound', - 'borg\-bot', + 'borg\\-bot', 'brightnet', 'bspider', 'cactvschemistryspider', @@ -103,7 +103,7 @@ robot_useragents = [ 'download_express', 'dragonbot', 'dwcp', - 'e\-collector', + 'e\\-collector', 'ebiness', 'elfinbot', 'emacs', @@ -168,7 +168,7 @@ robot_useragents = [ 'kilroy', 'ko[_+ ]yappo[_+ ]robot', 'kummhttp', - 'labelgrabber\.txt', + 'labelgrabber\\.txt', 'larbin', 'legs', 'linkidator', @@ -190,11 +190,11 @@ robot_useragents = [ 'muncher', 'mwdsearch', 'ndspider', - 'nederland\.zoek', + 'nederland\\.zoek', 'netcarta', 'netmechanic', 'netscoop', - 'newscan\-online', + 'newscan\\-online', 'nhse', 'northstar', 'nzexplorer', @@ -235,7 +235,7 @@ robot_useragents = [ 'roverbot', 'rules', 'safetynetrobot', - 'search\-info', + 'search\\-info', 'search_au', 'searchprocess', 'senrigan', @@ -244,7 +244,7 @@ robot_useragents = [ 'shaihulud', 'sift', 'simbot', - 'site\-valet', + 'site\\-valet', 'sitetech', 'skymob', 'slcrawler', @@ -310,7 +310,7 @@ robot_useragents = [ 'webwatch', 'whatuseek', 'whowhere', - 'wired\-digital', + 'wired\\-digital', 'wmir', 'wolp', 'wombat', @@ -321,8 +321,8 @@ robot_useragents = [ 'wz101', 'xget', # Other robots reported by users - '1\-more_scanner', - 'accoona\-ai\-agent', + '1\\-more_scanner', + 'accoona\\-ai\\-agent', 'activebookmark', 'adamm_bot', 'almaden', @@ -331,9 +331,9 @@ robot_useragents = [ 'alpha_search_agent', 'allrati', 'aport', - 'archive\.org_bot', + 'archive\\.org_bot', 'argus', # Must be before nutch - 'arianna\.libero\.it', + 'arianna\\.libero\\.it', 'aspseek', 'asterias', 'awbot', @@ -351,10 +351,10 @@ robot_useragents = [ 'blogshares', 'blogslive', 'blogssay', - 'bncf\.firenze\.sbn\.it\/raccolta\.txt', + 'bncf\\.firenze\\.sbn\\.it\\/raccolta\\.txt', 'bobby', - 'boitho\.com\-dc', - 'bookmark\-manager', + 'boitho\\.com\\-dc', + 'bookmark\\-manager', 'boris', 'bumblebee', 'candlelight[_+ ]favorites[_+ ]inspector', @@ -363,7 +363,7 @@ robot_useragents = [ 'cfnetwork', 'cipinetbot', 'checkweb_link_validator', - 'commons\-httpclient', + 'commons\\-httpclient', 'computer_and_automation_research_institute_crawler', 'converamultimediacrawler', 'converacrawler', @@ -372,39 +372,39 @@ robot_useragents = [ 'cuasarbot', 'cursor', 'custo', - 'datafountains\/dmoz_downloader', + 'datafountains\\/dmoz_downloader', 'daviesbot', 'daypopbot', 'deepindex', - 'dipsie\.bot', + 'dipsie\\.bot', 'dnsgroup', 'domainchecker', - 'domainsdb\.net', + 'domainsdb\\.net', 'dulance', 'dumbot', - 'dumm\.de\-bot', - 'earthcom\.info', + 'dumm\\.de\\-bot', + 'earthcom\\.info', 'easydl', - 'edgeio\-retriever', + 'edgeio\\-retriever', 'ets_v', 'exactseek', 'extreme[_+ ]picture[_+ ]finder', 'eventax', 'everbeecrawler', - 'everest\-vulcan', + 'everest\\-vulcan', 'ezresult', 'enteprise', 'facebook', - 'fast_enterprise_crawler.*crawleradmin\.t\-info@telekom\.de', - 'fast_enterprise_crawler.*t\-info_bi_cluster_crawleradmin\.t\-info@telekom\.de', - 'matrix_s\.p\.a\._\-_fast_enterprise_crawler', # must come before fast enterprise crawler + 'fast_enterprise_crawler.*crawleradmin\\.t\\-info@telekom\\.de', + 'fast_enterprise_crawler.*t\\-info_bi_cluster_crawleradmin\\.t\\-info@telekom\\.de', + 'matrix_s\\.p\\.a\\._\\-_fast_enterprise_crawler', # must come before fast enterprise crawler 'fast_enterprise_crawler', - 'fast\-search\-engine', + 'fast\\-search\\-engine', 'favicon', 'favorg', 'favorites_sweeper', 'feedburner', - 'feedfetcher\-google', + 'feedfetcher\\-google', 'feedflow', 'feedster', 'feedsky', @@ -412,7 +412,7 @@ robot_useragents = [ 'filmkamerabot', 'findlinks', 'findexa_crawler', - 'fooky\.com\/ScorpionBot', + 'fooky\\.com\\/ScorpionBot', 'g2crawler', 'gaisbot', 'geniebot', @@ -420,7 +420,7 @@ robot_useragents = [ 'girafabot', 'global_fetch', 'gnodspider', - 'goforit\.com', + 'goforit\\.com', 'goforitbot', 'gonzo', 'grub', @@ -433,9 +433,9 @@ robot_useragents = [ 'htmlparser', 'html[_+ ]link[_+ ]validator', 'httrack', - 'hundesuche\.com\-bot', + 'hundesuche\\.com\\-bot', 'ichiro', - 'iltrovatore\-setaccio', + 'iltrovatore\\-setaccio', 'infobot', 'infociousbot', 'infomine', @@ -450,37 +450,37 @@ robot_useragents = [ 'jrtwine[_+ ]software[_+ ]check[_+ ]favorites[_+ ]utility', 'justview', 'kalambot', - 'kamano\.de_newsfeedverzeichnis', + 'kamano\\.de_newsfeedverzeichnis', 'kazoombot', 'kevin', 'keyoshid', # Must come before Y!J 'kinjabot', - 'kinja\-imagebot', + 'kinja\\-imagebot', 'knowitall', - 'knowledge\.com', + 'knowledge\\.com', 'kouaa_krawler', 'krugle', 'ksibot', 'kurzor', 'lanshanbot', - 'letscrawl\.com', + 'letscrawl\\.com', 'libcrawl', 'linkbot', 'link_valet_online', - 'metager\-linkchecker', # Must be before linkchecker + 'metager\\-linkchecker', # Must be before linkchecker 'linkchecker', - 'livejournal\.com', + 'livejournal\\.com', 'lmspider', - 'lwp\-request', - 'lwp\-trivial', + 'lwp\\-request', + 'lwp\\-trivial', 'magpierss', - 'mail\.ru', - 'mapoftheinternet\.com', - 'mediapartners\-google', + 'mail\\.ru', + 'mapoftheinternet\\.com', + 'mediapartners\\-google', 'megite', 'metaspinner', 'microsoft[_+ ]url[_+ ]control', - 'mini\-reptile', + 'mini\\-reptile', 'minirank', 'missigua_locator', 'misterbot', @@ -489,7 +489,7 @@ robot_useragents = [ 'mj12bot', 'mojeekbot', 'msiecrawler', - 'ms_search_4\.0_robot', + 'ms_search_4\\.0_robot', 'msrabot', 'msrbot', 'mt::telegraph::agent', @@ -504,12 +504,12 @@ robot_useragents = [ 'noxtrumbot', 'npbot', 'nutchcvs', - 'nutchosu\-vlib', + 'nutchosu\\-vlib', 'nutch', # Must come after other nutch versions 'ocelli', 'octora_beta_bot', 'omniexplorer[_+ ]bot', - 'onet\.pl[_+ ]sa', + 'onet\\.pl[_+ ]sa', 'onfolio', 'opentaggerbot', 'openwebspider', @@ -517,13 +517,13 @@ robot_useragents = [ 'orbiter', 'yodaobot', 'qihoobot', - 'passwordmaker\.org', + 'passwordmaker\\.org', 'pear_http_request_class', 'peerbot', 'perman', 'php[_+ ]version[_+ ]tracker', 'pictureofinternet', - 'ping\.blo\.gs', + 'ping\\.blo\\.gs', 'plinki', 'pluckfeedcrawler', 'pogodak', @@ -531,7 +531,7 @@ robot_useragents = [ 'popdexter', 'port_huron_labs', 'postfavorites', - 'projectwf\-java\-test\-crawler', + 'projectwf\\-java\\-test\\-crawler', 'proodlebot', 'pyquery', 'rambler', @@ -548,11 +548,11 @@ robot_useragents = [ 'seekbot', 'sensis_web_crawler', 'seznambot', - 'shim\-crawler', + 'shim\\-crawler', 'shoutcast', 'slysearch', - 'snap\.com_beta_crawler', - 'sohu\-search', + 'snap\\.com_beta_crawler', + 'sohu\\-search', 'sohu', # "sohu agent" 'snappy', 'sphere_scout', @@ -560,7 +560,7 @@ robot_useragents = [ 'sproose_crawler', 'steeler', 'steroid__download', - 'suchfin\-bot', + 'suchfin\\-bot', 'superbot', 'surveybot', 'susie', @@ -572,7 +572,7 @@ robot_useragents = [ 'teragramcrawlersurf', 'test_crawler', 'testbot', - 't\-h\-u\-n\-d\-e\-r\-s\-t\-o\-n\-e', + 't\\-h\\-u\\-n\\-d\\-e\\-r\\-s\\-t\\-o\\-n\\-e', 'topicblogs', 'turnitinbot', 'turtlescanner', # Must be before turtle @@ -582,24 +582,24 @@ robot_useragents = [ 'ubicrawler', 'ultraseek', 'unchaos_bot_hybrid_web_search_engine', - 'unido\-bot', + 'unido\\-bot', 'updated', - 'ustc\-semantic\-group', - 'vagabondo\-wap', + 'ustc\\-semantic\\-group', + 'vagabondo\\-wap', 'vagabondo', 'vermut', - 'versus_crawler_from_eda\.baykan@epfl\.ch', + 'versus_crawler_from_eda\\.baykan@epfl\\.ch', 'vespa_crawler', 'vortex', - 'vse\/', - 'w3c\-checklink', + 'vse\\/', + 'w3c\\-checklink', 'w3c[_+ ]css[_+ ]validator[_+ ]jfouffa', 'w3c_validator', 'watchmouse', 'wavefire', - 'webclipping\.com', + 'webclipping\\.com', 'webcompass', - 'webcrawl\.net', + 'webcrawl\\.net', 'web_downloader', 'webdup', 'webfilter', @@ -616,29 +616,29 @@ robot_useragents = [ 'xirq', 'y!j', # Must come after keyoshid Y!J 'yacy', - 'yahoo\-blogs', - 'yahoo\-verticalcrawler', + 'yahoo\\-blogs', + 'yahoo\\-verticalcrawler', 'yahoofeedseeker', - 'yahooseeker\-testing', + 'yahooseeker\\-testing', 'yahooseeker', - 'yahoo\-mmcrawler', + 'yahoo\\-mmcrawler', 'yahoo!_mindset', 'yandex', 'flexum', 'yanga', 'yooglifetchagent', - 'z\-add_link_checker', + 'z\\-add_link_checker', 'zealbot', 'zhuaxia', 'zspider', 'zeus', - 'ng\/1\.', # put at end to avoid false positive - 'ng\/2\.', # put at end to avoid false positive + 'ng\\/1\\.', # put at end to avoid false positive + 'ng\\/2\\.', # put at end to avoid false positive 'exabot', # put at end to avoid false positive # Other id that are 99% of robots 'wget', 'libwww', - 'java\/[0-9]' # put at end to avoid false positive + 'java\\/[0-9]' # put at end to avoid false positive # Generic robot 'robot', @@ -649,8 +649,8 @@ robot_useragents = [ 'scanner', 'spider', 'sucker', - 'bot[\s_+:,\.\;\/\\\-]', - '[\s_+:,\.\;\/\\\-]bot', + 'bot[\\s_+:,\\.\\;\\/\\\\\\-]', + '[\\s_+:,\\.\\;\\/\\\\\\-]bot', 'no_user_agent', # manually added
-- Dr Peter Chubb https://trustworthy.systems/ Trustworthy Systems Group CSE, UNSW Core hours: Mon 8am-3pm; Wed: 8am-5pm; Fri 8am-12pm.