The fix seems to be to double most of the backslashes in robot-detection.py

Index: robot-detection-0.4.0/robot_detection.py
===================================================================
--- robot-detection-0.4.0.orig/robot_detection.py
+++ robot-detection-0.4.0/robot_detection.py
@@ -10,7 +10,7 @@ robot_useragents = [
         'contentmatch',
         'ferret',
         'googlebot',
-        'google\-sitemaps',
+        'google\\-sitemaps',
         'gulliver',
         'virus[_+ ]detector',          # Must be before harvest
         'harvest',
@@ -24,23 +24,23 @@ robot_useragents = [
         'nomad',
         'scooter',
         'slurp',
-        '^voyager\/',
+        '^voyager\\/',
         'weblayers',
         # Common robots (Not in robot file)
         'antibot',
         'bruinbot',
         'digout4u',
         'echo!',
-        'fast\-webcrawler',
-        'ia_archiver\-web\.archive\.org', # Must be before ia_archiver to 
avoid confusion with alexa
+        'fast\\-webcrawler',
+        'ia_archiver\\-web\\.archive\\.org', # Must be before ia_archiver to 
avoid confusion with alexa
         'ia_archiver',
         'jennybot',
         'mercator',
         'netcraft',
-        'msnbot\-media',
+        'msnbot\\-media',
         'msnbot',
         'petersnews',
-        'relevantnoise\.com',
+        'relevantnoise\\.com',
         'unlost_web_crawler',
         'voila',
         'webbase',
@@ -52,7 +52,7 @@ robot_useragents = [
         # Less common robots (In robot file)
         '[^a]fish',
         'abcdatos',
-        'acme\.spider',
+        'acme\\.spider',
         'ahoythehomepagefinder',
         'alkaline',
         'anthill',
@@ -64,7 +64,7 @@ robot_useragents = [
         'powermarks',
         'arks',
         'aspider',
-        'atn\.txt',
+        'atn\\.txt',
         'atomz',
         'auresys',
         'backrub',
@@ -73,7 +73,7 @@ robot_useragents = [
         'blackwidow',
         'blindekuh',
         'bloodhound',
-        'borg\-bot',
+        'borg\\-bot',
         'brightnet',
         'bspider',
         'cactvschemistryspider',
@@ -103,7 +103,7 @@ robot_useragents = [
         'download_express',
         'dragonbot',
         'dwcp',
-        'e\-collector',
+        'e\\-collector',
         'ebiness',
         'elfinbot',
         'emacs',
@@ -168,7 +168,7 @@ robot_useragents = [
         'kilroy',
         'ko[_+ ]yappo[_+ ]robot',
         'kummhttp',
-        'labelgrabber\.txt',
+        'labelgrabber\\.txt',
         'larbin',
         'legs',
         'linkidator',
@@ -190,11 +190,11 @@ robot_useragents = [
         'muncher',
         'mwdsearch',
         'ndspider',
-        'nederland\.zoek',
+        'nederland\\.zoek',
         'netcarta',
         'netmechanic',
         'netscoop',
-        'newscan\-online',
+        'newscan\\-online',
         'nhse',
         'northstar',
         'nzexplorer',
@@ -235,7 +235,7 @@ robot_useragents = [
         'roverbot',
         'rules',
         'safetynetrobot',
-        'search\-info',
+        'search\\-info',
         'search_au',
         'searchprocess',
         'senrigan',
@@ -244,7 +244,7 @@ robot_useragents = [
         'shaihulud',
         'sift',
         'simbot',
-        'site\-valet',
+        'site\\-valet',
         'sitetech',
         'skymob',
         'slcrawler',
@@ -310,7 +310,7 @@ robot_useragents = [
         'webwatch',
         'whatuseek',
         'whowhere',
-        'wired\-digital',
+        'wired\\-digital',
         'wmir',
         'wolp',
         'wombat',
@@ -321,8 +321,8 @@ robot_useragents = [
         'wz101',
         'xget',
         # Other robots reported by users
-        '1\-more_scanner',
-        'accoona\-ai\-agent',
+        '1\\-more_scanner',
+        'accoona\\-ai\\-agent',
         'activebookmark',
         'adamm_bot',
         'almaden',
@@ -331,9 +331,9 @@ robot_useragents = [
         'alpha_search_agent',
         'allrati',
         'aport',
-        'archive\.org_bot',
+        'archive\\.org_bot',
         'argus',               # Must be before nutch
-        'arianna\.libero\.it',
+        'arianna\\.libero\\.it',
         'aspseek',
         'asterias',
         'awbot',
@@ -351,10 +351,10 @@ robot_useragents = [
         'blogshares',
         'blogslive',
         'blogssay',
-        'bncf\.firenze\.sbn\.it\/raccolta\.txt',
+        'bncf\\.firenze\\.sbn\\.it\\/raccolta\\.txt',
         'bobby',
-        'boitho\.com\-dc',
-        'bookmark\-manager',
+        'boitho\\.com\\-dc',
+        'bookmark\\-manager',
         'boris',
         'bumblebee',
         'candlelight[_+ ]favorites[_+ ]inspector',
@@ -363,7 +363,7 @@ robot_useragents = [
         'cfnetwork',
         'cipinetbot',
         'checkweb_link_validator',
-        'commons\-httpclient',
+        'commons\\-httpclient',
         'computer_and_automation_research_institute_crawler',
         'converamultimediacrawler',
         'converacrawler',
@@ -372,39 +372,39 @@ robot_useragents = [
         'cuasarbot',
         'cursor',
         'custo',
-        'datafountains\/dmoz_downloader',
+        'datafountains\\/dmoz_downloader',
         'daviesbot',
         'daypopbot',
         'deepindex',
-        'dipsie\.bot',
+        'dipsie\\.bot',
         'dnsgroup',
         'domainchecker',
-        'domainsdb\.net',
+        'domainsdb\\.net',
         'dulance',
         'dumbot',
-        'dumm\.de\-bot',
-        'earthcom\.info',
+        'dumm\\.de\\-bot',
+        'earthcom\\.info',
         'easydl',
-        'edgeio\-retriever',
+        'edgeio\\-retriever',
         'ets_v',
         'exactseek',
         'extreme[_+ ]picture[_+ ]finder',
         'eventax',
         'everbeecrawler',
-        'everest\-vulcan',
+        'everest\\-vulcan',
         'ezresult',
         'enteprise',
         'facebook',
-        'fast_enterprise_crawler.*crawleradmin\.t\-info@telekom\.de',
-        
'fast_enterprise_crawler.*t\-info_bi_cluster_crawleradmin\.t\-info@telekom\.de',
-        'matrix_s\.p\.a\._\-_fast_enterprise_crawler', # must come before fast 
enterprise crawler
+        'fast_enterprise_crawler.*crawleradmin\\.t\\-info@telekom\\.de',
+        
'fast_enterprise_crawler.*t\\-info_bi_cluster_crawleradmin\\.t\\-info@telekom\\.de',
+        'matrix_s\\.p\\.a\\._\\-_fast_enterprise_crawler', # must come before 
fast enterprise crawler
         'fast_enterprise_crawler',
-        'fast\-search\-engine',
+        'fast\\-search\\-engine',
         'favicon',
         'favorg',
         'favorites_sweeper',
         'feedburner',
-        'feedfetcher\-google',
+        'feedfetcher\\-google',
         'feedflow',
         'feedster',
         'feedsky',
@@ -412,7 +412,7 @@ robot_useragents = [
         'filmkamerabot',
         'findlinks',
         'findexa_crawler',
-        'fooky\.com\/ScorpionBot',
+        'fooky\\.com\\/ScorpionBot',
         'g2crawler',
         'gaisbot',
         'geniebot',
@@ -420,7 +420,7 @@ robot_useragents = [
         'girafabot',
         'global_fetch',
         'gnodspider',
-        'goforit\.com',
+        'goforit\\.com',
         'goforitbot',
         'gonzo',
         'grub',
@@ -433,9 +433,9 @@ robot_useragents = [
         'htmlparser',
         'html[_+ ]link[_+ ]validator',
         'httrack',
-        'hundesuche\.com\-bot',
+        'hundesuche\\.com\\-bot',
         'ichiro',
-        'iltrovatore\-setaccio',
+        'iltrovatore\\-setaccio',
         'infobot',
         'infociousbot',
         'infomine',
@@ -450,37 +450,37 @@ robot_useragents = [
         'jrtwine[_+ ]software[_+ ]check[_+ ]favorites[_+ ]utility',
         'justview',
         'kalambot',
-        'kamano\.de_newsfeedverzeichnis',
+        'kamano\\.de_newsfeedverzeichnis',
         'kazoombot',
         'kevin',
         'keyoshid', # Must come before Y!J
         'kinjabot',
-        'kinja\-imagebot',
+        'kinja\\-imagebot',
         'knowitall',
-        'knowledge\.com',
+        'knowledge\\.com',
         'kouaa_krawler',
         'krugle',
         'ksibot',
         'kurzor',
         'lanshanbot',
-        'letscrawl\.com',
+        'letscrawl\\.com',
         'libcrawl',
         'linkbot',
         'link_valet_online',
-        'metager\-linkchecker',        # Must be before linkchecker
+        'metager\\-linkchecker',       # Must be before linkchecker
         'linkchecker',
-        'livejournal\.com',
+        'livejournal\\.com',
         'lmspider',
-        'lwp\-request',
-        'lwp\-trivial',
+        'lwp\\-request',
+        'lwp\\-trivial',
         'magpierss',
-        'mail\.ru',
-        'mapoftheinternet\.com',
-        'mediapartners\-google',
+        'mail\\.ru',
+        'mapoftheinternet\\.com',
+        'mediapartners\\-google',
         'megite',
         'metaspinner',
         'microsoft[_+ ]url[_+ ]control',
-        'mini\-reptile',
+        'mini\\-reptile',
         'minirank',
         'missigua_locator',
         'misterbot',
@@ -489,7 +489,7 @@ robot_useragents = [
         'mj12bot',
         'mojeekbot',
         'msiecrawler',
-        'ms_search_4\.0_robot',
+        'ms_search_4\\.0_robot',
         'msrabot',
         'msrbot',
         'mt::telegraph::agent',
@@ -504,12 +504,12 @@ robot_useragents = [
         'noxtrumbot',
         'npbot',
         'nutchcvs',
-        'nutchosu\-vlib',
+        'nutchosu\\-vlib',
         'nutch',  # Must come after other nutch versions
         'ocelli',
         'octora_beta_bot',
         'omniexplorer[_+ ]bot',
-        'onet\.pl[_+ ]sa',
+        'onet\\.pl[_+ ]sa',
         'onfolio',
         'opentaggerbot',
         'openwebspider',
@@ -517,13 +517,13 @@ robot_useragents = [
         'orbiter',
         'yodaobot',
         'qihoobot',
-        'passwordmaker\.org',
+        'passwordmaker\\.org',
         'pear_http_request_class',
         'peerbot',
         'perman',
         'php[_+ ]version[_+ ]tracker',
         'pictureofinternet',
-        'ping\.blo\.gs',
+        'ping\\.blo\\.gs',
         'plinki',
         'pluckfeedcrawler',
         'pogodak',
@@ -531,7 +531,7 @@ robot_useragents = [
         'popdexter',
         'port_huron_labs',
         'postfavorites',
-        'projectwf\-java\-test\-crawler',
+        'projectwf\\-java\\-test\\-crawler',
         'proodlebot',
         'pyquery',
         'rambler',
@@ -548,11 +548,11 @@ robot_useragents = [
         'seekbot',
         'sensis_web_crawler',
         'seznambot',
-        'shim\-crawler',
+        'shim\\-crawler',
         'shoutcast',
         'slysearch',
-        'snap\.com_beta_crawler',
-        'sohu\-search',
+        'snap\\.com_beta_crawler',
+        'sohu\\-search',
         'sohu', # "sohu agent"
         'snappy',
         'sphere_scout',
@@ -560,7 +560,7 @@ robot_useragents = [
         'sproose_crawler',
         'steeler',
         'steroid__download',
-        'suchfin\-bot',
+        'suchfin\\-bot',
         'superbot',
         'surveybot',
         'susie',
@@ -572,7 +572,7 @@ robot_useragents = [
         'teragramcrawlersurf',
         'test_crawler',
         'testbot',
-        't\-h\-u\-n\-d\-e\-r\-s\-t\-o\-n\-e',
+        't\\-h\\-u\\-n\\-d\\-e\\-r\\-s\\-t\\-o\\-n\\-e',
         'topicblogs',
         'turnitinbot',
         'turtlescanner',               # Must be before turtle
@@ -582,24 +582,24 @@ robot_useragents = [
         'ubicrawler',
         'ultraseek',
         'unchaos_bot_hybrid_web_search_engine',
-        'unido\-bot',
+        'unido\\-bot',
         'updated',
-        'ustc\-semantic\-group',
-        'vagabondo\-wap',
+        'ustc\\-semantic\\-group',
+        'vagabondo\\-wap',
         'vagabondo',
         'vermut',
-        'versus_crawler_from_eda\.baykan@epfl\.ch',
+        'versus_crawler_from_eda\\.baykan@epfl\\.ch',
         'vespa_crawler',
         'vortex',
-        'vse\/',
-        'w3c\-checklink',
+        'vse\\/',
+        'w3c\\-checklink',
         'w3c[_+ ]css[_+ ]validator[_+ ]jfouffa',
         'w3c_validator',
         'watchmouse',
         'wavefire',
-        'webclipping\.com',
+        'webclipping\\.com',
         'webcompass',
-        'webcrawl\.net',
+        'webcrawl\\.net',
         'web_downloader',
         'webdup',
         'webfilter',
@@ -616,29 +616,29 @@ robot_useragents = [
         'xirq',
         'y!j', # Must come after keyoshid Y!J
         'yacy',
-        'yahoo\-blogs',
-        'yahoo\-verticalcrawler',
+        'yahoo\\-blogs',
+        'yahoo\\-verticalcrawler',
         'yahoofeedseeker',
-        'yahooseeker\-testing',
+        'yahooseeker\\-testing',
         'yahooseeker',
-        'yahoo\-mmcrawler',
+        'yahoo\\-mmcrawler',
         'yahoo!_mindset',
         'yandex',
         'flexum',
         'yanga',
         'yooglifetchagent',
-        'z\-add_link_checker',
+        'z\\-add_link_checker',
         'zealbot',
         'zhuaxia',
         'zspider',
         'zeus',
-        'ng\/1\.', # put at end to avoid false positive
-        'ng\/2\.', # put at end to avoid false positive
+        'ng\\/1\\.', # put at end to avoid false positive
+        'ng\\/2\\.', # put at end to avoid false positive
         'exabot',  # put at end to avoid false positive
         # Other id that are 99% of robots
         'wget',
         'libwww',
-        'java\/[0-9]'   # put at end to avoid false positive
+        'java\\/[0-9]'   # put at end to avoid false positive
 
         # Generic robot
         'robot',
@@ -649,8 +649,8 @@ robot_useragents = [
         'scanner',
         'spider',
         'sucker',
-        'bot[\s_+:,\.\;\/\\\-]',
-        '[\s_+:,\.\;\/\\\-]bot',
+        'bot[\\s_+:,\\.\\;\\/\\\\\\-]',
+        '[\\s_+:,\\.\\;\\/\\\\\\-]bot',
         'no_user_agent',
 
         # manually added
-- 
Dr Peter Chubb                https://trustworthy.systems/
Trustworthy Systems Group                        CSE, UNSW
Core hours: Mon 8am-3pm; Wed: 8am-5pm; Fri 8am-12pm.

Reply via email to