On 5/19/2009 10:19 AM spir said...
Le Tue, 19 May 2009 11:36:17 +0200,
spir <denis.s...@free.fr> s'exprima ainsi:

[...]

Thank you Albert, Kent, Sanders, Lie, Malcolm.

This time regex wins! Thought it wouldn't because of the additional func call 
(too bad we cannot pass a mapping to re.sub). Actually the diff. is very small 
;-) The relevant  change is indeed using a dict.
Replacing string concat with ''.join() is slower (tested with 10 times and 100 
times bigger strings too). Strange...
Membership test in a set is only very slightly faster than in dict keys.

Hmm... this seems faster assuming it does the same thing...

xlate = dict( (chr(c),chr(c)) for c in range(256))
xlate.update(control_char_map)

def cleanRepr5(text):
    return "".join([ xlate[c] for c in text ])


Emile




I did a test with random strings of typical length for my app. Timing is ~ 
stable.

===================================================
### various funcs ###
# original
def cleanRepr0(text):
        ''' text with control chars replaced by repr() equivalent '''
        chars = ""
        for char in text:
                n = ord(char)
                if (n < 32) or (n > 126 and n < 160):
                        char = repr(char)[1:-1]
                chars += char
        return chars

# use list
def cleanRepr1(text):
        chars = []
        for char in text:
                n = ord(char)
                if (n < 32) or (n > 126 and n < 160):
                        char = repr(char)[1:-1]
                chars.append(char)
        return ''.join(chars)

control_chars = set( chr(n) for n in (range(0, 32) + range(127, 160)) )
control_char_map = dict( (c, repr(c)[1:-1]) for c in control_chars )

# use map
def cleanRepr2(text):
        chars = ""
        for char in text:
                if char in control_char_map:
                        char = control_char_map[char]
                chars += char
        return chars

# use map & set
def cleanRepr3(text):
        chars = []
        for char in text:
                if char in control_chars:
                        char = control_char_map[char]
                chars.append(char)
        return ''.join(chars)
def cleanRepr3(text):
        chars = ""
        for char in text:
                if char in control_chars:
                        char = control_char_map[char]
                chars += char
        return chars

import re
controlsRe = re.compile(r'[\x00-\x1f\x7f-\x9f]')

# use regex
def substChar(m):
    ''' Helper function for re.sub(). m will be a Match object. '''
    return control_char_map[m.group()]
def cleanRepr4(text):
        return controlsRe.sub(substChar, text)


### timing ###
#helper func to generate random string
from time import time
import random

def randomString():
        count = random.randrange(11,111)
        chars = [chr(random.randrange(1, 255)) for n in range(count)]
        return ''.join(chars)

def timeAll():
        t0=t1=t2=t3=t4=0
        for n in range(9999):
                s = randomString()
                t = time() ; cleanRepr0(s) ; t0 += time() - t
                t = time() ; cleanRepr1(s) ; t1 += time() - t
                t = time() ; cleanRepr2(s) ; t2 += time() - t
                t = time() ; cleanRepr3(s) ; t3 += time() - t
                t = time() ; cleanRepr4(s) ; t4 += time() - t
        print ( "original: %.3f\n"
                        "list:             %.3f\n"
                        "map:              %.3f\n"
                        "map & set:    %.3f\n"
                        "regex:            %.3f\n"
                        %(t0,t1,t2,t3,t4) )

timeAll()
===================================================
==>
original:       0.692
list:           0.829
map:            0.364
map & set:  0.349
regex:          0.341
===================================================

Denis
------
la vita e estrany
_______________________________________________
Tutor maillist  -  Tutor@python.org
http://mail.python.org/mailman/listinfo/tutor


_______________________________________________
Tutor maillist  -  Tutor@python.org
http://mail.python.org/mailman/listinfo/tutor

Reply via email to