Re: Generating a large random string
Paul Rubin wrote:
[color=blue]
> Oops, per other post, it gives strings of bytes and needs filtering.
> The following runs in about 1.2 seconds on my machine, but has an
> small (infinitesimal) chance of failure:
>
> import string,array,ti me
> t=time.time()
> ttab = string.letters* 4 + '\0'*48
> a = array.array('B' , open("/dev/urandom").read( 1500000).transl ate(ttab))
> a = array.array('B' , filter(abs,a)). tostring()[:1000000]
> print time.time()-t[/color]
from __future__ import division
import array, random, string, sys
identity = string.maketran s("", "")
ld = 256//len(string.lett ers)
rest = 256 % len(string.lett ers)
ttab = string.letters* ld + '\0'*rest
dtab = identity[-rest:]
# a fully functional variant of your approach
def randstrUnix(len gth, extra=1.25):
a = open("/dev/urandom").read( int(length*extr a)).translate(t tab, dtab)
while len(a) < length:
a += randstrUnix(len gth-len(a), 1.3)
return a[:length]
twoletters = [c+d for c in string.letters for d in string.letters]
# the fastest pure-python version I was able to produce
def randstrPure(len gth):
r = random.random
n = len(twoletters)
l2 = length//2
lst = [None] * l2
for i in xrange(l2):
lst[i] = twoletters[int(r() * n)]
if length & 1:
lst.append(rand om.choice(strin g.letters))
return "".join(lst )
The timings:
$ timeit.py -s"import randchoice as r" "r.randstrUnix( 1000000)"
10 loops, best of 3: 2.29e+05 usec per loop
$ timeit.py -s"import randchoice as r" "r.randstrPure( 1000000)"
10 loops, best of 3: 6.51e+05 usec per loop
A factor of 3 would hardly justify the OS-dependency in most cases.
Note that using twoletters[int(r() * n)] as seen in Sean Ross' version
instead of random.choice(t woletters) doubled the speed.
Peter
Paul Rubin wrote:
[color=blue]
> Oops, per other post, it gives strings of bytes and needs filtering.
> The following runs in about 1.2 seconds on my machine, but has an
> small (infinitesimal) chance of failure:
>
> import string,array,ti me
> t=time.time()
> ttab = string.letters* 4 + '\0'*48
> a = array.array('B' , open("/dev/urandom").read( 1500000).transl ate(ttab))
> a = array.array('B' , filter(abs,a)). tostring()[:1000000]
> print time.time()-t[/color]
from __future__ import division
import array, random, string, sys
identity = string.maketran s("", "")
ld = 256//len(string.lett ers)
rest = 256 % len(string.lett ers)
ttab = string.letters* ld + '\0'*rest
dtab = identity[-rest:]
# a fully functional variant of your approach
def randstrUnix(len gth, extra=1.25):
a = open("/dev/urandom").read( int(length*extr a)).translate(t tab, dtab)
while len(a) < length:
a += randstrUnix(len gth-len(a), 1.3)
return a[:length]
twoletters = [c+d for c in string.letters for d in string.letters]
# the fastest pure-python version I was able to produce
def randstrPure(len gth):
r = random.random
n = len(twoletters)
l2 = length//2
lst = [None] * l2
for i in xrange(l2):
lst[i] = twoletters[int(r() * n)]
if length & 1:
lst.append(rand om.choice(strin g.letters))
return "".join(lst )
The timings:
$ timeit.py -s"import randchoice as r" "r.randstrUnix( 1000000)"
10 loops, best of 3: 2.29e+05 usec per loop
$ timeit.py -s"import randchoice as r" "r.randstrPure( 1000000)"
10 loops, best of 3: 6.51e+05 usec per loop
A factor of 3 would hardly justify the OS-dependency in most cases.
Note that using twoletters[int(r() * n)] as seen in Sean Ross' version
instead of random.choice(t woletters) doubled the speed.
Peter
Comment