faqts : Computers : Programming : Languages : Python : Snippets : Maths

+ Search
Add Entry AlertManage Folder Edit Entry Add page to http://del.icio.us/
Did You Find This Entry Useful?

13 of 17 people (76%) answered Yes
Recently 6 of 10 people (60%) answered Yes

Entry

LongRan - Long random number generator

Jul 5th, 2000 10:01
Nathan Wallace, unknown unknown, Hans Nowak, Snippet 216, Tim Peters


"""
Packages: maths.random
"""
# Released to the public domain,
# by Tim Peters ([email protected]),
# 2 Aug 98.
__version__ = 3
# Version 1:  2 Aug 98
# Version 2:  2 Aug 98
#     Hacked _maketable to break patterns in leading bits.
# Version 3:  2 Aug 98
#     Stopped trying to fill the table directly with the seed,
#     and used seed instead to kick off a good 64-bit MLCG,
#     sucking 8-byte digits out of that.
#     Noticed that the constant in the -i*c generator often had
#     an extremely regular pattern.  Now replace all but its first
#     6 bits by stuff derived from _ran64.  Surprise:  c's
#     relationship to the M2 modulus was *so* simple before that
#     the gcd calculation can take hundreds of times longer now!
__doc__ = """LongRan is a random-number generator for long ints.
LongRan(nbits, seed=12345678987654321L, lags=(97, 33))
creates an object for generating pseudo-random non-negative ints
of (no more than) nbits bits.  Each int in range(2L**nbits) is equally
likely.
If g is an object of class LongRan, then
    g.get() gets "the next" random int.
    g.setseed(seed=12345678987654321L) resets the sequence.
For example,
    g = LongRan(128)
    for i in range(10): print hex(g.get())
    g.setseed()
    print hex(g.get())
produces
    0xC68B960293E04B1E022BE2B5EBDF7CA4L
    0xF7ECB9F8E9C8BC4536F72116DFAE5499L
    0xFE3F25953B8EA30670CBCADF80C2FD1BL
    0xE134AFDB8ED0FAEA8B496FB3C4CB0468L
    0x5005D9D091B659D06C01C3A636FECECAL
    0x162FE74AF33C84A2B703A4FE92B9DCF4L
    0x8B339B47918D28171CDD9CA5A976639BL
    0x58B6B6CA85B9C5957338A91ED1FCC52BL
    0x19AF26542D1B95BB948531EF59266DF1L
    0xB290FD6CCBD746AB3332C49D0E27AD9FL
    0xC68B960293E04B1E022BE2B5EBDF7CA4L # same as first above
seed is any int (short or long), and determines the sequence generated.
Only the last 64 bits of seed are looked at.
lags is a 2-tuple (lag1, lag2), and should have lag1 > lag2 >= 1.
A table of lag1 longs (each with nbits bits) is maintained internally,
and the storage required is proportional to nbits * lag1.  "Good"
values for lag1 and lag2 are crucial.  See e.g. Knuth Volume 2 for
other good pairs (Section 3.2.2, Table 1, "Subscript Pairs Yielding
Long Periods Mod 2").
Period:  Provided the lag pair is well chosen, the period is at least
2**lag1.  It is likely substantially longer.
Method:  A lagged Fibonacci generator F(lag1, lag2, -) is the workhorse.
F(97, 33, -) (LongRan's default) has been widely tested (although this
specific implementation has not!) and reported to produce excellent
results in almost all tests, both for the numbers as a whole and for
slices of bits within each number (e.g., the last bit is as random as the
first).  Marsaglia reports that it fails his stringent "Birthday
Spacings" test, though.  Therefore the main generator is combined with
another of different algebraic structure (a very poor but fast-for-
long-ints degenerate MLCG).
Speed:  This is very fast, linear in the number of bits.  On my P5-166,
a random 32 kilobit int is generated in about 1.1 milliseconds,
resetting a 32 Kbit generator takes about one second, and creating a
32 Kbit generator takes about 20 seconds (the vast bulk of which is spent
computing a long-int gcd!).
CAUTIONS:
+ This has been very lightly tested.  It may be buggy, and may
  not produce anything even remotely resembling random ints.  Use
  at your own risk.
+ Initialization uses a F(2, 1, -) generator, much as Knuth uses for his
  IN55 routine (an F(55, 24, -) generator).  That's a bad generator, but
  after shuffing the table and running 3 full cycles of the real
  generator it's hoped that the bad stuff is washed out. More-modern
  practice is to use an independent random-bit generator to initialize
  the table; alas, trying that for LongRan(2**15) looked like it would
  take about half an hour to finish, while the current method completes
  in about a second.
"""
# mod 2**64 MLCG from Knuth (Vol 2, section 3.3.4, Table 1 line 30)
_ran64seed = None
def _ran64(mask64 = (1L << 64) - 1):
    global _ran64seed
    seed = _ran64seed * 6364136223846793005L + 7
    seed = _ran64seed = seed & mask64
    return seed
def _setran64seed(seed):
    global _ran64seed
    _ran64seed = seed
# Create a list with nwords slightly-random positive ints each no more
# than nbits long.  At least one will be odd.  As random ints, these
# suck!  But they can be generated quickly, and work well in context
# after shuffling and running a few cycles of the real generator.
def _maketable(nbits, nwords, seed):
    assert nbits > 0
    assert nwords > 0
    M = 1L << nbits
    # fill up an nbit word with huge digits from _ran64
    _setran64seed(seed)
    seed = 0L
    while seed < M:
        start_seed = seed
        seed = (seed << 64) | _ran64()
    seed = (seed ^ start_seed) & (M - 1)
    del start_seed
    # and derive another value from that
    next = (seed * _ran64()) & (M - 1)
    next = next | 1
    a = []
    for i in range(nwords):
        try:
            next = int(next)
        except OverflowError:
            pass
        a.append(next)
        next, seed = seed, next - seed
        if seed < 0:
            seed = seed + M
    return a
# Shuffle list in place (i.e. permute it randomly).
def _shuffletable(a, seed):
    _setran64seed(seed)
    for i in range(len(a)-1, 0, -1):
        j = int(((i + 1) * _ran64()) >> 64)
        assert 0 <= j <= i
        a[i], a[j] = a[j], a[i]
# greatest common divisor
def _gcd(a, b):
    assert a >= b
    while b:
        newb = a - b
        if newb >= b:
            newb = newb % b
        a = b
        b = newb
    return a
class LongRan:
    def __init__(self, nbits, seed=12345678987654321L, lags=(97, 33)):
        if nbits < 4:
            raise ValueError("nbits must be >= 4")
        self.nbits = nbits
        # self.M <- modulus for F(lag1, lag2, -) generator
        if nbits <= 30:
            self.M = 1 << nbits
        else:
            self.M = 1L << nbits
        lag1, lag2 = lags
        if not lag1 > lag2 >= 1:
            raise ValueError("lags must have lag1 > lag2 >= 1")
        self.lag1, self.lag2 = lags
        # M2 <- modulus for -i*c generator
        M2 = self.M - 3
        assert M2 & 1
        while M2 & 7 != 5:
            M2 = M2 - 2
        self.M2 = M2
        # c <- about 41% of M2, then search down until
        # gcd(c, M2) is 1.  This so that seed-i*c = seed-j*c mod M2
        # implies i = j mod M2 (i.e., seed-i*c may be a crappy generator,
        # but at least it's full-period).
        c = (M2 * 105L) >> 8   # 105/256 ~= 0.41
        # figure out the number of bits in c
        n = nbits # close upper bound
        mask = self.M >> 1
        while c & mask == 0:
            mask = mask >> 1
            n = n - 1
        del mask
        # fill the tail with junk, but preserve the first 6 bits
        if n > 6:
            n = n - 6
            c = c >> n
            n, leftover = divmod(n, 64)
            _setran64seed(2718281828L)
            if leftover:
                c = c << leftover
                c = c | (_ran64() >> (64 - leftover))
            for i in range(n):
                c = (c << 64) | _ran64()
        # make it odd and coprime to the modulus
        c = c | 1
        while _gcd(M2, c) != 1:
            c = c - 2
        try:
            c = int(c)
        except OverflowError:
            pass
        self.c = c
        assert 1 <= c < M2
        self.setseed(seed)
    def __str__(self):
        return "LongRan" + `(self.nbits, (self.lag1, self.lag2))`
    def setseed(self, seed=12345678987654321L):
        self.a = None  # free up old storage
        self.a = _maketable(self.nbits, self.lag1, seed)
        _shuffletable(self.a, self.nbits)
        self.i = self.lag1 - 1
        self.j = self.lag2 - 1
        self.seed = seed % self.M2
        for i in range(3 * self.lag1):
            self.get()
    def get(self):
        a, i, j, seed, M = self.a, self.i, self.j, self.seed, self.M
        x = a[i] - a[j]
        if x < 0:
            x = x + M
        a[i] = x
        i = i - 1
        j = j - 1
        if i < 0:
            i = len(a) - 1
        elif j < 0:
            j = len(a) - 1
        seed = seed - self.c
        if seed < 0:
            seed = seed + self.M2
        x = x - seed
        if x < 0:
            x = x + M
        self.i, self.j, self.seed = i, j, seed
        return x
def _test():
    g = LongRan(128)
    g1 = g.get()
    g2 = g.get()
    expected1 = 0xC68B960293E04B1E022BE2B5EBDF7CA4L
    expected2 = 0xF7ECB9F8E9C8BC4536F72116DFAE5499L
    assert g1 == expected1
    assert g2 == expected2
    g.setseed()
    g1 = g.get()
    assert g1 == expected1
    if 0:
        from Timer import timeone
        g = LongRan(2**15)
        print timeone(g.get, ())
if __name__ == '__main__':
    _test()
del _test