To help the unfamiliar to understand the application and relevance of this algorithm, consider the following dynamical system derived from an epidemiological problem:

Suppose an infected individual arrives in his home town after his/her vacation carrying an infectious disease entirely new to his fellow citizens. This means no one in town has antibodies for that disease. Let this set of people be called the "susceptibles" and let S be the number of people in this group. Our Infected individual will initially be the only member of a group called the "infectious" which size is denoted by I. The rate of encounters between susceptibles and infectious is proportional to their numbers (S*I). Everytime an infectious meet a susceptible there is a chance b that the disease is transmitted, so the probability of a transmission event is b*S*I. This is called an individual state transition, since the individual went from a susceptible state to an infectious one.

The dynamical system used as an example here contains this and other state transitions described in the code below:

As can be seen there are five different states in this model with the last one R, being an absorbing state, since all individuals converge to that state with time."""Example of an SEIR model with two Infectious classes: subclinical(Is) and clinical(Ic)Is/ \S -> E R\ /IcStates:S: SusceptibleE: ExposedIs: Infectious subclinicalIc: Infectious clinicalR: RecoveredTransition rates:b,ks,kc,rs,rc = (0.001, 0.1, 0.1, 0.01, .01)Transitions:S -> E : b*S*(Is+Ic)E -> Is : ks*EE -> Ic : kc*EIs -> R : rs*IsIc -> R : rc*Ic"""

from gillespie import Model

import time

from numpy import array

vars = ['S','E','Is','Ic','R']#rates: b,ks,kc,rs,rc

r = (0.001, 0.1, 0.1, 0.01, .01)

ini = (490,0,10,0,0)

prop = (lambdar,ini:r[0]*ini[0]*(ini[2]+ini[3]),

lambdar,ini:r[1]*ini[1],

lambdar,ini:r[2]*ini[1],

lambdar,ini:r[3]*ini[2],

lambdar,ini:r[4]*ini[3]

)

tmat = array([[-1,0,0,0,0],

[1,-1,-1,0,0],

[0,1,0,-1,0],

[0,0,1,0,-1],

[0,0,0,1,1]

])#for e in prop:# print e()

M=Model(vnames=vars,rates = r,inits=ini,tmat=tmat,propensity=prop)

t0 = time.time()

M.run(tmax=80,reps=100)

t,series,steps = M.getStats()

from pylab import plot , show, legend, errorbar

plot(t,series.mean(axis=2),'-o')

legend(vars,loc=0)

show()

the code that simulates this stochastic dynamic model is in the gillespie module:

from numpy.random import uniform, multinomial, exponential,random

from numpy import arange, array, empty,zeros,log#from math import log

import time

import multiprocessing

import psyco

psyco.full()#global ini#ini=[500,1,0]classModel:

def__init__(self,vnames,rates,inits, tmat,propensity):'''* vnames: list of strings* rates: list of fixed rate parameters* inits: list of initial values of variables* propensity: list of lambda functions of the form:lambda r,ini: some function of rates ans inits.'''

self.vn = vnames

self.rates = rates

self.inits = inits

self.tm = tmat

self.pv = propensity#[compile(eq,'errmsg','eval') for eq in propensity]

self.pvl = len(self.pv)#length of propensity vector

self.nvars = len(self.inits)#number of variables

self.time=None

self.series=None

self.steps=0

defgetStats(self):

returnself.time,self.series,self.steps

defrun(self,method='SSA', tmax=10, reps=1):

self.res = zeros((tmax,self.nvars,reps),dtype=float)

tvec = arange(tmax, dtype=int)

ifmethod =='SSA':

fori in xrange(reps):

steps = self.GSSA(tmax,i)

elifmethod == 'SSAct':

pass

self.time=tvec

self.series=self.res

self.steps=steps

defGSSA(self, tmax=50,round=0):'''Gillespie Direct algorithm'''

ini = self.inits

r = self.rates

pvi = self.pv

l=self.pvl

pv = zeros(l,dtype=float)

tm = self.tm

tc = 0

steps = 0

self.res[0,:,round]= ini

a0=1

fortim in xrange(1,tmax):

whiletc < tim:

fori in xrange(l):

pv[i] = pvi[i](r,ini)

#pv = abs(array([eq() for eq in pvi]))# #propensity vector

a0 = pv.sum()#sum of all transition probabilities# print tim, pv, a0

tau = (-1/a0)*log(random())

event = multinomial(1,pv/a0)# event which will happen on this iteration

ini += tm[:,event.nonzero()[0][0]]

#print tc, ini

tc += tau

steps +=1

ifa0 == 0:break

self.res[tim,:,round] = ini

ifa0 == 0:break# tvec = tvec[:tim]# self.res = res[:tim,:,round]

returnsteps

defCR(self,pv):"""Composition reaction algorithm"""

pass

defmain():

vars = ['s','i','r']

ini= [500,1,0]

rates = [.001,.1]

tm = array([[-1,0],[1,-1],[0,1]])

prop=[lambdar, ini:r[0]*ini[0]*ini[1],lambdar,ini:r[0]*ini[1]]

M = Model(vnames = vars,rates = rates,inits=ini, tmat=tm,propensity=prop)

t0=time.time()

M.run(tmax=80,reps=1000)

#print res# from pylab import plot , show, legend# plot(t,res,'-.')# legend(M.vn,loc=0)# show()

if__name__=="__main__":

import cProfile

cProfile.run('main()',sort=1)

main()

The Gillespie SSA is a very simple algorithm, so I thought it would be nice to blog about it and have other Pythonistas take a look at my code and possibly find a way to optimize it further. Having the core code run as fast as possible is very important for this application, because you normally have to run thousands of replicates of a every run due to its stochastic nature. This "need for speed", made me try Cython, but due to my inexperience with it, I couldn't squeeze much performance out of it:

That's it. I hope this code will serve to inspire other scientists which need to use this type of simulations. In the next few days I will organize it in a package and make it available somewhere.

from numpy.random import uniform, multinomial, exponential#from numpy import arange, array, empty,zeros

import numpy as np

cimport numpy as np

import time

DTYPE = np.double

ctypedef np.double_t DTYPE_t

ctypedef np.int_t INT_t

cdefclassModel:

cdef object vn,rates,inits,pv

cdef np.ndarray tm,res,time,series

cdef int pvl,nvars,steps

cdef object ts

def__init__(self,vnames,rates,inits, tmat,propensity):'''* vnames: list of strings* rates: list of fixed rate parameters* inits: list of initial values of variables* propensity: list of lambda functions of the form:lambda r,ini: some function of rates ans inits.'''

self.vn = vnames

self.rates = rates

self.inits = inits

self.tm = tmat

self.pv = propensity#[compile(eq,'errmsg','eval') for eq in propensity]

self.pvl = len(self.pv)#length of propensity vector

self.nvars = len(self.inits)#number of variables

self.time = np.zeros(1)

self.series = np.zeros(1)

self.steps = 0

defrun(self, method='SSA', int tmax=10, int reps=1):

cdef np.ndarray[DTYPE_t,ndim=3] res = np.zeros((tmax,self.nvars,reps),dtype=float)

tvec = np.arange(tmax)

self.res = res

cdef int i, steps

ifmethod =='SSA':

fori from 0 <= i<reps:

steps = self.GSSA(tmax,i)

elifmethod == 'SSAct':

pass

self.time=tvec

self.series=self.res

self.steps=steps

defgetStats(self):

returnself.time,self.series,self.steps

cpdef int GSSA(self, int tmax=50,int round=0):'''Gillespie Direct algorithm'''

ini = self.inits

r = self.rates

pvi = self.pv

cdef int l,steps,i,tim

cdef double a0,tc, tau

#cdef np.ndarray[INT_t] tvec

cdef np.ndarray[DTYPE_t] pv

l=self.pvl

pv = np.zeros(l, dtype=float)

tm = self.tm

#tvec = np.arange(tmax,dtype=int)

tc = 0

steps = 0

self.res[0,:,round]= ini

a0=1.

fortim from 1<= tim <tmax:

whiletc < tim:

fori from 0 <= i <l:

pv[i] = pvi[i](r,ini)

#pv = abs(array([eq() for eq in pvi]))# #propensity vector

a0 = a_sum(pv,l)#sum of all transition probabilities

#print ini#,tim, pv, a0

tau = (-1/a0)*np.log(np.random.random())

event = multinomial(1,(pv/a0))# event which will happen on this iteration

ini += tm[:,event.nonzero()[0][0]]

#print tc, ini

tc += tau

steps +=1

ifa0 == 0:break

self.res[tim,:,round] = ini

ifa0 == 0:break# tvec = tvec[:tim]# self.res = res[:tim,:,round]

returnsteps

defCR(self,pv):"""Composition reaction algorithm"""

pass

defmain():

vars = ['s','i','r']

cdef np.ndarray ini= np.array([500,1,0],dtype = int)

cdef np.ndarray rates = np.array([.001,.1],dtype=float)

cdef np.ndarray tm = np.array([[-1,0],[1,-1],[0,1]])

prop = [l1,l2]

M = Model(vnames = vars,rates = rates,inits=ini, tmat=tm,propensity=prop)

t0=time.time()

M.run(tmax=80,reps=1000)

cdef double a_sum(np.ndarray a, int len):

cdef double s

cdef int i

s=0

fori from 0 <=i <len:

s+=a[i]

returns

defl1(np.ndarray r,np.ndarray ini):

returnr[0]*ini[0]*ini[1]defl2(np.ndarray r,np.ndarray ini):

returnr[1]*ini[1]

I would appreciate any suggestions to speed up the gillespie module, both the Python and the Cython versions.