Source code for EoN.simulation

import networkx as nx
import random
import heapq
import numpy as np
import EoN
from collections import defaultdict
from collections import Counter

#######################
#                     #
#   Auxiliary stuff   #
#                     #
#######################



def _truncated_exponential_(rate, T):
    r'''returns a number between 0 and T from an
    exponential distribution conditional on the outcome being between 0 and T'''
    t = random.expovariate(rate)
    L = int(t/T)
    return t - L*T
   
class myQueue(object):
    r'''
    This class is used to store and act on a priority queue of events for 
    event-driven simulations.  It is based on heapq.

    Each queue is given a tmax (default is infinity) so that any event at later 
    time is ignored.
    
    This is a priority queue of 4-tuples of the form 
                   ``(t, counter, function, function_arguments)``

    The ``'counter'`` is present just to break ties, which generally only occur when 
    multiple events are put in place for the initial condition, but could also 
    occur in cases where events tend to happen at discrete times.

    note that the function is understood to have its first argument be t, and 
    the tuple ``function_arguments`` does not include this first t.

    So function is called as 
        ``function(t, *function_arguments)``

    Previously I used a class of events, but sorting using the __lt__ function 
    I wrote was significantly slower than simply using tuples.
    '''
    def __init__(self, tmax=float("Inf")):
        self._Q_ = []
        self.tmax=tmax
        self.counter = 0 #tie-breaker for putting things in priority queue
    def add(self, time, function, args = ()):
        r'''time is the time of the event.  args are the arguments of the
        function not including the first argument which must be time'''
        if time<self.tmax:   
            heapq.heappush(self._Q_, (time, self.counter, function, args))
            self.counter += 1
    def pop_and_run(self):
        r'''Pops the next event off the queue and performs the function'''
        t, counter, function, args = heapq.heappop(self._Q_)
        function(t, *args)
    def __len__(self): 
        r'''this will allow us to use commands like ``while Q:`` '''
        return len(self._Q_)
        
# from sortedcontainers import SortedList 
# 
#         
# class _MySortedList_(object):
#     r'''
#     This code is intended as a possible replacement for the ListDict that I've
#     been using.  
#     
#     I'm leaving it in place, but so far for my tests it is slower than ListDict.
#     It might do better in cases where the probabilities of events have a wider
#     range (in which case the rejection sampling in ListDict would be slow).  
#     
#     Since the cases I've tried may have fairly similar probabilities, the rejection
#     sampling seems to outperform it.  If rejection sampling often resulted in rejections, this
#     data type might perform better.
#      ''' 
#     
#      
#     def __init__(self, weighted = False):
#         if weighted is True:
#             self.weighted = True
#             self.SL = SortedList()
#             self.itemcounter = 0
#             #
#             #the SortedList self.SL will hold tuples that look like
#             #(weight, itemcounter, item).
#             #itemcounter is included to break any ties and make sure that
#             #the comparison algorithm never looks at item (since I don't know
#             #whether comparison is even defined for them.
#             #
#             #The entries will be ordered by weight first, and if there is
#             #a tie, then by itemcounter (which increments whenever something
#             #is added)
#             #
#             
#             self.item2weight = {}
#             #
#             #self.item2weight is a dict where self.item2weight[item] is a tuple
#             # of the form (weight, itemcounter), which will allow us to quickly
#             #find the item in the list if we ever need it.
#             #it also allows for a quick test to see if item has been put in.
#             self._total_weight = 0
#         else:
#             self.weighted = False #need to build up this case.  Should basically be a listdict
#             raise EoN.EoNError("you should be using a ListDict instead")
#             
#     def insert(self, item, weight = None):
#         r'''
#         If not present, then inserts the thing (with weight if appropriate)
#         if already there, replaces the weight unless weight is 0
#         
#         If weight is 0, then it removes the item and doesn't replace.
#         
#         WARNING:
#             if already present, then replaces weight.  To increment weight
#             use update
#         '''
#         
#         if item in self.item2weight:
#             datum = (*self.item2weight.pop(item), item)
#             self.SL.remove(datum)
#         if weight > 0:
#             self.update(item, weight_increment=weight)
# 
#     def update(self, item, weight_increment = None):
#         r'''
#         If not present, then inserts the thing (with weight if appropriate)
#         if already there, increments weight
#         
#         WARNING:
#             if already present, just increments weight.  To overwrite weight 
#             use insert.
#         '''
#         if weight_increment is not None: #will break if passing a weight to unweighted case
#             self._total_weight += weight_increment
#             if item in self.item2weight:
#                 datum = (*self.item2weight.pop(item), item)
#                 self.SL.remove(datum)
#                 new_weight = datum[0]+weight_increment
#             else:
#                 new_weight = weight_increment
#             if new_weight >0:
#                 self.itemcounter += 1
#                 self.SL.add((new_weight, self.itemcounter, item))
#                 self.item2weight[item] = (new_weight, self.itemcounter)
#         elif self.weighted:
#             raise Exception('if weighted, must assign weight_increment')
#     
#     
#     def __len__(self):
#         return len(self.items)
# 
#     def __contains__(self, item):
#         return item in self.item2weight
# 
#     def remove(self, item):
#         datum = (*self.item2weight.pop(item), item)
#         self.SL.remove(datum)
#         self._total_weight -= datum[0]
#         
#     def choose_random(self):
#         #presumes weighted and sorted in increasing order.  So start from end
#         #and work back.
#         current_index = len(self.SL)-1
#         largest_r = 0
#         q=0
#         p=1
#         while current_index >= 0:
#             datum = self.SL[current_index]
#             W = datum[0]
#             r = (q+p*random.random())**(1./W)
#             if r> largest_r:
#                 largest_r = r
#                 returnindex = current_index
#                 
#             q = largest_r**W
#             p = 1-q
#             current_index -= np.random.geometric(p)
#     
#         return self.SL[returnindex][2]
# 
#         
# 
#     def random_removal(self):
#         r'''uses other class methods to choose an entry by weight and then remove it'''
#         choice = self.choose_random()
#         self.remove(choice)
#         return choice
# 
#     def total_weight(self):
#         if self.weighted:
#             return self._total_weight
#         else:
#             return len(self)
# 
#     def update_total_weight(self):
#         self._total_weight = sum(self.weight[item] for item in self.items)


class _ListDict_(object):
    r'''
    The Gillespie algorithm will involve a step that samples a random element 
    from a set based on its weight.  This is awkward in Python.
    
    So I'm introducing a new class based on a stack overflow answer by
    Amber (http://stackoverflow.com/users/148870/amber) 
    for a question by
    tba (http://stackoverflow.com/users/46521/tba) 
    found at
    http://stackoverflow.com/a/15993515/2966723
    
    This will allow me to select a random element uniformly, and then use 
    rejection sampling to make sure it's been selected with the appropriate 
    weight.

    
    I believe a faster data structure can be created with a (binary) tree.
    We add an object with a weight to the tree.  The nodes track their weights
    and the sum of the weights below it.  So choosing a random object (by weight)
    means that we choose a random number between 0 and weight_sum.  Then 
    if it's less than the first node's weight, we choose that.  Otherwise,
    we see if the remaining bit is less than the total under the first child.  
    If so, go there, otherwise, it's the other child.  Then iterate.  Adding
    a node would probably involve placing higher weight nodes higher in
    the tree.  Currently I don't have a fast enough implementation of this
    for my purposes.  So for now I'm sticking to the mixture of lists & 
    dictionaries.
    
    I believe this structure I'm describing is similar to a "partial sum tree"
    or a "Fenwick tree", but they seem subtly different from this.
    '''
    def __init__(self, weighted = False):
        self.item_to_position = {}
        self.items = []

        self.weighted = weighted
        if self.weighted:
            self.weight = defaultdict(int) #presume all weights positive
            self.max_weight = 0
            self._total_weight = 0
            self.max_weight_count = 0
            

    def __len__(self):
        return len(self.items)

    def __contains__(self, item):
        return item in self.item_to_position

    def _update_max_weight(self):
        C = Counter(self.weight.values())  #may be a faster way to do this, we only need to count the max.
        self.max_weight = max(C.keys())
        self.max_weight_count = C[self.max_weight]

        
    def insert(self, item, weight = None):
        r'''
        If not present, then inserts the thing (with weight if appropriate)
        if already there, replaces the weight unless weight is 0
        
        If weight is 0, then it removes the item and doesn't replace.
        
        WARNING:
            replaces weight if already present, does not increment weight.
            
        
        '''
        if self.__contains__(item):
            self.remove(item)
        if weight != 0:
            self.update(item, weight_increment=weight)
        

    def update(self, item, weight_increment = None):
        r'''
        If not present, then inserts the thing (with weight if appropriate)
        if already there, increments weight
        
        WARNING:
            increments weight if already present, cannot overwrite weight.
        '''
        if weight_increment is not None: #will break if passing a weight to unweighted case
            if weight_increment >0 or self.weight[item] != self.max_weight:
                self.weight[item] = self.weight[item] + weight_increment
                self._total_weight += weight_increment
                if self.weight[item] > self.max_weight:
                    self.max_weight_count = 1
                    self.max_weight = self.weight[item]
                elif self.weight[item] == self.max_weight:
                    self.max_weight_count += 1
            else: #it's a negative increment and was at max
                self.max_weight_count -= 1
                self.weight[item] = self.weight[item] + weight_increment
                self._total_weight += weight_increment
                self.max_weight_count -= 1 
                if self.max_weight_count == 0:
                    self._update_max_weight               
        elif self.weighted:
            raise Exception('if weighted, must assign weight_increment')

        if item in self: #we've already got it, do nothing else
            return
        self.items.append(item)
        self.item_to_position[item] = len(self.items)-1

    def remove(self, choice):
        position = self.item_to_position.pop(choice)
        last_item = self.items.pop()
        if position != len(self.items):
            self.items[position] = last_item
            self.item_to_position[last_item] = position
            
        if self.weighted:
            weight = self.weight.pop(choice)
            self._total_weight -= weight
            if weight == self.max_weight:  
                #if we find ourselves in this case often
                #it may be better just to let max_weight be the
                #largest weight *ever* encountered, even if all remaining weights are less
                #
                self.max_weight_count -= 1
                if self.max_weight_count == 0 and len(self)>0:
                    self._update_max_weight()

    def choose_random(self):
        # r'''chooses a random node.  If there is a weight, it will use rejection
        # sampling to choose a random node until it succeeds'''
        if self.weighted:
            while True:
                choice = random.choice(self.items)
                if random.random() < self.weight[choice]/self.max_weight:
                    break
            # r = random.random()*self.total_weight
            # for item in self.items:
            #     r-= self.weight[item]
            #     if r<0:
            #         break
            return choice

        else:
            return random.choice(self.items)
        

    def random_removal(self):
        r'''uses other class methods to choose and then remove a random node'''
        choice = self.choose_random()
        self.remove(choice)
        return choice

    def total_weight(self):
        if self.weighted:
            return self._total_weight
        else:
            return len(self)
    def update_total_weight(self):
        self._total_weight = sum(self.weight[item] for item in self.items)

def _transform_to_node_history_(infection_times, recovery_times, tmin, SIR = True):
    r'''The original (v0.96 and earlier) returned infection_times and recovery_times.
    The new version returns node_history instead. This code transforms
    the former to the latter.
    
    It is only used for the continuous time cases.
    '''
    if SIR:
        node_history = defaultdict(lambda : ([tmin], ['S']))
        for node, time in infection_times.items():
            if time == tmin:
                node_history[node] = ([], [])
            node_history[node][0].append(time)
            node_history[node][1].append('I')
        for node, time in recovery_times.items():
            if time == tmin:
                node_history[node] = ([], [])
            node_history[node][0].append(time)
            node_history[node][1].append('R')        
    else:
        node_history = defaultdict(lambda : ([tmin], ['S']))
        for node, Itimes in infection_times.items():
            Rtimes = recovery_times[node]
            while Itimes:
                time = Itimes.pop(0)
                if time == tmin:
                    node_history[node] = ([], [])
                node_history[node][0].append(time)
                node_history[node][1].append('I')
                if Rtimes:
                    time = Rtimes.pop(0)
                    node_history[node][0].append(time)
                    node_history[node][1].append('S')
        
    return node_history


##########################
#                        #
#    SIMULATION CODE     #
#                        #
##########################

'''
    The code in the region below is used for stochastic simulation of 
    epidemics on networks
'''

def _simple_test_transmission_(u, v, p):
    r'''
    A simple test for whether u transmits to v assuming constant probability p
    
    From figure 6.8 of Kiss, Miller, & Simon.  Please cite the book if 
    using this test_transmission function for basic_discrete_SIR.

    This handles the simple case where transmission occurs with 
    probability p.

    :Arguments:

        u (node)
            the infected node
        v : node
            the susceptible node
        p : number between 0 and 1
            the transmission probability

    :Returns:
        
    
            
            True if u will infect v (given opportunity)
            False otherwise
    '''

    return random.random()<p


[docs]def discrete_SIR(G, test_transmission=_simple_test_transmission_, args=(), 
                initial_infecteds=None, initial_recovereds = None, 
                rho = None, tmin = 0, tmax = float('Inf'),
                return_full_data = False, sim_kwargs = None):
    #tested in test_discrete_SIR
    r'''
    Simulates an SIR epidemic on G in discrete time, allowing user-specified transmission rules
    
    From figure 6.8 of Kiss, Miller, & Simon.  Please cite the book
    if using this algorithm.

    Return details of epidemic curve from a discrete time simulation.
    
    It assumes that individuals are infected for exactly one unit of 
    time and then recover with immunity.

    This is defined to handle a user-defined function
    ``test_transmission(node1,node2,*args)``
    which determines whether transmission occurs.

    So elaborate rules can be created as desired by the user.

    By default it uses 
    ``_simple_test_transmission_``
    in which case args should be entered as (p,)

    :Arguments: 

    **G** NetworkX Graph (or some other structure which quacks like a 
                           NetworkX Graph)
        The network on which the epidemic will be simulated.
        
    **test_transmission** function(u,v,*args)
        (see below for args definition)
        A function that determines whether u transmits to v.
        It returns True if transmission happens and False otherwise.
        The default will return True with probability p, where args=(p,)

        This function can be user-defined.
        It is called like:
        test_transmission(u,v,*args)
        Note that if args is not entered, then args=(), and this call is 
        equivalent to
        test_transmission(u,v)

    **args** a list or tuple
        The arguments of test_transmission coming after the nodes.  If 
        simply having transmission with probability p it should be 
        entered as 
        args=(p,)
            
        [note the comma is needed to tell Python that this is really a 
        tuple]

    **initial_infecteds** node or iterable of nodes (default None)
        if a single node, then this node is initially infected
        if an iterable, then whole set is initially infected
        if None, then choose randomly based on rho.  If rho is also
        None, a random single node is chosen.
        If both initial_infecteds and rho are assigned, then there
        is an error.
       
    **initial_recovereds** as for initial_infecteds, but initially 
            recovered nodes.
            
    **rho** number  (default is None)
        initial fraction infected. initial number infected 
        is int(round(G.order()*rho)).
        
        The default results in a single randomly chosen initial infection.

    **tmin** start time
        
    **tmax** stop time (default Infinity). 

    **return_full_data** boolean (default False)
        Tells whether a Simulation_Investigation object should be returned.  

    **sim_kwargs** keyword arguments
        Any keyword arguments to be sent to the Simulation_Investigation object
        Only relevant if ``return_full_data=True``
            

    :Returns: 
        
        
    **t, S, I, R** numpy arrays
            
    Or ``if return_full_data is True`` returns
    
    **full_data**  Simulation_Investigation object
        from this we can extract the status history of all nodes
        We can also plot the network at given times
        and even create animations using class methods.
    
    :SAMPLE USE:

    ::

        import networkx as nx
        import EoN
        import matplotlib.pyplot as plt
        G = nx.fast_gnp_random_graph(1000,0.002)
        t, S, I, R = EoN.discrete_SIR(G, args = (0.6,), 
                                            initial_infecteds=range(20))
        plt.plot(t,I)
    
    
    Because this sample uses the defaults, it is equivalent to a call to 
    basic_discrete_SIR
    '''
    if rho is not None and initial_infecteds is not None:
        raise EoN.EoNError("cannot define both initial_infecteds and rho")

    
    
    if initial_infecteds is None:  #create initial infecteds list if not given
        if rho is None:
            initial_number = 1
        else:
            initial_number = int(round(G.order()*rho))
        initial_infecteds=random.sample(G.nodes(), initial_number)
    elif G.has_node(initial_infecteds):
        initial_infecteds=[initial_infecteds]
    #else it is assumed to be a list of nodes.

    if return_full_data:
        node_history = defaultdict(lambda : ([tmin], ['S']))
        transmissions = []
        for node in initial_infecteds:
            node_history[node] = ([tmin], ['I'])
            transmissions.append((tmin-1, None, node))
        if initial_recovereds is not None:
            for node in initial_recovereds:
                node_history[node] = ([tmin], ['R'])
    
    N=G.order()
    t = [tmin]
    S = [N-len(initial_infecteds)]
    I = [len(initial_infecteds)]
    R = [0]
    
    susceptible = defaultdict(lambda: True)  
    #above line is equivalent to u.susceptible=True for all nodes.
    
    for u in initial_infecteds:
        susceptible[u] = False
    if initial_recovereds is not None:
        for u in initial_recovereds:
            susceptible[u] = False
        
    infecteds = set(initial_infecteds)
    
    while infecteds and t[-1]<tmax:
        new_infecteds = set()
        
        infector = {}  #used for returning full data.  a waste of time otherwise
        for u in infecteds:
            for v in G.neighbors(u):
                if susceptible[v] and test_transmission(u, v, *args): 
                    new_infecteds.add(v)
                    susceptible[v] = False
                    infector[v] = [u]
                elif return_full_data and v in new_infecteds and test_transmission(u, v, *args):
                    #if ``v`` already infected on this round, consider if it is
                    #multiply infected this round.
                    infector[v].append(u)
                    

        if return_full_data:
            for v in infector.keys():
                transmissions.append((t[-1], random.choice(infector[v]), v))
            next_time = t[-1]+1
            if next_time <= tmax:
                for u in infecteds:
                    node_history[u][0].append(next_time)
                    node_history[u][1].append('R')
                for v in new_infecteds:
                    node_history[v][0].append(next_time)
                    node_history[v][1].append('I')

        infecteds = new_infecteds

        R.append(R[-1]+I[-1])
        I.append(len(infecteds))
        S.append(S[-1]-I[-1])
        t.append(t[-1]+1)
    if not return_full_data:
        return np.array(t), np.array(S), np.array(I), \
               np.array(R)
    else:
        if sim_kwargs is None:
            sim_kwargs = {}
        return EoN.Simulation_Investigation(G, node_history, transmissions, 
                                            possible_statuses = ['S', 'I', 'R'], 
                                            **sim_kwargs)



[docs]def basic_discrete_SIR(G, p, initial_infecteds=None, 
                                initial_recovereds = None, rho = None,
                                tmin = 0, tmax=float('Inf'), 
                                return_full_data = False, sim_kwargs = None):
    #tested in test_basic_discrete_SIR   
    r'''
    Performs simple discrete SIR simulation assuming constant transmission 
    probability p.
    
    From figure 6.8 of Kiss, Miller, & Simon.  Please cite the book if 
    using this algorithm.

    Does a simulation of the simple case of all nodes transmitting
    with probability p independently to each neighbor and then
    recovering.

    :Arguments: 

    **G**    networkx Graph
            The network the disease will transmit through.
            
    **p** number
            transmission probability
            
    **initial_infecteds**  node or iterable of nodes (default None)
            if a single node, then this node is initially infected
            if an iterable, then whole set is initially infected
            if None, then choose randomly based on rho.  If rho is also
            None, a random single node is chosen.
            If both initial_infecteds and rho are assigned, then there
            is an error.
       
    **initial_recovereds**  as for initial_infecteds, but for initially 
            recovered nodes.
            
    **rho**  number  (default None)
            initial fraction infected. number initially infected
            is int(round(G.order()*rho))
            
            The default results in a single randomly chosen initial infection.
        
    **tmin**  float  (default 0)
        start time
        
    **tmax**  float  (default infinity)
        stop time (if not extinct first).  

    **return_full_data**  boolean (default False)
        Tells whether a Simulation_Investigation object should be returned.  

    **sim_kwargs** keyword arguments
        Any keyword arguments to be sent to the Simulation_Investigation object
        Only relevant if ``return_full_data=True``

    :Returns: 
        
    if return_full_data is False returns 
        
        **t, S, I, R**    numpy arrays
        
        these numpy arrays give all the times observed and the number 
        in each state at each time.
            
    Or ``if return_full_data is True`` returns
        **full_data**  Simulation_Investigation object
            
        from this we can extract the status history of all nodes
        We can also plot the network at given times
        and even create animations using class methods.

    
    :SAMPLE USE:

    ::


        import networkx as nx
        import EoN
        import matplotlib.pyplot as plt
        G = nx.fast_gnp_random_graph(1000,0.002)
        t, S, I, R = EoN.basic_discrete_SIR(G, 0.6)
        plt.plot(t,S)
    
    
        #This sample may be boring if the randomly chosen initial infection
        #doesn't trigger an epidemic.

'''

    return discrete_SIR(G, _simple_test_transmission_, (p,), 
                                    initial_infecteds, initial_recovereds, 
                                    rho, tmin, tmax, return_full_data, sim_kwargs=sim_kwargs)

[docs]def basic_discrete_SIS(G, p, initial_infecteds=None, rho = None,
                                tmin = 0, tmax = 100, return_full_data = False, 
                                sim_kwargs = None):
    
    '''Does a simulation of the simple case of all nodes transmitting
    with probability p independently to each susceptible neighbor and then
    recovering.
    
    This is not directly described in Kiss, Miller, & Simon.
    
    
    
    :Arguments: 

    **G** networkx Graph
            The network the disease will transmit through.
            
    **p** number
            transmission probability
            
    **initial_infecteds**  node or iterable of nodes (default None)
            if a single node, then this node is initially infected
            if an iterable, then whole set is initially infected
            if None, then choose randomly based on rho.  If rho is also
            None, a random single node is chosen.
            If both initial_infecteds and rho are assigned, then there
            is an error.
       
    **rho**  number
            initial fraction infected. number is int(round(G.order()*rho))

    **return_full_data**  boolean (default False)
            Tells whether a Simulation_Investigation object should be returned.  

    **sim_kwargs** keyword arguments
        Any keyword arguments to be sent to the Simulation_Investigation object
        Only relevant if ``return_full_data=True``

    :Returns: 

    if return_full_data is False
        **t, S, I**
            
        All numpy arrays 

    if return_full_data is True
        **full_data**    Simulation_Investigation object

        from this we can extract the status history of all nodes
        We can also plot the network at given times
        and even create animations using class methods.

    :SAMPLE USE:

    ::

        import networkx as nx
        import EoN
        import matplotlib.pyplot as plt
        G = nx.fast_gnp_random_graph(1000,0.002)
        t, S, I = EoN.basic_discrete_SIS(G, 0.6, tmax = 20)
        plt.plot(t,S)

    '''
    
    if rho is not None and initial_infecteds is not None:
        raise EoN.EoNError("cannot define both initial_infecteds and rho")

    if initial_infecteds is None:  #create initial infecteds list if not given
        if rho is None:
            initial_number = 1
        else:
            initial_number = int(round(G.order()*rho))
        initial_infecteds=random.sample(G.nodes(), initial_number)
    elif G.has_node(initial_infecteds):
        initial_infecteds=[initial_infecteds]
    #else it is assumed to be a list of nodes.

    if return_full_data:
        transmissions = []
        node_history = defaultdict(lambda : ([tmin], ['S']))
        for u in initial_infecteds:
            node_history[u] = ([tmin], ['I'])
            transmissions.append((tmin-1, None, u))
    N=G.order()
    t = [tmin]
    S = [N-len(initial_infecteds)]
    I = [len(initial_infecteds)]
    
    infecteds = set(initial_infecteds)
    while infecteds and t[-1]<tmax:
        new_infecteds = set()
        infector={}
        for u in infecteds:
            for v in G.neighbors(u):
                if v not in infecteds and random.random()<p:
                    if v not in new_infecteds:
                        new_infecteds.add(v)
                        infector[v] = [u]
                    else:
                        infector[v].append(u)
                        
                        
#            new_infecteds.union({v for v in G.neighbors(u) 
#                                  if random.random()<p and v not in infecteds})

        if return_full_data:
            for v in infector.keys():
                transmissions.append((t[-1], random.choice(infector[v]), v))
            next_time = t[-1]+1
            if next_time<= tmax:
                for u in infecteds:
                    node_history[u][0].append(next_time)
                    node_history[u][1].append('S')
                for v in new_infecteds:
                    node_history[v][0].append(next_time)
                    node_history[v][1].append('I')
        infecteds = new_infecteds
        t.append(t[-1]+1)
        S.append(N-len(infecteds))
        I.append(len(infecteds))
            
        
    if not return_full_data:
        return np.array(t), np.array(S), np.array(I)
    else:
        if sim_kwargs is None:
            sim_kwargs = {}
        return EoN.Simulation_Investigation(G, node_history, transmissions, 
                                            possible_statuses = ['S', 'I'], 
                                            **sim_kwargs)

    
    
[docs]def percolate_network(G, p):
    #tested indirectly in test_basic_discrete_SIR   

    r'''
    Performs percolation on a network G with each edge persisting with 
    probability p
    
    From figure 6.10 of Kiss, Miller, & Simon.  Please cite the book
    if using this algorithm.

    Performs bond percolation on the network G, keeping edges with 
    probability p

    :Arguments: 

    **G**    networkx Graph
        The contact network
    **p** number between 0 and 1
        the probability of keeping edge

    :Returns: 
        
    **H**   NetworkX Graph
        A network with same nodes as G, but with each edge retained 
        independently with probability p.
        
    :SAMPLE USE:

    ::


        import networkx as nx
        import EoN
        import matplotlib.pyplot as plt
        G = nx.fast_gnp_random_graph(1000,0.002)
        H = EoN.percolate_network(G, 0.6)

        #H is now a graph with about 60% of the edges of G
'''

    H = nx.Graph()
    H.add_nodes_from(G.nodes())
    for edge in G.edges():
        if random.random()<p:
            H.add_edge(*edge)
    return H

def _edge_exists_(u, v, H):
    r'''
    Tests if directed edge ``u``, ``v`` exists in graph ``H``.
    
    From figure 6.10 of Kiss, Miller, & Simon.  Please cite the book
    if using this algorithm.

    Tests whether ``H`` has an edge from ``u`` to ``v``.

    :Arguments: 

    **u** node
    **v** node
    **H** networkx graph

    :Returns: 
        
    **True**  if ``H`` has the edge
    **False**  if ``H`` does not have the edge
    '''
    return H.has_edge(u,v)

[docs]def percolation_based_discrete_SIR(G, p, 
                                    initial_infecteds=None, 
                                    initial_recovereds = None,
                                    rho = None, tmin = 0,
                                    tmax = float('Inf'),
                                    return_full_data = False, sim_kwargs = None):
    #tested in test_basic_discrete_SIR   
    r'''
    perfoms a simple SIR epidemic but using percolation as the underlying 
    method.
    
    From figure 6.10 of Kiss, Miller, & Simon.  Please cite the book
    if using this algorithm.

    
    The simple case of all nodes transmitting with probability p 
    independently to each neighbor and then recovering, but using a 
    percolation-based approach.  
    
    :Warning:
        
    You probably **shouldn't use this algorithm**.
        
    See ``basic_discrete_SIR`` which produces equivalent outputs.  
    
    That algorithm will be faster than this one.  
    
    The value of this function is that by performing many simulations we 
    can see that the outputs of the two are equivalent.  
    
    This algorithm leads to a better understanding of the theory, but
    it's not computationally efficient.


    :Arguments: 

    **G**    networkx Graph
            The network the disease will transmit through.
    **p** number
            transmission probability

    **initial_infecteds**  node or iterable of nodes (default None)
            if a single node, then this node is initially infected
            if an iterable, then whole set is initially infected
            if None, then choose randomly based on rho.  If rho is also
            None, a random single node is chosen.
            If both initial_infecteds and rho are assigned, then there
            is an error.
       
       
    **initial_recovereds**  as for initial_infecteds, but initially 
            recovered nodes.
            
    **rho**  number
            initial fraction infected. number is int(round(G.order()*rho))
        
    **tmin**  start time
        
    **tmax**  stop time (if not extinct first).  Default step is 1.

    **return_full_data**  boolean (default False)
            Tells whether a Simulation_Investigation object should be returned.  

    **sim_kwargs** keyword arguments
        Any keyword arguments to be sent to the Simulation_Investigation object
        Only relevant if ``return_full_data=True``

    :Returns: 
        
    **t, S, I, R** numpy arrays
        
    OR if ``return_full_data is True``:
            
    **full_data**    Simulation_Investigation object
        from this we can extract the status history of all nodes
        We can also plot the network at given times
        and even create animations using class methods.
    
    :SAMPLE USE:

    ::

        import networkx as nx
        import EoN
        import matplotlib.pyplot as plt
        G = nx.fast_gnp_random_graph(1000,0.002)
        t, S, I, R = EoN.percolation_based_discrete_SIR(G, p)
        plt.plot(t,S)
    
    This is equivalent to basic_discrete_epidemic (but many simulations
    may be needed before it's clear, since these are stochastic)

'''

    H = percolate_network(G, p)
    return discrete_SIR(H, test_transmission=H.has_edge, 
                                initial_infecteds=initial_infecteds, 
                                initial_recovereds = initial_recovereds,
                                rho = rho, tmin = tmin, tmax = tmax, 
                                return_full_data=return_full_data, 
                                sim_kwargs=sim_kwargs)
                                

[docs]def estimate_SIR_prob_size(G, p):
    #tested in test_estimate_SIR_prob_size
    r'''
    Uses percolation to estimate the probability and size of epidemics 
    assuming constant transmission probability p
    
    From figure 6.12 of Kiss, Miller, & Simon.  Please cite the
    book if using this algorithm.

    Provies an estimate of epidemic probability and size assuming a 
    fixed transmission probability p.  
    
    The estimate is found by performing bond percolation and then 
    finding the largest connected component in the remaining network.  
    
    This assumes that there is a single giant component above threshold.  

    It will not be an appropriate measure if the network is made up of 
    several densely connected components with very weak connections 
    between these components.

    :Arguments: 

    **G**    networkx Graph
            The network the disease will transmit through.
    **p** number
            transmission probability

    :Returns: 
        
    **PE, AR**   both floats between 0 and 1 
        estimates of the probability and proportion 
        infected (attack rate) in epidemics
        (the two are equal, but each given for consistency with 
        ``estimate_directed_SIR_prob_size``)
          
            
    :SAMPLE USE:

    ::

        import networkx as nx
        import EoN
    
        G = nx.fast_gnp_random_graph(1000,0.002)
        PE, AR = EoN.estimate_SIR_prob_size(G, 0.6)

    '''
    H = percolate_network(G, p)
    size = max((len(CC) for CC in nx.connected_components(H)))
    returnval = float(size)/G.order()
    return returnval, returnval


[docs]def directed_percolate_network(G, tau, gamma, weights = True):
    #indirectly tested in test_estimate_SIR_prob_size
    r'''
    performs directed percolation, assuming that transmission and recovery 
    are Markovian
    
    
    From figure 6.13 of Kiss, Miller, & Simon.  Please cite the
    book if using this algorithm.  
    
    This performs directed percolation corresponding to an SIR epidemic
    assuming that transmission is at rate tau and recovery at rate 
    gamma

    :See Also:

    ``nonMarkov_directed_percolate_network`` which allows for duration and 
        time to infect to come from other distributions.
    
    ``nonMarkov_directed_percolate_network`` which allows for more complex 
        rules
    
    :Arguments: 

    **G**    networkx Graph
        The network the disease will transmit through.
    **tau**   positive float 
        transmission rate
    **gamma**   positive float 
        recovery rate
    **weights**   boolean    (default True)
        if True, then includes information on time to recovery
        and delay to transmission.  If False, just the directed graph.

    :Returns: 
        :
    **H**   networkx DiGraph  (directed graph)
        a u->v edge exists in H if u would transmit to v if ever 
        infected.
        
        The edge has a time attribute (time_to_infect) which gives the 
        delay from infection of u until transmission occurs.
        
        Each node u has a time attribute (duration) which gives the 
        duration of its infectious period.
        
    :SAMPLE USE:


    ::

        import networkx as nx
        import EoN
        
        G = nx.fast_gnp_random_graph(1000,0.002)
        H = EoN.directed_percolate_network(G, 2, 1)

    '''
    
    #simply calls directed_percolate_network_with_timing, using markovian rules.
    
    def trans_time_fxn(u, v, tau):
        if tau>0:
            return random.expovariate(tau)
        else:
            return float('Inf')
    trans_time_args = (tau,)
    
    def rec_time_fxn(u, gamma):
        if gamma>0:
            return random.expovariate(gamma)
        else:
            return float('Inf')
    rec_time_args = (gamma,)
    
    return nonMarkov_directed_percolate_network_with_timing(G, trans_time_fxn, 
                                                            rec_time_fxn,  
                                                            trans_time_args,
                                                            rec_time_args, 
                                                            weights=weights)
                
def _out_component_(G, source):
    '''
    rather than following the pseudocode in figure 6.15 of 
        Kiss, Miller & Simon,
    this uses a built-in Networkx command.  

    finds the set of nodes (including source) which are reachable from 
    nodes in source.

    :Arguments: 

    **G**  networkx Graph
        The network the disease will transmit through.
    **source** either a node or an iterable of nodes (set, list, tuple)
        The nodes from which the infections start.  We assume no node
        will ever have a name that is an iterable of other node names.
        It will run, but may not use source user expects.


    :Returns: 
                  
    **reachable_nodes** set
        the set of nodes reachable from source (including source).

    :Warning: 
    if the graph G has nodes like 1, 2, 3, and (1,2,3), then a
    source of (1,2,3) is potentially ambiguous.  This algorithm will interpret
    the source as the single node (1,2,3)


    '''
    if G.has_node(source): 
        source_nodes = {source}
    else:
        source_nodes = set(source)
        
    reachable_nodes = set().union(source_nodes)

    for node in source_nodes:
        reachable_nodes = reachable_nodes.union(
                                        set(nx.descendants(G, node)))

    
    return reachable_nodes

def _in_component_(G, target):
    r'''
    creates the _in_component_ by basically reversing _out_component_.

    :Arguments: 

    **G**  networkx Graph
        The network the disease will transmit through.
            
    **target** a target node (or iterable of target nodes)
        The node whose infection we are interested in.

        In principle target could be an iterable, but in this case we 
        would be finding those possible sources whose infection leads to 
        infection of at least one target, not all.

    :Warning: 
        
    if the graph G has nodes like 1, 2, 3, and (1,2,3), then a
    target of (1,2,3) is potentially ambiguous.  It will interpret
    the target as the single node (1,2,3)

    :Returns: 

    **source_nodes** (set)
        the set of nodes (including target) from which target is 
        reachable


    '''
    if G.has_node(target):
        #potential bug if for example G has  nodes like 1, 2, 3, and also
        #(1,2,3).  Then a target of (1,2,3) is ambiguous.
        target_nodes = {target}
    else:
        target_nodes = set(target)

    source_nodes = set().union(target_nodes)
    
    for node in target_nodes:
        source_nodes = source_nodes.union(set(nx.ancestors(G, node)))

    return source_nodes


[docs]def get_infected_nodes(G, tau, gamma, initial_infecteds=None, 
                initial_recovereds = None):
    r'''
    Finds all eventually infected nodes in an SIR simulation, through a 
    percolation approach 
    
    From figure 6.15 of Kiss, Miller, & Simon.  Please cite the book if 
    using this algorithm

    Assumes that
    the intial infecteds are as given and transmission occurs with rate 
    tau and recovery with rate gamma.          
    

    Note that the output of this algorithm is stochastic.
    
    This code has similar run-time whether an epidemic occurs or not.
    There are much faster ways to implement an algorithm giving the same 
    output, for example by actually running one of the epidemic simulation tools.
    
    :Warning:
    
    why are you using this command? If it's to better understand the
    relationship between percolation and SIR epidemics, that's fine.  
    But this command IS NOT an efficient way to calculate anything.  Don't do 
    it like this.  Use one of the other algorithms.  Try ``fast_SIR``, 
    for example.
    
    :Arguments: 

    **G**    networkx Graph
        The network the disease will transmit through.
    **tau**   positive float 
        transmission rate
    **gamma**   positive float 
        recovery rate
    **initial_infecteds**  node or iterable of nodes
        if a single node, then this node is initially infected
        if an iterable, then whole set is initially infected
        if None, then a randomly chosen node is initially infected.
    **initial_recovereds** node or iterable of nodes
        if a single node, then this node is initially recovered
        if an iterable, then whole set is initially recovered

    :Returns: 
            
    **infected_nodes** set
        the set of nodes infected eventually in a simulation.
        
    :SAMPLE USE:
        
    ::

        import networkx as nx
        import EoN
    
        G = nx.fast_gnp_random_graph(1000,0.002)
        
        finalR = EoN.get_infected_nodes(G, 2, 1, initial_infecteds=[0, 5])
    
        #finds the nodes infected if 0 and 5 are the initial nodes infected
        #and tau=2, gamma=1
    '''    
    if initial_recovereds is None:
        initial_recovereds = set()
    elif G.has_node(initial_recovereds):
        initial_recovereds = set([initial_recovereds])
    else:
        initial_recovereds = set(initial_recovereds)
    if initial_infecteds is None:
        while True:
            node = random.choice(G.nodes())
            if node not in initial_recovereds:
                break
        initial_infecteds=set([node])
    elif G.has_node(initial_infecteds):
        initial_infecteds=set([initial_infecteds])
    else:
        initial_infecteds = set(initial_infecteds)
    if initial_infecteds.intersection(initial_recovereds):
        raise EoN.EoNError("initial infecteds and initial recovereds overlap")
    H = directed_percolate_network(G, tau, gamma)
    for node in initial_recovereds:
        H.remove_node(node)
    infected_nodes = _out_component_(H, initial_infecteds)
    return infected_nodes


[docs]def estimate_directed_SIR_prob_size(G, tau, gamma):
    #tested in test_estimate_SIR_prob_size
    '''
    Predicts probability and attack rate assuming continuous-time Markovian SIR disease on network G
    
    From figure 6.17 of Kiss, Miller, & Simon.  Please cite the book if 
    using this algorithm
    
    
    
    :See Also:

    ``estimate_nonMarkov_SIR_prob_size`` which handles nonMarkovian versions

    :Arguments: 

    **G**    networkx Graph
        The network the disease will transmit through.
    **tau**   positive float 
        transmission rate
    **gamma**   positive float 
        recovery rate

    :Returns: 
        
    **PE, AR**  numbers (between 0 and 1)
        Estimates of epidemic probability and attack rate found by 
        performing directed percolation, finding largest strongly 
        connected component and finding its in/out components.
        
    :SAMPLE USE:

    ::


        import networkx as nx
        import EoN
    
        G = nx.fast_gnp_random_graph(1000,0.003)
        PE, AR = EoN.estimate_directed_SIR_prob_size(G, 2, 1)
    
    '''
    
    H = directed_percolate_network(G, tau, gamma)
    return estimate_SIR_prob_size_from_dir_perc(H)

[docs]def estimate_SIR_prob_size_from_dir_perc(H):
    #indirectly tested in test_estimate_SIR_prob_size
    r'''
    Estimates probability and size of SIR epidemics for an input network after directed percolation
    
    From figure 6.17 of Kiss, Miller, & Simon.  Please cite the book if 
    using this algorithm

    :Arguments: 

    **H**  directed graph 
        The outcome of directed percolation on the contact network G

    :Returns: 
        
    **PE, AR**  numbers
        Estimates of epidemic probability and attack rate found by 
        finding largest strongly connected component and finding in/out 
        components.
        
    :SAMPLE USE:


    ::

        import networkx as nx
        import EoN

        G = nx.fast_gnp_random_graph(1000,0.003)
        H = some_user_defined_operation_to_do_percolation(G, argument)
        PE, AR = EoN.estimate_SIR_prob_size_from_dir_perc(H)

    '''

    Hscc = max((nx.strongly_connected_components(H)), key = len)
    u = list(Hscc)[0]  #random.choice(Hscc)
    inC = _in_component_(H, u) #includes both H_{IN} and H_{SCC}
    outC = _out_component_(H, u) #includes both H_{OUT} and H_{SCC}
    N=float(H.order())
    PE = len(inC)/N
    AR = len(outC)/N
    return PE, AR
 
[docs]def estimate_nonMarkov_SIR_prob_size_with_timing(G, 
                                                trans_time_fxn, 
                                                rec_time_fxn, 
                                                trans_time_args=(),
                                                rec_time_args=()):
    '''
    estimates probability and size for user-input transmission and recovery time functions.
    
    :Arguments: 
    
    **G** Networkx Graph
        the input graph
    **trans_time_fxn** function
        trans_time_fxn(u, v, *trans_time_args) 
        returns the delay from u's infection to transmission to v.
    **rec_time_fxn** function
        rec_time_fxn(u, *rec_time_args)
        returns the delay from u's infection to its recovery.
    **trans_time_args** tuple
        any additional arguments required by trans_time_fxn.  For example
        weights of nodes.
    **rec_time_args** tuple
        any additional arguments required by rec_time_fxn

    :Returns: 
        
    **PE, AR**  numbers (between 0 and 1)
        Estimates of epidemic probability and attack rate found by 
        finding largest strongly connected component and finding in/out 
        components.
        
    :SAMPLE USE:


    ::

        #mimicking the standard version with transmission rate tau
        #and recovery rate gamma
        #equivalent to 
        #PE, AR = EoN.estimate_SIR_prob_size(G, tau, gamma)
    
        import networkx as nx
        import EoN
        import random
        from collections import defaultdict
    
        G=nx.fast_gnp_random_graph(1000,0.002)
        
        tau = 2
        
        gamma = 1
    
        def trans_time_fxn(u,v, tau):
            
            return random.expovariate(tau)
            
        def rec_time_fxn(u, gamma):
            
            return random.expovariate(gamma)
        
        PE, AR = EoN.estimate_nonMarkov_SIR_prob_size(G, 
                                                    trans_time_fxn=trans_time_fxn,
                                                    rec_time_fxn = rec_time_fxn,
                                                    trans_time_args = (tau,),
                                                    rec_time_args = (gamma,)
                                                    )

    '''
    
    H = nonMarkov_directed_percolate_network_with_timing(G, 
                                                        trans_time_fxn,
                                                        rec_time_fxn,
                                                        trans_time_args,
                                                        rec_time_args)
    return estimate_SIR_prob_size_from_dir_perc(H)
    
    
[docs]def estimate_nonMarkov_SIR_prob_size(G, xi, zeta, transmission):
    '''
    Predicts epidemic probability and size using nonMarkov_directed_percolate_network.
    
    This is not directly described in Kiss, Miller, & Simon, but is based on (fig 6.18).
    
    :Warning:
    You probably DON'T REALLY WANT TO USE THIS.
    Check if estimate_nonMarkov_prob_size_with_timing fits your needs better.
    
    :Arguments: 

    **G** (networkx Graph)
            The input graph

    **xi** dict
        xi[u] gives all necessary information to determine what u's 
        infectiousness is.
    **zeta** dict
        zeta[v] gives everything needed about v's susceptibility

    **transmission** user-defined function
        transmission(xi[u], zeta[v]) determines whether u transmits to 
        v.  Returns True or False depending on whether the transmission would
        happen

    :Returns: 
        
    **PE, AR**  numbers (between 0 and 1)
        Estimates of epidemic probability and attack rate found by 
        finding largest strongly connected component and finding in/out 
        components.
        
    :SAMPLE USE:


    ::

        #mimicking the standard version with transmission rate tau
        #and recovery rate gamma
    
        import networkx as nx
        import EoN
        import random
        from collections import defaultdict
    
        G=nx.fast_gnp_random_graph(1000,0.002)
        tau = 2
        gamma = 1
    
        xi = {node:random.expovariate(gamma) for node in G.nodes()}  
        #xi[node] is duration of infection of node.
        
        zeta = defaultdict(lambda : tau) #every node has zeta=tau, so same 
                                        #transmission rate
        
        def my_transmission(infection_duration, trans_rate):
            #infect if duration is longer than time to infection.
            if infection_duration > random.expovariate(trans_rate):
                return True
            else:  
                return False
        
        PE, AR = EoN.estimate_nonMarkov_SIR_prob_size(G, xi, zeta, 
                                                        my_transmission)
            

    '''
    
    H = nonMarkov_directed_percolate_network(G, xi, zeta, transmission)
    return estimate_SIR_prob_size_from_dir_perc(H)
        
[docs]def nonMarkov_directed_percolate_network_with_timing(G, 
                                                    trans_time_fxn, 
                                                    rec_time_fxn,
                                                    trans_time_args=(),
                                                    rec_time_args=(), 
                                                    weights=True):
    r'''
    Performs directed percolation on G for user-specified transmission time
    and recovery time distributions.
    
    A generalization of figure 6.13 of Kiss, Miller & Simon
    
          

    
    :See Also:
        
    **directed_percolate_network**
    if it's just a constant transmission and recovery rate.

    **nonMarkov_directed_percolate_network**
    if your rule for creating the percolated network cannot be expressed
    as simply calculating durations and delays until transmission.
    
    
    :Arguments: 

    The arguments are very much like in fast_nonMarkov_SIR
        
    **G**  Networkx Graph
        the input graph
    **trans_time_fxn** user-defined function
        returns the delay from u's infection to transmission to v.
        
        delay=trans_time_fxn(u, v, *trans_time_args) 
        
    **rec_time_fxn** user-defined function
        returns the duration of from u's infection (delay to its recovery).
        
        duration = rec_time_fxn(u, *rec_time_args)
        

        Note:
            if delay == duration, we assume infection happens.
            
    **trans_time_args** tuple
        any additional arguments required by trans_time_fxn.  For example
        weights of nodes.
    **rec_time_args** tuple
        any additional arguments required by rec_time_fxn
    **weights** boolean
        if true, then return directed network with the delay and duration as
        weights.
    
    :Returns:
        
    **H** directed graph.
        
    The returned graph is built by assigning each node an infection duration 
    and then each edge (in each direction) a delay until transmission.  
    If   delay<duration  it adds directed edge to ``H``.  
    
    if weights is True, then ``H`` contains the duration and delays
    as weights.  Else it's just a directed graph.

    '''
    H = nx.DiGraph()
    if weights:
        for u in G.nodes():
            duration = rec_time_fxn(u, *rec_time_args)
            H.add_node(u, duration = duration)
            for v in G.neighbors(u):
                delay = trans_time_fxn(u, v, *trans_time_args)
                if delay<=duration:
                    H.add_edge(u,v, delay_to_infection = delay)
    else:
        for u in G.nodes():
            duration = rec_time_fxn(u, *rec_time_args)
            H.add_node(u)
            for v in G.neighbors(u):
                delay = trans_time_fxn(u, v, *trans_time_args)
                if delay<=duration:
                    H.add_edge(u,v)
    return H

[docs]def nonMarkov_directed_percolate_network(G, xi, zeta, transmission):
    r'''
    performs directed percolation on a network following user-specified rules.
    
    From figure 6.18 of Kiss, Miller, & Simon.  
    Please cite the book if using this algorithm.
    
    This algorithm is particularly intended for a case where the duration and
    delays from infection to transmission are somehow related to one another.

    :Warning:
    You probably **shouldn't use this**.
    Check if nonMarkov_directed_percolate_with_timing fits your needs better.

    :See Also:
        
    ``nonMarkov_directed_percolate_network_with_timing``
        
        if your rule for creating the percolated network is based on calculating
        a recovery time for each node and then calculating a separate
        transmission time for the edges this will be better.
    
    ``directed_percolate_network``
        
        if it's just a constant transmission and recovery rate.

    
    
    :Arguments: 

    **G** networkx Graph
        The input graph

    **xi** dict
        xi[u] gives all necessary information to determine what us 
        infectiousness is.
    **zeta** dict
        zeta[v] gives everything needed about vs susceptibility

    **transmission** user-defined function
        transmission(xi[u], zeta[v]) determines whether u transmits to v.
        
        returns True if transmission happens and False if it does not

    :Returns: 
        
    **H** networkx DiGraph (directed graph)
        Edge (u,v) exists in H if disease will transmit given the opportunity.
    
    :SAMPLE USE:

    for now, I'm being lazy.  
    Look at the sample for estimate_nonMarkov_SIR_prob_size to infer it.
    
'''
    H = nx.DiGraph()
    for u in G.nodes():
        H.add_node(u)
        for v in G.neighbors(u):
            if transmission(xi[u],zeta[v]):
                H.add_edge(u,v)
    return H
    
    
    
    
### Code starting here does event-driven simulations ###


def _find_trans_and_rec_delays_SIR_(node, sus_neighbors, trans_time_fxn, 
                                    rec_time_fxn,  trans_time_args=(),
                                    rec_time_args=()):

    rec_delay = rec_time_fxn(node, *rec_time_args)
    trans_delay={}
    for target in sus_neighbors:
        trans_delay[target] = trans_time_fxn(node, target, *trans_time_args)
    return trans_delay, rec_delay
    

def _process_trans_SIR_(time, G, source, target, times, S, I, R, Q, status, 
                            rec_time, pred_inf_time, transmissions, 
                            trans_and_rec_time_fxn, 
                            trans_and_rec_time_args = ()):
    r'''
    From figure A.4 of Kiss, Miller, & Simon.  Please cite the book if 
    using this algorithm.

    :Arguments: 

    time : number
        time of transmission
**G**  networkx Graph
    node : node
        node receiving transmission.
    times : list
        list of times at which events have happened
    S, I, R : lists
        lists of numbers of nodes of each status at each time
    Q : myQueue
        the queue of events
    status : dict
        dictionary giving status of each node
    rec_time : dict
        dictionary giving recovery time of each node
    pred_inf_time : dict
        dictionary giving predicted infeciton time of nodes 
    trans_and_rec_time_fxn : function
        trans_and_rec_time_fxn(node, susceptible_neighbors, *trans_and_rec_time_args) 
        returns tuple consisting of
           dict of delays until transmission from node to neighbors and 
           float having delay until recovery of node
        An example of how to use this appears in the code fast_SIR where
        depending on whether inputs are weighted, it constructs different
        versions of this function and then calls fast_nonMarkov_SIR.
    trans_and_rec_time_args : tuple (default empty)
        see trans_and_rec_time_fxn

    :Returns: 
    
    nothing returned

    :MODIFIES:
    
    status : updates status of newly infected node
    rec_time : adds recovery time for node
    times : appends time of event
    S : appends new S (reduced by 1 from last)
    I : appends new I (increased by 1)
    R : appends new R (same as last)
    Q : adds recovery and transmission events for newly infected node.
    pred_inf_time : updated for nodes that will receive transmission

    '''

    if status[target] == 'S':  #nothing happens if already infected.
        status[target] = 'I'
        times.append(time)
        transmissions.append((time, source, target))
        S.append(S[-1]-1) #one less susceptible
        I.append(I[-1]+1) #one more infected
        R.append(R[-1])   #no change to recovered
        
        
        suscep_neighbors = [v for v in G.neighbors(target) if status[v]=='S']

        trans_delay, rec_delay = trans_and_rec_time_fxn(target, suscep_neighbors,
                                                *trans_and_rec_time_args)

                                
        rec_time[target] = time + rec_delay
        if rec_time[target]<=Q.tmax:
            Q.add(rec_time[target], _process_rec_SIR_, 
                            args = (target, times, S, I, R, status))
        for v in trans_delay:
            inf_time = time + trans_delay[v]
            if inf_time<= rec_time[target] and inf_time < pred_inf_time[v] and inf_time<=Q.tmax:
                Q.add(inf_time, _process_trans_SIR_, 
                              args = (G, target, v, times, S, I, R, Q, 
                                        status, rec_time, pred_inf_time, 
                                        transmissions, trans_and_rec_time_fxn,
                                        trans_and_rec_time_args
                                     )
                             )
                pred_inf_time[v] = inf_time
    
def _process_rec_SIR_(time, node, times, S, I, R, status):
    r'''From figure A.3 of Kiss, Miller, & Simon.  Please cite the
    book if using this algorithm.

    :Arguments: 

        event : event
            has details on node and time
        times : list
            list of times at which events have happened
        S, I, R : lists
            lists of numbers of nodes of each status at each time
        status : dict
            dictionary giving status of each node


    :Returns: 
        :
        Nothing

    MODIFIES
    ----------
    status : updates status of newly recovered node
    times : appends time of event
    S : appends new S (same as last)
    I : appends new I (decreased by 1)
    R : appends new R (increased by 1)
    '''
    times.append(time)
    S.append(S[-1])   #no change to number susceptible
    I.append(I[-1]-1) #one less infected
    R.append(R[-1]+1) #one more recovered
    status[node] = 'R'
    
def _trans_and_rec_time_Markovian_const_trans_(node, sus_neighbors, tau, rec_rate_fxn):
    r'''I introduced this with a goal of making the code run faster.  It looks
    like the fancy way of selecting the infectees and then choosing their 
    infection times is slower than just cycling through, finding infection
    times and checking if that time is less than recovery time.  So I've
    commented out the more "sophisticated" approach.
    '''
    
    duration = random.expovariate(rec_rate_fxn(node))

        
    trans_prob = 1-np.exp(-tau*duration)
    number_to_infect = np.random.binomial(len(sus_neighbors),trans_prob)
        #print(len(suscep_neighbors),number_to_infect,trans_prob, tau, duration)
    transmission_recipients = random.sample(sus_neighbors,number_to_infect)
    trans_delay = {}
    for v in transmission_recipients:
        trans_delay[v] = _truncated_exponential_(tau, duration)
    return trans_delay, duration
#     duration = random.expovariate(rec_rate_fxn(node))
#     trans_delay = {}
# 
#         
#     for v in sus_neighbors:
#         if tau == 0:
#             trans_delay[v] = float('Inf')
#         else:
#             trans_delay[v] = random.expovariate(tau)
# #        if delay<duration:
# #            trans_delay[v] = delay
#     return trans_delay, duration
                
##slow approach 1:
#    next_delay = random.expovariate(tau)
#    index, delay = int(next_delay//duration), next_delay%duration
#    while index<len(sus_neighbors):
#        trans_delay[sus_neighbors[index]] = delay
#        next_delay = random.expovariate(tau)
#        jump, delay = int(next_delay//duration), next_delay%duration
#        index += jump

##slow approach 2:
    #trans_prob = 1-np.exp(-tau*duration)
    #number_to_infect = np.random.binomial(len(sus_neighbors),trans_prob)
        #print(len(suscep_neighbors),number_to_infect,trans_prob, tau, duration)
    #transmission_recipients = random.sample(sus_neighbors,number_to_infect)
    #trans_delay = {}
    #for v in transmission_recipients:
    #    trans_delay[v] = _truncated_exponential_(tau, duration)
    return trans_delay, duration

[docs]def fast_SIR(G, tau, gamma, initial_infecteds = None, initial_recovereds = None, 
                rho = None, tmin = 0, tmax=float('Inf'), transmission_weight = None, 
                recovery_weight = None, return_full_data = False, sim_kwargs = None):
    r'''
    fast SIR simulation for exponentially distributed infection and 
    recovery times
    
    From figure A.3 of Kiss, Miller, & Simon.  Please cite the
    book if using this algorithm.

    
    

    :Arguments: 

    **G** networkx Graph
        The underlying network

    **tau** number
        transmission rate per edge

    **gamma** number
        recovery rate per node
        
    **initial_infecteds** node or iterable of nodes
        if a single node, then this node is initially infected
        
        if an iterable, then whole set is initially infected
        
        if None, then choose randomly based on rho.  
        
        If rho is also None, a random single node is chosen.
        
        If both initial_infecteds and rho are assigned, then there
        is an error.
       
    **initial_recovereds** iterable of nodes (default None)
        this whole collection is made recovered.
        Currently there is no test for consistency with initial_infecteds.
        Understood that everyone who isn't infected or recovered initially
        is initially susceptible.

    **rho** number
        initial fraction infected. number is int(round(G.order()*rho))

    **tmin** number (default 0)
        starting time
            
    **tmax** number  (default Infinity)
        maximum time after which simulation will stop.
        the default of running to infinity is okay for SIR, 
        but not for SIS.

    **transmission_weight**    string  (default None)
        the label for a weight given to the edges.
        transmission rate is
        G.adj[i][j][transmission_weight]*tau

    **recovery_weight**   string (default None))
        a label for a weight given to the nodes to scale their 
        recovery rates
        gamma_i = G.nodes[i][recovery_weight]*gamma

    **return_full_data**   boolean (default False)
        Tells whether a Simulation_Investigation object should be returned.  

    **sim_kwargs** keyword arguments
        Any keyword arguments to be sent to the Simulation_Investigation object
        Only relevant if ``return_full_data=True``

        
    :Returns:
        
    **times, S, I, R** numpy arrays
        
    Or if ``return_full_data is True``
            
    **full_data**  Simulation_Investigation object
            from this we can extract the status history of all nodes.
            We can also plot the network at given times
            and create animations using class methods.
    
    :SAMPLE USE:

    ::


        import networkx as nx
        import EoN
        import matplotlib.pyplot as plt
        
        G = nx.configuration_model([1,5,10]*100000)
        initial_size = 10000
        gamma = 1.
        tau = 0.3
        t, S, I, R = EoN.fast_SIR(G, tau, gamma, 
                                    initial_infecteds = range(initial_size))
                                    
        plt.plot(t, I)
    '''
    #tested in test_SIR_dynamics
    if transmission_weight is not None or tau*gamma == 0:
        trans_rate_fxn, rec_rate_fxn = EoN._get_rate_functions_(G, tau, gamma, 
                                                    transmission_weight,
                                                    recovery_weight)
        def trans_time_fxn(source, target, trans_rate_fxn):
            rate = trans_rate_fxn(source, target)
            if rate >0:
                return random.expovariate(rate)
            else:
                return float('Inf')
        def rec_time_fxn(node, rec_rate_fxn):
            rate = rec_rate_fxn(node)
            if rate >0:
                return random.expovariate(rate)
            else:
                return float('Inf') 

        trans_time_args = (trans_rate_fxn,)
        rec_time_args = (rec_rate_fxn,)
        return fast_nonMarkov_SIR(G, trans_time_fxn = trans_time_fxn, 
                        rec_time_fxn = rec_time_fxn,
                        trans_time_args = trans_time_args, 
                        rec_time_args = rec_time_args, 
                        initial_infecteds = initial_infecteds, 
                        initial_recovereds = initial_recovereds, 
                        rho=rho, tmin = tmin, tmax = tmax, 
                        return_full_data = return_full_data, 
                        sim_kwargs=sim_kwargs)
    else:
        #the transmission rate is tau for all edges.  We can use this
        #to speed up the code.
        
        #get rec_rate_fxn (recovery rate may be variable)
        trans_rate_fxn, rec_rate_fxn = EoN._get_rate_functions_(G, tau, gamma, 
                                                    transmission_weight,
                                                    recovery_weight)
        
        return fast_nonMarkov_SIR(G, 
                        trans_and_rec_time_fxn=_trans_and_rec_time_Markovian_const_trans_,
                        trans_and_rec_time_args=(tau, rec_rate_fxn),
                        initial_infecteds = initial_infecteds, 
                        initial_recovereds = initial_recovereds, 
                        rho=rho, tmin = tmin, tmax = tmax, 
                        return_full_data = return_full_data, 
                        sim_kwargs=sim_kwargs)



[docs]def fast_nonMarkov_SIR(G, trans_time_fxn=None,
                        rec_time_fxn=None,
                        trans_and_rec_time_fxn = None,
                        trans_time_args=(),
                        rec_time_args=(),
                        trans_and_rec_time_args = (),
                        initial_infecteds = None,
                        initial_recovereds = None,
                        rho=None, tmin = 0, tmax = float('Inf'), 
                        return_full_data = False, sim_kwargs = None):
    r'''
    A modification of the algorithm in figure A.3 of Kiss, Miller, & 
    Simon to allow for user-defined rules governing time of 
    transmission.  
    
    Please cite the book if using this algorithm.

    This is useful if the transmission rule is non-Markovian in time, or
    for more elaborate models.  

    Allows the user to define functions (details below) to determine
    the rules of transmission times and recovery times.  There are two ways to do
    this.  The user can define a function that calculates the recovery time
    and another function that calculates the transmission time.  If recovery is after
    transmission, then transmission occurs.  We do this if the time to transmission
    is independent of the time to recovery.
    
    Alternately, the user may want to model a situation where time to transmission
    and time to recovery are not independent.  Then the user can define a single 
    function (details below) that would determine both recovery and transmission times.  
    

    :Arguments: 

    **G** Networkx Graph
        
    **trans_time_fxn** a user-defined function
        returns the delay until transmission for an edge.  May depend 
        on various arguments and need not be Markovian.  
                 
        Returns float

        Will be called using the form
                 
        ``trans_delay = trans_time_fxn(source_node, target_node, *trans_time_args)``
            Here trans_time_args is a tuple of the additional
            arguments the functions needs.

        the source_node is the infected node
        the target_node is the node that may receive transmission
        rec_delay is the duration of source_node's infection, calculated 
        by rec_time_fxn.

    **rec_time_fxn** a user-defined function
        returns the delay until recovery for a node.  May depend on various 
        arguments and need not be Markovian.  
            
        Returns float.

        Called using the form
            
        ``rec_delay = rec_time_fxn(node, *rec_time_args)``
            Here rec_time_args is a uple of additional arguments
            the function needs.
    
    **trans_and_rec_time_fxn** a user-defined function
        returns both a dict giving delay until transmissions for all edges 
        from source to susceptible neighbors and a float giving delay until 
        recovery of the source.  
                                 
        Can only be used **INSTEAD OF** ``trans_time_fxn`` AND ``rec_time_fxn``.
              
        Gives an **ERROR** if these are also defined
            
        Called using the form 
        ``trans_delay_dict, rec_delay = trans_and_rec_time_fxn(
                                           node, susceptible_neighbors,
                                           *trans_and_rec_time_args)``
        here trans_delay_dict is a dict whose keys are those neighbors
        who receive a transmission and rec_delay is a float.
            
    **trans_time_args** tuple
        see trans_time_fxn
        
    **rec_time_args** tuple
        see rec_time_fxn

    **trans_and_rec_time_args** tuple
        see trans_and_rec_time_fxn
        
    **initial_infecteds** node or iterable of nodes
        if a single node, then this node is initially infected
            
        if an iterable, then whole set is initially infected
        
        if None, then choose randomly based on rho.  If rho is also
        None, a random single node is chosen.
            
        If both initial_infecteds and rho are assigned, then there
        is an error.
            
    **initial_recovereds** iterable of nodes (default None)
        this whole collection is made recovered.
        
        Currently there is no test for consistency with initial_infecteds.
        
        Understood that everyone who isn't infected or recovered initially
        is initially susceptible.
    
    **rho** number
        initial fraction infected. number is int(round(G.order()*rho))

    **tmin** number (default 0)
        starting time
            
    **tmax** number (default infinity)
        final time

    **return_full_data** boolean (default False)
        Tells whether a Simulation_Investigation object should be returned.  

    **sim_kwargs** keyword arguments
        Any keyword arguments to be sent to the Simulation_Investigation object
        Only relevant if ``return_full_data=True``

    
    :Returns: 
        
    **times, S, I, R** numpy arrays
        
    Or if ``return_full_data is True``
            
    **full_data**  Simulation_Investigation object
        from this we can extract the status history of all nodes
        We can also plot the network at given times
        and even create animations using class methods.

    
    :SAMPLE USE:
          
    ::
        
              
        import EoN
        import networkx as nx
        import matplotlib.pyplot as plt
        import random
        
        N=1000000
        G = nx.fast_gnp_random_graph(N, 5/(N-1.))
        

        
        #set up the code to handle constant transmission rate 
        #with fixed recovery time.
        def trans_time_fxn(source, target, rate):
            return random.expovariate(rate)

        def rec_time_fxn(node,D):
            return D
        
        D = 5
        tau = 0.3
        initial_inf_count = 100

        t, S, I, R = EoN.fast_nonMarkov_SIR(G, 
                                trans_time_fxn=trans_time_fxn, 
                                rec_time_fxn=rec_time_fxn,
                                trans_time_args=(tau,), 
                                rec_time_args=(D,),
                                initial_infecteds = range(initial_inf_count))
        
        # note the comma after ``tau`` and ``D``.  This is needed for python
        # to recognize these are tuples

        # initial condition has first 100 nodes in G infected.
    
    '''                                 
    if rho and initial_infecteds:
        raise EoN.EoNError("cannot define both initial_infecteds and rho")
    if rho and initial_recovereds:
        raise EoN.EoNError("cannot define both initial_recovereds and rho")

    if (trans_time_fxn and not rec_time_fxn) or (rec_time_fxn and not trans_time_fxn):
        raise EoN.EoNError("must define both trans_time_fxn and rec_time_fxn or neither")
    elif trans_and_rec_time_fxn and trans_time_fxn:
        raise EoN.EoNError("cannot define trans_and_rec_time_fxn at the same time as trans_time_fxn and rec_time_fxn")
    elif not trans_and_rec_time_fxn and not trans_time_fxn:
        raise EoN.EoNError("if not defining trans_and_rec_time_fxn, must define trans_time_fxn and rec_time_fxn")
        
    if not trans_and_rec_time_fxn: #we define the joint function.
        trans_and_rec_time_fxn =  _find_trans_and_rec_delays_SIR_
        trans_and_rec_time_args = (trans_time_fxn, rec_time_fxn, trans_time_args, rec_time_args)
        
    #now we define the initial setup.
    status = defaultdict(lambda: 'S') #node status defaults to 'S'
    rec_time = defaultdict(lambda: tmin-1) #node recovery time defaults to -1
    if initial_recovereds is not None:
        for node in initial_recovereds:
            status[node] = 'R'
            rec_time[node] = tmin-1 #default value for these.  Ensures that the recovered nodes appear with a time
    pred_inf_time = defaultdict(lambda: float('Inf')) 
        #infection time defaults to \infty  --- this could be set to tmax, 
        #probably with a slight improvement to performance.
    
    Q = myQueue(tmax)

    if initial_infecteds is None:  #create initial infecteds list if not given
        if rho is None:
            initial_number = 1
        else:
            initial_number = int(round(G.order()*rho))
        initial_infecteds=random.sample(G.nodes(), initial_number)
    elif G.has_node(initial_infecteds):
        initial_infecteds=[initial_infecteds]
    #else it is assumed to be a list of nodes.
        
    times, S, I, R= ([tmin], [G.order()], [0], [0])  
    transmissions = []
    
    for u in initial_infecteds:
        pred_inf_time[u] = tmin
        Q.add(tmin, _process_trans_SIR_, args=(G, None, u, times, S, I, R, Q, 
                                                    status, rec_time, 
                                                    pred_inf_time, transmissions, 
                                                    trans_and_rec_time_fxn,
                                                    trans_and_rec_time_args
                                                )
                        )
    
    #Note that when finally infected, pred_inf_time is correct
    #and rec_time is correct.  
    #So if return_full_data is true, these are correct

    while Q:  #all the work is done in this while loop.
        Q.pop_and_run()

    #the initial infections were treated as ordinary infection events at 
    #time 0.
    #So each initial infection added an entry at time 0 to lists.
    #We'd like to get rid these excess events.
    times = times[len(initial_infecteds):]
    S=S[len(initial_infecteds):]
    I=I[len(initial_infecteds):]
    R=R[len(initial_infecteds):]

    if not return_full_data:
        return np.array(times), np.array(S), np.array(I), \
               np.array(R) 
    else:
        #strip pred_inf_time and rec_time down to just the values for nodes 
        #that became infected
        #could use iteritems for Python 2, by   try ... except AttributeError
        infection_times = {node:time for (node,time) in 
                            pred_inf_time.items() if status[node]!='S'}
        recovery_times = {node:time for (node,time) in 
                                rec_time.items() if status[node] =='R'}
                                
                
        node_history = _transform_to_node_history_(infection_times, recovery_times, 
                                                    tmin, SIR = True)
        if sim_kwargs is None:
            sim_kwargs = {}
        return EoN.Simulation_Investigation(G, node_history, transmissions, 
                                            possible_statuses = ['S', 'I', 'R'], 
                                            **sim_kwargs)


def _find_trans_and_rec_delays_SIS_(node, neighbors, trans_time_fxn, 
                                    rec_time_fxn,
                                    trans_time_args=(),
                                    rec_time_args=()):
    rec_delay = rec_time_fxn(node, *rec_time_args)
    trans_delays={}
    for target in neighbors:
        trans_delays[target] = trans_time_fxn(node, target, rec_delay, 
                                                *trans_time_args)
    return trans_delays, rec_delay

def _process_trans_SIS_Markov(time, G, source, target, times, S, I, Q,
                        status, rec_time, infection_times, recovery_times, 
                        transmissions, trans_rate_fxn, rec_rate_fxn):
    r'''From figure A.6 of Kiss, Miller, & Simon.  Please cite the
    book if using this algorithm.

    Does the Markovian version.  So it doesn't take in a 
    list of transmission times
    
    :Arguments: 

    **time** number
        current time
    **G** networkx Graph
    **source**  node
        node causing transmission
    **target**  node
        node receiving transmission.
    **times** list
        list of times at which events have happened
    **S, I** lists
        lists of numbers of nodes of each status at each time
    **Q**  myQueue
        the queue of events
    **status** dict
        dictionary giving status of each node
    **rec_time** dict
        dictionary giving recovery time of each node
    **infection_times** 
    
    **recovery_times** 
        
    **transmissions** list)
        list of tuples (t, source, target) for each transmission event.
        
    **trans_rate_fxn**   User-defined function
        transmission rate trans_rate_fxn(u,v) gives transmission rate 
        from u to v
    **rec_rate_fxn**    User-defined function
        recovery rate rec_rate_fxn(u) is recovery rate of u.

    :Returns: 
        
    nothing returned

    :MODIFIES:
    
    status : updates status of target
    rec_time : adds recovery time for target
    times : appends time of event
    infection_times[node] : appends time of infection.
    S : appends new S (reduced by 1 from last)
    I : appends new I (increased by 1)
    Q : adds recovery and transmission events for target.

    '''

    if status[target] == 'S':
        status[target] = 'I'
        transmissions.append((time, source, target))
        I.append(I[-1]+1) #one more infected
        S.append(S[-1]-1) #one less susceptible
        times.append(time)
        rec_rate = rec_rate_fxn(target)
        if rec_rate>0:
            rec_time[target] = time + random.expovariate(rec_rate_fxn(target))
        elif rec_rate == 0:
            rec_time[target] = float('Inf')
        else:
            raise EoN.EoNError('recovery rate must be non-negative')
        
        if rec_time[target]<Q.tmax:
            Q.add(rec_time[target], _process_rec_SIS_, 
                    args = (target, times, recovery_times, S, I, status))
        for v in G.neighbors(target): #target plays role of source here
            _find_next_trans_SIS_Markov(Q, time, trans_rate_fxn(target, v), 
                                        target, v, status, rec_time,
                                        trans_event_args = 
                                            (G, target, v, times, S, I, Q, 
                                            status, rec_time, infection_times, 
                                            recovery_times, transmissions, 
                                            trans_rate_fxn, rec_rate_fxn
                                            )
                                  )
        infection_times[target].append(time)
    if source is not None:
        _find_next_trans_SIS_Markov(Q, time, trans_rate_fxn(source, target), 
                                source, target, status, rec_time, 
                                trans_event_args = (G, source, target, times, 
                                            S, I, Q, status, 
                                            rec_time, infection_times, 
                                            recovery_times, transmissions, 
                                            trans_rate_fxn, rec_rate_fxn
                                            )
                             )

def _process_trans_SIS_nonMarkov_(time, G, source, target, future_transmissions,
                        times, S, I, Q, status, rec_time, 
                        infection_times, recovery_times, transmissions,
                        trans_and_rec_time_fxn, trans_and_rec_time_args=()):
    r'''From figure A.6 of Kiss, Miller, & Simon.  Please cite the
    book if using this algorithm.

    Does the nonMarkovian version.
    
    :Arguments: 

        time : number
            current time
    **G**  networkx Graph
    **source** node
            node causing transmission
    **target** node
            node receiving transmission.
        times : list
            list of times at which events have happened
        S, I: lists
            lists of numbers of nodes of each status at each time
        Q : myQueue
            the queue of events
        status : dict
            dictionary giving status of each node
        rec_time : dict
            dictionary giving recovery time of each node
        infection_times : 
        
        recovery_times : 
            
        trans_and_rec_time_fxn : function

        trans_and_rec_time_args : 

    :Returns: 
    
    nothing returned

    :MODIFIES:

    status : updates status of target
    rec_time : adds recovery time for target
    times : appends time of event
    infection_times[node] : appends time of infection.
    S : appends new S (reduced by 1 from last)
    I : appends new I (increased by 1)
    Q : adds recovery and transmission events for target.

    '''

    if status[target] == 'S':
        status[target] = 'I'
        transmissions.append((time, source, target))
        infection_times[target].append(time)

        times.append(time)
        I.append(I[-1]+1) #one more infected
        S.append(S[-1]-1) #one less susceptible

        trans_delays, rec_delay = trans_and_rec_time_fxn(target, G.neighbors(target), 
                                                        *trans_and_rec_time_args)
        rec_time[target] = time + rec_delay
        
        if rec_time[target]<Q.tmax:
            Q.add(rec_time[target], _process_rec_SIS_, 
                    args = (target, times, recovery_times, S, I, status))
        for v in G.neighbors(target): #target plays role of source here
            if trans_delays[v]:
                trans_times = [time + td for td in trans_delays[v]] #when do transmissions happen
                if status[v] == 'I':  #only care about those after current infectious period
                    trans_times = [time for time in trans_times if time>rec_time[v]]
                following_transmissions = trans_times[1:]
                if trans_times: #no point adding any if there are none
                    Q.add(trans_times[0], _process_trans_SIS_nonMarkov_, args = (G, target, v, following_transmissions, times, S, I, Q, status, 
                                                                                    rec_time, infection_times, recovery_times, transmissions,
                                                                                    trans_and_rec_time_fxn, trans_and_rec_time_args))
    
    #target is definitely infected now.  It has some future_transmissions stored.  
    #do they happen?
    trans_times = [time for time in future_transmissions if time> rec_time[target]]
    following_transmissions = trans_times[1:]
    if trans_times:
        Q.add(trans_times[0], _process_trans_SIS_nonMarkov_, args = (G, source, target, following_transmissions, times, S, I, Q, status, 
                                                                                    rec_time, infection_times, recovery_times, transmissions,
                                                                                    trans_and_rec_time_fxn, trans_and_rec_time_args))
                                                                                    


def _find_next_trans_SIS_Markov(Q, time, tau, source, target, status, rec_time, 
                            trans_event_args=()):
    r'''From figure A.6 of Kiss, Miller, & Simon.  Please cite the
    book if using this algorithm.


    determines if a transmission from source to target will occur and if 
    so puts into Q

    :Arguments: 

        Q : myQueue
            A priority queue of events
        t : current time
        tau : transmission rate
    **source** infected node that may transmit
    **target** the possibly susceptible node that may receive a 
             transmission
        status : a dict giving the current status of every node
        rec_time : a dict giving the recovery time of every node that has 
               been infected.

    :Returns: 
        :
        nothing returned

    MODIFIES
    --------
    Q : if a transmission time is potentially valid, add the first 
        event.
        when this transmission occurs later we will consider adding 
        another event.
        note that the event includes the source, so we can later check 
        if same source will transmit again.

    Entry requirement:
    -------
    Only enter this if the source node is INFECTED.

    '''
    
    #assert(status[source]=='I')
    if rec_time[target]<rec_time[source]: 
        #if target is susceptible, then rec_time[target]<time
        if tau>0:
            delay = random.expovariate(tau)
        elif tau == 0:
            delay = float('Inf')
        else:
            raise EoN.EoNError('rate must be non-negative')
        #transmission_time = max(time, rec_time[target]) + delay
        transmission_time = time + delay
        if transmission_time<rec_time[target]:
            delay = random.expovariate(tau)
            transmission_time = rec_time[target]+delay
        if transmission_time < rec_time[source] and transmission_time < Q.tmax:
            Q.add(transmission_time, _process_trans_SIS_Markov, 
                                args = trans_event_args
                            )
 
def _process_rec_SIS_(time, node, times, recovery_times, S, I, status):
    r'''From figure A.6 of Kiss, Miller, & Simon.  Please cite the
    book if using this algorithm.

    '''

    times.append(time)
    recovery_times[node].append(time)
    S.append(S[-1]+1)   #one more susceptible
    I.append(I[-1]-1) #one less infected
    status[node] = 'S'

[docs]def fast_SIS(G, tau, gamma, initial_infecteds=None, rho = None, tmin=0, tmax=100, 
                transmission_weight = None, recovery_weight = None, 
                return_full_data = False, sim_kwargs = None):
    r'''Fast SIS simulations for epidemics on weighted or unweighted
    networks, allowing edge and node weights to scale the transmission
    and recovery rates.  Assumes exponentially distributed times to recovery
    and to transmission.
    
    From figure A.5 of Kiss, Miller, & Simon.  Please cite the
    book if using this algorithm.

    :Arguments: 
    
    **G** networkx Graph
        The underlying network

    **tau** positive float
        transmission rate per edge

    **gamma** number
        recovery rate per node

    **initial_infecteds** node or iterable of nodes
        if a single node, then this node is initially infected
        if an iterable, then whole set is initially infected
        if None, then choose randomly based on rho.  If rho is also
        None, a random single node is chosen.
        If both initial_infecteds and rho are assigned, then there
        is an error.
       
    **rho** number
        initial fraction infected. number infected is int(round(G.order()*rho))
       
    **tmin** number (default 0)
        starting time
            
    **tmax** number (default 100)
        stop time

    **transmission_weight** string       (default None)
        the label for a weight given to the edges.
        transmission rate is
        G.adj[i][j][transmission_weight]*tau

    **recovery_weight** string       (default None)
        a label for a weight given to the nodes to scale their 
        recovery rates
        ``gamma_i = G.nodes[i][recovery_weight]*gamma``
    
    **return_full_data** boolean (default False)
        Tells whether a Simulation_Investigation object should be returned.  

    **sim_kwargs** keyword arguments
        Any keyword arguments to be sent to the Simulation_Investigation object
        Only relevant if ``return_full_data=True``

    :Returns: 
        
    **times, S, I** each a numpy array
        times and number in each status for corresponding time
        
    or if return_full_data=True
    
    **full_data**  Simulation_Investigation object
        from this we can extract the status history of all nodes.
        We can also plot the network at given times
        and even create animations using class methods.
    
    :SAMPLE USE:

    ::


        import networkx as nx
        import EoN
        import matplotlib.pyplot as plt
        
        G = nx.configuration_model([1,5,10]*100000)
        initial_size = 10000
        gamma = 1.
        tau = 0.2
        t, S, I = EoN.fast_SIS(G, tau, gamma, tmax = 10,
                                    initial_infecteds = range(initial_size))
                                    
        plt.plot(t, I)
            
    '''
    if rho is not None and initial_infecteds is not None:
        raise EoN.EoNError("cannot define both initial_infecteds and rho")
    
    trans_rate_fxn, rec_rate_fxn = EoN._get_rate_functions_(G, tau, gamma, 
                                                transmission_weight,
                                                recovery_weight)

    if initial_infecteds is None:
        if rho is None:
            initial_number = 1
        else:
            initial_number = int(round(G.order()*rho))
        initial_infecteds=random.sample(G.nodes(), initial_number)
    elif G.has_node(initial_infecteds):
        initial_infecteds=[initial_infecteds]

    times = [tmin]
    S = [G.order()]
    I = [0]
    Q = myQueue(tmax)
    status = defaultdict(lambda: 'S') #node status defaults to 'S'
    rec_time = defaultdict(lambda: tmin-1) #node recovery time defaults to -1

    infection_times = defaultdict(lambda: []) #defaults to empty list
    recovery_times = defaultdict(lambda: [])
    transmissions = []
    for u in initial_infecteds:
        Q.add(tmin, _process_trans_SIS_Markov, 
                            args = (G, None, u, times, 
                                    S, I, Q, status, rec_time, infection_times, recovery_times, 
                                    transmissions, trans_rate_fxn, rec_rate_fxn)
                        )
    while Q:
        Q.pop_and_run()

    #the initial infections were treated as ordinary infection events at 
    #time 0.
    #So each initial infection added an entry at time tmin to lists.
    #We'd like to get rid these excess events.
    times = times[len(initial_infecteds):]
    S=S[len(initial_infecteds):]
    I=I[len(initial_infecteds):]

    if not return_full_data:
        return np.array(times), np.array(S), np.array(I)
    else:
        node_history = _transform_to_node_history_(infection_times, recovery_times, tmin, SIR = False)
        if sim_kwargs is None:
            sim_kwargs = {}
        return EoN.Simulation_Investigation(G, node_history, transmissions, 
                                            possible_statuses=['S', 'I'], 
                                            **sim_kwargs)


[docs]def fast_nonMarkov_SIS(G, trans_time_fxn=None, rec_time_fxn=None, 
                        trans_and_rec_time_fxn = None, trans_time_args=(),
                        rec_time_args = (), trans_and_rec_time_args=(),
                        initial_infecteds = None, rho = None, tmin=0, tmax = 100,
                        return_full_data = False, sim_kwargs = None):
                        
    r'''Similar to fast_nonMarkov_SIR. 
    
    :Warning: 
        
    trans_time_fxn (or trans_and_rec_time_fxn) need to return lists of
    times.  Not just the next time. So this is different from the SIR 
    version.

    :Arguments: 
    
    **G** networkx Graph
        The underlying network

    **trans_time_fxn** User-defined function returning a list
    
        **RETURNS A LIST**
        
        **has slightly different arguments than the SIR version**

        a user-defined function that returns list of delays until 
        transmission for an edge.  All delays are before recovery.
        
        Each entry is the delay from time of infection of node to time of the
        given transmission (i.e., it's not looking at delays from one transmission
        to the next)
        
        May depend on various arguments and need not be Markovian.                          
        
        Called using the form
        
        trans_delays = trans_time_fxn(source_node, target_node, rec_delay, *trans_time_args)
        
        the source_node is the infected node
        
        the target_node is the node that may receive transmission


    **rec_time_fxn** user-designed function returning a float
    
        Returns the duration of infection of a node.  May depend on various arguments 
        and need not be Markovian.  
        
        Called using the form
        
        duration = rec_time_fxn(node, *rec_time_args)

    
    **trans_and_rec_time_fxn** user-defined function returning a dict and a float
    
        returns both a dict whose values are lists of delays until 
        transmissions for all edges from source to neighbors and a float 
        giving duration of infection of the source.  
                                 
        can only be **used instead of** 
        ``trans_time_fxn`` and ``rec_time_fxn``.  
        there is an **error** if these are also defined.
        
        Called using the form 
        
        trans_delay_dict, duration = trans_and_rec_time_fxn(node, 
        susceptible_neighbors, *trans_and_rec_time_args)
        
        here 
        
        trans_delay_dict is a dict whose keys are those neighbors
        who receive a transmission and whose values are lists of delays
        
        duration is a float.
        
    **trans_time_args** tuple
        see trans_time_fxn
        
    **rec_time_args** tuple
        see rec_time_fxn
        
    **trans_and_rec_time_args** tuple
        see trans_and_rec_time_fxn
    
    **initial_infecteds** node or iterable of nodes
        if a single node, then this node is initially infected
        if an iterable, then whole set is initially infected
        if None, then choose randomly based on rho.  If rho is also
        None, a random single node is chosen.
        If both initial_infecteds and rho are assigned, then there
        is an error.
       
    **rho** number
        initial fraction infected. number is int(round(G.order()*rho))
       
    **tmin** number (default 0)
        starting time
        
    **tmax** number (default 100)
        stop time

    **return_full_data** boolean (default False)
        Tells whether a Simulation_Investigation object should be returned.  

    **sim_kwargs** keyword arguments
        Any keyword arguments to be sent to the Simulation_Investigation object
        Only relevant if ``return_full_data=True``

    :Returns: 
        
    **times, S, I** each a numpy array
        giving times and number in each status for corresponding time
    
    or if return_full_data=True:
        
    **full_data** a Simulation_Investigation object
        from this we can extract the status history of all nodes
        We can also plot the network at given times
        and even create animations using class methods.

    :SAMPLE USE:

    ::

                        
    '''
    if rho  and initial_infecteds:
        raise EoN.EoNError("cannot define both initial_infecteds and rho")
    
    if (trans_time_fxn and not rec_time_fxn) or (rec_time_fxn and not trans_time_fxn):
        raise EoN.EoNError("must define both trans_time_fxn and rec_time_fxn or neither")
    if trans_and_rec_time_fxn and trans_time_fxn:
        raise EoN.EoNError("cannot define trans_and_rec_time_fxn at the same time as either trans_time_fxn or rec_time_fxn")
    elif not trans_and_rec_time_fxn and not trans_time_fxn:
        raise EoN.EoNError("if not defining trans_and_rec_time_fxn, must define trans_time_fxn and rec_time_fxn")
    
    if not trans_and_rec_time_fxn: #we define the joint function.
        trans_and_rec_time_fxn =  _find_trans_and_rec_delays_SIS_
        trans_and_rec_time_args = (trans_time_fxn, rec_time_fxn, trans_time_args, rec_time_args)    
                
                
    if initial_infecteds is None:  #create initial infecteds list if not given
        if rho is None:
            initial_number = 1
        else:
            initial_number = int(round(G.order()*rho))
        initial_infecteds=random.sample(G.nodes(), initial_number)
    elif G.has_node(initial_infecteds):
        initial_infecteds=[initial_infecteds]
        
    times, S, I = ([tmin], [G.order()], [0])  

    Q = myQueue(tmax)
    status = defaultdict(lambda: 'S') #node status defaults to 'S'
    rec_time = defaultdict(lambda: tmin-1) #node recovery time defaults to -1

    infection_times = defaultdict(lambda: []) #defaults to empty list
    recovery_times = defaultdict(lambda: [])
    transmissions = []
    
    for u in initial_infecteds:
        Q.add(tmin, _process_trans_SIS_nonMarkov_, args=(G, 
                                                        None, u, [], times, S, I, Q, 
                                                        status, rec_time, 
                                                        infection_times, recovery_times,
                                                        transmissions,
                                                        trans_and_rec_time_fxn,
                                                        trans_and_rec_time_args
                                                    )
                )
                    
    while Q:  #all the work is done in this while loop.
        Q.pop_and_run()

    #the initial infections were treated as ordinary infection events at 
    #time 0.
    #So each initial infection added an entry at time tmin to lists.
    #We'd like to get rid these excess events.
    times = times[len(initial_infecteds):]
    S=S[len(initial_infecteds):]
    I=I[len(initial_infecteds):]

    if not return_full_data:
        return np.array(times), np.array(S), np.array(I)
    else:
        node_history = _transform_to_node_history_(infection_times, recovery_times, tmin, SIR = False)
        if sim_kwargs is None:
            sim_kwargs = {}
        return EoN.Simulation_Investigation(G, node_history, transmissions, possible_statuses = ['S', 'I'], **sim_kwargs)



#####Now dealing with Gillespie code#####


[docs]def Gillespie_SIR(G, tau, gamma, initial_infecteds=None, 
                    initial_recovereds = None, rho = None, tmin = 0, 
                    tmax=float('Inf'), recovery_weight = None, 
                    transmission_weight = None, return_full_data = False, sim_kwargs = None):
    #tested in test_SIR_dynamics
    r'''    
    
    Performs SIR simulations for epidemics.
    
    For unweighted networks, the run time is usually slower than fast_SIR, but 
    they are close.  If we add weights, then this Gillespie implementation 
    slows down much more.  
    
    I think there are better ways to implement the algorithm to remove this.  
    This will need a new data type that allows us to quickly sample a random 
    event with appropriate weight.  I think this is doable through a binary 
    tree and it is in development.
    
    Rather than using figure A.1 of Kiss, Miller, & Simon, this uses a method 
    from Petter Holme 
        "Model versions and fast algorithms for network epidemiology"
    which focuses on SI edges (versions before 0.99.2 used a
    method more like fig A.1).  
    
            
    This approach will not work for nonMarkovian transmission.  Boguna et al
        "Simulating non-Markovian stochastic processes"
    have looked at how to handle nonMarkovian transmission in a Gillespie 
    Algorithm.  At present I don't see a way to efficientl adapt their 
    approach - I think each substep will take O(N) time.  So the full algorithm 
    will be O(N^2).  For this, it will be much better to use fast_SIR
    which I believe is O(N log N)
    
    :See Also:

    **fast_SIR** which has the same inputs but uses a different method to 
    run faster, particularly in the weighted case.
    
    :Arguments:
         
    **G** networkx Graph
        The underlying network
    **tau** positive float
        transmission rate per edge
       
    **gamma** number
        recovery rate per node
    
    **initial_infecteds** node or iterable of nodes
        if a single node, then this node is initially infected
        if an iterable, then whole set is initially infected
        if None, then choose randomly based on rho.  If rho is also
        None, a random single node is chosen.
        If both initial_infecteds and rho are assigned, then there
        is an error.
    
    **initial_recovereds** iterable of nodes (default None)
        this whole collection is made recovered.
        Currently there is no test for consistency with initial_infecteds.
        Understood that everyone who isn't infected or recovered initially
        is initially susceptible.
        
    **rho** number
        initial fraction infected. number is int(round(G.order()*rho))
        
    **tmin** number (default 0)
        starting time
            
    **tmax** number (default Infinity)
        stop time
        
    **recovery_weight** string (default None)
        the string used to define the node attribute for the weight.
        Assumes that the recovery rate is gamma*G.nodes[u][recovery_weight].
        If None, then just uses gamma without scaling.
    
    **transmission_weight** string (default None)
        the string used to define the edge attribute for the weight.
        Assumes that the transmission rate from u to v is 
        tau*G.adj[u][v][transmission_weight]
        If None, then just uses tau without scaling.

    **return_full_data** boolean (default False)
        Tells whether a Simulation_Investigation object should be returned.  

    **sim_kwargs** keyword arguments
        Any keyword arguments to be sent to the Simulation_Investigation object
        Only relevant if ``return_full_data=True``


    :Returns: 
        
    **times, S, I, R** each a numpy array
        giving times and number in each status for corresponding time

    OR if return_full_data=True:
    
    **full_data**  Simulation_Investigation object
        from this we can extract the status history of all nodes
        We can also plot the network at given times
        and even create animations using class methods.
        
    :SAMPLE USE:


    ::

        import networkx as nx
        import EoN
        import matplotlib.pyplot as plt
        
        G = nx.configuration_model([1,5,10]*100000)
        initial_size = 10000
        gamma = 1.
        tau = 0.3
        t, S, I, R = EoN.Gillespie_SIR(G, tau, gamma, 
                                    initial_infecteds = range(initial_size))
                                    
        plt.plot(t, I)
    
    '''

    if rho is not None and initial_infecteds is not None:
        raise EoN.EoNError("cannot define both initial_infecteds and rho")

    
    if return_full_data:
        infection_times = defaultdict(lambda: []) #defaults to an empty list for each node
        recovery_times = defaultdict(lambda: [])

    if transmission_weight is not None:
        def edgeweight(u,v):
            return G.adj[u][v][transmission_weight]
    else:
        def edgeweight(u,v):
            return None
    
    if recovery_weight is not None:
        def nodeweight(u):
            return G.nodes[u][recovery_weight]
    else:
        def nodeweight(u):
            return None

    tau = float(tau)  #just to avoid integer division problems in python 2.
    gamma = float(gamma)
    
    if initial_infecteds is None:
        if rho is None:
            initial_number = 1
        else:
            initial_number = int(round(G.order()*rho))
        initial_infecteds=random.sample(G.nodes(), initial_number)
    elif G.has_node(initial_infecteds):
        initial_infecteds=[initial_infecteds]
        
    if initial_recovereds is None:
        initial_recovereds = []
        
    I = [len(initial_infecteds)]
    R = [len(initial_recovereds)]
    S = [G.order()-I[0]-R[0]]
    times = [tmin]
    
    transmissions = []
    t = tmin
    
    status = defaultdict(lambda : 'S')
    for node in initial_infecteds:
        status[node] = 'I'
        if return_full_data:
            infection_times[node].append(t)
            transmissions.append((t, None, node))
    for node in initial_recovereds:
        status[node] = 'R'
        if return_full_data:
            recovery_times[node].append(t)

    if recovery_weight is not None:
        infecteds = _ListDict_(weighted=True)
    else:
        infecteds = _ListDict_() #unweighted - code is faster for this case
    if transmission_weight is not None:
        IS_links = _ListDict_(weighted=True)
    else:
        IS_links = _ListDict_()

    for node in initial_infecteds:
        infecteds.update(node, weight_increment = nodeweight(node)) #weight is none if unweighted
        for nbr in G.neighbors(node):  #must have this in a separate loop 
                                       #from assigning status
            if status[nbr] == 'S':
                IS_links.update((node, nbr), weight_increment = edgeweight(node,nbr))
    
    total_recovery_rate = gamma*infecteds.total_weight() #gamma*I_weight_sum
    
    total_transmission_rate = tau*IS_links.total_weight()#IS_weight_sum
        
    total_rate = total_recovery_rate + total_transmission_rate
    delay = random.expovariate(total_rate)
    t += delay
    
    while infecteds and t<tmax:
        if random.random()<total_recovery_rate/total_rate: #recover
            recovering_node = infecteds.random_removal() #does weighted choice and removes it
            status[recovering_node]='R'
            if return_full_data:
                recovery_times[recovering_node].append(t)

            for nbr in G.neighbors(recovering_node):
                if status[nbr] == 'S':
                    IS_links.remove((recovering_node, nbr))
            times.append(t)
            S.append(S[-1])
            I.append(I[-1]-1)
            R.append(R[-1]+1)
        else: #transmit
            transmitter, recipient = IS_links.choose_random() #we don't use remove since that complicates the later removal of edges.
            status[recipient]='I'

            if return_full_data:
                transmissions.append((t, transmitter, recipient))
                infection_times[recipient].append(t)
            infecteds.update(recipient, weight_increment = nodeweight(recipient))

            for nbr in G.neighbors(recipient):
                if status[nbr] == 'S':
                    IS_links.update((recipient, nbr), weight_increment=edgeweight(recipient, nbr))
                elif status[nbr]=='I' and nbr != recipient: #self edge would break this without last test.elif
                    IS_links.remove((nbr, recipient))
                     
            times.append(t)
            S.append(S[-1]-1)
            I.append(I[-1]+1)
            R.append(R[-1])
            
        total_recovery_rate = gamma*infecteds.total_weight()#I_weight_sum
        total_transmission_rate = tau*IS_links.total_weight()#IS_weight_sum
        
                
        total_rate = total_recovery_rate + total_transmission_rate
        if total_rate>0:
            delay = random.expovariate(total_rate)
        else:
            delay = float('Inf')
        t += delay

    if not return_full_data:
        return np.array(times), np.array(S), np.array(I), \
                np.array(R)
    else:
        #print(infection_times)
        #print(recovery_times)
        infection_times = {node: L[0] for node, L in infection_times.items()}
        recovery_times = {node: L[0] for node, L in recovery_times.items()}
        #print(infection_times)
        #print(recovery_times)

        node_history = _transform_to_node_history_(infection_times, recovery_times, tmin, SIR = True)
        if sim_kwargs is None:
            sim_kwargs = {}
        return EoN.Simulation_Investigation(G, node_history, transmissions, possible_statuses = ['S', 'I', 'R'], **sim_kwargs)


[docs]def Gillespie_SIS(G, tau, gamma, initial_infecteds=None, rho = None, tmin = 0,
                    tmax=100, recovery_weight=None, transmission_weight = None, 
                    return_full_data = False, sim_kwargs = None):
    r'''
    Performs SIS simulations for epidemics on networks with or without weighted edges.
    
    It assumes that the edges have a weight associated with them and that the
    transmission rate for an edge is tau*weight[edge]
    
    Based on an algorithm by Petter Holme.  It requires a weighted choice of edges
    and this will be done by tracking the maximum edge weight and then using 
    repeated rejection samples until a successful selection.
    

    :See Also:

    **fast_SIS** which has the same inputs but uses a faster method (esp 
    for weighted graphs).
    
    
    :Arguments: 
        
    **G** (NetworkX Graph)
        The underlying network
    **tau** (positive float) 
        transmission rate per edge
    **gamma** number
        recovery rate per node
    
    **initial_infecteds** node or iterable of nodes
        if a single node, then this node is initially infected
        if an iterable, then whole set is initially infected
        if None, then choose randomly based on rho.  If rho is also
        None, a random single node is chosen.
        If both initial_infecteds and rho are assigned, then there
        is an error.
    
    **rho** number
        initial fraction infected. number is int(round(G.order()*rho))
    
    **tmin** number (default 0)
        starting time
        
    **tmax** number
        stop time
        
    **recovery_weight** string (default None)
        the string used to define the node attribute for the weight.
        Assumes that the recovery rate is gamma*G.nodes[u][recovery_weight].
        If None, then just uses gamma without scaling.
        
    **transmission_weight** string (default None)
        the string used to define the edge attribute for the weight.
        Assumes that the transmission rate from u to v is 
        tau*G.adj[u][v][transmission_weight]

    **return_full_data** boolean (default False)
        Tells whether a Simulation_Investigation object should be returned.  
        
    **sim_kwargs** keyword arguments
        Any keyword arguments to be sent to the Simulation_Investigation object
        Only relevant if ``return_full_data=True``
        
    :Returns: 

    **times, S, I** numpy arrays
        giving times and number in each status for corresponding time

    or if ``return_full_data==True``

    **full_data**  Simulation_Investigation object
        from this we can extract the status history of all nodes
        We can also plot the network at given times
        and even create animations using class methods.

    :SAMPLE USE:

    ::

        import networkx as nx
        import EoN
        import matplotlib.pyplot as plt
        
        G = nx.configuration_model([1,5,10]*100000)
        initial_size = 10000
        gamma = 1.
        tau = 0.2
        t, S, I = EoN.Gillespie_SIS(G, tau, gamma, tmax = 20,
                                    initial_infecteds = range(initial_size))
                                
        plt.plot(t, I)

    '''
    if rho is not None and initial_infecteds is not None:
        raise EoN.EoNError("cannot define both initial_infecteds and rho")

    if return_full_data:
        infection_times = defaultdict(lambda: []) #defaults to an empty list 
        recovery_times = defaultdict(lambda: [])  #for each node

    if transmission_weight is not None:
        def edgeweight(u,v):
            return G.adj[u][v][transmission_weight]
    else:
        def edgeweight(u,v):
            return None
    
    if recovery_weight is not None:
        def nodeweight(u):
            return G.nodes[u][recovery_weight]
    else:
        def nodeweight(u):
            return None
            
    tau = float(tau)  #just to avoid integer division problems.
    gamma = float(gamma)
    
    if initial_infecteds is None:
        if rho is None:
            initial_number = 1
        else:
            initial_number = int(round(G.order()*rho))
        initial_infecteds=random.sample(G.nodes(), initial_number)
    elif G.has_node(initial_infecteds):
        initial_infecteds=[initial_infecteds]
        
    I = [len(initial_infecteds)]
    S = [G.order()-I[0]]
    times = [tmin]
    
    t = tmin
    transmissions = []
    
    status = defaultdict(lambda : 'S')
    for node in initial_infecteds:
        status[node] = 'I'
        if  return_full_data:
            infection_times[node].append(t)
            transmissions.append((t, None, node))

    if recovery_weight is None:
        infecteds = _ListDict_()
    else:
        infecteds = _ListDict_(weighted=True)

    if transmission_weight is None:
        IS_links = _ListDict_()
    else:
        IS_links = _ListDict_(weighted=True)
        
        
    for node in initial_infecteds:
        infecteds.update(node, weight_increment = nodeweight(node))
        for nbr in G.neighbors(node):  #must have this in a separate loop 
                                       #after assigning status of node
            if status[nbr] == 'S':
                IS_links.update((node, nbr), weight_increment=edgeweight(node, nbr))
    
    total_recovery_rate = gamma*infecteds.total_weight()#I_weight_sum
    total_transmission_rate = tau*IS_links.total_weight()#IS_weight_sum
            
    total_rate = total_recovery_rate + total_transmission_rate
    delay = random.expovariate(total_rate)
    t = t+delay
    
    while infecteds and t<tmax:
        if random.random()<total_recovery_rate/total_rate: #recover
            recovering_node = infecteds.random_removal()
            status[recovering_node]='S'
            if return_full_data:
                recovery_times[recovering_node].append(t)

                                
            for nbr in G.neighbors(recovering_node):
                if nbr == recovering_node:  #move past self edges
                    continue
                elif status[nbr] == 'S':
                    IS_links.remove((recovering_node, nbr))
                else:
                    IS_links.update((nbr, recovering_node), weight_increment = edgeweight(recovering_node, nbr))
                        
            times.append(t)
            S.append(S[-1]+1)
            I.append(I[-1]-1)
        else:
            transmitter, recipient = IS_links.choose_random()
            status[recipient]='I'
            if  return_full_data:
                infection_times[recipient].append(t)
                transmissions.append((t, transmitter, recipient))

            infecteds.update(recipient, weight_increment = nodeweight(recipient))
            for nbr in G.neighbors(recipient):
                if status[nbr] == 'S':
                    IS_links.update((recipient, nbr), weight_increment = edgeweight(recipient, nbr))
                elif nbr != recipient: #otherwise a self-loop breaks the code
                    IS_links.remove((nbr, recipient))
                    
            times.append(t)
            S.append(S[-1]-1)
            I.append(I[-1]+1)

        total_recovery_rate = gamma*infecteds.total_weight()#I_weight_sum
        
        total_transmission_rate = tau*IS_links.total_weight()#IS_weight_sum
        

        total_rate = total_recovery_rate + total_transmission_rate
        if total_rate>0:
            delay = random.expovariate(total_rate)
        else:
            delay = float('Inf')
        t += delay

    if not return_full_data:
        return np.array(times), np.array(S), np.array(I)
    else:
        node_history = _transform_to_node_history_(infection_times, recovery_times, tmin, SIR = False)
        if sim_kwargs is None:
            sim_kwargs = {}
        return EoN.Simulation_Investigation(G, node_history, possible_statuses=['S', 'I'], **sim_kwargs)

[docs]def Gillespie_complex_contagion(G, rate_function, transition_choice, 
    get_influence_set, IC, return_statuses, tmin = 0, tmax=100, parameters = None, 
    return_full_data = False, sim_kwargs = None):
    
    r''' 
    Initially intended for a complex contagion.  However, this can allow influence
    from any nodes, not just immediate neighbors.
    
    The complex contagion must be something that all nodes do something simultaneously
    
    **This is not the same as if node** ``v`` **primes node** ``u`` **and 
    later node** ``w`` **causes** ``u`` **to transition.  This will require 
    that both** ``v`` **and** ``w`` **have the relevant states at the moment**
    **of transition and it has forgotten any previous history.**
    
    :Arguments: 
        
    **G** (NetworkX Graph)
        The underlying network
        
    **rate_function** 
        A function that will take the network, a node, and the statuses of all 
        the nodes and calculate the rate at which the node changes its status.

        The function is called like 
        
        if parameters is None:
            rate_function(G, node, status)
        else:
            rate_function(G, node, status, parameters)
        
        where G is the graph, node is the node, status is a dict such that 
        status[u] returns the status of u, and parameters is the parameters
        passed to the function.
        
        it returns a number, the combined rate at which ``node`` might change 
        status.
        
        
    **transition_choice**
        A function that takes the network, a node, and the statuses of all the
        nodes and chooses which event will happen.  The function should be 
        called [with or without ``parameters``]
        
        if ``parameters`` is ``None``:
            ``transition_choice(G, node, status)``
        else:
            ``transition_choice(G, node, status, parameters)``
        
        where ``G`` is the graph, ``node` is the node, ``status` is a dict 
        such that ``status[u]`` returns the status of u, and ``parameters`` is 
        the parameters passed to the function.
        
        It should return the new status of ``node`` based on the fact that the
        node is changing status.
        
    **get_influence_set**
        When a node ``u`` changes status, we want to know which nodes may have their
        rate altered.  We need to update their rates.  This function returns all
        nodes that may be affected by ``u`` (either in its previous state or its 
        current state).  We will go through and recalculate the rates for all
        of these nodes.  For a contagion, we can simply choose all neighbors,
        but it may be faster to leave out any nodes that it wouldn't have
        affected before or after its transition (e.g., R or I neighbors in SIR).
        
        if ``parameters`` is None:
            ``get_influence_set(G, node, status)``
        else:
            ``get_influence_set(G, node, status, parameters)``
        
        where G is the graph, node is the node, status is a dict such that 
        status[u] returns the status of u, and parameters is the parameters
        passed to the function.
        
        it should return the set of nodes whose rates need to be recalculated.
        
        Most likely, it is 
        
        def get_influence_set(G, node, status):
            return G.neighbors(node)
            
    
    **IC**   
        A dict.  IC[node] returns the status of node.
        
    **return_statuses** list or other iterable (but not a generator)
        The statuses that we will return information for, in the order
        we will return them.
        
    **tmin** number (default 0)
        starting time
            
    **tmax** number (default 100)
        stop time
            
    **return_full_data** boolean
        currently needs to be False.  True raises an error.
        
    **parameters**  list/tuple.
        Any parameters of the functions rate_function, transition_choice, influence_set
        We assume all three functions can accept parameters.  
        Examples: recovery rate, transmission rate, ...
        
        
    :Returns: 

    **(times, status1, status2, ...)**  tuple of numpy arrays
        first entry is the times at which events happen.
        second (etc) entry is an array with the same number of entries as ``times``
        giving the number of nodes of status ordered as they are in ``return_statuses`` 
        

    :SAMPLE USE:
    This simply does an SIR epidemic, by saying that the rate of becoming 
    infected is tau times the number of infected neighbors.  
    

    ::


        import networkx as nx
        import EoN
        import matplotlib.pyplot as plt
        from collections import defaultdict #makes defining the initial condition easier
        
        
        def rate_function(G, node, status, parameters):    
            #This function needs to return the rate at which node changes status.
            #
            tau,gamma = parameters                       
            if status[node] == 'I':
                return gamma         
            elif status[node] == 'S':                                          
                return tau*len([nbr for nbr in G.neighbors(node) if status[nbr] == 'I'])
            else:
                return 0
                
        def transition_choice(G, node, status, parameters):
            #this function needs to return the new status of node.  We already
            #know it is changing status.
            #
            #this function could be more elaborate if there were different
            #possible transitions that could happen.
            if status[node] == 'I':                       
                return 'R'
            elif status[node] == 'S':
                return 'I'         
                
        def get_influence_set(G, node, status, parameters):
            #this function needs to return any node whose rates might change
            #because ``node`` has just changed status.
            #
            #the only neighbors a node might affect are the susceptible ones.
            
            return {nbr for nbr in G.neighbors(node) if status[nbr] == 'S'}
            
        G = nx.fast_gnp_random_graph(100000,0.00005)  
        
        gamma = 1.
        tau = 0.5
        parameters = (tau, gamma)
        
        IC = defaultdict(lambda: 'S')
        for node in range(200):      
            IC[node] = 'I'
            
        t, S, I, R = EoN.Gillespie_complex_contagion(G, rate_function, 
                                   transition_choice, get_influence_set, IC, 
                                   return_statuses=('S', 'I', 'R'), 
                                   parameters=parameters)
                                    
        plt.plot(t, I)
        
    '''
    
    if parameters is None:
        parameters = ()
        

    status = {node: IC[node] for node in G.nodes()}

    if return_full_data:
        node_history = {node:([tmin], [status[node]]) for node in G.nodes()}

    times = [tmin]
    t = tmin
    data = {}
    C = Counter(status.values())
    for return_status in return_statuses:
        data[return_status] = [C[return_status]]                

    nodes_by_rate = _ListDict_(weighted=True)
    
    for u in G.nodes():
        rate = rate_function(G, u, status, parameters)
        if rate>0:
            nodes_by_rate.insert(u, weight = rate)
            
    if nodes_by_rate.total_weight()>0:
        delay = random.expovariate(nodes_by_rate.total_weight())
    else:
        delay = float('Inf')
    t += delay
    while nodes_by_rate.total_weight()>0 and t< tmax:
        
        times.append(t)
#        print(delta_t, nodes_by_rate.total_weight())
        node = nodes_by_rate.choose_random()
        new_status = transition_choice(G, node, status, parameters)

        for x in data.keys():         
            data[x].append(data[x][-1])
        if status[node] in return_statuses:
            data[status[node]][-1] -= 1
        if new_status in return_statuses:
            data[new_status][-1] += 1

        status[node] = new_status
        if return_full_data:
            node_history[node][0].append(t)
            node_history[node][1].append(new_status)

        #update self
        weight = rate_function(G, node, status, parameters)
        nodes_by_rate.insert(node, weight = weight)

        influence_set = get_influence_set(G, node, status, parameters)  
        
        for nbr in influence_set:
            weight = rate_function(G, nbr, status, parameters)
            nodes_by_rate.insert(nbr, weight=weight) 

        if nodes_by_rate.total_weight()>0:
            delay = random.expovariate(nodes_by_rate.total_weight())
        else:
            delay = float('Inf')
        t += delay

    if not return_full_data:
        returnval = []
        times = np.array(times)
        returnval.append(times)
        for return_status in return_statuses:
            data[return_status] = np.array(data[return_status])
            returnval.append(data[return_status])
        return returnval
    else:
        if sim_kwargs is None:
            sim_kwargs = {}
        return EoN.Simulation_Investigation(G, node_history, possible_statuses = return_statuses, **sim_kwargs)
    
[docs]def Gillespie_Arbitrary(G, spontaneous_transition_graph, 
  nbr_induced_transition_graph, IC, return_statuses, tmin = 0,  tmax=100, 
  spont_kwargs = None, nbr_kwargs=None, return_full_data = False, sim_kwargs = None):
  r'''Calls Gillespie_simple_contagion.  This is here for legacy reasons.
  
  Gillespie_Arbitrary has been replaced by Gillespie_simple_contagion.  It
  will be removed in future versions.
  '''
  
  print("Gillespie_Arbitrary has been replaced by Gillespie_simple_contagion.\n",   
        "It will be removed in future versions.")
        
  return Gillespie_simple_contagion(G, spontaneous_transition_graph, 
                                    nbr_induced_transition_graph, IC, 
                                    return_statuses, tmin = tmin,  tmax=tmax, 
                                    return_full_data = return_full_data, 
                                    **sim_kwargs)
  
[docs]def Gillespie_simple_contagion(G, spontaneous_transition_graph, 
  nbr_induced_transition_graph, IC, return_statuses, tmin = 0,  tmax=100, 
  spont_kwargs = None, nbr_kwargs=None, return_full_data = False, sim_kwargs = None):
    r'''
    Performs simulations for epidemics, allowing more flexibility than SIR/SIS.
    
    This does not handle complex contagions.  It assumes that when an individual
    changes status either he/she has received a "transmission" from a *single*
    neighbor or he/she is changing status independently of any neighbors.  So
    this is like SIS or SIR.  Other examples would be like SEIR, SIRS, etc   
    
    There is an example below demonstrating an SEIR epidemic.
    
    We allow for nodes to undergo two types of transitions.  They can be:
    
    - spontaneous - so a node of status A becomes B without any neighbor's 
      influence
    
    - neighbor-induced - so an edge between status A and status B nodes suddenly
      becomes an edge between a status A and a status C node because the status
      B node changes status due to the existence of the edge.  (in principle, 
      both could change status, but the code currently only allows the second
      node to change status).
    
    Both types of transitions can be represented by weighted directed graphs.
    We describe two weighted graphs whose nodes represent the possible statuses
    and whose edges represent possible transitions of statuses.
      
    - The spontaneous transitions can be represented by a graph whose nodes are 
      the possible statuses and an edge from ``'A'`` to ``'B'`` represent that in the 
      absence of any influence from others an indivdiual of status ``'A'`` 
      transitions to status ``'B'`` with default rate given by the weight of the edge 
      in this spontaneous transition graph.  The rate may be modified by
      properties of the nodes in the contact network ``G``.
    
    - The neighbor-induced transitions can be represented by a "transitions 
      graph" whose nodes are length-2 tuples.  The first entry represents the 
      first individual of a partnership and the second represents the second 
      individual.  **only the second individual changes status**.  An edge in 
      the transitions graph from the node ``('A', 'B')`` to the node ``('A', 'C')``
      represents that an ``'AB'`` partnership in the contact network can cause the 
      second individual to transition to status ``'C'``.  The weight of the edge 
      in the represents the default transition rate.  The rate may be modified
      by properties of the nodes or the edge in the contact network ``G``.
      
    [for reference, if you look at Fig 4.3 on pg 122 of Kiss, Miller & Simon
    the graphs for **SIS** would be:
        ``spontaneous_transition_graph``: ``'I'``-> ``'S'`` with the edge weighted by ``gamma`` and
        
        ``nbr_induced_transition_graph``: ``('I', 'S')`` -> ``('I', 'I')`` with the edge weighted by ``tau``.
        
    For **SIR** they would be:
        ``spontaneous_transition_graph``: ``'I'``->``'R'`` with weight ``gamma`` and
        
        ``nbr_induced_transition_graph``: ``('I', 'S')`` -> ``('I', 'I')`` with rate ``tau``.
        ]
        
    These graphs must be defined and then input into the algorithm.  
    
    It is possible to weight edges or nodes in the contact network ``G`` (that is, 
    not the 2 directed networks defined above, but the original contact 
    network) so that some of these transitions have different rates for 
    different individuals/partnerships.  These are included as attributes in the
    contact network.  In the most general case, the transition rate depends
    on some function of the attributes of the nodes or edge in the contact
    network ``G``.  
    
    There are two ways we introduce individual or pair-level heterogeneity in
    the population.  The first way is through introducing weights to individuals
    or their contacts which simply multiply the default rate.  The second is
    through including a function of nodes or pairs of nodes which will explicitly
    calculate the scaling factor, possibly including more information than
    we can include with the weights.  For a given transition, you can use at 
    most one of these. 
    
    - We first describe examples of weighting nodes/edges in the population
    
    So for the SIR case, if some people have higher recovery rate, we might 
    define a node attribute ``'recovery_weight'`` for each node in ``G``, and the 
    recovery would occur with rate G.nodes[node]['recovery_weight']*gamma.  So
    a value of 1.1 would correspond to a 10% increased recovery rate.  Since I
    don't know what name you might choose for the weight label as I write this 
    algorithm, in defining the spontaneous transition graph (H), the ``'I'``->``'R'`` 
    edge would be given an attribute ``'weight_label'`` so that 
    ``H.adj['I']['R']['weight_label'] = 'recovery_weight'``.
    If you define the attribute ``'weight_label'`` for an edge in ``H``, then it will be
    assumed that every node in ``G`` has a corresponding weight.  If no attribute
    is given, then it is assumed that all transitions happen with the original
    rate.
    
    We similarly define the weight_labels as edge attributes in the 
    neighbor-induced transition graph.  The edges of the graph ``'G'`` have a 
    corresponding ``G[u,v]['transmission_weight']``
    
    - Alternately we might introduce a function.
    
    So for the SIR case if the recovery rate depends on two attributes of a node
    (say, age and gender), we define a function ``rate_function(G,node)`` which
    will then look at G.nodes[node]['age'] and G.nodes[node]['gender'] and then
    return a factor which will be multiplied by ``gamma`` to give the recovery 
    rate.  Similarly, if we are considering a neighbor induced transition and
    the rate depends on properties of both nodes we define another function
    ``rate_function(G,u,v)`` which may use attributes of u or v or the edge
    to find the appropriate scaling factor.
    
    
    
    :Arguments: 
        
    **G** NetworkX Graph
        The underlying contact network.  If ``G`` is directed, we assume that
        "transmissions" can only go in the same direction as the edge.
            
    **spontaneous_transition_graph** Directed networkx graph
        The nodes of this graph are the possible statuses of a node in ``G``.
        An edge in this graph is a possible transition in ``G`` that occurs
        without any influence from neighbors.
            
        An edge in this directed graph is labelled with attributes
        
          - ``'rate'``   [a number, the default rate of the transition]
          - ``'weight_label'``  (optional) [a string, giving the label of 
                            a **node** attribute in the contact network ``G``
                            that scales the transition rate]
          - ``'rate_function'`` (optional not combinable with ``'weight_label'``
                            for some edge.)
                            [a user-defined function of the contact network 
                            and node that will scale the transition rate.
                            This cannot depend on the statuses of any nodes - 
                            we must be able to calculate it once at the
                            beginning of the process.]  It will be called as
                            ``rate_function(G, node,**spont_kwargs)`` where
                            ``spont_kwargs`` is described below.
                            
        Only one of ``'weight_label'`` and ``'rate_function'`` can be given.
        
        In the description below, let's use   
          - ``rate = spontaneous_transition_graph.adj[Status1][Status2]['rate']``
          - ``weight_label = spontaneous_transition_graph.adj[Status1][Status2]['weight_label']``
          - ``rate_function = spontaneous_transition_graph.adj[Status1][Status2]['rate_function']``
        
        For a node ``u`` whose status is ``Status1``, the rate at which ``u`` 
        transitions to ``Status2`` is
          - ``rate``    if neither ``weight_label`` nor ``rate_function`` is defined.
          - ``rate*G.nodes[u][weight_label]`` if ``weight_label`` is defined.
          - ``rate*rate_function(G, u, **spont_kwargs)`` if ``rate_function`` is 
             defined.
      
      
        So for example in the case of an SIR disease, this would be a graph
        with an isolated node ``'S'`` and an edge from node ``'I'`` to ``'R'`` with 
        ``rate`` equal to ``gamma`` (the recovery rate).  It would not actually 
        be necessary to have the node ``'S'``.

                    
        Note that the rate_function version is more general, and in principle 
        we don't need the weight_label option.  It's here for backwards 
        compatibility and general simplicity purposes.
        
            
    **nbr_induced_transition_graph** Directed networkx graph
    
        The nodes of this graph are tuples with possible statuses of nodes 
        at the end of an edge. The first node in the tuple is the node that
        could be affecting the second.  So for example for the SIR model
        we would expect a node ``('I', 'S')`` with an edge to ``('I', 'I')``.
        
        An edge in this directed graph is labelled with attributes
        
          - ``'rate'``   [a number, the default rate of the transition]
          - ``'weight_label'``  (optional) [a string, giving the label of 
                            an *edge** attribute in the contact network ``G``
                            that scales the transition rate]
          - ``'rate_function'`` (optional not combinable with ``'weight_label'``
                            for some edge.)
                            [a user-defined function of the contact network 
                            and source and target nodes that will scale the 
                            transition rate.
                            This cannot depend on the statuses of any nodes - 
                            we must be able to calculate it once at the
                            beginning of the process.
                            It will be called as 
                            ``rate_function(G, source, target, *nbr_kwargs)``]
                            
        Only one of ``'weight_label'`` and ``'rate_function'`` can be given.  
        
        In the description below, let's use
        
         - ``rate = spontaneous_transition_graph.adj[Status1][Status2]['rate']``
         - ``weight_label = spontaneous_transition_graph.adj[Status1][Status2]['weight_label']``
         - ``rate_function = spontaneous_transition_graph.adj[Status1][Status2]['rate_function']``        
        
        If the transition is (A,B) to (A,C) and we have a source node of
        status A joined to a target node of status B then the target node
        transitions from B to C with rate
          - ``rate``    if neither ``weight_label`` nor ``rate_function`` is defined.
          - ``rate*G.adj[source][target][weight_label]`` if ``weight_label`` is defined.
          - ``rate*rate_function(G, source, target, **nbr_kwargs)`` if ``rate_function`` is defined
        
        So for example in the case of an SIR disease with transmission rate
        tau, this would be a graph with an edge from the node ``('I','S')`` to 
        ``('I', 'I')``.  The attribute ``'rate'`` for the edge would be ``tau``.
        
    **IC** dict
        states the initial status of each node in the network.
            
    **return_statuses** list or other iterable (but not a generator)
        The statuses that we will return information for, in the order
        we will return them.
        
    **tmin** number (default 0)
        starting time
            
    **tmax** number (default 100)
        stop time
            
    **spont_kwargs**  dict or None (default None)
        Any parameters which might be needed if the user has defined a rate
        function for a spontaneous transition.  
        If any of the spontaneous transition rate functions accepts these, 
        they all need to (even if not used).  It's easiest to define the
        function as def f(..., **kwargs) 
        
    **nbr_kwargs** dict or None (default None)
        Any parameters which might be needed if the user has defined a rate
        function for a neighbor-induced transition.  
        If any of the neighbor-induced transition rate functions accepts these, 
        they all need to (even if not used).  It's easiest to define the
        function as def f(..., **kwargs) 
           
    **return_full_data** boolean
        Tells whether to return a Simulation_Investigation object or not
        
    **sim_kwargs** keyword arguments
        Any keyword arguments to be sent to the Simulation_Investigation object
        Only relevant if ``return_full_data=True``
        
    :Returns: 

    **(times, status1, status2, ...)**  tuple of numpy arrays
        first entry is the times at which events happen.
        second (etc) entry is an array with the same number of entries as ``times``
        giving the number of nodes of status ordered as they are in ``return_statuses`` 
        
    
    :SAMPLE USE:

    This does an SEIR epidemic.  It treats the nodes and edges as weighted.
    So some individuals have a higher E->I transition rate than others, and some
    edges have a higher transmission rate than others.  The recovery rate
    is taken to be the same for all nodes.
    
    There are more examples in the
    online documentation at :ref:``simple-contagion-section``.

    ::

        import EoN
        import networkx as nx
        from collections import defaultdict
        import matplotlib.pyplot as plt
        import random
        
        N = 100000
        G = nx.fast_gnp_random_graph(N, 5./(N-1))
        
        #they will vary in the rate of leaving exposed class.
        #and edges will vary in transition rate.
        #there is no variation in recovery rate.
        
        node_attribute_dict = {node: 0.5+random.random() for node in G.nodes()}
        edge_attribute_dict = {edge: 0.5+random.random() for edge in G.edges()}
        
        nx.set_node_attributes(G, values=node_attribute_dict, name='expose2infect_weight')
        nx.set_edge_attributes(G, values=edge_attribute_dict, name='transmission_weight')
        
        
        H = nx.DiGraph()
        H.add_node('S')
        H.add_edge('E', 'I', rate = 0.6, weight_label='expose2infect_weight')
        H.add_edge('I', 'R', rate = 0.1)
        
        J = nx.DiGraph()
        J.add_edge(('I', 'S'), ('I', 'E'), rate = 0.1, weight_label='transmission_weight')
        IC = defaultdict(lambda: 'S')
        for node in range(200):
            IC[node] = 'I'
        
        return_statuses = ('S', 'E', 'I', 'R')
        
        t, S, E, I, R = EoN.Gillespie_simple_contagion(G, H, J, IC, return_statuses,
                                                tmax = float('Inf'))
        
        plt.semilogy(t, S, label = 'Susceptible')
        plt.semilogy(t, E, label = 'Exposed') 
        plt.semilogy(t, I, label = 'Infected')
        plt.semilogy(t, R, label = 'Recovered')
        plt.legend()

        plt.savefig('SEIR.png')       
'''

    if spont_kwargs is None:
        spont_kwargs = {}
        
    if nbr_kwargs is None:
        nbr_kwargs = {}
        
    if sim_kwargs is None:
        sim_kwargs = {}
        
    status = {node: IC[node] for node in G.nodes()}

    if return_full_data:
        transmissions = []
        node_history = {node:([tmin], [status[node]]) for node in G.nodes()}


    times = [tmin]
    data = {}
    C = Counter(status.values())     
    for return_status in return_statuses:
        data[return_status] = [C[return_status]] #
    try:
        spontaneous_transitions = sorted(spontaneous_transition_graph.edges())
        induced_transitions = sorted(nbr_induced_transition_graph.edges())
    except TypeError:
        print("note that because your states are not sortable, your simulations "+
              "may produce different outcomes even if you specify the random seed")
        spontaneous_transitions = list(spontaneous_transition_graph.edges())
        induced_transitions = list(nbr_induced_transition_graph.edges())
    potential_transitions = {}
    rate = {}# intrinsic rate of a transition
    #weight_sum = defaultdict(lambda: 0)
    #weights = defaultdict(lambda: None)
    #max_weight = defaultdict(lambda: 0)
    get_weight = defaultdict(lambda: defaultdict(lambda:None))


    #SET UP THE POSSIBLE EVENTS, STARTING WITH SPONTANEOUS
    for transition in spontaneous_transitions:
        rate[transition] = spontaneous_transition_graph.adj[transition[0]][transition[1]]['rate']
        if 'weight_label' in spontaneous_transition_graph.adj[transition[0]][transition[1]]:
            if 'rate_function' in spontaneous_transition_graph.adj[transition[0]][transition[1]]:
                raise EoN.EoNError('cannot define both "weight_label" and "rate_function" in',\
                                    'spontaneous_transitions graph')
            wl = spontaneous_transition_graph.adj[transition[0]][transition[1]]['weight_label']
            get_weight[transition] = nx.get_node_attributes(G, wl) #This is a dict mapping node to its weight.
            potential_transitions[transition] = _ListDict_(weighted=True)#max_weight[transition] = max(get_weight[transition].values())
        elif 'rate_function' in spontaneous_transition_graph.adj[transition[0]][transition[1]]:
            rf = spontaneous_transition_graph.adj[transition[0]][transition[1]]['rate_function']
            get_weight[transition] = {node: rf(G, node, **spont_kwargs) for node in G}#This is a dict mapping node to its weight.
            potential_transitions[transition] = _ListDict_(weighted=True)
        else:
            potential_transitions[transition] = _ListDict_()#max_weight[transition]=1
      
    #print(spontaneous_transitions)
    #print([rate[transition] for transition in spontaneous_transitions])
    #CONTINUING SETTING UP POSSIBLE EVENTS, NOW WITH INDUCED TRANSITIONS.      
    for transition in induced_transitions:
        if transition[0][0] != transition[1][0]:
            raise EoN.EoNError("transition {} -> {} not allowed: first node must keep same status".format(transition[0],transition[1]))
        rate[transition] = nbr_induced_transition_graph.adj[transition[0]][transition[1]]['rate']

        if 'weight_label' in nbr_induced_transition_graph.adj[transition[0]][transition[1]]:
            if 'rate_function' in nbr_induced_transition_graph.adj[transition[0]][transition[1]]:
                raise EoN.EoNError('cannot define both "weight_label" and "rate_function" in',\
                                    'nbr_induced_transitions graph')
            wl = nbr_induced_transition_graph.adj[transition[0]][transition[1]]['weight_label']
            get_weight[transition] = nx.get_edge_attributes(G, wl)#a dict mapping edge to its weight.
            #note if G is directed, then this has all edges.  If undirected
            #edge will appear only once.  But we may care about the opposite direction.
            #so we need to add those now.
            if not nx.is_directed(G):
                get_weight[transition].update({(source, target): G.adj[source][target][wl] for target, source in get_weight[transition]})
            potential_transitions[transition] = _ListDict_(weighted=True)
        elif 'rate_function' in nbr_induced_transition_graph.adj[transition[0]][transition[1]]:
            rf = nbr_induced_transition_graph.adj[transition[0]][transition[1]]['rate_function']
            edges = list(G.edges())            
            get_weight[transition] = {(source, target): rf(G, source, target, **nbr_kwargs) for source, target in edges}
            if not nx.is_directed(G):
                get_weight[transition].update({(source, target): rf(G, source, target, **nbr_kwargs) for target, source in edges})
            potential_transitions[transition] = _ListDict_(weighted=True)
        else:
            potential_transitions[transition] = _ListDict_()
                
    #initialize all potential events to start.                
    for node in G.nodes():        
        if spontaneous_transition_graph.has_node(status[node]):# and spontaneous_transition_graph.degree(status[node])>0:
            for transition in spontaneous_transition_graph.edges(status[node]):
                potential_transitions[transition].update(node, weight_increment = get_weight[transition][node])
                #weight increment defaults to None if not present                    
                    
                    
        for nbr in G.neighbors(node):
            #print(status[node],status[nbr])
            if nbr_induced_transition_graph.has_node((status[node],status[nbr])):# and nbr_induced_transition_graph.degree((status[node],status[nbr])) >0:
                for transition in nbr_induced_transition_graph.edges((status[node],status[nbr])):
                    potential_transitions[transition].update((node, nbr), weight_increment = get_weight[transition][(node, nbr)])
    t = tmin
    
    #NOW WE'RE READY TO GET STARTED WITH THE SIMULATING
    
    total_rate = sum(rate[transition]*potential_transitions[transition].total_weight() for transition in spontaneous_transitions+induced_transitions)
    if total_rate>0:
        delay = random.expovariate(total_rate)
    else:
        delay = float('Inf')
    t = t+delay
    while total_rate>0 and t<tmax:
        times.append(t)
        r = random.random()
        for transition in spontaneous_transitions+induced_transitions:
            r -= rate[transition]*potential_transitions[transition].total_weight()/total_rate
            if r<0:
                break
        #either node doing spontaneous or edge doing an induced event
        if transition in spontaneous_transitions:
            spontaneous = True
        else:
            spontaneous = False
            
        actor = potential_transitions[transition].choose_random()
                
        if spontaneous:
            modified_node = actor
            old_status = transition[0]
            new_status = transition[1]
            #node changes status

        else:                    

            source, target = actor
            modified_node = target

            old_status = transition[0][1]
            new_status = transition[1][1]
            
            if return_full_data:
                transmissions.append((t, source, modified_node))

                
            #modified_node changes status

        status[modified_node] = new_status
        if return_full_data:
            node_history[modified_node][0].append(t)
            node_history[modified_node][1].append(new_status)
        
        #it might look like there is a cleaner way to do this, but what if it
        #happens that old_status == status[modified_node]???  This way still works.
        for x in data.keys():         
            data[x].append(data[x][-1])
        if old_status in return_statuses:
            data[old_status][-1] -= 1
        if status[modified_node] in return_statuses:
            data[status[modified_node]][-1] += 1
        
        #UPDATE OUR POTENTIAL TRANSITIONS
        
        for transition in spontaneous_transitions: #can probably make more efficient, but there aren't many
            #remove modified_node from any spontaneous lists
            #add modified_node to any spontaneous lists
            if transition[0] == old_status:
                potential_transitions[transition].remove(modified_node)
            if transition[0] == status[modified_node]:
                potential_transitions[transition].update(modified_node, weight_increment = get_weight[transition][modified_node])
            #roundoff error can kill the calculation, but it's slow to do this right.
            #so we'll only deal with it if the value is small enough that roundoff
            #error might matter.
            if potential_transitions[transition].total_weight() < 10**(-7) and potential_transitions[transition].total_weight()!=0:
                potential_transitions[transition].update_total_weight()
        #print(modified_node, old_status, status[modified_node])
        #print(potential_transitions[
        for transition in induced_transitions:
            if G.is_directed():
                for nbr in G.neighbors(modified_node):
                    #remove edge from any induced lists
                    #add edge to any induced lists

                    nbr_status = status[nbr]
                    
                    if (modified_node, nbr) not in get_weight[transition]:
                        get_weight[transition][(modified_node,nbr)] = get_weight[transition][(nbr,modified_node)]
                    if transition[0] == (old_status, nbr_status):
                        potential_transitions[transition].remove((modified_node, nbr))
                    if transition[0] == (status[modified_node], nbr_status):
                        potential_transitions[transition].update((modified_node, nbr), weight_increment = get_weight[transition][(modified_node, nbr)])
                for pred in G.predecessors(modified_node):
                    #remove edge from any induced lists
                    #add edge to any induced lists

                    pred_status = status[pred]
                    if (pred, modified_node) not in get_weight[transition]:
                        get_weight[transition][(pred, modified_node)] = get_weight[transition][(pred, modified_node)]
                    if transition[0] == (pred_status, old_status):
                        potential_transitions[transition].remove((pred, modified_node))
                    if transition[0] == (pred_status, status[modified_node]):
                        potential_transitions[transition].update((pred, modified_node), weight_increment = get_weight[transition][(pred, modified_node)])                    
            else:
                for nbr in G.neighbors(modified_node):
                    #remove edge from any induced lists
                    #add edge to any induced lists
                    nbr_status = status[nbr]
                    
                    if (modified_node, nbr) not in get_weight[transition]:
                        get_weight[transition][(modified_node,nbr)] = get_weight[transition][(nbr,modified_node)]
                    elif (nbr, modified_node) not in get_weight[transition]:
                        get_weight[transition][(nbr, modified_node)] = get_weight[transition][(modified_node, nbr)]
                        
                    if transition[0] == (nbr_status, old_status):
                        potential_transitions[transition].remove((nbr, modified_node))
                    if transition[0] == (old_status, nbr_status):
                        potential_transitions[transition].remove((modified_node, nbr))
                    
                    if transition[0] == (nbr_status, status[modified_node]):
                        potential_transitions[transition].update((nbr, modified_node), weight_increment = get_weight[transition][(nbr, modified_node)])
                    if transition[0] == (status[modified_node], nbr_status):
                        potential_transitions[transition].update((modified_node, nbr), weight_increment = get_weight[transition][(modified_node, nbr)])
                
            #roundoff error can kill the calculation, but it's slow to do this right.
            #so we'll only deal with it if the value is small enough that roundoff
            #error might matter.
            if potential_transitions[transition].total_weight() < 10**(-7) and potential_transitions[transition].total_weight()!=0:
                potential_transitions[transition].update_total_weight()
        total_rate = sum(rate[transition]*potential_transitions[transition].total_weight() for transition in spontaneous_transitions+induced_transitions)
        if total_rate>0:
            delay = random.expovariate(total_rate)
        else:
            delay = float('Inf')
            
        t += delay

        
    if not return_full_data:
        returnval = []
        times = np.array(times)
        returnval.append(times)
        for return_status in return_statuses:
            data[return_status] = np.array(data[return_status])
            returnval.append(data[return_status])
        return returnval
    else:
        if sim_kwargs is None:
            sim_kwargs = {}
        return EoN.Simulation_Investigation(G, node_history, transmissions, possible_statuses= return_statuses, **sim_kwargs)