BoPeng/simuPOP

Determining subpopulation names

Closed this issue · 6 comments

Hi Bo,
I am trying to simulate a population of fishers that split at T=1 (original population = 1000; T=1 sub-populations= 72, 928). I am additionally trying to simulate age-structuring as well in the population, where only the individuals from ages 2-6 are able to mate with each other.

I was wondering:

  1. the index numbers python has associated with each group
  2. I just wanted to make sure that there is no breeding between the various subgroups in the code I have currently written?
  • please refer to preOps and matingScheme (i've just copied my entire code for clarification).

Thank you so much,
Natasha

My code:

import simuPOP as sim
import simuOpt
#import random for random number generators
import random
#import simupop.utils module (allow for exporting files)
import simuPOP.utils as simp
# seed number set so that each time simulation is run it will output same numbers
sim.setRNG(seed=12349)

def avgHet(pop):
      lociNum=17
      sim.stat(pop, heteroFreq=sim.ALL_AVAIL, step=10),
      with open('HeteroFreqb.txt','a') as f:
          print(pop.dvars().gen, file = f)
      for i in range(lociNum):
          with open('HeteroFreqb.txt','a') as f:
              print(pop.dvars().heteroFreq[i], file = f)
      for x in range(lociNum):
          with open('HeteroFreqb','a') as f:
              print('', file = f)
      return True

def simuOverlappingGeneration(size, maxAge, minMatingAge, gen):
    '''
    size         population size.
    maxAge       maximum age. individuals with age > maxAge will die.
    minMatingAge minimal mating age.
    gen          generations to simulate
    '''

    # set population size to 1000 individuals, ploidy to 2, and a total of 17 loci
    pop=sim.Population(size, ploidy=2, loci=[1]*17, infoFields=['age'])
    #set age of fishers randomly in a pouplation
    pop.setIndInfo([random.randint(0,6) for x in range(1000)], 'age')
    # create age structuring in the population
    pop.setVirtualSplitter(sim.InfoSplitter(field='age', cutoff=[minMatingAge, maxAge+0.1]))
    #evolve the population using specified proportions for sex, genotype frequencies, and ages
    pop.evolve(
            #initialize population with starting M/F ratios and Genotype proportions (applied before evolution)
            #randomly set individual ages between 0 and 6 yers old
        initOps=[
                sim.InitSex(maleProp=0.57),
                sim.InitGenotype(prop=[.057,.093,.321,.150,.379], loci=0), 
                sim.InitGenotype(prop=[.036,.121,.300,.379,.164], loci=1),
                sim.InitGenotype(prop=[.375,.125,.417,.083], loci=2),
                sim.InitGenotype(prop=[.479,.250,.236,.028,.007], loci=3), 
                sim.InitGenotype(prop=[.340,.014,.250,.354,.042], loci=4),
                sim.InitGenotype(prop=[.338, .155, .465, .042], loci=5),
                sim.InitGenotype(prop=[.028, .417, .458, .097], loci=6),
                sim.InitGenotype(prop=[.229, .500, .014, .160, .063, .034], loci=7), 
                sim.InitGenotype(prop=[.014, .410, .465, .111], loci=8),
                sim.InitGenotype(prop=[.965, .014, .021], loci=9),
                sim.InitGenotype(prop=[.042, .218, .049, .120, .106, .141, .169, .042, .092, .014, .007], loci=10),
                sim.InitGenotype(prop=[.160, .104, .257, .347, .111, .014, .007], loci=11),
                sim.InitGenotype(prop=[.431, .500, .069], loci=12),
                sim.InitGenotype(prop=[.028, .035, .250, .632, .035, .006, .014], loci=13),
                sim.InitGenotype(prop=[.069, .771, .063, .097], loci=14),
                sim.InitGenotype(prop=[.022, .123, .087, .312, .181, .152, .123], loci=15),
                sim.InitGenotype(prop=[.375, .625], loci=16),
                                ],
                # every single generation, a population ages by 1. At t=1, a the population splits into 72 and 928
                # individuals to simulate translocation of BC populations
        preOps=[sim.InfoExec('age += 1'),
                sim.SplitSubPops(subPops=0, sizes=[72, 928], at=1),
                sim.Stat(popSize=True),
                sim.PyEval(r'"Gen %d:\t%s\n" % (gen, subPopSize)'),
                ],  
        matingScheme = sim.HeteroMating(matingSchemes=[
                sim.CloneMating(subPops=[(0, 0), (0,1), (1,0), (1,1)], weight=-1),
                sim.RandomMating(subPops=[(0, 1), (1,1)], 
                    ops=[sim.MendelianGenoTransmitter()]
                )
            ]),
        postOps = [
        sim.PyOutput('\n', reps=-1, step = 10),
        sim.PyOperator(avgHet),
        
        #For subpopulation 0:
            
        #Calculation of Number of Alleles
        sim.Stat(alleleFreq=range(17), subPops=[0], vars=['alleleFreq_sp'], step=1),
        
        #Locus 1
        sim.PyEval('gen', step=1, reps=0),
        sim.PyEval(r"'\t%.2f' % (sum([len(subPop[0]['alleleFreq'][[0][0]]) "
            "for x in range(2)])/2.)", step=1, output='>>NA_loc01.txt'),        
        sim.PyOutput('\n', reps=-1, step=1, output='>>NA_loc01.txt'), 
        
        #Locus 2
        sim.PyEval('gen', step=1, reps=0),
        sim.PyEval(r"'\t%.2f' % (sum([len(subPop[0]['alleleFreq'][[1][0]]) "
            "for x in range(2)])/2.)", step=1, output='>>NA_loc02.txt'),        
        sim.PyOutput('\n', reps=-1, step=1, output='>>NA_loc02.txt'), 
        
        #Locus 3
        sim.PyEval('gen', step=1, reps=0),
        sim.PyEval(r"'\t%.2f' % (sum([len(subPop[0]['alleleFreq'][[2][0]]) "
            "for x in range(2)])/2.)", step=1, output='>>NA_loc03.txt'),        
        sim.PyOutput('\n', reps=-1, step=1, output='>>NA_loc03.txt'),
        
        #Locus 4
        sim.PyEval('gen', step=1, reps=0),
        sim.PyEval(r"'\t%.2f' % (sum([len(subPop[0]['alleleFreq'][[3][0]]) "
            "for x in range(2)])/2.)", step=1, output='>>NA_loc04.txt'),        
        sim.PyOutput('\n', reps=-1, step=1, output='>>NA_loc04.txt'),
        
       #Locus 5
        sim.PyEval('gen', step=1, reps=0),
        sim.PyEval(r"'\t%.2f' % (sum([len(subPop[0]['alleleFreq'][[4][0]]) "
            "for x in range(2)])/2.)", step=1, output='>>NA_loc05.txt'),        
        sim.PyOutput('\n', reps=-1, step=1, output='>>NA_loc05.txt'), 
        
        #Locus 6
        sim.PyEval('gen', step=1, reps=0),
        sim.PyEval(r"'\t%.2f' % (sum([len(subPop[0]['alleleFreq'][[5][0]]) "
            "for x in range(2)])/2.)", step=1, output='>>NA_loc06.txt'),        
        sim.PyOutput('\n', reps=-1, step=1, output='>>NA_loc06.txt'),
        
        #Locus 7
        sim.PyEval('gen', step=1, reps=0),
        sim.PyEval(r"'\t%.2f' % (sum([len(subPop[0]['alleleFreq'][[6][0]]) "
            "for x in range(2)])/2.)", step=1, output='>>NA_loc07.txt'),        
        sim.PyOutput('\n', reps=-1, step=1, output='>>NA_loc07.txt'),
        
        #Locus 8
        sim.PyEval('gen', step=1, reps=0),
        sim.PyEval(r"'\t%.2f' % (sum([len(subPop[0]['alleleFreq'][[7][0]]) "
            "for x in range(2)])/2.)", step=1, output='>>NA_loc08.txt'),        
        sim.PyOutput('\n', reps=-1, step=1, output='>>NA_loc08.txt'),
        
        #Locus 9
        sim.PyEval('gen', step=1, reps=0),
        sim.PyEval(r"'\t%.2f' % (sum([len(subPop[0]['alleleFreq'][[8][0]]) "
            "for x in range(2)])/2.)", step=1, output='>>NA_loc09.txt'),        
        sim.PyOutput('\n', reps=-1, step=1, output='>>NA_loc09.txt'),
        
         #Locus 10
        sim.PyEval('gen', step=1, reps=0),
        sim.PyEval(r"'\t%.2f' % (sum([len(subPop[0]['alleleFreq'][[9][0]]) "
            "for x in range(2)])/2.)", step=1, output='>>NA_loc10.txt'),        
        sim.PyOutput('\n', reps=-1, step=1, output='>>NA_loc10.txt'),
        
        #Locus 11
        sim.PyEval('gen', step=1, reps=0),
        sim.PyEval(r"'\t%.2f' % (sum([len(subPop[0]['alleleFreq'][[10][0]]) "
            "for x in range(2)])/2.)", step=1, output='>>NA_loc11.txt'),        
        sim.PyOutput('\n', reps=-1, step=1, output='>>NA_loc11.txt'),
        
        #Locus 12
        sim.PyEval('gen', step=1, reps=0),
        sim.PyEval(r"'\t%.2f' % (sum([len(subPop[0]['alleleFreq'][[11][0]]) "
            "for x in range(2)])/2.)", step=1, output='>>NA_loc12.txt'),        
        sim.PyOutput('\n', reps=-1, step=1, output='>>NA_loc12.txt'),
        
        #Locus 13
        sim.PyEval('gen', step=1, reps=0),
        sim.PyEval(r"'\t%.2f' % (sum([len(subPop[0]['alleleFreq'][[12][0]]) "
            "for x in range(2)])/2.)", step=1, output='>>NA_loc13.txt'),        
        sim.PyOutput('\n', reps=-1, step=1, output='>>NA_loc13.txt'),
        
        #Locus 14
        sim.PyEval('gen', step=1, reps=0),
        sim.PyEval(r"'\t%.2f' % (sum([len(subPop[0]['alleleFreq'][[13][0]]) "
            "for x in range(2)])/2.)", step=1, output='>>NA_loc14.txt'),        
        sim.PyOutput('\n', reps=-1, step=1, output='>>NA_loc14.txt'),
        
        #Locus 15
        sim.PyEval('gen', step=1, reps=0),
        sim.PyEval(r"'\t%.2f' % (sum([len(subPop[0]['alleleFreq'][[14][0]]) "
            "for x in range(2)])/2.)", step=1, output='>>NA_loc15.txt'),        
        sim.PyOutput('\n', reps=-1, step=1, output='>>NA_loc15.txt'),
        
        #Locus 16
        sim.PyEval('gen', step=1, reps=0),
        sim.PyEval(r"'\t%.2f' % (sum([len(subPop[0]['alleleFreq'][[15][0]]) "
            "for x in range(2)])/2.)", step=1, output='>>NA_loc16.txt'),        
        sim.PyOutput('\n', reps=-1, step=1, output='>>NA_loc16.txt'),
        
        #Locus 16
        sim.PyEval('gen', step=1, reps=0),
        sim.PyEval(r"'\t%.2f' % (sum([len(subPop[0]['alleleFreq'][[16][0]]) "
            "for x in range(2)])/2.)", step=1, output='>>NA_loc17.txt'),        
        sim.PyOutput('\n', reps=-1, step=1, output='>>NA_loc17.txt'),  
        ],       
        gen=gen
        )
        
simuOverlappingGeneration(1000, 6, 2, 100)

Do not have time to go through your code carefully, but

  1. You do not need so many sim.PyEval('gen', step=1, reps=0),.
  2. Locus 16 etc could be written as list comprehension if you know how to do it, something like

postOps = [whatever] + [sim.PyEval(.... loc) for loc in ...]
3. You are splitting your population so subPops=[(0, 0), (0,1), (1,0), (1,1)], might not work. You can try to use [(ALL_AVAIL, ALL_AVAIL)] to let the mating scheme get all the subpopulations automatically.

Thank you Bo!

for the following code:

sim.Stat(alleleFreq=range(17), subPops=[0], vars=['alleleFreq_sp'], popSize=True, step=1), print(pop.vars()['alleleFreq'][0][0]),

How would I use the pop.vars function to print out all the allele frequencies at all the loci? When I try doing this I keep getting KeyError: 'alleleFreq'.

Thank you,
Natasha

Well, you can always print the entire dictionary pop.vars()` and figure out how to access specific information of interest.

Great, thank you!

Thanks Bo:
For the below, are you referring to using ALL_AVAIL under the clone mating or polygamous mating scheme?

  1. You are splitting your population so subPops=[(0, 0), (0,1), (1,0), (1,1)], might not work. You can try to use [(ALL_AVAIL, ALL_AVAIL)] to let the mating scheme get all the subpopulations automatically.

[(ALL_AVAIL, ALL_AVAIL)] basically means all vsp in all subpopulations. ALL_AVAIL means all subpopulations. The former should be used in placed of

subPops=[(0, 0), (0,1), (1,0), (1,1)],

if subpopulation 1 does not exist before population split.