#include #include #include #ifdef MALLOCDEBUG #include "../gmalloc/malloc.h" #endif /* implementation of binary immune system model * by Nelson Minar * model from Forrest, Lapedes, Hightower, etc. * This code is Copyright (c) 1994 Nelson Minar. Please do not use without * the permission of the author. */ /* {{{ typedefs */ typedef unsigned char Allele; /* one bit */ typedef Allele * Antigen; typedef struct { Allele * genome; unsigned char * pieceMatches; unsigned bestMatch; double trueMatch; double match; double fitness; } * Individual; typedef Antigen * AgUniverse; typedef struct { unsigned best; unsigned worst; double avgMatch; double avgFitness; double sigma; double avgTrueMatch; Individual * individuals; } * Population; /* individual layout: * [A1 A2 .. An B1 B2 .. Bn .. .. K1 K2 .. Kn] * where N is numInLibrary, K is numLibraries, and each token is geneLength * piece matches records the matches of A1, A2,.. B1, B2 etc etc */ /* }}} */ /* {{{ global variables */ /* variables having to do with representation */ unsigned agLength, numLibraries, numInLibrary; /* derived from variables above */ unsigned geneLength, libraryLength, individualLength, numAbStrings; /* number of entities */ unsigned agUniverseSize, populationSize; /* variables having to do with model */ /* abSamplingNoise == 0 implies no noise, otherwise sample that many ab */ unsigned agSamplingNoise, abSamplingNoise; /* generations */ unsigned finalGeneration; /* per bit mutation probability, crossover probability */ double pMutation, pCross; /* counters */ unsigned numMutations = 0, numCrossover = 0; /* output controls */ unsigned printLastFlag, printPopulationInterval, printBestInterval, printWorstInterval, printStatsInterval; /* scaling controls */ double sigmaMultiplier; unsigned randomSeed; /* }}} */ /* {{{ prototypes */ int ipow(int, int); void error(char *); void usage(char *); void setParams(int, char **); Antigen newAntigen(void); AgUniverse newAgUniverse(void); Individual newIndividual(void); void copyIndividual(Individual, Individual); Population allocPopulation(void); Population initPopulation(Population); void printString(Allele * s, unsigned n); void printAntigen(Antigen); void printIndividual(Individual, AgUniverse); void printAgUniverse(AgUniverse); void printPopulation(Population, AgUniverse, unsigned); void analyzeIndividual(Individual, AgUniverse); void updateIndividualMatches(Individual in, AgUniverse agUniverse); unsigned evalPieceMatch(Allele *, Allele *, unsigned); unsigned evalOneMatch(Individual, unsigned, unsigned); unsigned evalTrueMatch(Individual, AgUniverse, unsigned); unsigned evalSampledMatch(Individual, AgUniverse, unsigned, unsigned *); double evalAbMatch(Individual, AgUniverse); double calculatePopFitness(Population, AgUniverse); unsigned rouletteSelect(double, Population); void mutateIndividual(Individual); void makeKids(Population, unsigned, Individual, Individual); /* }}} */ /* {{{ macros to access representation */ #define theLibrary(in,l) ((in) + (l)*libraryLength) #define theGeneInLibrary(l,g) ((l) + (g)*geneLength) #define theGene(in,l,g) (theGeneInLibrary(theLibrary(in,l),g)) #define pieceMatch(in,ag,l,g) (*((in)->pieceMatches + (ag)*numLibraries*numInLibrary + (l)*numInLibrary + (g))) /* }}} */ /* {{{ random number functions */ #undef USE48 #ifdef USE48 double drand48(); long lrand48(); void srand48(long); # define seedRandom(s) (srand48(s)) # define randomAllele() (lrand48() & 0x1) # define randomInt() (lrand48()) # define randomDouble() (drand48()) #else # define seedRandom(s) (srandom(s)) # define randomAllele() ((random() >> 11) & 0x1) # define randomInt() (random()) # ifndef sun # define randomDouble() ((double)random() / (double)RAND_MAX) # else # define randomDouble() ((double)random() / (double)((1U<<31) - 1)) # endif #endif /* }}} */ /* {{{ malloc debugging */ #ifdef MALLOCDEBUG static void *(*old_malloc_hook) (size_t); static void * my_malloc_hook (size_t size) { void *result; __malloc_hook = old_malloc_hook; result = malloc (size); printf ("malloc (%u) returns %p\n", (unsigned int) size, result); __malloc_hook = my_malloc_hook; return result; } #endif /* }}} */ /* {{{ ipow() */ int ipow(int x, int y) { int i, r = 1; if (y < 0) error("Negative exponent given.\n"); for (i = 0; i < y; i++) r *= x; return r; } /* }}} */ /* {{{ error() */ void error(char * s) { fprintf(stderr, s); exit(1); } /* }}} */ /* {{{ usage() */ void usage(char * s) { fprintf(stderr, "Usage: %s\n", s); fprintf(stderr, "\t-S population stats print interval\n"); fprintf(stderr, "\t-L (print last generation\n"); fprintf(stderr, "\t-P population print interval\n"); fprintf(stderr, "\t-B best print interval\n"); fprintf(stderr, "\t-W worst print interval\n"); fprintf(stderr, "\t-g generations\n"); fprintf(stderr, "\t-p population size\n"); fprintf(stderr, "\t-m per individual mutation probability\n"); fprintf(stderr, "\t-c crossover probability\n"); fprintf(stderr, "\t-l antigen length\n"); fprintf(stderr, "\t-a antigen universe size\n"); fprintf(stderr, "\t-e number of entries in antibody library\n"); fprintf(stderr, "\t-n number of libraries\n"); fprintf(stderr, "\t-s samples of Ab to take (0 means all)\n"); fprintf(stderr, "\t-r random seed\n"); exit(1); } /* }}} */ /* {{{ setParams() */ void setParams(int argc, char ** argv) { extern char * optarg; int c; agLength = 16; numLibraries = 1; numInLibrary = 1; agUniverseSize = 1; populationSize = 20; agSamplingNoise = 0; abSamplingNoise = 0; /* how many ab to sample */ finalGeneration = 60; pMutation = 0.1; pCross = 0.7; sigmaMultiplier = 2.0; printLastFlag = 0; printPopulationInterval = 0; printBestInterval = 0; printWorstInterval = 0; printStatsInterval = 1; randomSeed = time(0) + getpid(); while ((c = getopt(argc, argv, "LS:W:P:B:g:p:l:n:m:c:a:e:s:")) != -1) switch(c) { case 'L': printLastFlag = 1; break; case 'S': printStatsInterval = atoi(optarg); break; case 'P': printPopulationInterval = atoi(optarg); break; case 'B': printBestInterval = atoi(optarg); break; case 'W': printWorstInterval = atoi(optarg); break; case 'r': randomSeed = atoi(optarg); break; case 'g': finalGeneration = atoi(optarg); break; case 'p': populationSize = atoi(optarg); break; case 'l': agLength = atoi(optarg); break; case 'm': pMutation = atof(optarg); break; case 'c': pCross = atof(optarg); break; case 'a': agUniverseSize = atoi(optarg); break; case 'e': numInLibrary = atoi(optarg); break; case 'n': numLibraries = atoi(optarg); break; case 's': abSamplingNoise = atoi(optarg); break; default : case '?': usage(argv[0]); break; } printf("Parameters. seed: %u agLen: %u numLib: %u numInLib: %u agUSize: %u agNoise: %u abNoise: %u pop: %u gens: %u pMut: %.4f pCross: %.4f sigmaMult: %.4f\n", randomSeed, agLength, numLibraries, numInLibrary, agUniverseSize, agSamplingNoise, abSamplingNoise, populationSize, finalGeneration, pMutation, pCross, sigmaMultiplier); geneLength = agLength / numLibraries; libraryLength = numInLibrary * geneLength; individualLength = libraryLength * numLibraries; numAbStrings = ipow(numInLibrary, numLibraries); pMutation /= individualLength; seedRandom(randomSeed); if (agLength % numLibraries) error("numLibraries does not divide agLength"); if (populationSize % 2) error("populationSize must be even\n"); } /* }}} */ /* {{{ newAntigen() */ Antigen newAntigen(void) { Antigen ag; unsigned i; ag = malloc(agLength * sizeof(*ag)); if (ag == 0) error("Out of memory in newAntigen"); for (i = 0; i < agLength; i++) #define RANDOMALLELE #ifdef RANDOMALLELE ag[i] = randomAllele(); #else ag[i] = 1; #endif return ag; } /* }}} */ /* {{{ newAgUniverse() */ AgUniverse newAgUniverse(void) { AgUniverse a; unsigned i; a = malloc(sizeof(*a) * agUniverseSize); if (a == NULL) error("Out of memory in newAgUniverse"); for (i = 0; i < agUniverseSize; i++) a[i] = newAntigen(); return a; } /* }}} */ /* {{{ newIndividual */ Individual newIndividual(void) { Individual in; in = malloc(sizeof(*in)); if (in == 0) error("Out of memory in newIndividual"); in->bestMatch = -1; in->match = -99999.0; in->trueMatch = -99999.0; in->fitness = -99999.0; /* error catch */ in->genome = calloc(individualLength, sizeof (*(in->genome))); if (in->genome == 0) error("Out of memory in newIndividual"); in->pieceMatches = malloc(sizeof *(in->pieceMatches) * agUniverseSize * numLibraries * numInLibrary); return in; } /* }}} */ /* {{{ copyIndividual */ void copyIndividual(Individual from, Individual to) { bcopy(from->genome, to->genome, individualLength * sizeof(*(from->genome))); to->bestMatch = from->bestMatch; to->match = from->match; to->fitness = from->fitness; } /* }}} */ /* {{{ allocPopulation */ Population allocPopulation(void) { Population p; p = malloc(sizeof(*p)); if (p == NULL) error("Out of memory in allocPopulation"); p->individuals = malloc(sizeof (*(p->individuals)) * populationSize); if (p->individuals == NULL) error("Out of memory in allocPopulation"); return p; } /* }}} */ /* {{{ initPopulation() */ Population initPopulation(Population p) { unsigned i; for (i = 0; i < populationSize; i++) p->individuals[i] = newIndividual(); p->best = -1; p->worst = -1; p->avgMatch = -99999; p->avgFitness = -99999; p->sigma = -99999; p->avgTrueMatch = -99999; return p; } /* }}} */ /* {{{ printString() */ void printString(Allele * s, unsigned n) { int i; for (i = 0; i < n; i++) putchar(s[i] ? '1' : '0'); } /* }}} */ /* {{{ printAntigen() */ void printAntigen(Antigen ag) { printString(ag, agLength); } /* }}} */ /* {{{ printIndividual() */ void printIndividual(Individual in, AgUniverse agUniverse) { unsigned library, gene, ag; printf("My truematch: %5.5f Fitness: %5.5f Match: %5.5f Matches:", in->trueMatch, in->fitness, in->match); for (ag = 0; ag < agUniverseSize; ag++) printf(" %d", evalTrueMatch(in, agUniverse, ag)); printf("."); #define ANALYZE #ifdef ANALYZE analyzeIndividual(in, agUniverse); printf("."); #endif printf(" ["); for (library = 0; library < numLibraries; library++) { putchar('['); for (gene = 0; gene < numInLibrary; gene++) { printString(theGene(in->genome, library, gene), geneLength); if (gene != numInLibrary - 1) putchar(' '); } putchar(']'); } putchar(']'); } /* }}} */ /* {{{ printAgUniverse */ void printAgUniverse(AgUniverse agUniverse) { unsigned i; for (i = 0; i < agUniverseSize; i++) { printAntigen(agUniverse[i]); putchar('\n'); } } /* }}} */ /* {{{ printPopulation */ void printPopulation(Population pop, AgUniverse agUniverse, unsigned generation) { unsigned i; if ((generation == finalGeneration - 1) || (printStatsInterval && (generation % printStatsInterval == 0))) printf("Avg truematch: %5.5f Best: %5.5f Worst: %5.5f Sigma: %5.5f Avg match: %5.5f Mut: %u Xover: %u Gen: %d\n", pop->avgTrueMatch, pop->individuals[pop->best]->trueMatch, pop->individuals[pop->worst]->trueMatch, pop->sigma, pop->avgMatch, numMutations, numCrossover, generation); if (printBestInterval && (generation % printBestInterval == 0)) { printf("Best: "); printIndividual(pop->individuals[pop->best], agUniverse); printf(" Gen: %d\n", generation); } if (printWorstInterval && (generation % printWorstInterval == 0)) { printf("Worst: "); printIndividual(pop->individuals[pop->worst], agUniverse); printf(" Gen: %d\n", generation); } if ((printLastFlag && (generation == finalGeneration - 1)) || (printPopulationInterval && (generation % printPopulationInterval == 0))) { printf("Population dump for generation %d\n", generation); for (i = 0; i < populationSize; i++) { printIndividual(pop->individuals[i], agUniverse); putchar('\n'); } printf("\n"); } } /* }}} */ /* {{{ analyzeIndividual() */ /* for each antigen, show the best antibody that matches * (code borrowed from evalTrueMatch()) */ void analyzeIndividual(Individual in, AgUniverse agUniverse) { unsigned lib, ag, entry; printf(" Best per antigen: "); for (ag = 0; ag < agUniverseSize; ag++) { for (lib = 0; lib < numLibraries; lib++) { /* sum match over libs */ unsigned bestEntry; int bestEntryMatch = -1; for (entry = 0; entry < numInLibrary; entry++) { /* find best in each */ int entryMatch; entryMatch = pieceMatch(in, ag, lib, entry); if (entryMatch > bestEntryMatch) { bestEntryMatch = entryMatch; bestEntry = entry; } } if (bestEntryMatch == -1) error("calculation error in analyzeIndividual()"); printf("%u,", bestEntry); } printf(" "); } #ifdef PIECEANALYSIS printf(" Best pieces ("); for (lib = 0; lib < numLibraries; lib++) { printf(" lib %u:", lib); for (ag = 0; ag < agUniverseSize; ag++) { int bestEntry = -1; for (entry = 0; entry < numInLibrary; entry++) { int match = pieceMatch(in, ag, lib, entry); if (match > bestEntry) bestEntry = match; } printf(" %u:%u", ag, bestEntry); } } printf(")"); #endif } /* }}} */ /* {{{ evalPieceMatch() */ /* how much do two strings match by? */ unsigned evalPieceMatch(Allele * a, Allele * b, unsigned n) { unsigned i, match; match = 0; for (i = 0; i < n; i++) if (a[i] == b[i]) match++; return match; } /* }}} */ /* {{{ updateIndividualMatches */ /* for each piece of each genome, record how much it matches each antigen */ void updateIndividualMatches(Individual in, AgUniverse agUniverse) { unsigned ag, lib, entry; for (ag = 0; ag < agUniverseSize; ag++) for (lib = 0; lib < numLibraries; lib++) for (entry = 0; entry < numInLibrary; entry++) pieceMatch(in,ag,lib,entry) = evalPieceMatch(agUniverse[ag]+lib*geneLength*sizeof(*(agUniverse[ag])), theGene(in->genome, lib, entry), geneLength); } /* }}} */ /* {{{ evalOneMatch */ /* evaluate the match of one particular instance of an antibody * against one particular antigen. * "which" codes for a particular antibody as a base n system, * where n is the number in each library and the digits correspond to * which entry one chooses in each library. */ unsigned evalOneMatch(Individual in, unsigned which, unsigned ag) { unsigned library, match; match = 0; for (library = 0; library < numLibraries; library++) { unsigned entry = which % numInLibrary; unsigned thisMatch; thisMatch = pieceMatch(in, ag, library, entry); match += thisMatch; which /= numInLibrary; /* now shift */ } return match; } /* }}} */ /* {{{ evalTrueMatch() */ /* a particular individual match against an antigen * is the best of each antibody */ unsigned evalTrueMatch(Individual in, AgUniverse agUniverse, unsigned ag) { unsigned lib, match; match = 0; #ifdef DEBUGMATCHING printf("::"); #endif for (lib = 0; lib < numLibraries; lib++) { /* sum match over libs */ unsigned entry; int bestEntryMatch = -1; for (entry = 0; entry < numInLibrary; entry++) { /* find best in each */ int entryMatch; entryMatch = pieceMatch(in, ag, lib, entry); if (entryMatch > bestEntryMatch) bestEntryMatch = entryMatch; #ifdef DEBUGMATCHING printf("%u,%u,%u:%u ", ag,lib,entry,entryMatch); #endif } if (bestEntryMatch == -1) error("calculation error in evalTrueMatch()"); match += bestEntryMatch; } #ifdef DEBUGMATCHING printf("\n"); #endif return match; } /* }}} */ /* {{{ evalSampledMatch() */ unsigned evalSampledMatch(Individual in, AgUniverse agUniverse, unsigned ag, unsigned * sample) { int bestSampledMatch = -1; unsigned i, which; for (i = 0; i < abSamplingNoise; i++) { int aMatch; which = sample[i]; aMatch = evalOneMatch(in, which, ag); if (aMatch > bestSampledMatch) bestSampledMatch = aMatch; #undef SAMPLEDEBUG #ifdef SAMPLEDEBUG printf("::%u matches %u.\n", which, aMatch); #endif } return bestSampledMatch; } /* }}} */ /* {{{ evalAbMatch() */ /* match of an individual is the average of the matches of each * sampled from the entire agUniverse */ double evalAbMatch(Individual in, AgUniverse agUniverse) { unsigned ag, bestTrueMatch; double totalTrueMatch, totalSampledMatch; static unsigned * sample = 0; if (agSamplingNoise != 0) error("evalAbMatch only works for no sampling noise."); if (sample == 0) /* stupid dynamic memory */ sample = malloc(abSamplingNoise * sizeof(*sample)); updateIndividualMatches(in, agUniverse); totalTrueMatch = 0; bestTrueMatch = 0; /* calculate the total, best match */ for (ag = 0; ag < agUniverseSize; ag++) { unsigned trueMatch; trueMatch = evalTrueMatch(in, agUniverse, ag); totalTrueMatch += trueMatch; if (trueMatch > bestTrueMatch) bestTrueMatch = trueMatch; } in->bestMatch = bestTrueMatch; in->trueMatch = totalTrueMatch / agUniverseSize; /* now calculate the sampled-noise match and put it into in->match */ if (abSamplingNoise == 0) /* no noise, no problem */ in->match = in->trueMatch; else { unsigned i; for (i = 0; i < abSamplingNoise; i++) /* build the ample */ sample[i] = randomInt() % numAbStrings; totalSampledMatch = 0.0; for (ag = 0; ag < agUniverseSize; ag++) totalSampledMatch += evalSampledMatch(in, agUniverse, ag, sample); in->match = totalSampledMatch / agUniverseSize; } return in->match; } /* }}} */ /* {{{ calculatePopFitness() */ /* sigma scaling code. Maps the match of an individual to fitness. * match <= mean - sigmaMultiplier*sigma -> 0 (where mean is mean of matches, * match = mean -> 1 sigma is the SD) * match = mean + sigmaMultiplier*sigma -> 2 * * roughly, * fitness = match - (mean - sigmaMultiplier*sigma) / (sigmaMultiplier * sigma) */ double calculatePopFitness(Population pop, AgUniverse agUniverse) { unsigned i, best, worst; double avgMatch, totalMatch, totalMatchSquared, sigmaMatch;/* distribution */ double totalTrueMatch; double totalFitness; /* find distribution of matches */ totalMatch = 0.0; totalMatchSquared = 0.0; totalTrueMatch = 0.0; best = 0; worst = 0; for (i = 0; i < populationSize; i++) { double match, trueMatch;; match = evalAbMatch(pop->individuals[i], agUniverse); totalMatch += match; totalMatchSquared += match*match; trueMatch = pop->individuals[i]->trueMatch; /* set in evalAbMatch */ totalTrueMatch += trueMatch; if (pop->individuals[best]->trueMatch < trueMatch) best = i; if (pop->individuals[worst]->trueMatch > trueMatch) worst = i; } avgMatch = totalMatch / populationSize; sigmaMatch = sqrt((totalMatchSquared / populationSize) - (avgMatch * avgMatch)); /* now do sigma scaling and truncation */ totalFitness = 0.0; for (i = 0; i < populationSize; i++) { if (sigmaMatch > 0) { double t; t = pop->individuals[i]->match - (avgMatch - sigmaMultiplier * sigmaMatch); if (t < 0.0) /* truncate at 0 */ t = 0.0; pop->individuals[i]->fitness = t / (sigmaMultiplier * sigmaMatch); } else pop->individuals[i]->fitness = 1.0; totalFitness += pop->individuals[i]->fitness; } /* record what we know */ pop->avgMatch = avgMatch; pop->best = best; pop->worst = worst; pop->sigma = sigmaMatch; pop->avgFitness = totalFitness / populationSize; pop->avgTrueMatch = totalTrueMatch / populationSize; return totalFitness; } /* }}} */ /* {{{ rouletteSelect() */ /* roulette wheel selection: returns index of that selected. */ unsigned rouletteSelect(double totalFitness, Population pop) { unsigned where; double stopHere, wheelSum; where = 0; wheelSum = 0.0; stopHere = (randomDouble() * totalFitness); wheelSum = pop->individuals[0]->fitness; /* prime the loop */ while (wheelSum < stopHere) { where++; wheelSum += pop->individuals[where]->fitness; } return where; } /* }}} */ /* {{{ mutateIndividual() */ void mutateIndividual(Individual in) { unsigned i; for (i = 0; i < individualLength; i++) if (randomDouble() < pMutation) { numMutations++; in->genome[i] = (in->genome[i] + 1) % 2; /* flip bit */ } } /* }}} */ /* {{{ makeKids() */ /* makeKids: given two individuals and a population, fill in two * individuals in that population (pop[which] and pop[which+1]) */ void makeKids(Population pop, unsigned which, Individual p1, Individual p2) { if (randomDouble() < pCross) { unsigned crossPoint; crossPoint = randomInt() % (individualLength - 1) + 1; bcopy(p1->genome, pop->individuals[which]->genome, crossPoint * sizeof(*(p1->genome))); bcopy(p2->genome+crossPoint, pop->individuals[which]->genome+crossPoint, (individualLength - crossPoint) * sizeof(*(p1->genome))); bcopy(p2->genome, pop->individuals[which+1]->genome, crossPoint * sizeof(*(p2->genome))); bcopy(p1->genome+crossPoint, pop->individuals[which+1]->genome+crossPoint, (individualLength - crossPoint) * sizeof(*(p2->genome))); numCrossover++; #undef PRINTCROSS #ifdef PRINTCROSS printf("Crossover at %d\n", crossPoint); printIndividual(p1); printf("\n"); printIndividual(p2); printf("\n"); printIndividual(pop[which]); printf("\n"); printIndividual(pop[which+1]); printf("\n"); #endif } else { copyIndividual(p1, pop->individuals[which]); copyIndividual(p2, pop->individuals[which+1]); } mutateIndividual(pop->individuals[which]); mutateIndividual(pop->individuals[which+1]); } /* }}} */ int main(int argc, char ** argv) { unsigned generation; AgUniverse agUniverse; Population pop0, pop1, oldPop, newPop; unsigned whichPop; unsigned i; #ifdef MALLOCDEBUG old_malloc_hook = __malloc_hook; /* malloc debugging */ __malloc_hook = my_malloc_hook; #endif MALLOCDEBUG setParams(argc, argv); agUniverse = newAgUniverse(); printAgUniverse(agUniverse); pop0 = initPopulation(allocPopulation()); pop1 = initPopulation(allocPopulation()); whichPop = 0; for (generation = 0; generation < finalGeneration; generation++) { double totalFitness; if (whichPop == 0) { oldPop = pop0; newPop = pop1; } else { oldPop = pop1; newPop = pop0; } /* calculate fitness */ totalFitness = calculatePopFitness(oldPop, agUniverse); printPopulation(oldPop, agUniverse, generation); /* generate new generation */ for (i = 0; i < populationSize; i+=2) { unsigned parent1, parent2; parent1 = rouletteSelect(totalFitness, oldPop); parent2 = rouletteSelect(totalFitness, oldPop); makeKids(newPop, i, oldPop->individuals[parent1], oldPop->individuals[parent2]); } whichPop = (whichPop + 1) % 2; /* swap which one */ } return 0; }