/****************************************************************************/ /* C implementation of the Cascade-Correlation learning algorithm. */ /* */ /* Written by: R. Scott Crowder, III */ /* School of Computer Science */ /* Carnegie Mellon University */ /* Pittsburgh, PA 15213-3890 */ /* */ /* Phone: (412) 268-8139 */ /* Internet: rsc@cs.cmu.edu */ /* */ /* */ /* This code has been placed in the public domain by the author. As a */ /* matter of simple courtesy, anyone using or adapting this code is */ /* expected to acknowledge the source. The author would like to hear */ /* about any attempts to use this system, successful or not. */ /* */ /* This code is a port to C from the original Common Lisp implementation */ /* written by Scott E. Fahlman. (Version dated June 1 1990.) */ /* */ /* For an explanation of this algorithm and some results, see "The */ /* Cascade-Correlation Learning Architecture" by Scott E. Fahlman and */ /* Christian Lebiere in D. S. Touretzky (ed.), "Advances in Neural */ /* Information Processing Systems 2", Morgan Kaufmann, 1990. A somewhat */ /* longer version is available as CMU Computer Science Tech Report */ /* CMU-CS-90-100. Instructions for Ftping this report are given at the */ /* end of this file. */ /* */ /* An example of the network set up file is provided at the bottom of */ /* this file. */ /* */ /* This code has been successfully compiled on the following machines. */ /* */ /* DEC Station 3100 using the MIPS compiler version 1.31 */ /* Sun 4 using the gcc compiler version 1.23 */ /* IBM PC-RT using the cc compiler */ /* IBM RS6000 (Model 520) using the xlc compiler */ /* 386 machine using the Turbo C 2.0 compiler */ /* The implementation compiles with the ANSI standard. Some machine */ /* specific preprocessor commands are required. It is assumed that your */ /* system will provide the required preprocessor arguments. */ /* */ /****************************************************************************/ /* Change Log */ /****************************************************************************/ /* */ /* Changes from Release 1 dated Jun-12-90 to Version 1.14 Jul-18-90 */ /* */ /* bug fix in TYPE_CONVERT Thanks to Michael Witbrock for the 1st report */ /* bug fix in BUILD_NET Thanks to Michael Witbrock for the 1st report */ /* bug fix in GET_ARRAY_ELEMENT Thanks to Ken Lang */ /* bug fix in COMPUTE_CORRELATIONS Thanks to Eric Melz */ /* bug fix in ADJUST_CORRELATIONS Thanks to Chris Lebiere */ /* bug fix in COMPUTE_SLOPES Thanks to Chris Lebiere */ /* removed 2nd call to INIT_GLOBALS Thanks to Dimitris Michailidis */ /* Added UnitType ASYMSIGMOID for users who like sigmoids to go from 0-1 */ /* all learning utility functions changed with this addition. */ /* Added command line argument option type 'cascor1 help' for usage info. */ /* Added .net file and on-the-fly parameter adjustment code. See new */ /* samples files at the end of this listing for examples. Functions */ /* main and GET_NETWORK_CONFIGURATION have changed completely. */ /* GET_USER_INPUT replaced by Y_OR_N_P */ /* included to support on-the-fly parameter updating */ /****************************************************************************/ /* */ /* Changes from Version 1.15 Jul-18-90 to 1.16 Oct-24-90 */ /* */ /* bug fix in BUILD_NETWORK, INSTALL_NEW_UNIT, and TRAIN to allow */ /* NTestPatterns > NTrainingPatterns. Thanks to William Stevenson */ /****************************************************************************/ /* */ /* Changes from Version 1.16 Oct-24-90 to 1.17 Nov-12-90 */ /****************************************************************************/ /* bug fix in TRAIN line 1662 change NtrainingPatterns to NTrainingPatterns */ /* Thanks to Merrill Flood for pointing out the problem. */ /****************************************************************************/ /* */ /* Changes from Version 1.17 Nov-12-90 to 1.30 Jan-23-91 */ /****************************************************************************/ /* Added code to allow user to save the weights into and load weights */ /* from external files. */ /* Added code to allow saving of .net files to save any changes to */ /* parameters made during interactive learning trials. */ /* Added an alternative main routine that can be used to calculate */ /* predictions using a previously saved set of weights. To activate */ /* this feature compile the code with the symbol PREDICT_ONLY defined. */ /* Added code to allow '# comment' lines in the training or test sets. */ /* Added optional code to calculate the number of multiply-accumulates */ /* used during training. This is useful for comparing */ /* Cascade-correlation to other learning algorithms in a machine */ /* independent manner. To activate this feature define the symbol */ /* CONNX at compile time. */ /* Added code to calculate the Lapedes and Faber ErrorIndex. Useful for */ /* problems with real-valued outputs. */ /* Added UnitType VARSIGMOID which can have arbitrary output range */ /* defined by SigmoidMin and SigmoidMax. Thanks to Dimitris */ /* Michailidis. */ /* Added code to allow the training and test data to be read from */ /* separate files. Thanks to Carlos Puchol. */ /* Added code to save SumError for each output instead of combining it */ /* together for all outputs. This change helps for multiple output */ /* problems. Thanks to Scott Fahlman. */ /* Code added to allow specification of a NonRandomSeed for the random */ /* number generator. Thanks to Dimitris Michailidis. */ /* Removed useless setting of Ninputs and Noutputs from BUILD_NET. Thanks */ /* to Dimitris Michailidis. */ /****************************************************************************/ /* */ /* Changes from Version 1.30 Jan-23-91 to 1.31 Jan-25-91 */ /* fixed typo. include not thanks to Peter Hancock. */ /* */ /* Changes from Version 1.31 Jan-25-91 to 1.32 Mar-21-91 */ /* BUG FIX in INIT_NET. Thanks to Boris Gokhman */ /* BUG FIX in TEST_EPOCH. Thanks to Boris Gokhman */ /* */ /* Changes from Version 1.32 Mar-21-91 to 1.33 Apr-16-92 */ /* Prototype correction for strtok. Thanks to Brian Ripley */ /****************************************************************************/ #include #include #include #include #include #include #define VERSION 1.33 #define REL_DATE "Apr-16-92" /* some stuff to make C code a little more readable */ typedef int BOOLEAN; #ifdef mips /* Pmax compiler is almost ANSI, it just doesn't understand 'void *' */ typedef char *VOIDP; #else typedef void *VOIDP; #endif /*****************************************************************************/ /* Parameter table contains all user settable parameters. It is used by the */ /* input routines to change the values of the named parameters. The list of */ /* parameters must be sorted alphabetically so that the search routine will */ /* work. Parameter names are used as keywords in the search. Keywords are */ /* lower case to speed the comparison process. */ /*****************************************************************************/ struct parmentry {char *keyword; /* variable name in lower case */ int vartype; /* can be INT, FLOAT, or ENUM */ VOIDP varptr; /* cast to correct type before use */ }; typedef struct parmentry PARMS; /* general symbols */ #define TRUE 1 #define FALSE 0 #define BOMB -99 #define LINELEN 80 #define EOL '\0' /* switches used in the interface routines */ #define INT 0 #define FLOAT 1 #define ENUM 2 /* interger values that use #defines */ #define BOOLE 3 #define GETTRAINING 4 #define GETTEST 5 #define GO 6 #define INT_NO 7 /* parameters only good in netfile */ #define FLOAT_NO 8 /* most are used in memory allocation */ #define ENUM_NO 9 /* and cannot be changed mid-simulation */ #define BOOLE_NO 10 #define VALUE 11 #define GETTRAININGFILE 12 #define GETTESTFILE 13 #define SAVE 14 #define INITFILE 15 #define NEXTLINE 0 #define FAILURE -1 /* switch constants */ #define SIGMOID 0 #define GAUSSIAN 1 #define LINEAR 2 #define ASYMSIGMOID 3 #define VARSIGMOID 4 #define WIN 20 #define STAGNANT 21 #define TIMEOUT 22 #define LOSE 23 #define BITS 30 #define INDEX 31 #define FATAL 0 #define WARN 1 /* Allocate global storage */ /***********************************************************************/ /* Assorted Parameters. */ /* These parameters and switches control the quickprop learning */ /* algorithm used to train the output weights. */ /***********************************************************************/ int UnitType; /* hidden unit type can be SIGMOID or GAUSIAN*/ int OutputType; /* output unit type can be SIGMOID or LINEAR */ float SigmoidMax; /* Maximum output vaule for sigmoid units. Used */ /* to alter sigmoid range without having to edit */ /* training values. Use the symbols "min" and */ /* "max" in the input file. The input routines */ /* will translate to the appropriate float values.*/ float SigmoidMin; /* Minimum output vaule for sigmoid units. */ float WeightRange; /* Random-init weights in range [-WR,+WR] */ float SigmoidPrimeOffset; /* Add to sigmoid-prime to kill flat spots */ float WeightMultiplier; /* Scale Candidate correlation to get init weight */ float OutputMu; /* Mu used to quickprop train output weights. */ float OutputShrinkFactor; /* Used in computing whether the proposed step is */ /* too large. Related to OutputMu. */ float OutputEpsilon; /* Controls the amount of linear gradient descent */ /* to use in updating output weights. */ float OutputDecay; /* This factor times the current weight is added */ /* to the slope at the start of each output epoch */ /* Keeps weights from growing too big. */ int OutputPatience; /* If we go for this many epochs with no real */ /* change, it's time to stop tuning. If 0, go on */ /* forever. */ float OutputChangeThreshold; /* The error must change by at least this */ /* fraction of its old value to count as a */ /* significant change. */ float InputMu; /* Mu used to quickprop train input weights. */ float InputShrinkFactor; /* Used in computing whether the proposed step is */ /* too large. Related to InputMu. */ float InputEpsilon; /* Controls the amount of linear gradient descent */ /* to use in updating Input weights. */ float InputDecay; /* This factor times the current weight is added */ /* to the slope at the start of each Input epoch */ /* Keeps weights from growing too big. */ int InputPatience; /* If we go for this many epochs with no real */ /* change, it's time to stop tuning. If 0, go on */ /* forever. */ float InputChangeThreshold; /* The error must change by at least this */ /* fraction of its old value to count as a */ /* significant change. */ /***********************************************************************/ /* Variables related to error and correlation. */ /***********************************************************************/ float TrueError; /* Total output error for one epoch */ float ScoreThreshold; /* This close to desired value => bit is correct */ int ErrorBits; /* Total # bits in epoch that were wrong */ float *SumErrors; /* Accumulate the sum of the error values used in */ /* the correlation phase. Sum is stored seperately */ /* for each output. Values are converted to */ /* average errors before use in ADJUST_CORRELATION */ float *DummySumErrors; /* Replace SumErrors with this for test epochs. */ float SumSqError; /* Accumulate the sum of the square of the error */ /* values used in the correlation phase. */ float BestCandidateScore; /* Best correlation score of all candidate units. */ int BestCandidate; /* Index of the candidate unit with best score. */ /***********************************************************************/ /* These variables and switches control the simulation and display. */ /***********************************************************************/ BOOLEAN UseCache; /* If TRUE, cache the forward-pass values instead */ /* of repeatedly computing them. */ int Epoch; /* Current epoch number */ BOOLEAN Graphics; /* If TRUE, print progress after each epoch. */ BOOLEAN NonRandomSeed; /* TRUE => use 1 as the seed for the random */ /* number generator. Useful when comparing */ /* different parameter settings. FALSE => use */ /* system clock to start random sequence. */ BOOLEAN Test; /* If TRUE, run a test epoch and print the result */ /* after each round of output tuning. */ BOOLEAN SinglePass; /* TRUE => Pause after forward/backward cycle */ BOOLEAN SingleEpoch; /* TRUE => Pause after each training epoch */ BOOLEAN Step; /* Turned to TRUE after each pause, briefly */ int Trial; /* Current trial number, used in log outputs */ /***********************************************************************/ /* The sets of training inputs and outputs. */ /***********************************************************************/ int NTrainingPatterns; /* !! Not in Lisp version. Needed here. */ int NTestPatterns; /* !! Not in Lisp version. Needed here. */ float **TrainingInputs; float **TrainingOutputs; float *Goal; /* Goal vector for the current training or */ /* testing case. */ char *T_T_files; /* Pointer to Training or Test filenames */ /* in input line updated by PROCESS_LINE, */ /* each time the user needs a file for input */ /* of training or test data. */ /***************************************************************************/ /* For some benchmarks there is a separate set of values used for testing */ /* the network's ability to generalize. These values are not used during */ /* training. */ /***************************************************************************/ float **TestInputs; float **TestOutputs; /***************************************************************************/ /* */ /* Fundamental data structures. */ /* */ /* Unit outputs and weights are floats. */ /* */ /* Instead of representing each unit by a structure, we represent the */ /* unit by a int. This is used to index into various arrays that hold */ /* per-unit information, such as the activation value of each unit. */ /* */ /* Per-connection information for each connection COMING INTO unit is */ /* stored in a array of arrays. The outer array is indexed by the unit */ /* number, and the inner array is then indexed by connection number. */ /* */ /* Unit 0 is always at a maximum-on value. Connections from this unit */ /* supply a bias. Next come some input units, then some hidden units. */ /* */ /* Output units have their own separate set of data structures, as do */ /* candidate units whose inputs are currently being trained. */ /***************************************************************************/ int MaxUnits; /* Maximum number of input values and hidden */ /* in the network. */ int Nunits; /* Total number of active units in net */ int Ninputs; /* Number of input units */ int Noutputs; /* Number of output units */ int Ncandidates; /* Number of candidate units trained at once. */ int MaxCases; /* Maximum number of training cases that can be */ /* accommdated by the current data structures. */ int Ncases; /* Number of training cases currently in use. */ /* Assume a contiguous block beginning with */ int FirstCase; /* Address of the first training case in the */ /* currently active set. Usually zero, but may */ /* differ if we are training on different chunks */ /* of the training set at different times. */ /***************************************************************************/ /* The following vectors hold values related to hidden units in the active */ /* net and their input weights. */ /***************************************************************************/ float *Values; /* Current activation value for each unit */ float **ValuesCache; /* Holds a distinct Values array for each of the */ /* MaxCases training cases. */ float *ExtraValues; /* Extra Values vector to use when no cache. */ int *Nconnections; /* # of INCOMING connections per unit */ int **Connections; /* C[i][j] lists jth unit projecting to unit i */ float **Weights; /* W[i][j] holds weight of C[i][j] */ /***************************************************************************/ /* The following arrays of arrays hold values for the outputs of the active*/ /* network and the output-side weights. */ /***************************************************************************/ float *Outputs; /* Network output values */ float *Errors; /* Final error value for each unit */ float **ErrorsCache; /* Holds a distinct Errors array for each of the */ /* MaxCases training cases. */ float *ExtraErrors; /* Extra Errors vector to use when no cache. */ float **OutputWeights; /* OW[i][j] holds the weight from hidden unit i */ /* to output unit j */ float **OutputDeltas; /* Change between previous OW and current one */ float **OutputSlopes; /* Partial derivative of TotalError wrt OW[i][j] */ float **OutputPrevSlopes; /* Previous value of OutputSlopes[i][j] */ /***************************************************************************/ /* The following arrays have one entry for each candidate unit in the */ /* pool of trainees. */ /***************************************************************************/ float *CandValues; /* Current output value of each candidate unit. */ float *CandSumValues; /* Output value of each candidate unit, summed */ /* over an entire training set. */ float **CandCor; /* Correlation between unit & residual error at */ /* each output, computed over a whole epoch. */ float **CandPrevCor; /* Holds the CandCor values from last epoch. */ float **CandWeights; /* Current input weights for each candidate unit. */ float **CandDeltas; /* Input weights deltas for each candidate unit. */ float **CandSlopes; /* Input weights slopes for each candidate unit. */ float **CandPrevSlopes; /* Holds the previous values of CandSlopes. */ /***************************************************************************/ /* This saves memory if each candidate unit receives a connection from */ /* each existing unit and input. That's always true at present, but may */ /* not be in future. */ /***************************************************************************/ int *AllConnections; /* A standard connection that connects a unit to */ /* all previous units, in order, but not to the */ /* bias unit.*/ /***************************************************************************/ /* ErrorIndex specific globals. Not in release Lisp version */ /***************************************************************************/ int NtrainingOutputValues; /* Number of outputs in the training set. */ int NtestOutputValues; /* Number of outputs in the test set. */ float TrainingStdDev; /* Std Dev of entire training set. Used to*/ /* normalize the ErrorIndex. */ float TestStdDev; float ErrorIndex; /* Normalized error function for continuos */ /* output training sets. */ float ErrorIndexThreshold; /* Stop training when ErrorIndex is < EIT. */ int ErrorMeasure; /* Set to BITS for using ErrorBits to stop */ /* of INDEX to use ErrorIndex to stop. */ /***************************************************************************/ /* Save and plot file related varibles */ /***************************************************************************/ BOOLEAN DumpWeights; /* Are we dumping weights into a file. */ char DumpFileRoot[LINELEN+1]; /* Root of the names for the files */ FILE *WeightFile; /* Contains weights from the current net. */ /*********************************************************************/ /* keyword table used for updating the simulation parameters without */ /* recompilation. */ /*********************************************************************/ PARMS ParmTable[] = { {"errorindexthreshold", FLOAT, (VOIDP)&ErrorIndexThreshold}, {"errormeasure", ENUM_NO, (VOIDP)&ErrorMeasure}, {"go", GO, (VOIDP)NULL}, /* special keyword */ {"graphics", BOOLE, (VOIDP)&Graphics}, {"inputchangethreshold", FLOAT, (VOIDP)&InputChangeThreshold}, {"inputdecay", FLOAT, (VOIDP)&InputDecay}, {"inputepsilon", FLOAT, (VOIDP)&InputEpsilon}, {"inputmu", FLOAT, (VOIDP)&InputMu}, {"inputpatience", INT, (VOIDP)&InputPatience}, {"maxunits", INT_NO, (VOIDP)&MaxUnits}, {"ncandidates", INT_NO, (VOIDP)&Ncandidates}, {"ninputs", INT_NO, (VOIDP)&Ninputs}, {"nonrandomseed", BOOLE, (VOIDP)&NonRandomSeed}, {"noutputs", INT_NO, (VOIDP)&Noutputs}, {"ntestpatterns", INT_NO, (VOIDP)&NTestPatterns}, {"ntrainingpatterns", INT_NO, (VOIDP)&NTrainingPatterns}, {"outputchangethreshold", FLOAT, (VOIDP)&OutputChangeThreshold}, {"outputdecay", FLOAT, (VOIDP)&OutputDecay}, {"outputepsilon", FLOAT, (VOIDP)&OutputEpsilon}, {"outputmu", FLOAT, (VOIDP)&OutputMu}, {"outputpatience", INT, (VOIDP)&OutputPatience}, {"outputtype", ENUM, (VOIDP)&OutputType}, {"quit", BOMB, (VOIDP)NULL}, /* special keyword */ {"save", SAVE, (VOIDP)NULL}, /* special keyword */ {"scorethreshold", FLOAT, (VOIDP)&ScoreThreshold}, {"sigmoidmax", FLOAT_NO, (VOIDP)&SigmoidMax}, {"sigmoidmin", FLOAT_NO, (VOIDP)&SigmoidMin}, {"sigmoidprimeoffset", FLOAT, (VOIDP)&SigmoidPrimeOffset}, {"singleepoch", BOOLE, (VOIDP)&SingleEpoch}, {"singlepass", BOOLE, (VOIDP)&SinglePass}, {"test", BOOLE, (VOIDP)&Test}, {"testing", GETTEST, (VOIDP)NULL}, /* special keyword */ {"training", GETTRAINING, (VOIDP)NULL}, /* special keyword */ {"unittype", ENUM_NO, (VOIDP)&UnitType}, {"usecache", BOOLE_NO, (VOIDP)&UseCache}, {"values", VALUE, (VOIDP)NULL}, /* special keyword */ {"weightfile", INITFILE, (VOIDP)NULL}, /* special keyword */ {"weightmultiplier", FLOAT, (VOIDP)&WeightMultiplier}, {"weightrange", FLOAT, (VOIDP)&WeightRange} }; int Nparameters = /* Number of entries in ParmTable */ sizeof(ParmTable)/sizeof(PARMS); BOOLEAN InterruptPending; /* TRUE => user has pressed Control-C */ char ErrMsg[1025]; /* general error message buffer */ /******************** end of global storage allocation **********************/ #ifdef CONNX long conx; #endif /***********************************************************************/ /* */ /* function prototypes (ANSI format) */ /* */ /***********************************************************************/ /************ * main routines mostly C specific *************/ void GET_NETWORK_CONFIGURATION(char *fname); VOIDP GET_ARRAY_MEM(unsigned elt_count, unsigned elt_size, char *fun_name); void ERROR(int type, char *message); /************ * learning utilities *************/ float ACTIVATION(float sum); float ACTIVATION_PRIME(float value, float sum); float OUTPUT_FUNCTION(float sum); float OUTPUT_PRIME(float out); /************ * Network-building utilities. *************/ void BUILD_NET(void); float RANDOM_WEIGHT(void); void INIT_NET(void); /************ * Interface utilities *************/ int FIND_KEY(char *searchkey); void PRINT_VALUE(int k); void LIST_ALL_VALUES(void); void PROMPT_FOR_VALUE(int k); void GET_TRAINING_DATA(FILE *infile); void GET_TEST_DATA(FILE *infile); void GET_TEST_DATA_FILE(void); void GET_TRAINING_DATA_FILE(void); void add_extension(char *fname, char *ext); int PROCESS_LINE(char *line); void strdncase(char *s); BOOLEAN Y_OR_N_P(char *prompt); void INTERACTIVE_PARM_UPDATE(void); char *TYPE_STRING(int var); char *BOOLE_STRING(int var); int TYPE_CONVERT(char *input); void CHECK_INTERRUPT(void); void TRAP_CONTROL_C(int sig); /************ * Parameter setting function. *************/ void INITIALIZE_GLOBALS(void); /************ * Candidate training and selecting utilities *************/ void INIT_CANDIDATES(void); void INSTALL_NEW_UNIT(void); void COMPUTE_CORRELATIONS(void); void ADJUST_CORRELATIONS(void); void COMPUTE_SLOPES(void); void UPDATE_INPUT_WEIGHTS(void); /************ * outer training loop *************/ void LIST_PARAMETERS(void); int TRAIN(int outlimit, int inlimit, int rounds, BOOLEAN interact); void TEST_EPOCH(float test_threshold); void PRINT_SUMMARY(void); void OUT_PASS_USER_INTERFACE(void); void OUT_EPOCH_USER_INTERFACE(void); void IN_EPOCH_USER_INTERFACE(void); void OUT_EPOCH_OUTPUT(void); void IN_EPOCH_OUTPUT(void); void OUT_PASS_OUTPUT(void); /************ * quickprop routine *************/ void QUICKPROP_UPDATE(int i, float weights[], float deltas[], float slopes[], float prevs[], float epsilon, float decay, float mu, float shrink_factor); /************ * training functions *************/ void SETUP_INPUTS(float input[]); void OUTPUT_FORWARD_PASS(void); void COMPUTE_UNIT_VALUE(int j); void FULL_FORWARD_PASS(float input[]); void COMPUTE_ERRORS(float goal[], BOOLEAN output_slopesp, BOOLEAN statsp); void UPDATE_OUTPUT_WEIGHTS(void); void TRAIN_OUTPUTS_EPOCH(void); int TRAIN_OUTPUTS(int max_epochs); /************ * candidate train functions *************/ void TRAIN_INPUTS_EPOCH(void); void CORRELATIONS_EPOCH(void); int TRAIN_INPUTS(int max_epochs); /************ * ErrorIndex routines *************/ float ERROR_INDEX(float std_dev, int num); float STANDARD_DEV(float **outputs, int npatterns, int nvalues); void INTERACT_SAVE_FILES(void); void SAVE_NET_FILE(void); void GET_WEIGHTS(char *realfname); void INTERACT_GET_WEIGHTS(void); void DUMP_WEIGHTS(FILE *fout); void SAVE_ALL_PARMS(FILE *fout); void SAVE_PARM_VALUE(FILE *fout, int k); void SAVE_TRAINING_SET(FILE *fout); void SAVE_TEST_SET(FILE *fout); void DUMP_PARMS(FILE *fout); void WRITE_NET_OUTPUT(void); void WRITE_UNIT_OUTPUT(void); void INTERACT_DUMP_WEIGHTS(void); void INIT_DUMP_FILES(char *fname); void SETUP_DUMP_FILES(void); /* function prototypes from */ extern VOIDP calloc(unsigned etl_count, unsigned elt_size); #ifdef __STDC__ /* compiler does conform to the standard */ extern double atof(const char *s); extern char *strtok(char *s, const char *set); #else /* compiler doesn't conform to the standard */ extern double atof(); extern char *strtok(); #endif #ifndef INT_MAX #define INT_MAX 32767 #endif /******************end of prototypes ****************************/ #ifndef PREDICT_ONLY main(int argc, char *argv[]) { int inlim, outlim, rounds, trials; int nhidden; /* number of hidden units used in run */ int vics, defs, i; long total_epochs, total_units, total_trials; long min_units, max_units, min_epochs, max_epochs; char fname[LINELEN+1]; BOOLEAN interact = FALSE; /***************/ if((argc != 1) && (argc != 6)){ /* wrong number of args */ printf("Usage: cascor NetFile InEpochs OutEpochs NewUnits Trials\n"); printf(" or cascor\n"); return; } else if(argc == 1) interact = TRUE; INITIALIZE_GLOBALS(); /* initialize testing parms */ total_epochs = 0; total_units = 0; min_units = INT_MAX; min_epochs = INT_MAX; max_units = 0; max_epochs = 0; total_trials = 0; vics = 0; defs = 0; /* Get network */ if(interact){ printf ("Enter name of network: "); scanf ("%s", fname); } else strcpy(fname, argv[1]); GET_NETWORK_CONFIGURATION(fname); /* initialize the random number generator before initializing the network*/ if(NonRandomSeed) /* Does user want a fixed sequence? */ srand(1); /* Use a fixed starting point */ else srand(time(NULL)); /* Use a random starting point */ INIT_NET(); /* Start the main processing loop */ do { if(interact){ printf("Number of epochs to train inputs: "); scanf ("%d", &inlim); printf("Number of epochs to train outputs: "); scanf ("%d", &outlim); printf("Maximum number of new units: "); scanf ("%d", &rounds); printf("Trials for this problem: "); scanf ("%d", &trials); if(Y_OR_N_P("Change some parameters?")) INTERACTIVE_PARM_UPDATE(); } else{ inlim = atoi(argv[2]); outlim = atoi(argv[3]); rounds = atoi(argv[4]); trials = atoi(argv[5]); } printf("Starting run for %s, Ilim %d, Olim %d, MaxUnits %d, Trials %d.\n", fname, inlim, outlim, rounds, trials); if(NonRandomSeed) printf(" Fixed starting point used for random weights.\n\n"); else printf(" Random starting point used for random weights.\n\n"); for(i=0;i max_epochs) ? Epoch : max_epochs; min_units = (nhidden < min_units) ? nhidden : min_units; max_units = (nhidden > max_units) ? nhidden : max_units; if(interact && Y_OR_N_P(" Do you want to save the current settings?")) SAVE_NET_FILE(); if(DumpWeights) DUMP_WEIGHTS(WeightFile); else if(interact && Y_OR_N_P(" Do you want to save the current weigths?")) INTERACT_DUMP_WEIGHTS(); } /* print out loop stats */ printf("\n\nTRAINING LOOP STATS\n"); LIST_PARAMETERS(); printf("\n Victories: %d, Defeats: %d, \n", vics, defs); printf(" Training Epochs - Min: %d, Avg: %d, Max: %d,\n", min_epochs, (total_epochs / total_trials), max_epochs); printf(" Hidden Units - Min: %d, Avg: %4.1f, Max: %d,\n", min_units,((float)total_units /total_trials), max_units); }while((interact) && Y_OR_N_P("Do you want to run more trials?")); /* Test the sucker. */ if((interact) && Y_OR_N_P("Do you want to test the last network?")) TEST_EPOCH(ScoreThreshold); return(TRUE); } #else /* PREDICT_ONLY */ main(int argc, char *argv[]) { int i,j; char nfname[LINELEN+1], wfname[LINELEN+1], dfname[LINELEN+1]; BOOLEAN interact = FALSE; void GET_INPUT_DATA(char *dfname); /***************/ if((argc != 1) && (argc != 4)){ /* wrong number of args */ printf("Usage: castest NetFile WeightFile DataFile\n"); printf(" or castest\n"); return; } else if(argc == 1) interact = TRUE; INITIALIZE_GLOBALS(); /* Get network */ if(interact){ printf ("Enter name of network file: "); scanf ("%s", nfname); printf ("Enter name of weight file: "); scanf ("%s", wfname); printf ("Enter name of data file: "); scanf ("%s", dfname); } else{ strcpy(nfname, argv[1]); strcpy(wfname, argv[2]); strcpy(dfname, argv[3]); } GET_NETWORK_CONFIGURATION(nfname); UseCache = FALSE; /* no reason to use cache for prediction only */ INIT_NET(); GET_WEIGHTS(wfname); GET_INPUT_DATA(dfname); for(i=0; iNTestPatterns) MaxCases = NTrainingPatterns; else MaxCases = NTestPatterns; Ncases = NTrainingPatterns; FirstCase = 0; Nunits = 1 + Ninputs; /* setup for ErrorIndex */ NtrainingOutputValues = Noutputs * NTrainingPatterns; NtestOutputValues = Noutputs * NTestPatterns; if(Nunits>MaxUnits) ERROR(FATAL, "MaxUnits must be greater than Ninputs."); /* allocate memory for outer arrays */ ValuesCache = (float **)GET_ARRAY_MEM(MaxCases, sizeof(float *), fn); ExtraValues = (float *)GET_ARRAY_MEM(MaxUnits, sizeof(float), fn); Values = ExtraValues; Nconnections = (int *)GET_ARRAY_MEM(MaxUnits, sizeof(int), fn); Connections = (int **)GET_ARRAY_MEM(MaxUnits, sizeof(int *), fn); Weights = (float **)GET_ARRAY_MEM(MaxUnits, sizeof(float *), fn); ErrorsCache = (float **)GET_ARRAY_MEM(MaxCases, sizeof(float *), fn); ExtraErrors = (float *)GET_ARRAY_MEM(Noutputs, sizeof(float), fn); SumErrors = (float *)GET_ARRAY_MEM(Noutputs, sizeof(float), fn); DummySumErrors = (float *)GET_ARRAY_MEM(Noutputs, sizeof(float), fn); Errors = ExtraErrors; Outputs = (float *)GET_ARRAY_MEM(Noutputs, sizeof(float), fn); OutputWeights = (float **)GET_ARRAY_MEM(Noutputs, sizeof(float *), fn); OutputDeltas = (float **)GET_ARRAY_MEM(Noutputs, sizeof(float *), fn); OutputSlopes = (float **)GET_ARRAY_MEM(Noutputs, sizeof(float *), fn); OutputPrevSlopes = (float **)GET_ARRAY_MEM(Noutputs, sizeof(float *), fn); CandValues = (float *)GET_ARRAY_MEM(Ncandidates, sizeof(float), fn); CandSumValues = (float *)GET_ARRAY_MEM(Ncandidates, sizeof(float), fn); CandCor = (float **)GET_ARRAY_MEM(Ncandidates, sizeof(float *), fn); CandPrevCor = (float **)GET_ARRAY_MEM(Ncandidates, sizeof(float *), fn); CandWeights = (float **)GET_ARRAY_MEM(Ncandidates, sizeof(float *), fn); CandDeltas = (float **)GET_ARRAY_MEM(Ncandidates, sizeof(float *), fn); CandSlopes = (float **)GET_ARRAY_MEM(Ncandidates, sizeof(float *), fn); CandPrevSlopes = (float **)GET_ARRAY_MEM(Ncandidates, sizeof(float *), fn); TrainingInputs = (float **)GET_ARRAY_MEM(NTrainingPatterns, sizeof(float *), fn); TrainingOutputs = (float **)GET_ARRAY_MEM(NTrainingPatterns, sizeof(float *), fn); if(NTestPatterns){ TestInputs = (float **)GET_ARRAY_MEM(NTestPatterns, sizeof(float *), fn); TestOutputs = (float **)GET_ARRAY_MEM(NTestPatterns, sizeof(float *), fn); } else{ /* no test patterns so just point at training set */ TestInputs = TrainingInputs; TestOutputs = TrainingOutputs; } /* Only create the caches if UseCache is on -- may not always have room. */ if(UseCache){ for(i=0; i 15.0) return(0.5); else return (1.0 /(1.0 + exp(-sum)) - 0.5); case GAUSSIAN: /* Gaussian activation function in range 0.0 to 1.0. */ temp = -0.5 * sum * sum; if (temp < -75.0) return(0.0); else return (exp(temp)); case ASYMSIGMOID: /* asymmetrical sigmoid function in range 0.0 to 1.0. */ if (sum < -15.0) return(0.0); else if (sum > 15.0) return(1.0); else return (1.0 /(1.0 + exp(-sum))); case VARSIGMOID: /* Sigmoid function in range SigmoidMin to SigmoidMax. */ if (sum < -15.0) return(SigmoidMin); else if (sum > 15.0) return(SigmoidMax); else return ((SigmoidMax - SigmoidMin)/ (1.0 + exp(-sum)) + SigmoidMin); } } /* * Given the unit's activation value and sum of weighted inputs, compute * the derivative of the activation with respect to the sum. Defined unit * types are SIGMOID, VARSIGMOID, and GAUSSIAN. * * Note: do not use sigmoid prime offset here, as it confuses the * correlation machinery. But do use it in output-prime. * */ float ACTIVATION_PRIME(float value, float sum) { switch(UnitType){ case SIGMOID: /* Symmetrical sigmoid function. */ return (0.25 - value*value); case GAUSSIAN: /* Gaussian activation function. */ return (sum * (- value)); case ASYMSIGMOID: /* asymmetrical sigmoid function in range 0.0 to 1.0. */ return (value * (1.0 - value)); case VARSIGMOID: /* Sigmoid function with range SigmoidMin to SigmoidMax. */ return ((value - SigmoidMin) * (1.0 - (value - SigmoidMin) / (SigmoidMax - SigmoidMin))); } } /* Compute the value of an output, given the weighted sum of incoming values. * Defined output types are SIGMOID, ASYMSIGMOID, and LINEAR. */ float OUTPUT_FUNCTION(float sum) { switch(OutputType){ case SIGMOID: /* Symmetrical sigmoid function, used for binary functions. */ if (sum < -15.0) return(-0.5); else if (sum > 15.0) return(0.5); else return (1.0 /(1.0 + exp(-sum)) - 0.5); case LINEAR: /* Linear output function, used for continuous functions. */ return (sum); case ASYMSIGMOID: /* asymmetrical sigmoid function in range 0.0 to 1.0. */ if (sum < -15.0) return(0.0); else if (sum > 15.0) return(1.0); else return (1.0 /(1.0 + exp(-sum))); case VARSIGMOID: /* Sigmoid function in range SigmoidMin to SigmoidMax. */ if (sum < -15.0) return(SigmoidMin); else if (sum > 15.0) return(SigmoidMax); else return ((SigmoidMax - SigmoidMin)/ (1.0 + exp(-sum)) + SigmoidMin); } } /* Compute the value of an output, given the weighted sum of incoming values. * Defined output types are SIGMOID, ASYMSIGMOID, and LINEAR. * * Sigmoid_Prime_Offset used to keep the back-prop error value from going to * zero. */ float OUTPUT_PRIME(float output) { switch(OutputType){ case SIGMOID: /* Symmetrical sigmoid function, used for binary functions. */ return (SigmoidPrimeOffset + 0.25 - output*output); case LINEAR: /* Linear output function, used for continuous functions. */ return (1.0); case ASYMSIGMOID: /* asymmetrical sigmoid function in range 0.0 to 1.0. */ return (SigmoidPrimeOffset + output * (1.0 - output)); case VARSIGMOID: /* Sigmoid function with range SigmoidMin to SigmoidMax. */ return (SigmoidPrimeOffset + (output - SigmoidMin) * (1.0 - (output - SigmoidMin) / (SigmoidMax - SigmoidMin))); } } /* The basic routine for doing quickprop-style update of weights, given a * pair of slopes and a delta. * * Given arrays holding weights, deltas, slopes, and previous slopes, * and an index i, update weight[i] and delta[i] appropriately. Move * slope[i] to prev[i] and zero out slope[i]. Add weight decay term to * each slope before doing the update. */ void QUICKPROP_UPDATE(int i, float weights[], float deltas[], float slopes[], float prevs[], float epsilon, float decay, float mu, float shrink_factor) { float w,d,s,p, next_step; /********/ w = weights[i]; d = deltas[i]; s = slopes[i] + decay * w; p = prevs[i]; next_step = 0.0; /* The step must always be in direction opposite to the slope. */ if(d < 0.0){ /* If last step was negative... */ if(s > 0.0) /* Add in linear term if current slope is still positive.*/ next_step -= epsilon * s; /*If current slope is close to or larger than prev slope... */ if(s >= (shrink_factor*p)) next_step += mu * d; /* Take maximum size negative step. */ else next_step += d * s / (p - s); /* Else, use quadratic estimate. */ } else if(d > 0.0){ /* If last step was positive... */ if(s < 0.0) /* Add in linear term if current slope is still negative.*/ next_step -= epsilon * s; /* If current slope is close to or more neg than prev slope... */ if(s <= (shrink_factor*p)) next_step += mu * d; /* Take maximum size negative step. */ else next_step += d * s / (p - s); /* Else, use quadratic estimate. */ } else /* Last step was zero, so use only linear term. */ next_step -= epsilon * s; /* update global data arrays */ deltas[i] = next_step; weights[i] = w + next_step; prevs[i] = s; slopes[i] = 0.0; } /* Set up all the inputs from the INPUT vector as the first few entries in in the values vector. */ void SETUP_INPUTS(float inputs[]) { int i; /*********/ Values[0] = 1.0; /* bias unit */ for(i=0; i