Additional Code - Zip file 53 KB

Code Sample 1

<cffunction
  name="train"
  access="public"
  returntype="VOID"
  hint="trains the AI to categorise
        certain words"
  >
  <cfargument 
    name="parsedText"
    type="query"
    required="yes"
    hint="the parsed text to categorise"
  />
  <cfargument 
    name="category"
    type="string"
    required="yes"
    hint="the categroy to place it in"
  />
  <cfscript>
  //Init private variables. 
  var word ='';
  var wordcount =0;
  var excpt='';
  var i =1;
  var parsed_len= parsedText.recordCount;
  
  //this CFC has a private variable to
  //keep the corpus in memory during the
  //life of the object
  var myCorpus= variables.instance.corpus;
  
  for(i=1;i lte parsed_len;i=i+1){
    word = parsedText.word[i];
    wordCount = parsedText.wordCount[i];
    try{
      structInsert(
        myCorpus,
        arguments.category,
        structNew(),
        0
      );
    }catch(Any excpt){}
    incrementStructElement(
      inputStruct = 
          myCorpus[arguments.category], 
      key  = word, 
      value  = wordCount
    );
  }
  //update the word counters for the category 
  //and for the corpus
  updateStatistics();
  return;
  </cfscript>
</cffunction>

Code Sample 2

<cffunction 
  name="classify"
  access="public"
  returntype="struct"
  hint="asks the AI to categorise words based
        on what it has learned"
  >
  <cfargument 
    name="parsedText"
    type="query"
    required="yes"
    hint="the parsed text to categorise"
  />
  
  <cfscript>
  var excpt   ='';
  var entry   = '';
  var score   = structNew();
  var weight  = 0;
  var category ='';
  var word ='';
  var i=1;
  var v = variables;
  
  for(i=1;
    i lte arguments.parsedText.recordCount;
    i=i+1){
    word = arguments.parsedText.word[i];
    //for every category calculate :
    //  (word freq in category) divided by 
    //  (category total wordcount)
    for(category in v.instance.count){
      if(structKeyExists(
         v.instance.corpus[category], 
         word)
      ){
        //number of times this word appears 
        //in this category
        weight = 
        v.instance.corpus[category][word];
      }else{
        //very small number used to estimate 
        //weight when we have no info
        weight = .1 ;
      }
      incrementStructElement(
         inputStruct = score, 
         key  = category, 
         value  = 
            log(weight 
              / v.instance.count[category])
      );
    }
  }
  //for every category adjust the score for 
  //the input text based on 
  //(category wordcount) / (corpus wordcount)
  for(category in variables.instance.count){
    incrementStructElement(
       inputStruct = score, 
       key= category, 
       value= 
        log(variables.instance.count[category]
          / variables.instance.total)
    );
  }
  return score;
  </cfscript>
</cffunction>

incrementStructElement just adds a value to 
a key value, creating the key with a value 
of zero if necessary:
<cfscript>
function incrementStructElement(inputStruct,
  key,value){
try{
structInsert(arguments.inputStruct, 
  arguments.key, 0,0);
}catch(Any excpt){} 
arguments.inputStruct[arguments.key] = 
  arguments.inputStruct[arguments.key] + 
  arguments.value;
}
}
</cfscript>

Code Sample 3

<cfscript>
//these structures simulate input articles.
//The struct keys are words appearing 
//in the articles.
  
//simulate an article about Flash  
testdata_flash = structNew();
  testdata_flash.flash = 12;
  testdata_flash.skills = 1;
  //add in some noise
  testdata_flash.junk = 200;

//simulate an article about CF
testdata_CF = structNew();
  testdata_CF.coldfusion = 12;
  testdata_CF.skills = 2;
  //add in some noise
  testdata_CF.junk = 200;

//an incoming article we want the system
//to classify for us
testdata = structNew();
  testdata.skills = 1;
  testdata.flash = 2;
  
//create an instance of our new component
 myClassifier = 
   createObject('component','classifier');
  myClassifier.init();
//train it with two documents worth of data
myClassifier.train(testdata_flash,'flash');
myClassifier.train(testdata_CF,'coldfusion');

//based on training, what do you suggest?
recommendations =
myClassifier.classify(testdata);
</cfscript>

Additional Code - Zip file 53 KB