Code Sample 1
<cffunction
name="train"
access="public"
returntype="VOID"
hint="trains the AI to categorise
certain words"
>
<cfargument
name="parsedText"
type="query"
required="yes"
hint="the parsed text to categorise"
/>
<cfargument
name="category"
type="string"
required="yes"
hint="the categroy to place it in"
/>
<cfscript>
//Init private variables.
var word ='';
var wordcount =0;
var excpt='';
var i =1;
var parsed_len= parsedText.recordCount;
//this CFC has a private variable to
//keep the corpus in memory during the
//life of the object
var myCorpus= variables.instance.corpus;
for(i=1;i lte parsed_len;i=i+1){
word = parsedText.word[i];
wordCount = parsedText.wordCount[i];
try{
structInsert(
myCorpus,
arguments.category,
structNew(),
0
);
}catch(Any excpt){}
incrementStructElement(
inputStruct =
myCorpus[arguments.category],
key = word,
value = wordCount
);
}
//update the word counters for the category
//and for the corpus
updateStatistics();
return;
</cfscript>
</cffunction>
Code Sample 2
<cffunction
name="classify"
access="public"
returntype="struct"
hint="asks the AI to categorise words based
on what it has learned"
>
<cfargument
name="parsedText"
type="query"
required="yes"
hint="the parsed text to categorise"
/>
<cfscript>
var excpt ='';
var entry = '';
var score = structNew();
var weight = 0;
var category ='';
var word ='';
var i=1;
var v = variables;
for(i=1;
i lte arguments.parsedText.recordCount;
i=i+1){
word = arguments.parsedText.word[i];
//for every category calculate :
// (word freq in category) divided by
// (category total wordcount)
for(category in v.instance.count){
if(structKeyExists(
v.instance.corpus[category],
word)
){
//number of times this word appears
//in this category
weight =
v.instance.corpus[category][word];
}else{
//very small number used to estimate
//weight when we have no info
weight = .1 ;
}
incrementStructElement(
inputStruct = score,
key = category,
value =
log(weight
/ v.instance.count[category])
);
}
}
//for every category adjust the score for
//the input text based on
//(category wordcount) / (corpus wordcount)
for(category in variables.instance.count){
incrementStructElement(
inputStruct = score,
key= category,
value=
log(variables.instance.count[category]
/ variables.instance.total)
);
}
return score;
</cfscript>
</cffunction>
incrementStructElement just adds a value to
a key value, creating the key with a value
of zero if necessary:
<cfscript>
function incrementStructElement(inputStruct,
key,value){
try{
structInsert(arguments.inputStruct,
arguments.key, 0,0);
}catch(Any excpt){}
arguments.inputStruct[arguments.key] =
arguments.inputStruct[arguments.key] +
arguments.value;
}
}
</cfscript>
Code Sample 3
<cfscript>
//these structures simulate input articles.
//The struct keys are words appearing
//in the articles.
//simulate an article about Flash
testdata_flash = structNew();
testdata_flash.flash = 12;
testdata_flash.skills = 1;
//add in some noise
testdata_flash.junk = 200;
//simulate an article about CF
testdata_CF = structNew();
testdata_CF.coldfusion = 12;
testdata_CF.skills = 2;
//add in some noise
testdata_CF.junk = 200;
//an incoming article we want the system
//to classify for us
testdata = structNew();
testdata.skills = 1;
testdata.flash = 2;
//create an instance of our new component
myClassifier =
createObject('component','classifier');
myClassifier.init();
//train it with two documents worth of data
myClassifier.train(testdata_flash,'flash');
myClassifier.train(testdata_CF,'coldfusion');
//based on training, what do you suggest?
recommendations =
myClassifier.classify(testdata);
</cfscript>