Listing 1

<cfparam name="intItemID" default="1">	<!--- Default item ID to
 display if no URL variable present --->
<cfscript>
	objVector = CreateObject("component", "vector");

	// STEP 1 - Set up a holding array containing test items
	arItems = ArrayNew(1);
	arItems[1] = StructNew();
	arItems[1].intItemID = 1;
	arItems[1].vcTitle = "Blair defends his Iraq strategy";
	arItems[1].txtBody = "Tony Blair told MPs he accepted full
	 responsibility for mistakes highlighted by the Butler Report,
	 which he described as made 'in good faith'. ...";
	
	arItems[2] = StructNew();
	arItems[2].intItemID = 2;
	arItems[2].vcTitle = "MoD denies iPod ban";
	arItems[2].txtBody = "The Ministry of Defence (MoD) has denied Reuters reports
	claiming it has banned high-capacity personal storage
	devices, such as iPods. ...";

	arItems[3] = StructNew();
	arItems[3].intItemID = 3;
	arItems[3].vcTitle = "Blair accepts Butler report findings";
	arItems[3].txtBody = "TONY Blair today welcomed the Butler report
	findings, saying it showed the government and intelligence services
	acted in 'good faith'. ...";

	arItems[4] = StructNew();
	arItems[4].intItemID = 4;
	arItems[4].vcTitle = "UK military denies ban on iPod";
	arItems[4].txtBody = "The Ministry of Defence has denied reports that
	it has banned Apple's iPod due to fears it could be used to steal
	sensitive files. ...";

	arItems[5] = StructNew();
	arItems[5].intItemID = 5;
	arItems[5].vcTitle = "Butler 'A by-Election Boost for Blair'";
	arItems[5].txtBody = "Lord Butlerís report into the intelligence on
	Iraq could help Tony Blair in tomorrowís by-elections, a political
	expert said today. ... ";

	// STEP 2 - Set up term space (find unique keywords in all items)
	arTermSpace = objVector.prepareTermSpace(arItems=arItems);

	// STEP 3 - Build Vector for each item
	arItemVectors = objVector.buildItemVectors(arItems=arItems,
	arTermSpace=arTermSpace, intTitleWeightFactor=3);

	// STEP 4 - Find item matches for current item being viewed
	args = StructNew();
	args.arCurrentItemVector = arItemVectors[intItemID].arVector;
	arItemVectorsWithoutCurrent = arItemVectors;
	temp = ArrayDeleteAt(arItemVectorsWithoutCurrent, intItemID);
	args.arItemVectors = arItemVectorsWithoutCurrent;
	args.iMaxRows = 5;
	args.iThreshold = 0;
	arMatches = objVector.getItemMatches(argumentCollection=args);
</cfscript>

<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
<head><title>Vector Space Example</title></head>
<body>
<!--- STEP 5 - Display the results --->
<cfoutput>
<h1>#arItems[intItemID].vcTitle#</h1>
<p>#arItems[intItemID].txtBody#</p>
<ul>
	<cfloop index="cLoop" from="1" to="#ArrayLen(arMatches)#">
		<li>
			<a href="default.cfm?intItemID=#arMatches[cLoop].intItemID#">#arItems
 [arMatches[cLoop].intItemID].vcTitle#</a>
			(#arMatches[cLoop].intCosineMeasure#% match)
		</li>
	</cfloop>
</ul>
</cfoutput>
</body>
</html>


Listing 2

<cfcomponent hint="Vector Space routines Component" displayname="vector">
	<!--- Include arrayOfStructsSort UDF by Nathan Dintenfass, available
	 at http://www.cflib.org/udf.cfm?ID=359 --->
	<cfinclude template="arrayOfStructsSort.udf" />

	<!--- PRIVATE METHODS --->
	<cffunction name="removePunctuation" access="private" returnType=
	"string" output="false" hint="Remove punctuation and HTML from
	input string">
		<cfargument name="sString" type="string" required="true" />
		<cfset var sWords = "" />
		<cfscript>
		    sWords = REReplaceNoCase(arguments.sString,
			 "<(.|\n)+?>"," ","ALL");
		    sWords = REReplace(sWords, "&(.+?);", " ", "ALL");
		    sWords = REReplace(sWords, "[[:punct:]]", " ", "ALL");
		    sWords = REReplace(sWords, "[[:cntrl:]]", " ", "ALL");
		    sWords = Replace(sWords,"£","","ALL");
		    sWords = REReplace(Trim(sWords), "\s{1,}", " ", "ALL");
		</cfscript>
		<cfreturn sWords />
	</cffunction>

	<cffunction name="removeStopWords" access="private" returnType="string"
	output="false" hint="Removes stop words from input
	string">
		<cfargument name="sString" type="string" required="true" />
		<cfset var arStopWords =
		ListToArray("$,0,1,2,3,4,5,6,7,8,9,a,able,about,after,again,all,almost,already,
also,although,am,an,and,another,any,are,are,around,as,at,b,based,be,because,been,
before,being,between,both,bring,but,by,c,came,can,com,come,comes,could,d,did,do,
does,doing,done,e,each,eight,else,etc,even,every,f,five,for,four,from,g,get,gets,
getting,go,going,got,h,had,has,have,he,he,her,here,him,himself,his,how,however,
href,http,i,if,in,including,into,is,it,it,its,j,just,k,kb,know,l,like,looks,m,
mailto,make,making,many,may,mb,me,means,might,more,more,most,move,mr,much,must,my,
n,need,needs,never,nice,nine,no,not,now,o,of,often,oh,ok,on,on,one,only,or,org,
other,our,out,over,own,p,piece,q,r,rather,re,really,s,said,same,say,says,see,
seven,several,she,should,since,single,six,so,so,some,something,still,stuff,such,t,
take,ten,than,that,the,their,them,them,then,there,there,these,they,they,thing,
things,this,those,three,through,to,too,took,two,u,under,up,us,use,used,using,
usual,v,ve,very,via,w,want,was,way,we,we,well,were,what,when,where,whether,which,
while,whilst,who,why,will,with,within,would,x,y,yes,yet,you,your,z") />
		<cfset var cStopWord = 0 />
		<cfset var sWords = arguments.sString />
		<cfscript>
			for (cStopWord=1; cStopWord LTE ArrayLen
			   (arStopWords); cStopWord = cStopWord + 1) {
			   sWords = ReplaceNoCase(sWords, " " &
			   arStopWords[cStopWord] & " ", " ", "ALL");
			}
		</cfscript>
		<cfreturn sWords />
	</cffunction>

	<cffunction name="getUniqueKeywords" access="private" returntype="Array"
	output="False" hint="Returns a list of unique
	keywords in a string.">
		<cfargument name="txtContent" type="string" required="true" />
		<cfargument name="bRecordFrequency" type="boolean"
		required="false" default=false />
		<cfset var lstKeyWords = "" />
		<cfset var arKeyWords = ArrayNew(1) />
		<cfset var arKeyWordsOutput = ArrayNew(1) />
		<cfset var sPrev = "" />
		<cfset var cWord = 0 />
		<cfset var temp = "" />
		<cfscript>
			// Remove punctuation & HTML
			lstKeyWords = removePunctuation(" " & arguments.txtContent & " ");

			// Remove stop words
			lstKeyWords = removeStopWords(lstKeyWords);

			// Convert string to list of single words
			lstKeyWords = REReplace(Trim(lstKeyWords), "\s{1,}", ",", "ALL");

			// Convert list to array and sort into alphabetical order
			arKeywords = ListToArray(lstKeywords);
			temp = ArraySort(arKeywords, "textnocase");

			// Run through words list, removing duplicates and
			   recording the frequency if applicable
			sPrev = "";
			for (cWord=1; cWord LTE ArrayLen(arKeywords);
                     cWord = cWord + 1) {
				if ((Not IsNumeric(arKeywords[cWord]))
                               AND (arKeywords[cWord] NEQ sPrev)) {
				   if (arguments.bRecordFrequency) {
		                       arKeyWordsOutput[ArrayLen
                                 (arKeyWordsOutput)+1] = StructNew();
                                  arKeyWordsOutput[ArrayLen
                                  (arKeyWordsOutput)].vcKeyWord
                                   = arKeywords[cWord];
		                   arKeyWordsOutput[ArrayLen
			(arKeyWordsOutput)].intFrequency = ListValue
			CountNoCase(lstKeywords, arKeywords[cWord]);
					} else {
                  	arKeyWordsOutput[ArrayLen(arKeyWordsOutput)+1] =
					arKeywords[cWord];
				}
				sPrev = arKeywords[cWord];
			}
		}
	</cfscript>
	<cfreturn arKeyWordsOutput />
</cffunction>

	<cffunction name="calculateCosineMeasure" access="private"
	returntype="numeric" output="False" hint="Returns cosine measure
	(0 to 100) for two supplied vectors">
		<cfargument name="arVector1" type="array" required="true" />
		<cfargument name="arVector2" type="array" required="true" />
		<cfset var cAxis = 0 />
		<cfset var iNumerator = 0 />
		<cfset var iSumSquares1 = 0 />
		<cfset var iSumSquares2 = 0 />
		<cfset var iCosineMeasure = 0 />
		<cfscript>
		   // loop through each axis and keep running totals
		    for (cAxis=1; cAxis LTE ArrayLen(arguments.arVector1);
		    cAxis=cAxis+1) {
			 iNumerator = iNumerator + (arVector1[cAxis]*arVector2
			 [cAxis]);
			iSumSquares1 = iSumSquares1 + arVector1[cAxis]^2;
			   iSumSquares2 = iSumSquares2 + arVector2[cAxis]^2;
			}
			iCosineMeasure =
			Round(100*(iNumerator/(sqr(iSumSquares1)*sqr(iSumSquares2))));
		</cfscript>
		<cfreturn iCosineMeasure />
	</cffunction>

	
	<!--- PUBLIC METHODS --->
	<cffunction name="prepareTermSpace" access="public" returntype=
	"Array" output="False" hint="Takes items from array and returns
	the term space.">
		<cfargument name="arItems" type="array" required="true" />
		<cfset var txtContent = "" />
		<cfset var cItem = 0 />
		<cfset var arTermSpace = ArrayNew(1) />
		<cfscript>
		   // Add content of title and body field to holding string
		      for each item in content array
			  for (cItem=1; cItem LTE ArrayLen(arguments.arItems);
			  cItem=cItem+1) {
			  txtContent = txtContent & arguments.arItems
			  [cItem].vcTitle & " " & arguments.arItems[cItem].txtBody & " ";
			}
			// Get list of unique keywords
			arTermSpace = getUniqueKeywords(txtContent=txtContent,
			bRecordFrequency=false);
		</cfscript>
		<cfreturn arTermSpace />
	</cffunction>

	<cffunction name="buildItemVectors" access="public"
	returntype="Array" output="False" hint="Builds vectors for each
	item in provided term space.">
		<cfargument name="arItems" type="array" required="true" />
		<cfargument name="arTermSpace" type="array" required="true" />
		<cfargument name="intTitleWeightFactor" type="numeric"
		required="false" default=1 />
		<cfset var arItemVectors = ArrayNew(1) />
		<cfset var cItem = 0 />
		<cfset var cTerm = 0 />
		<cfset var cItemTerm = 0 />
		<cfset var txtContent = "" />
		<cfset var temp = "" />
		<cfset var arItemKeywords = ArrayNew(1) />
		<cfscript>
			for (cItem=1; cItem LTE ArrayLen(arguments.arItems);
		cItem=cItem+1) {
				arItemVectors[ArrayLen(arItemVectors)+1] = StructNew();
				arItemVectors[ArrayLen(arItemVectors)].intItemID =
				arguments.arItems[cItem].intItemID;

				// Get unique keywords and frequencies in item content
				txtContent = RepeatString(arguments.arItems[cItem].vcTitle & "
				", arguments.intTitleWeightFactor) & " " &
				arguments.arItems[cItem].txtBody & " ";
				arItemKeywords = ArrayNew(1);
				arItemKeywords = getUniqueKeywords(txtContent=txtContent,
				bRecordFrequency=true);

				// Set up empty array to hold distance along each axis in the
				   term space
				arItemVectors[ArrayLen(arItemVectors)].arVector = ArrayNew(1);

				// Loop through vector array and record frequency of keyword
				   in item vector cItemTerm = 1;
				for (cTerm=1; cTerm LTE ArrayLen(arguments.arTermSpace);
		cTerm=cTerm+1) {
					if ((cItemTerm LTE ArrayLen(arItemKeywords)) AND
			(arguments.arTermSpace[cTerm] EQ arItemKeywords
			[cItemTerm].vcKeyWord)) {
			arItemVectors[ArrayLen(arItemVectors)].arVector[cTerm] =
			arItemKeywords[cItemTerm].intFrequency;
			cItemTerm = cItemTerm + 1;
					} else {
						arItemVectors[ArrayLen(arItemVectors)].arVector
						[cTerm] = 0;
					}
				}
			}
		</cfscript>
		<cfreturn arItemVectors />
	</cffunction>

	<cffunction name="getItemMatches" access="public" returntype="array"
	output="False" hint="Return list of item matches">
		<cfargument name="arCurrentItemVector" type="array" required="true"
		hint="Current item vector"/>
		<cfargument name="arItemVectors" type="array" required="true"
		hint="Full set of item vectors"/>
		<cfargument name="iMaxRows" type="numeric" required="false" default="10"
		/>
		<cfargument name="iThreshold" type="numeric" required="false"
		default="10" />
		<cfset var arItemMatches = ArrayNew(1) />
		<cfset var cItem = 0 />
		<cfset var intHoldingCosMeasure = 0 />
		<cfscript>
			// Calculate cosine measure for each item array and record if
			   greater than provided threshold value
		for (cItem=1; cItem LTE ArrayLen(arguments.arItemVectors);
        cItem=cItem+1) {
		intHoldingCosMeasure = calculateCosineMeasure(arVector1=
		arguments.arCurrentItemVector, arVector2=arItemVectors
		[cItem].arVector);
				if (intHoldingCosMeasure GTE arguments.iThreshold) {
					arItemMatches[ArrayLen(arItemMatches)+1] = StructNew();
					arItemMatches[ArrayLen(arItemMatches)].intItemID =
					arItemVectors[cItem].intItemID;
					arItemMatches[ArrayLen(arItemMatches)].intCosineMeasure =
					intHoldingCosMeasure;
				}
			}
			
			// Sort array by cosine measure
			arItemMatches = arrayOfStructsSort(arItemMatches,
			"intCosineMeasure", "desc", "numeric");

			// Trunctate array if longer than maximum number of rows
			if (ArrayLen(arItemMatches) GT arguments.iMaxRows) {
				for (cItem=ArrayLen(arItemMatches); cItem GT
				arguments.iMaxRows; cItem=cItem-1) {
				temp = ArrayDeleteAt(arItemMatches, cItem);
				}
			}
		</cfscript>
		<cfreturn arItemMatches />
	</cffunction>
</cfcomponent>