Read my latest posts!

Thursday, April 28, 2005

VBScript ART implementation

Well, here's the ART implementation as it currently exists. It is currently limited to 31 fields due to Integer limitations. If you want to hack away at it, be my guest. There's lots of improvements that can be made, such as:
use and storage in a real DB, so that when new data is used, the entire process is not being run; expansion of the number of fields it may use; storage of "optimal" parameters; wrap into a Windows Script Component; etc. Comments are in Green. (Sorry, the formatting/indentation got wiped from the orginal.)
Note: I have corrected the sense of the test as my other post points out.

' Working parameters:
' P_sub_I & E - vector bitwise AND;
' ||v|| - Magnitude of v (number of 1s set); This can be found through our code.
' N - Number of Prototype Vectors; This can be found through our code.
' rho - Vigilance parameter (0 <>
' P - Prototype Vector; These are what we are comparing against.
' E - Example Vector; These are what are being classified.
' d - dimension of Vectors (number of features); in other languages could be passed as part of a Struct, we will have to count them
' B - Beta parameter; "A small positive integer"

Option Explicit
' WScript.Quit

Dim objDlg, objF, Outfile
Dim dictExampleVectors 'the data we are feeding this beast
Dim dictPrototypes 'the value to check any classification
Private dictClasses 'Dictionary object to store the results in
Dim dictProtoMembers 'dictionary object to keep track of member count
Dim ProtoKey 'Variable to use as a key
Dim Satisfied 'answer to question to determine if user is satisfied
Dim rho, BetaParameter 'Vigilance parameter & Beta parameter, respectively;determines class size/matching & a tie-breaker
Dim example_vector, Prototype 'The data we are classifying
Dim total 'temporary integer
Dim intMagP, intMagE 'Magnitude of Features (the number of ones set) for Prototype & Example respectively
Dim intMagP_E 'Magnitude of Features (the number of ones set) for Prototype & Example ANDed together
Dim d 'the number of features the Examples have
Dim Matched 'Variable to indicate a match to a prototype
Dim OS
Private objADOConnection, objADOCommand, objADORSData
Dim FileSystem
Dim OutputFile
Dim Out
Dim StartTime, a, nochange
Dim ClassificationTime

Const Title = "Classification Script"
Const rhoPrompt = "Enter a value for Rho (0 < rhoprompt2 = "Rho is proportional to class size. If set low enough (<0.1), vectors must match." betaprompt = "Enter a value for Beta. (Must be a small positive integer.)" forreading =" 1" bif_returnonlyfsdirs =""> ' allow the user to select only folders
Const BIF_dontgobelowdomain = &H0002 ' prevent the user to go below a given domain
Const BIF_statustext = &H0004 ' ???
Const BIF_returnfsancestors = &H0008 ' only file system entries
Const BIF_editbox = &H0010 ' disply a text box to enter a path
Const BIF_validate = &H0020 ' OK will be enabled only, if a valid entry is selected
Const BIF_browseforcomputer = &H1000 ' show the computer branch in the tree
Const BIF_browseforprinter = &H2000 ' show the printer branch in the tree
Const BIF_browseincludefiles = &H4000 ' allow files to be selected
' There is an additional optional parameter.
' This fourth parameter gave the following experimental options.
' When you pass a textstring instead of the numeric BSF-constants, the root
' will be this specific folder or drive.
Const BSF_desktop = 0 'Desktop is the root directory.
'With BIF_returnonlyfsdirs circumvents
'problem with OK-button
Const BSF_internetexplorer = 1 'Internet Explorer is the root
Const BSF_programs = 2 'Programs folder of the start menu is the root
Const BSF_controlpanel = 3 'Control Panel is the root. Needs BIF_browseincludefiles
Const BSF_printers = 4 'Printers folder is the root. Needs BIF_browseincludefiles
Const BSF_documents = 5 'Documentsfolder is the root
Const BSF_favorites = 6 'Favorites is the root
Const BSF_startup = 7 'Startup-folder of the startmenu is the root. Needs BIF_browseincludefiles
Const BSF_recent = 8 'Recentfolder is the root. Needs BIF_browseincludefiles
Const BSF_sendto = 9 'Sendto-folder is the root. Needs BIF_browseincludefiles
Const BSF_recyclebin = 10 'Recycle Bin is the root. Needs BIF_browseincludefiles
Const BSF_startmenu = 11 'Start Menu is the root
Const BSF_desktopdirectory = 16 'The Desktopdirectory is the root directory
Const BSF_drives = 17 'The drives (My computer) folder is the root
Const BSF_network = 18 'The networkneighbourhood is the root
Const BSF_nethood = 19 'The nethoodfolder is the root
Const BSF_fonts = 20 'The fontsfolder is the root
Const BSF_templates = 21 'The templatesfolder is the root

Call Main()
Sub Main() 'this sub should give a generic user interface, provide for changes to the parameters
Dim Key
Dim expander
Dim TmpString
If (MsgBox ("This is a Classification script." & vbCrLf & _
"Currently, this implementation is limited to the use of 31 Boolean fields," & vbCrLf & _
"and one ID field. The ID field MUST be unique." & vbCrLf &amp;amp;amp;amp;amp;amp;amp;amp;amp;amp; vbCrLf & vbCrLf & _
"You may press Escape or click Cancel to exit now." & vbCrLf, vbOKCancel, Title) <> vbOK) Then WScript.Quit
Set dictExampleVectors = CreateObject("Scripting.Dictionary")
Set dictPrototypes = CreateObject("Scripting.Dictionary")
Set dictClasses = CreateObject("Scripting.Dictionary")
Set dictProtoMembers = CreateObject("Scripting.Dictionary")
Call LoadExamples() 'Use LoadExamples to set "d" to
While Not (Satisfied = vbYes)
dictPrototypes.RemoveAll 'clear the results/working vars
dictClasses.RemoveAll 'clear the results/working vars
dictProtoMembers.RemoveAll 'clear the results/working vars
ProtoKey = 0 'initilize the Key value
StartTime = Time 'to calculate the classification time
Call ART1() 'the actual algorithm
'display the number of classes returned using the rho, and Beta
ClassificationTime = DateDiff ("s", StartTime, Time) 'calculate classification time in seconds
MsgBox "Classified " & dictExampleVectors.Count & " items."
MsgBox "Classified " & dictExampleVectors.Count & " items." & vbCrLf &amp;amp;amp;amp;amp;amp;amp;amp;amp;amp; vbCrLf & vbCrLf & "Time to classify was " & ClassificationTime & " seconds."
TmpString = "Using Rho = " & rho &amp;amp;amp;amp;amp;amp;amp;amp;amp;amp; ", and Beta = " & BetaParameter & vbCrLf & ", the returned Number of Classes is " & dictPrototypes.Count & "."
' MsgBox TmpString
Satisfied = MsgBox (TmpString & vbCrLf &amp;amp;amp;amp;amp;amp;amp;amp;amp;amp; vbCrLf & vbCrLf & "Are these results satisfactory?",vbYesNo,Title)'try other parameter values
'Write the results to a file
OutPutFile.WriteLine "Classifications:" 'section header
For Each Key In dictClasses 'go through the classes
OutPutFile.WriteLine Key & vbTab & dictClasses.Item(Key) 'write the classification for each example
OutPutFile.WriteLine "Quit after " & a + 1 & " iterations." 'write run information
OutPutFile.WriteLine "Classified " & dictExampleVectors.Count & " items." & vbCrLf & "Time to classify was " & ClassificationTime & " seconds."
OutPutFile.WriteLine Replace (TmpString, vbCrLf, "")
OutPutFile.WriteLine "Prototypes:" 'section header
TmpString = "" 'clear var for reuse
For Each Key In dictPrototypes 'go through the prototypes, useful to see what the criteria was for each class
For expander = 0 To d - 1 'go through the bits of each proto
If (dictPrototypes.Item(Key) And 2 ^ expander) > 0 Then 'calculate the bit value
TmpString = TmpString & "True" & vbTab 'print value
TmpString = TmpString & "False" & vbTab 'print value
End If
OutPutFile.WriteLine Key & vbTab & TmpString & vbTab & dictPrototypes.Item(Key) 'print them, print the number for easy determination of duplication
TmpString = "" 'clear var

Set FileSystem = Nothing
MsgBox WScript.ScriptName & " Finished." &amp;amp;amp;amp;amp;amp;amp;amp;amp;amp; vbCrLf & vbCrLf & "Artificial Intelligence Rules!", , Title 'Inform that we are finished.
End Sub

' LoadExamples should create the dictExampleVectors and pass it to ART1(), then give a display or filename, et cetera.
Sub LoadExamples () 'This will be the code to customize for new scripts.
Dim data
Call makeConnection() 'get the file/database/whatever we are using for input
'read the first record, count the Boolean fields
d = intFeaturesSize(objADORSData) 'Call function to determine number of features to use in Proximity Test & in Magnitude function
'read code goes here 'Pass it to intFeatureSize, which will determine the number of boolean fields
Do Until objADORSData.EOF 'go through each record needing classification
data = ConvertToInt(objADORSData) 'function call to make the binary vals to bitfields
dictExampleVectors.Add objADORSData.Fields("ID").Value, data 'populate the dictionary with the record
objADORSData.MoveNext 'move to next record
Loop 'start over
End Sub
' *******************
Function intFeaturesSize (ByRef record) 'Determine the number of Booleans
Dim tmp 'Counter variable
Dim FLDS, field 'record fields
tmp = 0 'initialize tmp
Set FLDS = record.fields 'so we can use the for
For Each field In FLDS 'step through the fields
If VarType (field) = vbBoolean Then tmp = tmp + 1 'increment tmp by 1 for each Boolean
intFeaturesSize = tmp 'return the count of Booleans in the record
End Function

Function ConvertToInt (ByRef stuff) 'nuff said
Dim tmpInt, indexcount, FLDS, field
tmpInt = CLng (0) 'initialize value, make it a long integer
indexcount = -1 'initialize value, so when incremented it starts as 0
Set FLDS = stuff.fields 'so we can use the for
For Each field In FLDS 'go through the fields
If (VarType (field ) = vbBoolean) Then 'data we are using for classification?
indexcount = indexcount + 1 'counter of which bit we're looking at
If (field.value = True) Then 'check the value
tmpInt = CLng (tmpInt + (2 ^ indexcount)) 'store it in the appropriate bitfield
End If
End If
ConvertToInt = CLng (tmpInt) 'return the bitfield populated value
End Function

Sub makeConnection() 'make a connection to an Excel file,database, flatfile, etc.
Dim strComputer, objWMIService, colItems, objItem, DefaultFile
DefaultFile = "C:\Temp\Classification.xls" 'provide a default file for input
strComputer = "." 'figure out OS on THIS computer
Set objWMIService = GetObject("winmgmts:\\" & strComputer & "\root\cimv2") 'connect to the machine
' Set colItems = objWMIService.ExecQuery("Select * from Win32_OperatingSystem",,48) 'query it
Set colItems = objWMIService.ExecQuery("Select * from Win32_OperatingSystem",,48)

' For Each objItem in colItems
OS = colItems(0).Version 'just looking at the OS version
' Next
' WScript.Echo colItems.version
Select Case OS
Case "5.1.2600"
' OS is Windows XP, do the dialogbox.
' File Dialog code ---->
Set objDlg = WScript.CreateObject("UserAccounts.CommonDialog") 'CommonDialog to navigate to the Input
With objDlg 'Setup the Dialogbox to show Excel by default
.Filter = "Excel Files|*.xls|All Files|*.*"
.FilterIndex = 1
.InitialDir = "C:\temp" 'Gotta start somewhere...
.ShowOpen 'Show the Dialog
DefaultFile = .FileName 'Get the filename
End With
Case Else
' OS is Windows 2000 or older, do an inputbox
DefaultFile = InputBox("Enter the Input Filename Desired",Title,DefaultFile)
End Select
Set FileSystem = Wscript.CreateObject("Scripting.FileSystemObject") 'need to work with files
If Not FileSystem.FileExists(DefaultFile) Then 'check if file is there
WScript.Echo "File does not exist." & vbCrLf &amp;amp;amp;amp;amp;amp;amp;amp;amp;amp; vbCrLf & vbCrLf & "Exiting." 'if not, tell what's wrong
WScript.Quit 'get out
End If
Set objADOConnection = CreateObject("ADODB.Connection") 'connection to "DB"
With objADOConnection 'save typing, make clear what we're working on/doing
.Provider = "MSDASQL" 'details
.ConnectionString = "Driver={Microsoft Excel Driver (*.xls)};DBQ=" & DefaultFile &amp;amp;amp;amp;amp;amp;amp;amp;amp;amp; ";ReadOnly=False;"
.Open 'open the connection
'.Connection.Properties("FirstRowHasNames") = 1 'remove comment to prevent loss of first row's data (no headers), default is true
End With
Set objADOCommand = CreateObject("ADODB.Command") 'command to run on the "DB"
Set objADOCommand.ActiveConnection = objADOConnection 'connect the connection to the command
objADOCommand.CommandText = "Select * From [Sheet1$]" 'the query
Set objADORSData = CreateObject("ADODB.RecordSet") 'recordset to work with
objADORSData.Open objADOCommand.Execute 'populate recordset by execution of command

' File Dialog for OUTPUT ----->
DefaultFile = Right (DefaultFile, Len (DefaultFile) - InStr(DefaultFile, "\")) 'carve off the left part of the string to the slash
Loop Until InStr(DefaultFile, "\") = 0 'til they're all gone
Out = Left ( DefaultFile, InStr (DefaultFile, ".")) 'carve off the file extension
Select Case OS 'dialog is dependant on OS
Case "5.1.2600"
' OS is Windows XP, do the dialogbox.
' File Dialog code ---->
Set objDlg = WScript.CreateObject("UserAccounts.CommonDialog") 'CommonDialog to navigate to the Output
With objDlg 'Setup the Dialogbox to show Tab separated vaule files by default
.Filter = "Tab Separated Values|*.tsv"
.FilterIndex = 1
.InitialDir = "C:\exports" 'Gotta start somewhere...
.ShowOpen 'Show the Dialog
Out = .FileName 'Get the filename
End With
Case Else
' OS is Windows 2000 or older, do an inputbox
Out = Inputbox("Enter a filename for to store the Classification data.", Title,"c:\Exports\" & Out & ".tsv")
End Select
Set OutPutFile = FileSystem.CreateTextFile(Out) 'setup handle to file
End Sub

Sub ART1()
Dim prototypeToCheck
rho = InputBox (rhoPrompt & vbCrLf & rhoPrompt2, Title, "0.11") 'get a value
BetaParameter = InputBox (BetaPrompt, Title, "2") 'get a value
Call create_initial_vector() 'create the first Prototype
For a = 0 To 24 'geesh, hope never to run through this many iterations!
nochange = True 'var to determine when to drop out
For Each example_vector In dictExampleVectors 'go through the data
intMagE = Magnitude (dictExampleVectors.Item(example_vector)) 'get the Magnitude so we dont do it repeatedly
Matched = False 'Variable to let a new Prototype creation
For Each Prototype In dictPrototypes 'go through the protypes
intMagP = Magnitude (dictPrototypes.Item(Prototype)) 'get the Magnitude so we dont do this one repeatedly
intMagP_E = Magnitude ((dictPrototypes.Item(Prototype) And dictExampleVectors.Item(example_vector))) 'Number of common trues, so we dont do this repeatedly
If Promity_function() Then 'Proximity Test function
If Passes_Vigilance_test() Then 'Vigilance Test function
If dictClasses.Exists(example_vector) Then 'its been classified before, if different, change and check the prototype for Empty
prototypeToCheck = dictClasses.item(example_vector) 'check whether the prototype should be deleted.
If dictClasses.item(example_vector) <> prototypeToCheck Then 'check whether to do anything
If (dictProtoMembers.Item(Prototype) - 1 = 0) Then 'check the number of members
dictPrototypes.Remove (prototypeToCheck) 'delete empty class prototype
nochange = False 'dont get out this iteration yet
End If
End If
End If
dictClasses.item(example_vector) = Prototype 'reset value or add if necessary
Call Integrate_example_vector_into_current_prototype_vector (Prototype, example_vector) 'trim the criteria
Exit For 'if here, passed vigilance, found class, don't need to check the other prototypes
End If
End If
Next 'Prototype
If Not Matched Then 'didn't pass tests
Call Create_New_Prototype (example_vector) 'the cheese stands alone
dictClasses.item(example_vector) = ProtoKey 'new class gets this one
End If
' Call check()
Next 'example_vector
If noChange Then Exit For 'if there were no changes, resonance has ceased, no use going anymore
End Sub

Sub create_initial_vector() 'This sub creates the first Prototype from the first example vector
Dim key 'var to store the keys
key = dictExampleVectors.Keys 'new key
dictPrototypes.Item(0) = dictExampleVectors.Item(key(0)) 'just assign numeric keys for ExampleVectors
dictProtoMembers.Item(0) = 1 'if there's a member count it
End Sub

Sub Integrate_example_vector_into_current_prototype_vector(proto,vector)
dictPrototypes.Item(proto) = dictPrototypes.Item(proto) And dictExampleVectors.Item(vector) 'bitwise AND together,
dictProtoMembers.Item(proto) = dictProtoMembers.Item(proto) + 1 'count the member
Matched = True 'to prevent the creation of a new proto
End Sub

Sub Create_New_Prototype(vector)
ProtoKey = ProtoKey + 1 'create a new prototype key
dictPrototypes.Item(ProtoKey) = dictExampleVectors.Item(vector) 'create the new prototype
dictProtoMembers.Item(ProtoKey) = 1 'if there's a member count it
nochange = False 'prevent dropping out this iteration
End Sub

Function Promity_function() 'will use the Magnitude
If intMagP_E /(BetaParameter + intMagP) > intMagE / (BetaParameter + d) Then 'the actual proximity test
Promity_function = True 'passed, its close
Promity_function = False 'failed, its different
End If
End Function

Function Passes_Vigilance_test() 'will use the Magnitude
If intMagP_E/intMagE >= rho Then 'the actual vigilance test
Passes_Vigilance_test = True 'pass
Passes_Vigilance_test = False 'fail
End If
End Function

Function Magnitude(FeatureSet)
Dim total, indexcount
total = 0
For indexcount = 0 to d - 1 'go through the number of bits of data
If (FeatureSet And (2 ^ indexcount)) > 0 Then total = total + 1 'calculate the number of 1s
Magnitude = total 'the number of features with the value "1" in the set.
End Function

' Sub check ()
' Dim thing, tmpString
' tmpString = "Example Vectors:" & vbCrLf & vbCrLf
' For Each thing In dictExampleVectors
' tmpString = tmpString & thing & vbTab & dictExampleVectors.Item(thing) & vbCrLf
' Next
' WScript.Echo tmpString
' tmpString = "Prototype Vectors:" & vbCrLf & vbCrLf
' For Each thing In dictPrototypes
' tmpString = tmpString & thing & vbTab & dictPrototypes.Item(thing) & vbCrLf
' Next
' WScript.Echo tmpString
' tmpString = "Classifications:" & vbCrLf & vbCrLf
' For Each thing In dictClasses
' tmpString = tmpString & thing & vbTab & dictClasses.Item(thing) & vbCrLf
' Next
' WScript.Echo tmpString
' End Sub

Labels: , ,


Post a Comment

<< Home