An F# Wrapper for Weka.

Update on July. 22, 2013: Rebuild with Visual Stuido 2012 and F# 3.0. Update IKVM Runtime. Download, or from this link in case your connection to codeplex is slow. Source code is not updated because codeplex's svn service seems down right now. 

 

 

Update on Apr. 18, 2011: Added c# example to the repository. features 1) how to train and test classifier and 2) how to save and load the model (i.e. serialization. )

 

 

Sample Usage

A tutorial is available in my blog: F# and Data Mining:

WekaSharp: Tutorial for running Weka in F#/.Net


Implementation Details are covered:

WekaSharp: An F# wrapper for Weka



The following are examples:

(* playing decision trees on Iris dataset *)

// load the dataset
let iris =
@"C:\Program Files\Weka-3.6\data\iris.arff"
|> Dataset.readArff
|> Dataset.setClassIndexWithLastAttribute

// describe 3 kinds of classification tasks
let j48Tt = TrainTest(iris, iris, ClassifierType.J48, Parameter.J48.DefaultPara)
let j48Cv = CrossValidation(5, iris, ClassifierType.J48, Parameter.J48.DefaultPara)
let j48Rs = RandomSplit(0.7, iris, ClassifierType.J48, Parameter.J48.DefaultPara)

// perform the task and get result
let ttAccuracy = j48Tt |> Eval.evalClassify |> Eval.getAccuracy
let cvAccuracy = j48Cv |> Eval.evalClassify |> Eval.getAccuracy
let rsAccuracy = j48Rs |> Eval.evalClassify |> Eval.getAccuracy

(* test cluster algorithms *)

 let irisNolabel = 
@"C:\Program Files\Weka-3.6\data\iris.arff"
|> Dataset.readArffLastAttributeAsLabel
|> Dataset.removeClassAttribute

let irisLabeled =
@"C:\Program Files\Weka-3.6\data\iris.arff"
|> Dataset.readArffLastAttributeAsLabel

let kmeansTask = ClusterWithLabel(irisLabeled, ClustererType.KMeans, Parameter.KMeans.MakePara(K=3))
let emTask = ClusterWithLabel(irisLabeled, ClustererType.EM, Parameter.EM.MakePara(K=3))
let dbscanTask = ClusterWithLabel(irisLabeled, ClustererType.DBScan, Parameter.DBScan.DefaultPara)


let kmeansResult = Eval.evalClustering kmeansTask |> Eval.getClusterSummary
let emResult = Eval.evalClustering emTask |> Eval.getClusterSummary
let dbscanResult = Eval.evalClustering dbscanTask |> Eval.getClusterSummary

 

(* bulk & parallel processing example *)


// load the data set
let sonar =
@"D:\temp\datasets-UCI\UCI\sonar.arff"
|> Dataset.readArff
|> Dataset.setClassIndexWithLastAttribute

// set different parameters
let Cs = [0.01; 0.1; 1.; 10.; 50.; 100.; 500.; 1000.; 2000.; 5000. ]

// make the tasks with the parameter set
let tasks =
Cs
|> List.map (fun c -> Parameter.SVM.MakePara(C = c))
|> List.map (fun p -> CrossValidation(3, sonar, ClassifierType.SVM, p))

Profile.tic()
// the accuracy result
let results =
tasks
|> Eval.evalBulkClassify
|> List.map Eval.getAccuracy
Profile.toc("sequential time: ")


Profile.tic()
let resultsParallel =
tasks
|> Eval.evalBulkClassifyParallel
|> List.map Eval.getAccuracy
Profile.toc("parallel (PSeq) time: ")

// sequential time: : 9767.804800 ms
// parallel (PSeq) time: : 6154.715500 ms



(* do the plot *)

 lc.column(y = results, xname = "differnet C", yname = "Accuracy", title = "SVM on iris",
isValueShownAsLabel = true ) |> display

 

(* create dataset from F# arrays *)


// make the data array
let data = [| 0.; 0.;
1.; 1.;
0.; 1.;
1.; 0.; |]
let xorArray = Array2D.init 4 2 (fun i j -> data.[i*2 + j])

// make weka dataset from array
let xor0 = Dataset.from2DArray xorArray false

// add labels
let xor = xor0 |> Dataset.addClassLabels ["T"; "T"; "F"; "F"]

// make a svm classifier

let rbfTask = TrainTest(xor, xor, ClassifierType.SVM, Parameter.SVM.DefaultPara)
let linearTask = TrainTest(xor, xor, ClassifierType.SVM, Parameter.SVM.MakePara(kernelType = Parameter.SVMKernelType.LinearKernel) )

// rbf svm gets 100% accuracy
let rbfAccuracy = rbfTask |> Eval.evalClassify |> Eval.getAccuracy
// linear svm does not work on XOR data set
let linearAccuracy = linearTask |> Eval.evalClassify |> Eval.getAccuracy

 

 

More  Datasets

Sample ARFF data files are available at http://www.cs.waikato.ac.nz/ml/weka/index_datasets.html.

Last edited Jul 22, 2013 at 4:47 PM by yinz, version 17