An F# Wrapper for Weka.
update on Apr. 18, 2011: added c# example to the repository. features 1) how to train and test classifier and 2) how to save and load the model (i.e. serialization. )
Sample Usage
Implementation Details are covered:
The following are examples:
(* playing decision trees on Iris dataset *)
// load the dataset
let iris =
@"C:\Program Files\Weka-3.6\data\iris.arff"
|> Dataset.readArff
|> Dataset.setClassIndexWithLastAttribute
// describe 3 kinds of classification tasks
let j48Tt = TrainTest(iris, iris, ClassifierType.J48, Parameter.J48.DefaultPara)
let j48Cv = CrossValidation(5, iris, ClassifierType.J48, Parameter.J48.DefaultPara)
let j48Rs = RandomSplit(0.7, iris, ClassifierType.J48, Parameter.J48.DefaultPara)
// perform the task and get result
let ttAccuracy = j48Tt |> Eval.evalClassify |> Eval.getAccuracy
let cvAccuracy = j48Cv |> Eval.evalClassify |> Eval.getAccuracy
let rsAccuracy = j48Rs |> Eval.evalClassify |> Eval.getAccuracy
(* test cluster algorithms *)
let irisNolabel =
@"C:\Program Files\Weka-3.6\data\iris.arff"
|> Dataset.readArffLastAttributeAsLabel
|> Dataset.removeClassAttribute
let irisLabeled =
@"C:\Program Files\Weka-3.6\data\iris.arff"
|> Dataset.readArffLastAttributeAsLabel
let kmeansTask = ClusterWithLabel(irisLabeled, ClustererType.KMeans, Parameter.KMeans.MakePara(K=3))
let emTask = ClusterWithLabel(irisLabeled, ClustererType.EM, Parameter.EM.MakePara(K=3))
let dbscanTask = ClusterWithLabel(irisLabeled, ClustererType.DBScan, Parameter.DBScan.DefaultPara)
let kmeansResult = Eval.evalClustering kmeansTask |> Eval.getClusterSummary
let emResult = Eval.evalClustering emTask |> Eval.getClusterSummary
let dbscanResult = Eval.evalClustering dbscanTask |> Eval.getClusterSummary
(* bulk & parallel processing example *)
// load the data set
let sonar =
@"D:\temp\datasets-UCI\UCI\sonar.arff"
|> Dataset.readArff
|> Dataset.setClassIndexWithLastAttribute
// set different parameters
let Cs = [0.01; 0.1; 1.; 10.; 50.; 100.; 500.; 1000.; 2000.; 5000. ]
// make the tasks with the parameter set
let tasks =
Cs
|> List.map (fun c -> Parameter.SVM.MakePara(C = c))
|> List.map (fun p -> CrossValidation(3, sonar, ClassifierType.SVM, p))
Profile.tic()
// the accuracy result
let results =
tasks
|> Eval.evalBulkClassify
|> List.map Eval.getAccuracy
Profile.toc("sequential time: ")
Profile.tic()
let resultsParallel =
tasks
|> Eval.evalBulkClassifyParallel
|> List.map Eval.getAccuracy
Profile.toc("parallel (PSeq) time: ")
// sequential time: : 9767.804800 ms
// parallel (PSeq) time: : 6154.715500 ms
(* do the plot *)
lc.column(y = results, xname = "differnet C", yname = "Accuracy", title = "SVM on iris",
isValueShownAsLabel = true ) |> display
(* create dataset from F# arrays *)
// make the data array
let data = [| 0.; 0.;
1.; 1.;
0.; 1.;
1.; 0.; |]
let xorArray = Array2D.init 4 2 (fun i j -> data.[i*2 + j])
// make weka dataset from array
let xor0 = Dataset.from2DArray xorArray false
// add labels
let xor = xor0 |> Dataset.addClassLabels ["T"; "T"; "F"; "F"]
// make a svm classifier
let rbfTask = TrainTest(xor, xor, ClassifierType.SVM, Parameter.SVM.DefaultPara)
let linearTask = TrainTest(xor, xor, ClassifierType.SVM, Parameter.SVM.MakePara(kernelType = Parameter.SVMKernelType.LinearKernel) )
// rbf svm gets 100% accuracy
let rbfAccuracy = rbfTask |> Eval.evalClassify |> Eval.getAccuracy
// linear svm does not work on XOR data set
let linearAccuracy = linearTask |> Eval.evalClassify |> Eval.getAccuracy
More Datasets
Sample ARFF data files are available at
http://www.cs.waikato.ac.nz/ml/weka/index_datasets.html.