此次介紹一個我如今正在作的半監督算法,由於我只是提出一個框架,半監督算法只是一個工具,但是爲了找這個工具也讓我花了很多功夫。如今介紹的暫時不是Weka的一部分,但開發者是waikato大學(也就是開發Weka的那個大學)。下載地址是http://www.cs.waikato.ac.nz/~fracpete/projects/collective-classification/。我下載的是Classifiers compatible now with Weka 3.5.8 。別的幾種,能不能行我也不知道,我而後也按做者說的把Weka3.5.8也下載下來了(不知道和之前的版本是否兼容),使用很是簡單,先到Weka目錄下,把weka-src.jar解壓,再把下載的src.tar.gz在Weka目錄下解壓,若是有替換就替換,若是沒有替換,證實你的操做有錯。html
而後就能夠試一下了,把所有Weka源文件拷貝到你新的工程中去,下面我給出了一個測試的例子,不要太相信個人辦法,我還沒細看他的代碼(分紅訓練和測試樣本的方法有問題,懶得改有,誰改完了,請發給我)。注意一下,他訓練是同時用訓練集和測試集。若是不明白,下載他的一篇論文Using Weighted Nearest Neighbor to Benefit from Unlabeled Data。很是簡單,不用懼怕。java
補充:Tri-train是屬於Multi-View的半監督算法,能夠從周志華的網站上下載到,論文和代碼都不是很難懂,網址是:http://cs.nju.edu.cn/zhouzh/zhouzh.files/publication/annex/TriTrain.htm,這裏不做解釋。若是還有別的基於Weka的半監督代碼,請告訴我。算法
package semiTest; import java.io.BufferedWriter; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; import weka.classifiers.Evaluation; import weka.classifiers.collective.functions.LLGC; import weka.classifiers.collective.meta.CollectiveBagging; import weka.classifiers.collective.meta.YATSI; import weka.classifiers.trees.J48; import weka.core.Instances; import weka.core.Utils; import weka.filters.Filter; import weka.filters.unsupervised.instance.Resample; public class Test { Instances m_instances = null; Instances m_testIns = null; Instances m_trainIns = null; public void getFileInstances( String fileName ) throws Exception { FileReader frData = new FileReader( fileName ); m_instances = new Instances( frData ); } public void writeToArffFile(String newFilePath,Instances ins) throws IOException { BufferedWriter writer = new BufferedWriter(new FileWriter(newFilePath)); writer.write(ins.toString()); writer.flush(); writer.close(); } public void FilterRemovePercentageTest() throws Exception { Resample removePercentage =new Resample(); String[] options = Utils.splitOptions("-Z 10 -no-replacement"); removePercentage.setOptions(options); removePercentage.setInputFormat( m_instances ); m_trainIns = Filter.useFilter( m_instances, removePercentage); writeToArffFile("TrainData.arff", m_trainIns ); options = Utils.splitOptions("-Z 90 -no-replacement"); removePercentage.setOptions(options); removePercentage.setInputFormat( m_instances ); m_testIns = Filter.useFilter( m_instances,removePercentage); writeToArffFile("TestData.arff", m_testIns ); m_trainIns.setClassIndex( m_trainIns.numAttributes() - 1 ); m_testIns.setClassIndex( m_testIns.numAttributes() - 1 ); } public void LLGCTest() throws Exception { System.out.println( " **************LLGC********** " ); LLGC llgc = new LLGC(); llgc.buildClassifier( m_trainIns, m_testIns ); Evaluation eval = new Evaluation( m_trainIns ); eval.evaluateModel( llgc, m_testIns ); System.out.println( eval.toSummaryString() ); } public void J48Test() throws Exception { System.out.println( " **************J48********** " ); J48 j48 = new J48(); j48.buildClassifier( m_trainIns ); Evaluation eval = new Evaluation( m_trainIns ); eval.evaluateModel( j48, m_testIns ); System.out.println( eval.toSummaryString() ); } public void YATSITest() throws Exception { System.out.println( " **************YATSI********** " ); YATSI yatsi = new YATSI(); yatsi.buildClassifier( m_trainIns, m_testIns ); Evaluation eval = new Evaluation( m_trainIns ); eval.evaluateModel( yatsi, m_testIns ); System.out.println( eval.toSummaryString() ); } public void CollectiveEMTest() throws Exception { System.out.println( " **************EM********** " ); CollectiveBagging bagging = new CollectiveBagging(); bagging.buildClassifier( m_trainIns, m_testIns ); Evaluation eval = new Evaluation( m_trainIns ); eval.evaluateModel( bagging, m_testIns ); System.out.println( eval.toSummaryString() ); } public static void main(String[] args) throws Exception { Test percentage = new Test(); percentage.getFileInstances( "F://Program Files//Weka-3-4//data//soybean.arff"); percentage.FilterRemovePercentageTest(); percentage.J48Test(); percentage.YATSITest(); //percentage.LLGCTest(); } }