weka學習(三)缺失值處理

/**
 * @author hao.wei
 */
@Service
public class MissingHandleBizImpl implements MissingHandleBiz {

    private static final Logger logger = LoggerFactory.getLogger(MissingHandleBizImpl.class);
    /** 缺失值用該屬性的平均值填充*/
    @Override
    public Instances missingValuesFilledWithAvg(Instances instances, String incompatible) {
        try {
            // 屬性個數(列)
            int dim = instances.numAttributes();
            // 實例個數(行)
            int num = instances.numInstances();
            logger.info("開始將平均值填充入缺失值...");
            double[] meanV = new double[dim];
            for (int line = 0; line < meanV.length; line++) {
                // 第i列平均值
                meanV[line] = 0;
                // 實例個數
                int count = 0;
                for (int row = 0; row < num; row++) {
                    // 計算第i列平均值(缺失值 和 不合條件的值除外)
                    if (!instances.instance(row).isMissing(line) && !instances.instance(row).toString(line).trim().contains(incompatible)) {
                        meanV[line] += instances.instance(row).value(line);
                        count++;
                    }
                }
                meanV[line] = meanV[line] / count;
                logger.info("屬性[{}]的平均值爲[{}]", instances.attribute(line).name(), meanV[line]);
                for (int row = 0; row < num; row++) {
                    // 平均值填充缺失值 和 不符合條件的值
                    if (instances.instance(row).isMissing(line) || instances.instance(row).toString(line).contains(incompatible)) {
                        instances.instance(row).setValue(line, meanV[line]);
                    }
                }
            }
        } catch (Exception e) {
            logger.error("將平均值填充入缺失值發生系統異常,錯誤信息:", e);
        }
        logger.info("平均值填充如缺失值結束...");

        return instances;
    }

    /** 移除掉包含特殊值的屬性的實例*/
    @Override
    public Instances removeMismatchConditionData(Instances instances, String attribute, String incompatible) {
        try {
            logger.info("刪除[{}]屬性包含[{}]的實例", attribute, incompatible);
            // 屬性個數(列)
            int dim = instances.numAttributes();
            // 實例個數(行)
            int num = instances.numInstances();
            for (int i = 0; i < dim; i++) {
                // 屬性名稱和須要處理的屬性名相同
                if (instances.attribute(i).name().equals(attribute)) {
                    for (int j = 0; j < num; j++) {
                        // 實例的該屬性值包含不合條件值 刪除該條實例(行)
                        if (instances.instance(j).isMissing(i)|| instances.instance(j).toString(i).contains(incompatible)) {
                            logger.info("刪除的實例屬性值爲[{}]", instances.instance(j).toStringNoWeight());
                            instances.remove(j);
                            j--;
                            num--;
                        }
                    }
                }
            }
        } catch (Exception e) {
            logger.error("刪除[{}]屬性包含[{}]的實例發生系統異常,錯誤信息[{}]", attribute, incompatible, e);
        }
        return instances;
    }
}
相關文章
相關標籤/搜索