/** * @author hao.wei */ @Service public class MissingHandleBizImpl implements MissingHandleBiz { private static final Logger logger = LoggerFactory.getLogger(MissingHandleBizImpl.class); /** 缺失值用該屬性的平均值填充*/ @Override public Instances missingValuesFilledWithAvg(Instances instances, String incompatible) { try { // 屬性個數(列) int dim = instances.numAttributes(); // 實例個數(行) int num = instances.numInstances(); logger.info("開始將平均值填充入缺失值..."); double[] meanV = new double[dim]; for (int line = 0; line < meanV.length; line++) { // 第i列平均值 meanV[line] = 0; // 實例個數 int count = 0; for (int row = 0; row < num; row++) { // 計算第i列平均值(缺失值 和 不合條件的值除外) if (!instances.instance(row).isMissing(line) && !instances.instance(row).toString(line).trim().contains(incompatible)) { meanV[line] += instances.instance(row).value(line); count++; } } meanV[line] = meanV[line] / count; logger.info("屬性[{}]的平均值爲[{}]", instances.attribute(line).name(), meanV[line]); for (int row = 0; row < num; row++) { // 平均值填充缺失值 和 不符合條件的值 if (instances.instance(row).isMissing(line) || instances.instance(row).toString(line).contains(incompatible)) { instances.instance(row).setValue(line, meanV[line]); } } } } catch (Exception e) { logger.error("將平均值填充入缺失值發生系統異常,錯誤信息:", e); } logger.info("平均值填充如缺失值結束..."); return instances; } /** 移除掉包含特殊值的屬性的實例*/ @Override public Instances removeMismatchConditionData(Instances instances, String attribute, String incompatible) { try { logger.info("刪除[{}]屬性包含[{}]的實例", attribute, incompatible); // 屬性個數(列) int dim = instances.numAttributes(); // 實例個數(行) int num = instances.numInstances(); for (int i = 0; i < dim; i++) { // 屬性名稱和須要處理的屬性名相同 if (instances.attribute(i).name().equals(attribute)) { for (int j = 0; j < num; j++) { // 實例的該屬性值包含不合條件值 刪除該條實例(行) if (instances.instance(j).isMissing(i)|| instances.instance(j).toString(i).contains(incompatible)) { logger.info("刪除的實例屬性值爲[{}]", instances.instance(j).toStringNoWeight()); instances.remove(j); j--; num--; } } } } } catch (Exception e) { logger.error("刪除[{}]屬性包含[{}]的實例發生系統異常,錯誤信息[{}]", attribute, incompatible, e); } return instances; } }