今天幫同窗處理數據, 主要是從1w多條記錄中隨機獲取8k條, 而後再從8k條記錄中隨機獲取2k條記錄. 最後將2k條記錄中隨機分紅10組,使得每組的記錄都不重複.java
下面將個人代碼都貼上來, 好之後處理csv文件.sql
package spt.csv; import java.io.Serializable; import java.nio.charset.Charset; import spt.util.PropertyConfig; /** * CSV文件操做基礎類. */ abstract public class CSVBasic implements Serializable { private Charset charset; //編碼. private char delimiter; //分隔符. private String fileName; /** * 默認編碼. * * @return */ public static Charset getDefaultCharset() { return Charset.forName(PropertyConfig.getProperty("charset")); } /** * 默認分割符. * * @return */ public static char getDefaultDelimiter() { return PropertyConfig.getProperty("delimiter").charAt(0); } public String getFileName() { return fileName; } public void setFileName(String fileName) { this.fileName = fileName; } public Charset getCharset() { return charset; } public void setCharset(Charset charset) { this.charset = charset; } public void setDelimiter(char delimiter) { this.delimiter = delimiter; } public char getDelimiter() { return delimiter; } public CSVBasic() {} /**使用默認的分隔符和編碼. * @param fileName */ public CSVBasic(String fileName) { this(fileName, getDefaultDelimiter(), getDefaultCharset()); } public CSVBasic(String fileName, char delimiter, Charset charset) { setFileName(fileName); setDelimiter(delimiter); setCharset(charset); } /** * */ private static final long serialVersionUID = 7916808982930771124L; }
3.讀取csv文件,並映射記錄爲List<Map<String, String>> 對象:dom
package spt.csv; import java.io.FileNotFoundException; import java.io.IOException; import java.nio.charset.Charset; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import spt.util.PropertyConfig; import com.csvreader.CsvReader; /** * 讀取csv文件的類. */ public class Reader extends CSVBasic { private CsvReader reader; public CsvReader getReader() { return reader; } public void setReader(CsvReader reader) { this.reader = reader; } public Reader(String fileName) throws FileNotFoundException { this(fileName, getDefaultDelimiter(), getDefaultCharset()); } public Reader(String fileName, char delimiter, Charset charset) throws FileNotFoundException { // set before getting. super(fileName, delimiter, charset); setReader(new CsvReader(fileName, delimiter, charset)); } /**根據字段列表,見每條記錄映射爲一個Map對象的列表. * @param fieldNames * 指定配置文件中字段名的'鍵'的列表. * @return */ public List<Map<String, String>> getResult(List<String> fieldNames) { // 每行中的每個項是一個Map<String, String>的鍵值對. List<Map<String, String>> lines = new ArrayList<Map<String, String>>(); CsvReader r = null; try { r = getReader(); r.readHeaders(); // 讀取表頭. Map<String, String> itemMap = null; // 每一條記錄是一個Map<String, String>. while (r.readRecord()) { itemMap = new HashMap<String, String>(); String k = null; // 每一條記錄添加鍵值對. for (String fieldName : fieldNames) { // 字段名. k = PropertyConfig.getProperty(fieldName); itemMap.put(k, r.get(k)); } lines.add(itemMap); } return lines; } catch (IOException e) { e.printStackTrace(); return null; } finally { if(r != null) r.close(); } } @Override public String toString() { return getFileName(); } /** * */ private static final long serialVersionUID = -1712774594374451546L; }
4.將List<Map<String, String>>輸出爲csv文件的類:ide
package spt.csv; import java.io.IOException; import java.nio.charset.Charset; import java.util.List; import java.util.Map; import com.csvreader.CsvWriter; /** * csv文件寫入類. */ public class Writer extends CSVBasic { private CsvWriter writer = null; public boolean write(List<String> fieldNames, List<Map<String, String>> mapList) { CsvWriter writer = null; try { writer = getWriter(); // 寫入表頭. writer.writeRecord((String[]) fieldNames .toArray(new String[fieldNames.size()])); for (Map<String, String> map : mapList) { // 存儲每行記錄. String[] records = new String[fieldNames.size()]; for (int i = 0; i < fieldNames.size(); i++) records[i] = map.get(fieldNames.get(i)); // 寫入每行記錄. writer.writeRecord(records); } return true; } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); return false; } finally { if (writer != null) writer.close(); } } public Writer() { this(null, getDefaultDelimiter(), getDefaultCharset()); } public Writer(String fileName) { this(fileName, getDefaultDelimiter(), getDefaultCharset()); } public Writer(String fileName, char delimiter, Charset charset) { super(fileName, delimiter, charset); writer = new CsvWriter(fileName, delimiter, charset); } public CsvWriter getWriter() { return writer; } public void setWriter(CsvWriter writer) { this.writer = writer; } /** * */ private static final long serialVersionUID = -9141083858975437622L; }
5.表中有一個字段NYR, 表示時間, 因爲須要將結果按照時間的前後順序排序, 因此定義一個比較器:工具
package spt.csv; import java.text.ParseException; import java.util.Comparator; import java.util.Map; import spt.util.DateService; import spt.util.PropertyConfig; /** *每條記錄是一個Map對象,按照每條記錄中的'時間'的列進行排序. */ public class RecordDateComparator implements Comparator<Map<String, String>> { @Override public int compare(Map<String, String> m1, Map<String, String> m2) { try { long l01 = DateService.getDate(m1.get(PropertyConfig.getProperty("NYR"))).getTime(); long l02 = DateService.getDate(m2.get(PropertyConfig.getProperty("NYR"))).getTime(); //long的範圍和int的範圍不一樣. long diff = l01 - l02; if(diff < 0) return -1; else if(diff > 0) return 1; return 0; } catch (ParseException e) { e.printStackTrace(); return 0; } } }
6,在main類中:this
package spt.csv; import java.io.File; import java.io.FileNotFoundException; import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Map; import java.util.Random; import spt.util.PropertyConfig; /** * 從1w多條記錄中先選出8k條,而後在8k條記錄中選出2k條,最後將2k條記錄分紅10組. */ public class ReadWriteDemo { /** * @param args */ public static void main(String[] args) { // if (args.length < 1) // throw new NullPointerException("請指定文件路徑"); System.out.println("執行中...執行過程請不要關閉此窗口!"); final int first_size = Integer.parseInt(PropertyConfig .getProperty("first_size")); // 初次提取長度(8k). final int second_size = Integer.parseInt(PropertyConfig .getProperty("second_size")); // 初次提取(2k). final int groupCount = Integer.parseInt(PropertyConfig .getProperty("groupCount")); // 分組個數(10). String file = PropertyConfig.getProperty("input_file"); // 源文件路徑. List<String> fieldNames = null; try { fieldNames = initFields(); Reader csv = new Reader(file); // 總記錄. List<Map<String, String>> totalList = csv.getResult(fieldNames); // 初次提取的值(8k). List<Map<String, String>> firstTaken = random(totalList, first_size); // 再次提取的值(2k). List<Map<String, String>> secondTaken = random(firstTaken, second_size); // 每組記錄數(2百). List<Map<String, String>> tmpTaken = secondTaken; for (int i = 0; i < groupCount; i++) { List<Map<String, String>> AGroupTaken = random(tmpTaken, second_size / groupCount); // 除去上次已經使用的元素. tmpTaken.removeAll(AGroupTaken); // 在當前目錄上輸出(並驗證是否存在). String outputFile = null; // 若是文件已存在,則自動命名. int fileCount = 0; do { outputFile = "result" + fileCount++ + ".csv"; } while (new File(outputFile).exists()); Writer writer = new Writer(outputFile); // (集合)排序. Collections.sort(AGroupTaken, new RecordDateComparator()); writer.write(fieldNames, AGroupTaken); } System.out.println("done!"); } catch (FileNotFoundException e) { System.out.println("請指定正確的文件路徑!"); // TODO Auto-generated catch block e.printStackTrace(); } } /** * 隨機產生新的列表(長度比原來小). * * @param originalList * 輸入列表. * @param new_size * 新列表的長度. */ public static List<Map<String, String>> random( List<Map<String, String>> originalList, int new_size) { if (new_size <= 0 || new_size > originalList.size()) throw new IndexOutOfBoundsException("新列表的長度錯誤!"); List<Map<String, String>> newList = new ArrayList<Map<String, String>>( new_size); // 標識是否已被提取. boolean[] taken = new boolean[originalList.size()]; Random r = new Random(); Map<String, String> map = null; // 即將獲取的元素. int rIdx = 0; for (int i = 0; i < new_size; i++) { do { rIdx = r.nextInt(new_size); map = originalList.get(rIdx); } while (taken[rIdx]); // 若是發現已經提取,則重複操做. taken[rIdx] = true; // 標識已被提取. newList.add(map); } return newList; } private static List<String> initFields() { // 全部字段. List<String> fieldNames = new ArrayList<String>(14); fieldNames.add("id"); fieldNames.add("AJMC"); fieldNames.add("JYAQ"); fieldNames.add("AJLB"); fieldNames.add("AJFAB"); fieldNames.add("AJZT"); fieldNames.add("BASJ"); fieldNames.add("FXSJ"); fieldNames.add("FASJSX"); fieldNames.add("FASJXX"); fieldNames.add("AJBH"); fieldNames.add("ZBX"); fieldNames.add("ZBY"); fieldNames.add("NYR"); return fieldNames; } }
7,用到的自定義工具類爲:編碼
package spt.util; import java.text.DateFormat; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.Calendar; import java.util.Date; /** * 2015-2-27 提供日期轉換的工具類. */ public class DateService { // 定義稱線程共享,而不是沒調用一次就建立一個對象. private static DateFormat formater = new SimpleDateFormat(PropertyConfig.getProperty("date_format")); /** * 將字符串類型的日期轉換爲Date. * * @param strDate * @return * @throws ParseException */ public static Date getDate(String strDate) throws ParseException { // 若是輸入爲空,則返回null. if (Str.isEmpty(strDate)) return null; return formater.parse(strDate); } /** * 將java.util.Date轉換爲java.sql.Date;用於諸如'PreparedStatement.setDate'方法. * * @param utilDate * @return */ public static java.sql.Date getSQLDate(java.util.Date utilDate) { if (utilDate == null) return null; return new java.sql.Date(utilDate.getTime()); } /** * 將指定的日期轉換爲 * * @param date * @return */ public static String getDateStr(java.util.Date date) { if (date == null) return null; return formater.format(date); } /** * 計算指定日期與今天的間隔,判斷是不是須要日期. disDay表示與今天相隔天數,0:等於今天;1:明天;-1:昨天. * * @param anotherDate * @param disDay * @return */ public static boolean isSpecifiedDay(Date anotherDate, int disDay) { if (anotherDate == null) return false; Calendar cNow = Calendar.getInstance(); cNow.setTime(new Date()); // 每調用一次,都是與當前時間作比較. cNow.add(Calendar.DAY_OF_MONTH, disDay); Calendar cAnotherDate = Calendar.getInstance(); cAnotherDate.setTime(anotherDate); return cNow.get(Calendar.YEAR) == cAnotherDate.get(Calendar.YEAR) && cNow.get(Calendar.MONTH) == cAnotherDate.get(Calendar.MONTH) && cNow.get(Calendar.DAY_OF_MONTH) == cAnotherDate.get(Calendar.DAY_OF_MONTH); } }
package spt.util; import java.io.IOException; import java.net.URL; import java.util.Properties; /** * 2015-2-27 */ public class PropertyConfig { /** * @param key * @return */ public static String getProperty(String key) { Properties properties = getProperties(); return properties.getProperty(key); } /** * @param resources * @return */ public static Properties getProperties() { final String configFilePath = "raw/properties.properties"; URL url = PropertyConfig.class.getClassLoader().getResource(configFilePath); Properties props = new Properties(); try { props.load(url.openStream()); } catch (IOException e) { e.printStackTrace(); return null; } return props; } }
package spt.util; /** *字符串工具類. */ public class Str { /** * 判斷一個字符串是否有內容. * * @param str * @return 若是不不爲空,則返回true,不然返回false. */ public static boolean hasLength(String str) { return !isEmpty(str); } /**判斷字符串是否爲空. * @param str * @return */ public static boolean isEmpty(String str) { return str == null || str.isEmpty(); } }
其中,配置文件"raw/properties.properties"是放置在src目錄下.url