java csv - 讀寫及其操做.

  今天幫同窗處理數據, 主要是從1w多條記錄中隨機獲取8k條, 而後再從8k條記錄中隨機獲取2k條記錄. 最後將2k條記錄中隨機分紅10組,使得每組的記錄都不重複.java

  下面將個人代碼都貼上來, 好之後處理csv文件.sql

  1.   首先使用第三方的jar文件 javcsv.jar : 連接: http://pan.baidu.com/s/1qW5b3u0 密碼: qjmx
  2.   雖然該類庫能夠相對方便提供操做, 可是爲了方便處理, 我將處理的字段都放在配置文件中, 而後將每一條記錄都封裝爲Map<String, String>對象,我將讀寫的基礎類封裝爲 CSVBasic:
    package spt.csv;
    
    import java.io.Serializable;
    import java.nio.charset.Charset;
    
    import spt.util.PropertyConfig;
    
    /**
     * CSV文件操做基礎類.
     */
    abstract public class CSVBasic implements Serializable {
    
        private Charset charset;    //編碼.
        private char delimiter;    //分隔符.
        private String fileName;
    
        /**
         * 默認編碼.
         * 
         * @return
         */
        public static Charset getDefaultCharset() {
            return Charset.forName(PropertyConfig.getProperty("charset"));
        }
    
        /**
         * 默認分割符.
         * 
         * @return
         */
        public static char getDefaultDelimiter() {
            return PropertyConfig.getProperty("delimiter").charAt(0);
        }
        
        public String getFileName() {
            return fileName;
        }
    
        public void setFileName(String fileName) {
            this.fileName = fileName;
        }
    
        public Charset getCharset() {
            return charset;
        }
    
        public void setCharset(Charset charset) {
            this.charset = charset;
        }
    
        public void setDelimiter(char delimiter) {
            this.delimiter = delimiter;
        }
    
        public char getDelimiter() {
            return delimiter;
        }
    
        public CSVBasic() {}
        
        /**使用默認的分隔符和編碼.
         * @param fileName
         */
        public CSVBasic(String fileName) {
            this(fileName, getDefaultDelimiter(), getDefaultCharset());
        }
        
        public CSVBasic(String fileName, char delimiter, Charset charset) {
            setFileName(fileName);
            setDelimiter(delimiter);
            setCharset(charset);
        }
    
        /**
         * 
         */
        private static final long serialVersionUID = 7916808982930771124L;
    }
    View Code

      3.讀取csv文件,並映射記錄爲List<Map<String, String>> 對象:dom

    package spt.csv;
    
    import java.io.FileNotFoundException;
    import java.io.IOException;
    import java.nio.charset.Charset;
    import java.util.ArrayList;
    import java.util.HashMap;
    import java.util.List;
    import java.util.Map;
    
    import spt.util.PropertyConfig;
    
    import com.csvreader.CsvReader;
    
    /**
     * 讀取csv文件的類.
     */
    public class Reader extends CSVBasic {
    
        private CsvReader reader;
    
        public CsvReader getReader() {
            return reader;
        }
    
        public void setReader(CsvReader reader) {
            this.reader = reader;
        }
    
        public Reader(String fileName) throws FileNotFoundException {
            this(fileName, getDefaultDelimiter(), getDefaultCharset());
        }
    
        public Reader(String fileName, char delimiter, Charset charset)
                throws FileNotFoundException {
            // set before getting.
            super(fileName, delimiter, charset);
            setReader(new CsvReader(fileName, delimiter, charset));
        }
    
        /**根據字段列表,見每條記錄映射爲一個Map對象的列表.
         * @param fieldNames
         *            指定配置文件中字段名的'鍵'的列表.
         * @return
         */
        public List<Map<String, String>> getResult(List<String> fieldNames) {
            // 每行中的每個項是一個Map<String, String>的鍵值對.
            List<Map<String, String>> lines = new ArrayList<Map<String, String>>();
    
            CsvReader r = null;
            try {
                r = getReader();
                r.readHeaders(); // 讀取表頭.
    
                Map<String, String> itemMap = null; // 每一條記錄是一個Map<String, String>.
                while (r.readRecord()) {
                    itemMap = new HashMap<String, String>();
                    String k = null;
                    // 每一條記錄添加鍵值對.
                    for (String fieldName : fieldNames) {
                        // 字段名.
                        k = PropertyConfig.getProperty(fieldName);
                        itemMap.put(k, r.get(k));
                    }
                    lines.add(itemMap);
                }
                return lines;
            } catch (IOException e) {
                e.printStackTrace();
                return null;
            } finally {
                if(r != null)
                    r.close();
            }
        }
    
        @Override
        public String toString() {
            return getFileName();
        }
    
        /**
         * 
         */
        private static final long serialVersionUID = -1712774594374451546L;
    }
    View Code

    4.將List<Map<String, String>>輸出爲csv文件的類:ide

    package spt.csv;
    
    import java.io.IOException;
    import java.nio.charset.Charset;
    import java.util.List;
    import java.util.Map;
    
    import com.csvreader.CsvWriter;
    
    /**
     * csv文件寫入類.
     */
    public class Writer extends CSVBasic {
    
        private CsvWriter writer = null;
    
        public boolean write(List<String> fieldNames,
                List<Map<String, String>> mapList) {
            CsvWriter writer = null;
            try {
                writer = getWriter();
                // 寫入表頭.
                writer.writeRecord((String[]) fieldNames
                        .toArray(new String[fieldNames.size()]));
                for (Map<String, String> map : mapList) {
                    // 存儲每行記錄.
                    String[] records = new String[fieldNames.size()];
                    for (int i = 0; i < fieldNames.size(); i++)
                        records[i] = map.get(fieldNames.get(i));
                    // 寫入每行記錄.
                    writer.writeRecord(records);
                }
                return true;
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
                return false;
            } finally {
                if (writer != null)
                    writer.close();
            }
        }
    
        public Writer() {
            this(null, getDefaultDelimiter(), getDefaultCharset());
        }
    
        public Writer(String fileName) {
            this(fileName, getDefaultDelimiter(), getDefaultCharset());
        }
    
        public Writer(String fileName, char delimiter, Charset charset) {
            super(fileName, delimiter, charset);
            writer = new CsvWriter(fileName, delimiter, charset);
        }
    
        public CsvWriter getWriter() {
            return writer;
        }
    
        public void setWriter(CsvWriter writer) {
            this.writer = writer;
        }
    
        /**
         * 
         */
        private static final long serialVersionUID = -9141083858975437622L;
    }
    View Code

    5.表中有一個字段NYR, 表示時間, 因爲須要將結果按照時間的前後順序排序, 因此定義一個比較器:工具

    package spt.csv;
    
    import java.text.ParseException;
    import java.util.Comparator;
    import java.util.Map;
    
    import spt.util.DateService;
    import spt.util.PropertyConfig;
    
    /**
     *每條記錄是一個Map對象,按照每條記錄中的'時間'的列進行排序.
     */
    public class RecordDateComparator implements Comparator<Map<String, String>> {
    
        @Override
        public int compare(Map<String, String> m1, Map<String, String> m2) {
            try {
                long l01 = DateService.getDate(m1.get(PropertyConfig.getProperty("NYR"))).getTime();
                long l02 = DateService.getDate(m2.get(PropertyConfig.getProperty("NYR"))).getTime();
                //long的範圍和int的範圍不一樣.
                long diff = l01 - l02;
                if(diff < 0)
                    return -1;
                else if(diff > 0)
                    return 1;
                return 0;
            } catch (ParseException e) {
                e.printStackTrace();
                return 0;
            }
        }
    
    }
    View Code

    6,在main類中:this

    package spt.csv;
    
    import java.io.File;
    import java.io.FileNotFoundException;
    import java.util.ArrayList;
    import java.util.Collections;
    import java.util.List;
    import java.util.Map;
    import java.util.Random;
    
    import spt.util.PropertyConfig;
    
    /**
     * 從1w多條記錄中先選出8k條,而後在8k條記錄中選出2k條,最後將2k條記錄分紅10組.
     */
    public class ReadWriteDemo {
    
        /**
         * @param args
         */
        public static void main(String[] args) {
            // if (args.length < 1)
            // throw new NullPointerException("請指定文件路徑");
            System.out.println("執行中...執行過程請不要關閉此窗口!");
            final int first_size = Integer.parseInt(PropertyConfig
                    .getProperty("first_size")); // 初次提取長度(8k).
            final int second_size = Integer.parseInt(PropertyConfig
                    .getProperty("second_size")); // 初次提取(2k).
            final int groupCount = Integer.parseInt(PropertyConfig
                    .getProperty("groupCount")); // 分組個數(10).
            String file = PropertyConfig.getProperty("input_file"); // 源文件路徑.
    
            List<String> fieldNames = null;
            try {
                fieldNames = initFields();
                Reader csv = new Reader(file);
                // 總記錄.
                List<Map<String, String>> totalList = csv.getResult(fieldNames);
                // 初次提取的值(8k).
                List<Map<String, String>> firstTaken = random(totalList, first_size);
                // 再次提取的值(2k).
                List<Map<String, String>> secondTaken = random(firstTaken,
                        second_size);
                // 每組記錄數(2百).
                List<Map<String, String>> tmpTaken = secondTaken;
                for (int i = 0; i < groupCount; i++) {
                    List<Map<String, String>> AGroupTaken = random(tmpTaken,
                            second_size / groupCount);
                    // 除去上次已經使用的元素.
                    tmpTaken.removeAll(AGroupTaken);
                    // 在當前目錄上輸出(並驗證是否存在).
                    String outputFile = null;
                    // 若是文件已存在,則自動命名.
                    int fileCount = 0;
                    do {
                        outputFile = "result" + fileCount++ + ".csv";
                    } while (new File(outputFile).exists());
                    Writer writer = new Writer(outputFile);
                    // (集合)排序.
                    Collections.sort(AGroupTaken, new RecordDateComparator());
                    writer.write(fieldNames, AGroupTaken);
                }
                System.out.println("done!");
            } catch (FileNotFoundException e) {
                System.out.println("請指定正確的文件路徑!");
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
        }
    
        /**
         * 隨機產生新的列表(長度比原來小).
         * 
         * @param originalList
         *            輸入列表.
         * @param new_size
         *            新列表的長度.
         */
        public static List<Map<String, String>> random(
                List<Map<String, String>> originalList, int new_size) {
            if (new_size <= 0 || new_size > originalList.size())
                throw new IndexOutOfBoundsException("新列表的長度錯誤!");
            List<Map<String, String>> newList = new ArrayList<Map<String, String>>(
                    new_size);
            // 標識是否已被提取.
            boolean[] taken = new boolean[originalList.size()];
            Random r = new Random();
            Map<String, String> map = null; // 即將獲取的元素.
            int rIdx = 0;
            for (int i = 0; i < new_size; i++) {
                do {
                    rIdx = r.nextInt(new_size);
                    map = originalList.get(rIdx);
                } while (taken[rIdx]); // 若是發現已經提取,則重複操做.
                taken[rIdx] = true; // 標識已被提取.
                newList.add(map);
            }
    
            return newList;
        }
    
        private static List<String> initFields() {
            // 全部字段.
            List<String> fieldNames = new ArrayList<String>(14);
            fieldNames.add("id");
            fieldNames.add("AJMC");
            fieldNames.add("JYAQ");
            fieldNames.add("AJLB");
            fieldNames.add("AJFAB");
            fieldNames.add("AJZT");
            fieldNames.add("BASJ");
            fieldNames.add("FXSJ");
            fieldNames.add("FASJSX");
            fieldNames.add("FASJXX");
            fieldNames.add("AJBH");
            fieldNames.add("ZBX");
            fieldNames.add("ZBY");
            fieldNames.add("NYR");
            
            return fieldNames;
        }
    }
    View Code

    7,用到的自定義工具類爲:編碼

    package spt.util;
    
    import java.text.DateFormat;
    import java.text.ParseException;
    import java.text.SimpleDateFormat;
    import java.util.Calendar;
    import java.util.Date;
    
    /**
     * 2015-2-27 提供日期轉換的工具類.
     */
    public class DateService {
        // 定義稱線程共享,而不是沒調用一次就建立一個對象.
        private static DateFormat formater = new SimpleDateFormat(PropertyConfig.getProperty("date_format"));
    
        /**
         * 將字符串類型的日期轉換爲Date.
         * 
         * @param strDate
         * @return
         * @throws ParseException
         */
        public static Date getDate(String strDate) throws ParseException {
            // 若是輸入爲空,則返回null.
            if (Str.isEmpty(strDate))
                return null;
            return formater.parse(strDate);
        }
    
        /**
         * 將java.util.Date轉換爲java.sql.Date;用於諸如'PreparedStatement.setDate'方法.
         * 
         * @param utilDate
         * @return
         */
        public static java.sql.Date getSQLDate(java.util.Date utilDate) {
            if (utilDate == null)
                return null;
            return new java.sql.Date(utilDate.getTime());
        }
    
        /**
         * 將指定的日期轉換爲
         * 
         * @param date
         * @return
         */
        public static String getDateStr(java.util.Date date) {
            if (date == null)
                return null;
            return formater.format(date);
        }
    
    
        /**
         * 計算指定日期與今天的間隔,判斷是不是須要日期. disDay表示與今天相隔天數,0:等於今天;1:明天;-1:昨天.
         * 
         * @param anotherDate
         * @param disDay
         * @return
         */
        public static boolean isSpecifiedDay(Date anotherDate, int disDay) {
            if (anotherDate == null)
                return false;
            Calendar cNow = Calendar.getInstance();
            cNow.setTime(new Date()); // 每調用一次,都是與當前時間作比較.
            cNow.add(Calendar.DAY_OF_MONTH, disDay);
    
            Calendar cAnotherDate = Calendar.getInstance();
            cAnotherDate.setTime(anotherDate);
    
            return cNow.get(Calendar.YEAR) == cAnotherDate.get(Calendar.YEAR)
                    && cNow.get(Calendar.MONTH) == cAnotherDate.get(Calendar.MONTH)
                    && cNow.get(Calendar.DAY_OF_MONTH) == cAnotherDate.get(Calendar.DAY_OF_MONTH);
        }
    }
    View Code
    package spt.util;
    
    import java.io.IOException;
    import java.net.URL;
    import java.util.Properties;
    
    
    /**
     * 2015-2-27
     */
    public class PropertyConfig {
        
        /**
         * @param key
         * @return
         */
        public static String getProperty(String key) {
            Properties properties = getProperties();
            return properties.getProperty(key);
        }
    
        /**
         * @param resources
         * @return
         */
        public static Properties getProperties() {
            final String configFilePath = "raw/properties.properties";
            URL url = PropertyConfig.class.getClassLoader().getResource(configFilePath);
            Properties props = new Properties();
            try {
                props.load(url.openStream());
            } catch (IOException e) {
                e.printStackTrace();
                return null;
            } 
            return props;
        }
    }
    View Code
    package spt.util;
    
    /**
     *字符串工具類.
     */
    public class Str {
        /**
         * 判斷一個字符串是否有內容.
         * 
         * @param str
         * @return 若是不不爲空,則返回true,不然返回false.
         */
        public static boolean hasLength(String str) {
            return !isEmpty(str);
        }
        
        /**判斷字符串是否爲空.
         * @param str
         * @return
         */
        public static boolean isEmpty(String str) {
            return str == null || str.isEmpty();
        }
    }
    View Code

    其中,配置文件"raw/properties.properties"是放置在src目錄下.url

相關文章
相關標籤/搜索