項目中須要將巨量數據生成的json文件解析,並寫入數據庫,使用了 alibaba 的 fastjson,在實踐過程當中遇到了 GC 問題,記錄以下:java
數據大約爲70萬條,文件大小在3~4G左右,使用 fastjson 官方推薦的 Stream Api 例3 的示例,在讀取到30萬數據時,內存使用量開始迅速上升,CPU也迅速達到百分之百,在讀取到40萬數據左右時,出現 GC。git
代碼以下:github
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONReader; import lombok.extern.slf4j.Slf4j; import org.apache.commons.lang3.StringUtils; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.jdbc.core.namedparam.NamedParameterJdbcTemplate; import org.springframework.jdbc.core.namedparam.SqlParameterSourceUtils; import org.springframework.stereotype.Component; import java.io.*; import java.util.*; @Component @Slf4j public class EnterDatabaseUtils { @Autowired private NamedParameterJdbcTemplate namedParameterJdbcTemplate; private final int batchTotal = 50000; public boolean enterData(String databaseName, String tableName, File file, String[] fields) { String fileName = file.getName(); try { JSONReader reader = new JSONReader(new InputStreamReader(new FileInputStream(file.getAbsoluteFile()),"UTF-8")); String insertSql = "INSERT INTO `" + databaseName + "`.`" + tableName + "`" + " (`" + StringUtils.join(fields, "`,`") + "`)" + " VALUES(:" + StringUtils.join(fields, ",:") + ")"; long count = 1; ArrayList<Map<String, Object>> recordList = new ArrayList<>(); reader.startArray(); while (reader.hasNext()) { reader.startObject(); JSONObject = reader.readObject(JSONObject.class); if (count <= batchTotal) { recordList.add(record); count ++; } if (batchTotal + 1 == count) { namedParameterJdbcTemplate.batchUpdate(insertSql, SqlParameterSourceUtils.createBatch(recordList)); count = 1; recordList.clear(); } } if (recordList.size() > 0) { namedParameterJdbcTemplate.batchUpdate(insertSql, SqlParameterSourceUtils.createBatch(recordList)); recordList.clear(); } reader.endArray(); reader.close(); return true; } catch (Exception e) { log.error(databaseName + "." + tableName + ":插入失敗"); log.error("", e); return false; } } }
測試代碼:spring
import org.junit.Test; import org.junit.runner.RunWith; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.boot.test.context.SpringBootTest; import org.springframework.test.context.junit4.SpringRunner; import java.io.File; @RunWith(SpringRunner.class) @SpringBootTest public class EnterDatabaseUtilsTest { @Autowired private EnterDatabaseUtils enterDatabaseUtils; @Test public void testEnterDatabase() { File file = new File("/xxx/xxx/xxx.json"); String[] fields = {........}; boolean res = enterDatabaseUtils.enterData("xxxx", "xxxx", file, ); } }
開始的時候,懷疑是 namedParameterJdbcTemplate 引發的內存佔用瘋漲。可是將全部的數據庫相關操做刪除,僅保留json讀取代碼,內存仍然瘋漲並致使 GC。數據庫
遂懷疑是 fastjson 使用不當,閱讀了大量文章以後,終於在 Json少許數據解析 一文中找到了答案:單行直接 readObject 會致使內存不斷被消耗!apache
將代碼改成使用 startObject 將每行中的 key、value 單獨解析,內存和CPU佔用穩定無增加,問題解決。json
改進後的代碼以下:api
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONReader; import lombok.extern.slf4j.Slf4j; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.jdbc.core.namedparam.NamedParameterJdbcTemplate; import org.springframework.jdbc.core.namedparam.SqlParameterSourceUtils; import org.springframework.stereotype.Component; import java.io.*; import java.util.*; @Component @Slf4j public class EnterDatabaseUtils { @Autowired private NamedParameterJdbcTemplate namedParameterJdbcTemplate; private final int batchTotal = 50000; public boolean enterData(String databaseName, String tableName, File file, String[] fields) { String fileName = file.getName(); try { JSONReader reader = new JSONReader(new InputStreamReader(new FileInputStream(file.getAbsoluteFile()),"UTF-8")); String insertSql = "INSERT INTO `" + databaseName + "`.`" + tableName + "`" + " (`" + StringUtils.join(fields, "`,`") + "`)" + " VALUES(:" + StringUtils.join(fields, ",:") + ")"; long count = 1; ArrayList<Map<String, Object>> recordList = new ArrayList<>(); Map<String, Object> record = new HashMap<>(); reader.startArray(); while (reader.hasNext()) { reader.startObject(); while (reader.hasNext()) { record.put(reader.readString(), reader.readObject()); } reader.endObject(); if (count <= batchTotal) { recordList.add(record); count ++; } if (batchTotal + 1 == count) { namedParameterJdbcTemplate.batchUpdate(insertSql, SqlParameterSourceUtils.createBatch(recordList)); count = 1; recordList.clear(); } } if (recordList.size() > 0) { namedParameterJdbcTemplate.batchUpdate(insertSql, SqlParameterSourceUtils.createBatch(recordList)); recordList.clear(); } reader.endArray(); reader.close(); return true; } catch (Exception e) { log.error(databaseName + "." + tableName + ":插入失敗"); log.error("", e); return false; } } }