初步分析美國科研項目申報項目

 

將XML轉換爲新格式的XML、excel、和SQlserver數據java

 

package hander;

/**
 * @author ZTX * @function:讀取XML中字段到excel中
 * @Configuration:將文件放在 Xml\data下,或者data改成您的數據文件夾置於 Xml\下
 * @time:2013年11月12日19:37:45--2013年11月12日21:48:22--2013年11月13日17:28:45--2013年11月13日21:00:45
 */

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStreamWriter;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import org.dom4j.*;
import org.dom4j.io.OutputFormat;
import org.dom4j.io.SAXReader;
import org.dom4j.io.XMLWriter;

import org.apache.poi.*;
import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;

public class Read { 
	public static String inputXmlDir;//要處理的xml文件的文件夾
	public static String data="2006";//要處理的xml文件的文件夾名稱默認是data
	public static boolean head=false;//表頭是否寫了
	public static String pathPrefix="";//文件路徑前綴
	
	public static long  recordNum=0;//記錄批量
	public static void main(String args[]){
		PreparedStatement ppst = null ;
		ConnectSQLserver mssq=new ConnectSQLserver(true);//連sqlserver
		//建表
		String reCreWord=
				"create table Project"+
				"     "+
				"(                        "+
				"AwardTitle varchar(200) null, "+
				"Time varchar(1000) null, "+
				"Organization varchar(200) null, "+
				"ProgramOfficer varchar(100) null, "+
				"AbstractNarration varchar(8000) null, "+
				"AwardID varchar(10) null, "+
				"PrincipalInvestigator varchar(60) null, "+
				"EmailAddress varchar(40) null, "+
				"Name varchar(80) null, "+
				"CountryName varchar(60) null, "+
				"StateName varchar(60) null, "+
				"CityName varchar(60) null, "+
				"ProgramReference varchar(500) null, "+
				")                        ";
		//插入
		String inSQL="insert into Project(" +
				"AwardTitle,Time,Organization," +
				"ProgramOfficer,AbstractNarration," +
				"AwardID,PrincipalInvestigator,EmailAddress," +
				"Name,CountryName,StateName," +
				"CityName,ProgramReference)values(?,?,?,?,?,?,?,?,?,?,?,?,?);";

		try {
			 mssq.sta.execute(reCreWord);
			System.out.println("操做新表:"+inSQL);
			ppst=mssq.conn.prepareStatement(inSQL);
		} catch (SQLException e1) {
			e1.printStackTrace();
		}
//		try {
//			//mssq.conn.setAutoCommit(false);
//		} catch (SQLException e1) {
//			e1.printStackTrace();
//		}
		
		long start,end=0;
		//建立空表
		Document creDocument = DocumentHelper.createDocument();    
		//添加空字段
		//項目標題
		Element root= creDocument.addElement("root");
		Element element[ ] = new Element[13];
		element[0] =  root.addElement("AwardTitle"); 
		element[1] = root.addElement("Time"); 
		element[2]=  root.addElement("Organization"); 
		element[3] =  root.addElement("ProgramOfficer"); 
		element[4]=  root.addElement("AbstractNarration"); 
		element[5]=  root.addElement("AwardID"); 
		element[6]=  root.addElement("PrincipalInvestigator"); 
		element[7]=  root.addElement("EmailAddress"); 
		element[8] = root.addElement("Name"); 
		element[9]=  root.addElement("CountryName"); 
		element[10] = root.addElement("StateName"); 
		element[11]=  root.addElement("CityName"); 
		element[12]=  root.addElement("ProgramReference");
		
        SAXReader saxReader = new SAXReader();   
		//獲得待處理的文件夾
			//前綴
        File getPrefix = new File("");// 空文件夾 用來獲得 文件路徑前綴
        pathPrefix=getPrefix.getAbsolutePath();//獲得 文件路徑前綴
        	//組合
        inputXmlDir=File.separator + data;
        inputXmlDir=pathPrefix+inputXmlDir;//文件夾絕對路徑
        System.out.println(inputXmlDir);
        File file = new File(data);
        //遍歷文件夾下的每一個文件
	    if(file.isDirectory()){ 
	        File[] files = file.listFiles();
	        start=System.currentTimeMillis();
	        Document document;
	        for(int i = 0; i < files.length; i++){
	        	 System.out.println("第"+(i+1)+"篇");
	            try {
	            	document = saxReader.read(inputXmlDir+File.separator + files[i].getName());
					//拿出一個xml去處理
	        			try {
							String record[]=Handle( document,creDocument ,element);
							//System.out.println("record[0]:"+record[0].toString());
 							for( int k=0;k<record.length;k++ ){
 								 ppst.setString(k+1, record[k]);
 								//System.out.println(k);
 								// System.out.println(record[k]);
 							}
							ppst.execute();
	        			} catch (SQLException e) {
							e.printStackTrace();
						}
	            } catch (DocumentException e) {
	            	e.printStackTrace();
	            }
	        }
	        end=System.currentTimeMillis()-start;
	        System.out.println("所用時間:"+end/1000+"s");
	     }
	    //輸出xml結果文件
	    try{
	        XMLWriter output = new XMLWriter(
	            new FileWriter( new File(inputXmlDir+File.separator+"result.xml") ));
	            output.write( creDocument );
	            output.close();
	    }
	    catch(IOException e){
	    	System.out.println(e.getMessage());
	    }
	}
 
	//處理一個xml 
	public static String[] Handle(Document document, Document creDocument, Element[] element) throws SQLException{
		String [] record=new String [13];
		
		//解析
		Element root=document.getRootElement();    // 獲取根節點
        Iterator Award = root.elementIterator("Award"); //根節點#Award
        // 遍歷Award節點:
        while (Award.hasNext()) {
            Element AwardEle = (Element) Award.next();
            //項目標題
            String AwardTitle = AwardEle.elementTextTrim("AwardTitle"); //  Award#AwardTitle 
              //添加到xml中
            Element Title=element[0].addElement("Title");//AwardTitle的子標籤
            Title.setText(AwardTitle);
            record[0]=AwardTitle;
            //項目時間
            String Time="";
            String AwardEffectiveDate=AwardEle.elementTextTrim("AwardEffectiveDate"); //  Award#AwardEffectiveDate 
            String AwardExpirationDate=AwardEle.elementTextTrim("AwardExpirationDate"); //  Award#AwardExpirationDate 
            Time=AwardEffectiveDate +"-"+AwardExpirationDate;
            
            Element time=element[1].addElement("time"); 
            time.setText(Time);
            record[1]=Time;
            //組織
            String org="";
            Iterator Organization = AwardEle.elementIterator("Organization");//Award#Organization
            	// 遍歷Organization節點:
            while (Organization.hasNext()) {
                Element OrganizationEle = (Element) Organization.next();
                //組織合成
                org= getChild(OrganizationEle,"Division","LongName")+">"+getChild(OrganizationEle,"Directorate","LongName");
            }
            
            Element organization=element[2].addElement("organization"); 
            organization.setText(org);
            record[2]=org;
           //項目主任
           String  proOffice="";
           proOffice=getChild(AwardEle,"ProgramOfficer","SignBlockName");
           
           Element officers=element[3].addElement("officers"); 
           officers.setText(proOffice);
           record[3]=proOffice;
           //摘要敘述
           String  abs="";
           abs=getChild(AwardEle,"","AbstractNarration");
           
           Element abstracts=element[4].addElement("abstracts"); 
           abstracts.setText(abs);
           record[4]=abs;
           //項目編號
           String  Id="";
           Id=getChild(AwardEle,"","AwardID");
           
           Element id=element[5].addElement("id"); 
           id.setText(Id);
           record[5]=Id;
           //主要研究者
           String  Inv="";
           Inv=getChild(AwardEle,"Investigator","FirstName")+" "+getChild(AwardEle,"Investigator","LastName");
           if(getChild(AwardEle,"Investigator","RoleCode").equals("Principal Investigator")){
               Element investigator=element[6].addElement("investigator"); 
               investigator.setText(Inv);
               record[6]=Inv;
           }
           //主要研究者郵箱
           String  Em="";
           Em=getChild(AwardEle,"Investigator","EmailAddress");
           if(getChild(AwardEle,"Investigator","RoleCode").equals("Principal Investigator")){
        	   Element email=element[7].addElement("email"); 
               email.setText(Em);
               record[7]=Em;
           }
           //機構名稱
           String  Name="";
           Name=getChild(AwardEle,"Institution","Name");
           
           Element name=element[8].addElement("name"); 
           name.setText(Name);
           record[8]=Name;
           //機構國家
           String  Nation="";
           Nation=getChild(AwardEle,"Institution","CountryName");
          
           Element nation=element[9].addElement("nation"); 
           nation.setText(Nation);
           record[9]=Nation;
           //機構州
           String  State="";
           State=getChild(AwardEle,"Institution","StateName");
          
           Element state=element[10].addElement("state"); 
           state.setText(State);
           record[10]=State;
           //機構城市
           String  City="";
           City=getChild(AwardEle,"Institution","CityName");
          
           Element city=element[11].addElement("city"); 
           city.setText(City);
           record[11]=City;
           //研究主題
           String  Ref="";
          //(2)子節點的子元素
   		   Iterator iters = AwardEle.elementIterator("ProgramReference");
   		  //遍歷mom父節點
   		   while (iters.hasNext()){
   		       Element itemEle = (Element) iters.next();
   		    Ref+=itemEle.elementTextTrim("Text")+" ;"; // 拿到head下的子節點script下的字節點username的值
   		   }
   		 Element reference=element[12].addElement("reference"); 
   		 reference.setText(Ref);
   		record[12]=Ref;
        } 
       // ppst.addBatch();
       // ppst.executeBatch();
       // ppst.execute();
		recordNum++;
        if(!head){//寫表頭
        	String heads[]=new String [13];
        	heads[0]="項目標題";
        	heads[1]="項目時間";
        	heads[2]="項目主任";
        	heads[3]="組織";
        	heads[4]="摘要敘述";
        	heads[5]="項目編號";
        	heads[6]="主要研究者";
        	heads[7]="主要研究者郵箱";
        	heads[8]="機構名稱";
        	heads[9]="機構國家";
        	heads[10]="機構州";
        	heads[11]="機構城市";
        	heads[12]="研究主題";
        	//writeResult2Excel(heads);
        	head=true;
        }
       
        //writeResult2Excel(record);//數據量大就沒用
        return record;
	}
	//寫入excel一行
	public static void writeResult2Excel(String [] record) {
		String DB=pathPrefix+File.separator+"data/results.xls";	
		HSSFWorkbook wb = getDBwb(DB);//據已有的DB 創建新HSSFWorkbook對象  
		HSSFSheet sheet = wb.getSheet("sheet1");//獲得新的sheet對象	
		int rowNum=sheet.getLastRowNum();//得到總行數
		int coloumNum=13;//總列數
		 HSSFRow rowTo =null;
		 HSSFCell cellTo =null;
			 rowTo = sheet.createRow(rowNum+1);
			 for(int j=0;j<coloumNum;j++){//列循環
				 cellTo=rowTo.createCell(j);
				 cellTo.setCellValue(record[j]);
			 } 
		//要把結果寫到文件中去
		writeBack2Excel(DB,wb);
	}//writeBack
	public static HSSFWorkbook getDBwb(String getDB) {
		HSSFWorkbook workbook=null;
		try {
			  FileInputStream fis=new FileInputStream(getDB);
		       workbook=new HSSFWorkbook(fis); 
		} catch (IOException e) {
			e.printStackTrace();
		}
		return workbook;
	}
	public static void writeBack2Excel(String DBurl,HSSFWorkbook wb) {
		 FileOutputStream fileOut;
		try {
			fileOut = new FileOutputStream(DBurl);		
			wb.write(fileOut);//把Workbook對象輸出到文件中  
			fileOut.flush();              
			fileOut.close(); 
		} catch (FileNotFoundException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}  
	}
	private static String getChild(Element AwardEle,String mom,String kid) {
		String target="";
		//(1)直接輸出的是根節點的子元素
		if(mom.equals("")){
		    target= AwardEle.elementTextTrim(kid); 
		}
		//(2)子節點的子元素
		Iterator iters = AwardEle.elementIterator(mom);
		//遍歷mom父節點
		   while (iters.hasNext()){
		       Element itemEle = (Element) iters.next();
		       target= itemEle.elementTextTrim(kid); // 拿到head下的子節點script下的字節點username的值
		   }
		return target;
	}
}
相關文章
相關標籤/搜索