1. 背景mysql
導入150w數據到mysql,數據格式爲tab分隔csv文件。測試索引對導入速度的影響sql
2. 無索引,導入數據後重建bash
CREATE TABLE `tab` ( `ct_id` char(36) DEFAULT NULL COMMENT 'id', `ct_title` varchar(200) DEFAULT NULL COMMENT '標題', `bd_id` int(11) DEFAULT NULL COMMENT 'bdid', `org_unit` int(11) DEFAULT NULL COMMENT '', `poi_id` int(11) DEFAULT NULL, `poi_name` varchar(200) DEFAULT NULL COMMENT 'poi名稱' ) ENGINE=InnoDB DEFAULT CHARSET=utf8
mysql > load data infile '/tmp/ct.txt' into table tab; Query OK, 1928611 rows affected (15.83 sec) Records: 1928611 Deleted: 0 Skipped: 0 Warnings: 0
重建索引ide
mysql > alter table tab add UNIQUE index `uk_ct_poi` (`ct_id`,`poi_id`),函數
-> add index `idx_orgunit` (`org_unit`),測試
-> add index `idx_poi_orgunit` (`poi_id`,`org_unit`),大數據
-> add index idx_bd_id(bd_id),索引
-> add index idx_poi_id(poi_id),ip
-> add index idx_poi_name(poi_name),string
-> add index idx_all(ct_id,ct_title,bd_id,org_unit,poi_name),
-> add index idx_test(ct_title,bd_id,org_unit,poi_name);
Query OK, 0 rows affected (1 min 24.56 sec)
Records: 0 Duplicates: 0 Warnings: 0
3. 表建好索引導入數據
mysql > CREATE TABLE `tab` ( -> `ct_id` char(36) DEFAULT NULL COMMENT 'id', -> `ct_title` varchar(200) DEFAULT NULL COMMENT '標題', -> `bd_id` int(11) DEFAULT NULL COMMENT 'id', -> `org_unit` int(11) DEFAULT NULL COMMENT '', -> `poi_id` int(11) DEFAULT NULL, -> `poi_name` varchar(200) DEFAULT NULL COMMENT 'poi名稱', -> UNIQUE KEY `uk_ct_poi` (`ct_id`,`poi_id`), -> KEY `idx_orgunit` (`org_unit`), -> KEY `idx_poi_orgunit` (`poi_id`,`org_unit`), -> key idx_bd_id(bd_id), -> key idx_poi_id(poi_id), -> key idx_poi_name(poi_name), -> key idx_all(ct_id,ct_title,bd_id,org_unit,poi_name), -> key idx_test(ct_title,bd_id,org_unit,poi_name) -> ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
mysql > load data infile '/tmp/ct.txt' into table mixed_poi_mt_ct; Query OK, 1928611 rows affected (13 min 8.88 sec) Records: 1928611 Deleted: 0 Skipped: 0 Warnings: 0
4. 結論
大數據導入導出先導入數據再重建索引
5. 選擇性插入
無where
mysql> insert IGNORE into mixed_contract(contract_id,contract_name,old_orgunit_id,new_orgunit_id,region_manual_type,sub_orgunit_id,old_bd_id,pending_status,update_time,create_time)
-> select ct.ct_id,ct.ct_title,ct.org_unit,if(1=2,0,oga.orgunit_id),2,oga.orgunit_id,ct.bd_id,0,now(),now()
-> from mixed_poi_mt_ct ct
-> join poi_crm poi on ct.poi_id=poi.poi_id
-> join orgunit_geo_area oga on poi.geo_area_id=oga.geo_area_id
-> limit 100000
-> ;
Query OK, 69517 rows affected (5.02 sec)
Records: 100000 Duplicates: 30483 Warnings: 0
where字段有string類型索引
mysql> insert IGNORE into mixed_contract(contract_id,contract_name,old_orgunit_id,new_orgunit_id,region_manual_type,sub_orgunit_id,old_bd_id,pending_status,update_time,create_time)
-> select ct.ct_id,ct.ct_title,ct.org_unit,if(1=2,0,oga.orgunit_id),2,oga.orgunit_id,ct.bd_id,0,now(),now()
-> from mixed_poi_mt_ct ct
-> join poi_crm poi on ct.poi_id=poi.poi_id
-> join orgunit_geo_area oga on poi.geo_area_id=oga.geo_area_id
-> where ct.ct_id>'0'
-> limit 100000
-> ;
Query OK, 69517 rows affected (45.76 sec)
Records: 100000 Duplicates: 30483 Warnings: 0
where字段有int類型索引
mysql> insert IGNORE into mixed_contract(contract_id,contract_name,old_orgunit_id,new_orgunit_id,region_manual_type,sub_orgunit_id,old_bd_id,pending_status,update_time,create_time)
-> select ct.ct_id,ct.ct_title,ct.org_unit,if(1=2,0,oga.orgunit_id),2,oga.orgunit_id,ct.bd_id,0,now(),now()
-> from mixed_poi_mt_ct ct
-> join poi_crm poi on ct.poi_id=poi.poi_id
-> join orgunit_geo_area oga on poi.geo_area_id=oga.geo_area_id
-> where ct.poi_id>0
-> limit 100000;
Query OK, 69517 rows affected (5.72 sec)
Records: 100000 Duplicates: 30483 Warnings: 0
where字段無索引
mysql > insert IGNORE into mixed_contract(contract_id,contract_name,old_orgunit_id,new_orgunit_id,region_manual_type,sub_orgunit_id,old_bd_id,pending_status,update_time,create_time)
-> select ct.ct_id,ct.ct_title,ct.org_unit,if(1=2,0,oga.orgunit_id),2,oga.orgunit_id,ct.bd_id,0,now(),now()
-> from mixed_poi_mt_ct ct
-> join poi_crm poi on ct.poi_id=poi.poi_id
-> join orgunit_geo_area oga on poi.geo_area_id=oga.geo_area_id
-> where poi.close_status=0
-> limit 100000;
Query OK, 69355 rows affected (23 min 21.16 sec)
Records: 100000 Duplicates: 30645 Warnings: 0
join字段使用了函數
mysql > insert IGNORE into mixed_contract(contract_id,contract_name,old_orgunit_id,new_orgunit_id,region_manual_type,sub_orgunit_id,old_bd_id,pending_status,update_time,create_time)
-> select ct.ct_id,ct.ct_title,ct.org_unit,if(1=2,0,oga.orgunit_id),2,oga.orgunit_id,ct.bd_id,0,now(),now() -> from mixed_poi_mt_ct ct
-> join poi_crm poi on ct.poi_id=poi.poi_id
-> join orgunit_geo_area oga on if(merge_id!=0,(select t.geo_area_id from poi_crm t where t.poi_id=poi.merge_id),poi.geo_area_id)=oga.geo_area_id
-> limit 100000;
Query OK, 77348 rows affected (5 min 25.86 sec)
mysql > insert IGNORE into mixed_contract(contract_id,contract_name,old_orgunit_id,new_orgunit_id,region_manual_type,sub_orgunit_id,old_bd_id,pending_status,update_time,create_time) select ct.ct_id,ct.ct_title,ct.org_unit,if(1=2,0,oga.orgunit_id),2,oga.orgunit_id,ct.bd_id,0,now(),now() from mixed_poi_mt_ct ct join poi_crm poi on ct.poi_id=poi.poi_id join orgunit_geo_area oga on if(merge_id!=0,(select t.geo_area_id from poi_crm t where t.poi_id=poi.merge_id),poi.geo_area_id)=oga.geo_area_id where ct.ct_id>'0' limit 100000;
Query OK, 77348 rows affected (7 min 11.24 sec)
mysql > insert IGNORE into mixed_contract(contract_id,contract_name,old_orgunit_id,new_orgunit_id,region_manual_type,sub_orgunit_id,old_bd_id,pending_status,update_time,create_time)
-> select ct.ct_id,ct.ct_title,ct.org_unit,if(1=2,0,oga.orgunit_id),2,oga.orgunit_id,ct.bd_id,0,now(),now()
-> from mixed_poi_mt_ct ct
-> join poi_crm poi on ct.poi_id=poi.poi_id
-> join orgunit_geo_area oga on if(merge_id!=0,(select t.geo_area_id from poi_crm t where t.poi_id=poi.merge_id),poi.geo_area_id)=oga.geo_area_id
-> where poi.poi_id>0
-> limit 100000;
Query OK, 64770 rows affected (41.99 sec)
Records: 100000 Duplicates: 35230 Warnings: 0
??? 線下庫不穩定,過了段時間的數據
mysql > insert IGNORE into mixed_contract(contract_id,contract_name,old_orgunit_id,new_orgunit_id,region_manual_type,sub_orgunit_id,old_bd_id,pending_status,update_time,create_time)
-> select ct.ct_id,ct.ct_title,ct.org_unit,if(1=2,0,oga.orgunit_id),2,oga.orgunit_id,ct.bd_id,0,now(),now()
-> from mixed_poi_mt_ct ct
-> join poi_crm poi on ct.poi_id=poi.poi_id
-> join orgunit_geo_area oga on if(merge_id!=0,(select t.geo_area_id from poi_crm t where t.poi_id=poi.merge_id),poi.geo_area_id)=oga.geo_area_id
-> where poi.poi_id>0
-> limit 100000
-> ;
Query OK, 64770 rows affected (25.93 sec)
mysql > insert IGNORE into mixed_contract(contract_id,contract_name,old_orgunit_id,new_orgunit_id,region_manual_type,sub_orgunit_id,old_bd_id,pending_status,update_time,create_time)
-> select ct.ct_id,ct.ct_title,ct.org_unit,if(1=2,0,oga.orgunit_id),2,oga.orgunit_id,ct.bd_id,0,now(),now()
-> from mixed_poi_mt_ct ct
-> join poi_crm poi on ct.poi_id=poi.poi_id
-> join orgunit_geo_area oga on if(merge_id!=0,(select t.geo_area_id from poi_crm t where t.poi_id=poi.merge_id),poi.geo_area_id)=oga.geo_area_id
-> where poi.close_status=0
-> limit 100000
-> ;
Query OK, 77186 rows affected (5 min 0.31 sec)
Records: 100000 Duplicates: 22814 Warnings: 0