以oracle的測試schema爲例,emp和dept表,原始數據以下sql
SQL> select * from emp; EMPNO ENAME JOB MGR HIREDATE SAL COMM DEPTNO ---------- ---------- --------- ---------- -------------- ---------- ---------- ---------- 7369 SMITH CLERK 7902 17-12月-80 800 20 7499 ALLEN SALESMAN 7698 20-2月 -81 1600 300 30 7521 WARD SALESMAN 7698 22-2月 -81 1250 500 30 7566 JONES MANAGER 7839 02-4月 -81 2975 20 7654 MARTIN SALESMAN 7698 28-9月 -81 1250 1400 30 7698 BLAKE MANAGER 7839 01-5月 -81 2850 30 7782 CLARK MANAGER 7839 09-6月 -81 2450 10 7839 KING PRESIDENT 17-11月-81 5000 10 7844 TURNER SALESMAN 7698 08-9月 -81 1500 0 30 7900 JAMES CLERK 7698 03-12月-81 950 30 7902 FORD ANALYST 7566 03-12月-81 3000 20 7934 MILLER CLERK 7782 23-1月 -82 1300 10 已選擇12行。 SQL> select * from dept; DEPTNO DNAME LOC ---------- -------------- ------------- 10 ACCOUNTING NEW YORK 20 RESEARCH DALLAS 30 SALES CHICAGO 40 OPERATIONS BOSTON
完成以下查詢,mongodb
a 列出每一個部門的名稱及其人數
b 列出工資最高的頭三名員工姓名及其工資
c 列出工資比上司高的員工姓名
d 列出平均工資高於公司總平均工資的部門名稱
首先,須要創建mongodb集合,一開始,我採用徹底模擬emp和dept表的方式,發現不少狀況下都須要join,而mongodb並不支持join,作起來比較迂迴。因而決定,把全部數據都放在同一個集合(emp_dept)內,新加元素TABLE用來表示是來源於哪一個table的數據。如下是初始化emp_dept集合的ruby代碼ruby
#!/usr/bin/env ruby # 20140307, init_emp_dept.rb ### ### require "rubygems" require "mongo" mongoconn = Mongo::MongoClient.new("localhost", 27017) db = mongoconn.db("test") # 7369 SMITH CLERK 7902 17-12月-80 800 20 # 7499 ALLEN SALESMAN 7698 20-2月 -81 1600 300 30 # 7521 WARD SALESMAN 7698 22-2月 -81 1250 500 30 # 7566 JONES MANAGER 7839 02-4月 -81 2975 20 # 7654 MARTIN SALESMAN 7698 28-9月 -81 1250 1400 30 # 7698 BLAKE MANAGER 7839 01-5月 -81 2850 30 # 7782 CLARK MANAGER 7839 09-6月 -81 2450 10 # 7839 KING PRESIDENT 17-11月-81 5000 10 # 7844 TURNER SALESMAN 7698 08-9月 -81 1500 0 30 # 7900 JAMES CLERK 7698 03-12月-81 950 30 # 7902 FORD ANALYST 7566 03-12月-81 3000 20 # 7934 MILLER CLERK 7782 23-1月 -82 1300 10 coll = db.collection("emp_dept") coll.insert({ TABLE:"EMP", EMPNO:7369, ENAME:"SMITH", JOB:"CLERK", MGR:7902, HIREDATE:"19801217", SAL:800, DEPTNO:20 }) coll.insert({ TABLE:"EMP", EMPNO:7499, ENAME:"ALLEN", JOB:"SALESMAN", MGR:7698, HIREDATE:"19810220", SAL:1600, COMM:300, DEPTNO:30 }) coll.insert({ TABLE:"EMP", EMPNO:7521, ENAME:"WARD", JOB:"SALESMAN", MGR:7698, HIREDATE:"19810222", SAL:1250, COMM:500, DEPTNO:30 }) coll.insert({ TABLE:"EMP", EMPNO:7566, ENAME:"JONES", JOB:"MANAGER", MGR:7839, HIREDATE:"19810402", SAL:2975, DEPTNO:20 }) coll.insert({ TABLE:"EMP", EMPNO:7654, ENAME:"MARTIN", JOB:"SALESMAN", MGR:7698, HIREDATE:"19810928", SAL:1250, COMM:1400, DEPTNO:30 }) coll.insert({ TABLE:"EMP", EMPNO:7698, ENAME:"BLAKE", JOB:"MANAGER", MGR:7839, HIREDATE:"19810501", SAL:2850, DEPTNO:30 }) coll.insert({ TABLE:"EMP", EMPNO:7782, ENAME:"CLARK", JOB:"MANAGER", MGR:7839, HIREDATE:"19810609", SAL:2450, DEPTNO:10 }) coll.insert({ TABLE:"EMP", EMPNO:7839, ENAME:"KING", JOB:"PRESIDENT", HIREDATE:"19811117", SAL:5000, DEPTNO:10 }) coll.insert({ TABLE:"EMP", EMPNO:7844, ENAME:"TURNER", JOB:"SALESMAN", MGR:7698, HIREDATE:"19810908", SAL:1500, COMM:0, DEPTNO:30 }) coll.insert({ TABLE:"EMP", EMPNO:7900, ENAME:"JAMES", JOB:"CLERK", MGR:7698, HIREDATE:"19811203", SAL:950, DEPTNO:30 }) coll.insert({ TABLE:"EMP", EMPNO:7902, ENAME:"FORD", JOB:"ANALYST", MGR:7566, HIREDATE:"19811203", SAL:3000, DEPTNO:20 }) coll.insert({ TABLE:"EMP", EMPNO:7934, ENAME:"MILLER", JOB:"CLERK", MGR:7782, HIREDATE:"19820123", SAL:1300, DEPTNO:10 }) # coll.find.each { |doc| puts doc.inspect } # 10 ACCOUNTING NEW YORK # 20 RESEARCH DALLAS # 30 SALES CHICAGO # 40 OPERATIONS BOSTON coll.insert({ TABLE:"DEPT", DEPTNO:10, DNAME:"ACCOUNTING", LOC:"NEW YORK"}) coll.insert({ TABLE:"DEPT", DEPTNO:20, DNAME:"RESEARCH", LOC:"DALLAS"}) coll.insert({ TABLE:"DEPT", DEPTNO:30, DNAME:"SALES", LOC:"CHICAGO"}) coll.insert({ TABLE:"DEPT", DEPTNO:40, DNAME:"OPERATIONS", LOC:"BOSTON"}) coll.find.each { |doc| puts doc.inspect } # db.drop_collection("emp_dept")
貼一下輸出,便於看清楚集合的結構oracle
{"_id"=>BSON::ObjectId('531c2a59cf7c7c45af000001'), "TABLE"=>"EMP", "EMPNO"=>7369, "ENAME"=>"SMITH", "JOB"=>"CLERK", "MGR"=>7902, "HIREDATE"=>"19801217", "SAL"=>800, "DEPTNO"=>20} {"_id"=>BSON::ObjectId('531c2a59cf7c7c45af000002'), "TABLE"=>"EMP", "EMPNO"=>7499, "ENAME"=>"ALLEN", "JOB"=>"SALESMAN", "MGR"=>7698, "HIREDATE"=>"19810220", "SAL"=>1600, "COMM"=>300, "DEPTNO"=>30} {"_id"=>BSON::ObjectId('531c2a59cf7c7c45af000003'), "TABLE"=>"EMP", "EMPNO"=>7521, "ENAME"=>"WARD", "JOB"=>"SALESMAN", "MGR"=>7698, "HIREDATE"=>"19810222", "SAL"=>1250, "COMM"=>500, "DEPTNO"=>30} {"_id"=>BSON::ObjectId('531c2a59cf7c7c45af000004'), "TABLE"=>"EMP", "EMPNO"=>7566, "ENAME"=>"JONES", "JOB"=>"MANAGER", "MGR"=>7839, "HIREDATE"=>"19810402", "SAL"=>2975, "DEPTNO"=>20} {"_id"=>BSON::ObjectId('531c2a59cf7c7c45af000005'), "TABLE"=>"EMP", "EMPNO"=>7654, "ENAME"=>"MARTIN", "JOB"=>"SALESMAN", "MGR"=>7698, "HIREDATE"=>"19810928", "SAL"=>1250, "COMM"=>1400, "DEPTNO"=>30} {"_id"=>BSON::ObjectId('531c2a59cf7c7c45af000006'), "TABLE"=>"EMP", "EMPNO"=>7698, "ENAME"=>"BLAKE", "JOB"=>"MANAGER", "MGR"=>7839, "HIREDATE"=>"19810501", "SAL"=>2850, "DEPTNO"=>30} {"_id"=>BSON::ObjectId('531c2a59cf7c7c45af000007'), "TABLE"=>"EMP", "EMPNO"=>7782, "ENAME"=>"CLARK", "JOB"=>"MANAGER", "MGR"=>7839, "HIREDATE"=>"19810609", "SAL"=>2450, "DEPTNO"=>10} {"_id"=>BSON::ObjectId('531c2a59cf7c7c45af000008'), "TABLE"=>"EMP", "EMPNO"=>7839, "ENAME"=>"KING", "JOB"=>"PRESIDENT", "HIREDATE"=>"19811117", "SAL"=>5000, "DEPTNO"=>10} {"_id"=>BSON::ObjectId('531c2a59cf7c7c45af000009'), "TABLE"=>"EMP", "EMPNO"=>7844, "ENAME"=>"TURNER", "JOB"=>"SALESMAN", "MGR"=>7698, "HIREDATE"=>"19810908", "SAL"=>1500, "COMM"=>0, "DEPTNO"=>30} {"_id"=>BSON::ObjectId('531c2a59cf7c7c45af00000a'), "TABLE"=>"EMP", "EMPNO"=>7900, "ENAME"=>"JAMES", "JOB"=>"CLERK", "MGR"=>7698, "HIREDATE"=>"19811203", "SAL"=>950, "DEPTNO"=>30} {"_id"=>BSON::ObjectId('531c2a59cf7c7c45af00000b'), "TABLE"=>"EMP", "EMPNO"=>7902, "ENAME"=>"FORD", "JOB"=>"ANALYST", "MGR"=>7566, "HIREDATE"=>"19811203", "SAL"=>3000, "DEPTNO"=>20} {"_id"=>BSON::ObjectId('531c2a59cf7c7c45af00000c'), "TABLE"=>"EMP", "EMPNO"=>7934, "ENAME"=>"MILLER", "JOB"=>"CLERK", "MGR"=>7782, "HIREDATE"=>"19820123", "SAL"=>1300, "DEPTNO"=>10} {"_id"=>BSON::ObjectId('531c2a59cf7c7c45af00000d'), "TABLE"=>"DEPT", "DEPTNO"=>10, "DNAME"=>"ACCOUNTING", "LOC"=>"NEW YORK"} {"_id"=>BSON::ObjectId('531c2a59cf7c7c45af00000e'), "TABLE"=>"DEPT", "DEPTNO"=>20, "DNAME"=>"RESEARCH", "LOC"=>"DALLAS"} {"_id"=>BSON::ObjectId('531c2a59cf7c7c45af00000f'), "TABLE"=>"DEPT", "DEPTNO"=>30, "DNAME"=>"SALES", "LOC"=>"CHICAGO"} {"_id"=>BSON::ObjectId('531c2a59cf7c7c45af000010'), "TABLE"=>"DEPT", "DEPTNO"=>40, "DNAME"=>"OPERATIONS", "LOC"=>"BOSTON"}
對於問題a,mongodb的查詢以下測試
/* 20140309 * a 列出每一個部門的名稱及其人數 */ db.emp_dept.group({key: {"DEPTNO":true}, initial: {"DNAME":"", "csum":0}, reduce: function(obj, prev){ if (obj.TABLE == "DEPT"){ prev.DNAME = prev.DNAME + obj.DNAME } else if (obj.TABLE == "EMP"){ prev.csum ++; } }})
輸出以下ui
[ { "DEPTNO" : 20, "DNAME" : "RESEARCH", "csum" : 3 }, { "DEPTNO" : 30, "DNAME" : "SALES", "csum" : 6 }, { "DEPTNO" : 10, "DNAME" : "ACCOUNTING", "csum" : 3 }, { "DEPTNO" : 40, "DNAME" : "OPERATIONS", "csum" : 0 } ]
問題b,比較簡單,代碼以下this
/* 20140309 * b 列出工資最高的頭三名員工姓名及其工資 */ db.emp_dept.find({"TABLE":"EMP"},{"ENAME":1,"SAL":1,"_id":0}).sort({"SAL":-1}).limit(3)
輸出以下spa
{ "ENAME" : "KING", "SAL" : 5000 } { "ENAME" : "FORD", "SAL" : 3000 } { "ENAME" : "JONES", "SAL" : 2975 }
問題c,簡單一點的解法能夠這麼作code
/* 20140309 * c 列出工資比上司高的員工姓名 */ // solution 1 db.emp_dept.find({"$where": function(){ if (this["TABLE"] == "EMP") { if (this["MGR"] != null){ var mgr = db.emp_dept.findOne({"TABLE":"EMP", "EMPNO":this["MGR"]}) if (this["SAL"] > mgr["SAL"]){ return true; }else { return false; } } } }}, {"EMPNO":1, "ENAME":1, "_id":0})
輸出以下blog
{ "EMPNO" : 7902, "ENAME" : "FORD" }
若是但願可以在查出這些員工的同時,也能列出其上司和其上司的工資,可能就須要用map-reduce,若是用map-reduce的話,寫法就有不少種了,我是這樣寫的
/* 20140309 * c 列出工資比上司高的員工姓名 */ // solution 2 get_mgr_sal = function() { if (this["TABLE"] == "EMP") { if (this["MGR"] != null){ var mgr = db.emp_dept.findOne({"TABLE":"EMP", "EMPNO":this["MGR"]}); if (this["SAL"] > mgr["SAL"]){ emit({"EMPNO":this["EMPNO"], "ENAME":this["ENAME"]}, {"SAL" : this["SAL"], "MGR_EMPNO":mgr["EMPNO"], "MGR_NAME":mgr["ENAME"], "MGR_SAL":mgr["SAL"]}); } } } }; get_mgr_sal = function() { if (this["TABLE"] == "EMP") { if (this["MGR"] != null){ var mgr = db.emp_dept.findOne({"TABLE":"EMP", "EMPNO":this["MGR"]}) emit({"EMPNO":this["EMPNO"], "ENAME":this["ENAME"]}, {"SAL" : this["SAL"], "MGR_EMPNO":mgr["EMPNO"], "MGR_NAME":mgr["ENAME"], "MGR_SAL":mgr["SAL"]}); } } }; higher_than_mgr = function(key, emits) { return emits; }; filter_result = function(key, reduced){ if (reduced["SAL"] > reduced["MGR_SAL"]){ return reduced; } } db.runCommand({"mapreduce" : "emp_dept", "map" : get_mgr_sal, "reduce" : higher_than_mgr, "out":"result", "finalize":filter_result}) db.result.find({"value":{"$ne":null}})
輸出以下
{ "_id" : { "EMPNO" : 7902, "ENAME" : "FORD" }, "value" : { "SAL" : 3000, "MGR_EMPNO" : 7566, "MGR_NAME" : "JONES", "MGR_SAL" : 2975 } }
雖然已經有告終果,可是我感受map-reduce這麼用,感受還不是很舒服,但也沒有想出更好的寫法。
問題d,我只想出了用map-reduce的解法
/* 20140309 * d 列出平均工資高於公司總平均工資的部門名稱 */ map_avg_sal_dept = function() { if (this["TABLE"] == "EMP"){ var dept = db.emp_dept.findOne({"TABLE":"DEPT", "DEPTNO":this["DEPTNO"]}); var key = {"DEPTNO":dept["DEPTNO"], "DNAME":dept["DNAME"]}; var value = {"SAL_DEPT":this["SAL"], "DEPT_COUNT":1}; emit(key, value) } } red_avg_sal_dept = function(key, value) { var all_sal_dept = 0.0; var all_dept_count = 0; for (var i in value){ all_sal_dept += value[i]["SAL_DEPT"]; all_dept_count += value[i]["DEPT_COUNT"]; } return {"AVG_SAL_DEPT":all_sal_dept/all_dept_count}; } get_avg_sal = function(){ var all_sal = 0.0; var all_count = 0; db.emp_dept.find({"TABLE":"EMP"}).forEach( function (e) { all_sal += e["SAL"]; all_count ++; }); return all_sal/all_count; } filter_avg_sal_dept = function(key, reduced) { if (reduced["AVG_SAL_DEPT"] > avg_sal){ return reduced; } } db.runCommand({"mapreduce" : "emp_dept", "map" : map_avg_sal_dept, "reduce" : red_avg_sal_dept, "out":"result", "scope":{avg_sal:get_avg_sal()}, "finalize":filter_avg_sal_dept}) db.result.find({"value":{"$ne":null}})
輸出以下
{ "_id" : { "DEPTNO" : 10, "DNAME" : "ACCOUNTING" }, "value" : { "AVG_SAL_DEPT" : 2916.6666666666665 } }
{ "_id" : { "DEPTNO" : 20, "DNAME" : "RESEARCH" }, "value" : { "AVG_SAL_DEPT" : 2258.3333333333335 } }
整體看起來,用mongodb處理結構化數據,仍是顯得不那麼直觀,解法也顯得很迂迴。固然,若是適應了的話,寫這些代碼可能會變快,可是比起SQL對數據的抽象程度和友好性來講,mongodb仍是差了很多,若是能有mongodb的類sql接口,那就會更好了。