码迷,mamicode.com
首页 > 数据库 > 详细

MongoDB聚合查询

时间:2015-05-29 15:20:23      阅读:166      评论:0      收藏:0      [点我收藏+]

标签:

出于对性能的要求,公司希望把Mysql的数据迁移到MongoDB上,于是我开始学习Mongo的一些CRUD操作,由于第一次接触NoSQL,还是有点不习惯。

先吐个槽,公司的Mongo版本是2.6.4,而用的java驱动包版本是超级老物2.4版。当时一个“如何对分组后的文档进行筛选”这个需求头痛了很久,虽然shell命令下可以使用Aggregation很方便地解决,但是java驱动包从2.9.0版本才开始支持该特性,我至今没有找到不用Aggregation解决上述需求的办法。只能推荐公司升级驱动包版本,希望没有后续的兼容问题。

Mongo2.2版本后开始支持Aggregation Pipelinehttp://docs.mongodb.org/manual/core/aggregation-pipeline/),而java驱动包从2.9.0版本才开始支持2.2的特性,2.9版本是12年发布的,mongodb09年就出现了,可见Mongojava的开发者似乎不怎么友好_

话扯到这里,接下来就对我这一周所学的Mongo做一个总结,错误之处还望指教 :-D

 

MongoDB目前提供了三个可以执行聚合操作的命令:aggregatemapReducegroup。三者在性能和操作的优劣比较见官网提供的表格http://docs.mongodb.org/manual/reference/aggregation-commands-comparison/,这里不再赘述细节,先说一下这三个函数的原型及底层封装的命令。

三者在性能和操作的优劣比较见官网提供的表格http://docs.mongodb.org/manual/reference/aggregation-commands-comparison/,这里不再赘述细节,先说一下这三个函数的原型及底层封装的命令。

函数名

函数原型

封装的命令

db.collection.group()

db.collection.group(

    {

        key,

        reduce,

        initial

        [, keyf]

        [, cond]

        [, finalize]

    }

)

db.runCommand(

    {

      group:

       {

        ns: <namespace>,

        key: <key>,

        $reduce: <reduce function>,

        $keyf: <key function>,

        cond: <query>,

        finalize: <finalize function>

       }

    }

)

db.collection.mapReduce()

db.collection.mapReduce(

    <map>,

    <reduce>,

    {

        out: <collection>,

        query: <document>,

        sort: <document>,

        limit: <number>,

        finalize: <function>,

        scope: <document>,

        jsMode: <boolean>,

        verbose: <boolean>

    }

)

db.runCommand(

    {

    mapReduce: <collection>,

    map: <function>,

    reduce: <function>,

    finalize: <function>,

    out: <output>,

    query: <document>,

    sort: <document>,

    limit: <number>,

    scope: <document>,

    jsMode: <boolean>,

    verbose: <boolean>

    }

)

db.collection.aggregate()

db.collection.aggregate(

    pipeline,

    options

)

db.runCommand(

    {

      aggregate: "<collection>",

      pipeline: [ <stage>, <...> ],

      explain: <boolean>,

      allowDiskUse: <boolean>,

      cursor: <document>

    }

)

 

 

好记性不如烂笔头,下面通过操作来了解这几个函数和命令

先准备SQL的测试数据,用来验证结果、比较SQL语句和NoSQL的异同,测试环境是mysql

先创建数据库表

create table dogroup (

_id int,

name varchar(45),

course varchar(45),

score int,

gender int,

primary key(_id)

);

插入数据

insert into dogroup (_id, name, course, score, gender) values (1, "N", "C", 5, 0);

insert into dogroup (_id, name, course, score, gender) values (2, "N", "O", 4, 0);

insert into dogroup (_id, name, course, score, gender) values (3, "A", "C", 5, 1);

insert into dogroup (_id, name, course, score, gender) values (4, "A", "O", 6, 1);

insert into dogroup (_id, name, course, score, gender) values (5, "A", "U", 8, 1);

insert into dogroup (_id, name, course, score, gender) values (6, "A", "R", 8, 1);

insert into dogroup (_id, name, course, score, gender) values (7, "A", "S", 7, 1);

insert into dogroup (_id, name, course, score, gender) values (8, "M", "C", 4, 0);

insert into dogroup (_id, name, course, score, gender) values (9, "M", "U", 7, 0);

insert into dogroup (_id, name, course, score, gender) values (10, "E", "C", 7, 1);

准备MongoDB测试数据

创建Collection(等同于SQL中的表,该行可以不写,Mongo会在插入数据时自动创建Collection

db.createCollection("dogroup") 

插入数据

db.dogroup.insert({"_id": 1,"name": "N",course: "C","score": 5,gender: 0})

db.dogroup.insert({"_id": 2,"name": "N",course: "O","score": 4,gender: 0})

db.dogroup.insert({"_id": 3,"name": "A",course: "C","score": 5,gender: 1})

db.dogroup.insert({"_id": 4,"name": "A",course: "O","score": 6,gender: 1})

db.dogroup.insert({"_id": 5,"name": "A",course: "U","score": 8,gender: 1})

db.dogroup.insert({"_id": 6,"name": "A",course: "R","score": 8,gender: 1})

db.dogroup.insert({"_id": 7,"name": "A",course: "S","score": 7,gender: 1})

db.dogroup.insert({"_id": 8,"name": "M",course: "C","score": 4,gender: 0})

db.dogroup.insert({"_id": 9,"name": "M",course: "U","score": 7,gender: 0})

db.dogroup.insert({"_id": 10,"name": "E",course: "C","score": 7,gender: 1})

 

以下操作可能逻辑上没有实际意义,主要是帮助熟悉指令

1、查询出共有几门课程(course),返回的格式为“课程名、数量”

SQL写法:select course as ‘课程名‘, count(*) as ‘数量‘ from dogroup group by course;

MongoDB写法:

group方式

db.dogroup.group({

key : { course: 1 },

initial : { count: 0 },

reduce : function Reduce(curr, result) {

    result.count += 1;

},

finalize : function Finalize(out) {

    return {"课程名": out.course, "数量": out.count};

}

});

返回的格式如下:

{

        "课程名" : "C",

        "数量" : 4

},

{

        "课程名" : "O",

        "数量" : 2

},

{

        "课程名" : "U",

        "数量" : 2

},

{

        "课程名" : "R",

        "数量" : 1

},

{

        "课程名" : "S",

        "数量" : 1

}

mapReduce方式

db.dogroup.mapReduce(

    function () {

        emit(

            this.course,

            {course: this.course, count: 1}

        );

    },

    function (key, values) {

        var count = 0;

        values.forEach(function(val) {

            count += val.count;

        });

        return {course: key, count: count};

    },

    {

        out: { inline : 1 },

        finalize: function (key, reduced) {

            return {"课程名": reduced.course, "数量": reduced.count};

        }

    }

)

这里把count初始化为1的原因是,MongoDB执行完map函数(第一个函数)后,如果key所对应的values数组的元素个数只有一个,reduce函数(第二个函数)将不会被调用。

 

返回的格式如下:

{

      "_id" : "C",

      "value" : {

              "课程名" : "C",

              "数量" : 4

      }

},

{

      "_id" : "O",

      "value" : {

              "课程名" : "O",

              "数量" : 2

      }

},

{

      "_id" : "R",

      "value" : {

              "课程名" : "R",

              "数量" : 1

      }

},

{

      "_id" : "S",

      "value" : {

              "课程名" : "S",

              "数量" : 1

      }

},

{

      "_id" : "U",

      "value" : {

              "课程名" : "U",

              "数量" : 2

      }

}

aggregate方式

db.dogroup.aggregate(

    {

        $group:

        {

            _id: "$course",

            count: { $sum: 1 }

        }

    }

)

返回格式如下:

{ "_id" : "S", "count" : 1 }

{ "_id" : "R", "count" : 1 }

{ "_id" : "U", "count" : 2 }

{ "_id" : "O", "count" : 2 }

{ "_id" : "C", "count" : 4 }

 

以上三种方式中,group得到了我们想要的结果,mapReduce返回的结果只能嵌套在values里面,aggregate无法为返回结果指定别名。本人才疏学浅,刚接触Mongo,不知道后两者有没有可行的方法获取想要的结果,希望网友指教。

篇幅限制,下面的查询不再贴出返回结果,大家可以自己尝试。

 

2、查询Docouments(等同于SQL中记录)数大于2的课程

SQL写法:select course, count(*) as count from dogroup group by course having count > 2;

aggregate方式(注意$group$match的先后顺序)

db.dogroup.aggregate({

    $group: {

        _id: "$course",

        count: { $sum: 1 }

    }

    },{

    $match: {

        count:{

            $gt: 2

        }

    }

});

目前尚未找到groupmapReduce对分组结果进行筛选的方法,欢迎网友补充

 

3、找出所有分数高于5分的考生数量及分数,返回的格式为“分数、数量”

SQL写法:select score as ‘分数‘, count(distinct(name)) as ‘数量‘ from dogroup where score > 5 group by score;

group方式

db.dogroup.group({

    key : { score: 1 },

    cond : { score: {$gt: 5} },

    initial : { name:[] },

    reduce : function Reduce(curr, result) {

        var flag = true;

        for(i=0;i<result.name.length&&flag;i++){

            if(curr.name==result.name[i]){

                flag = false;

            }

        }

        if(flag){

            result.name.push(curr.name);

        }

    },

    finalize : function Finalize(out) {

        return {"分数": out.score, "数量": out.name.length};

    }

});

mapReduce方式

db.dogroup.mapReduce(

    function () {

        if(this.score > 5){

            emit(

                this.score,

                {score: this.score, name: this.name}

            );

        }

    },

    function (key, values) {

        var reduced = {score: key, names: []};

        var json = {};//利用json对象的key去重

        for(i = 0; i < values.length; i++){

            if(!json[values[i].name]){

                reduced.names.push(values[i].name);

                json[values[i].name] = 1;

            }

        }

        return reduced;

    },

    {

        out: { inline : 1 },

        finalize: function (key, reduced) {

            return {"分数": reduced.score, "数量": reduced.names?reduced.names.length:1};

        }

    }

)

aggregate方式

db.dogroup.aggregate({

        $match: {

            score: {

                $gt: 5

            }

        }

    },{

        $group: {

            _id: {

                score: "$score",

                name: "$name"

            }

        }

    },{

        $group: {

            _id: {

                "分数": "$_id.score"

            },

            "数量": { $sum: 1 }

        }

});

 

搞透上面这几个方法,大部分应用场景应该都没问题了。

MongoDB聚合查询

标签:

原文地址:http://www.cnblogs.com/code4j/p/4538407.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!