SlideShare a Scribd company logo
Zahid Mian
Part of the Brown-bag Series
Basic Aggregate functions available
Count, Distinct, Group
MongoDB doesn’t support SQL syntax
Aggregation requires building of “pipeline”
Essentially, one step/stage at a time, e.g.:
Step 1: Filter
Step 2: Projection
Step 3: Group
http://docs.mongodb.org/getting-started/shell/import-data/
db.restaurants.count();
> db.restaurants.distinct("borough");
[
"Brooklyn",
"Bronx",
"Manhattan",
"Queens",
"Staten Island",
"Missing"
]
> db.restaurants.group( {
... key: { borough: 1 },
... cond: { cuisine: "Bakery"},
... reduce: function(cur, result) { result.count += 1 },
... initial: { count: 0 }
... } );
[
{
"borough" : "Bronx",
"count" : 71
},
{
"borough" : "Manhattan",
"count" : 221
},
{
"borough" : "Brooklyn",
"count" : 173
},
{
"borough" : "Queens",
"count" : 204
},
{
"borough" : "Staten Island",
"count" : 20
},
{
"borough" : "Missing",
"count" : 2
}
]
>
key is equivalent to the group by clause
cond is equivalent to the where clause
reduce function is called for each document in the
collection that passes the condition
reduce function has two parameters: cur and result. cur
stores the current document and result stores the result so
far for that group
In this case result.count simply adds 1 for each document
initial sets the initial value for each group result
> db.restaurants.count();
25359
> db.restaurants.aggregate([{$group:{_id:'$cuisine', total: {$sum:1}}}]);
{ "_id" : "Chilean", "total" : 1 }
{ "_id" : "Californian", "total" : 1 }
{ "_id" : "Creole/Cajun", "total" : 1 }
{ "_id" : "Hawaiian", "total" : 3 }
{ "_id" : "Nuts/Confectionary", "total" : 6 }
{ "_id" : "Chinese/Japanese", "total" : 59 }
{ "_id" : "Soups", "total" : 4 }
{ "_id" : "Bagels/Pretzels", "total" : 168 }
{ "_id" : "Polynesian", "total" : 1 }
{ "_id" : "Delicatessen", "total" : 321 }
{ "_id" : "Eastern European", "total" : 65 }
{ "_id" : "Scandinavian", "total" : 7 }
{ "_id" : "Afghan", "total" : 14 }
{ "_id" : "Iranian", "total" : 2 }
{ "_id" : "Fruits/Vegetables", "total" : 7 }
{ "_id" : "German", "total" : 31 }
{ "_id" : "Creole", "total" : 24 }
{ "_id" : "Steak", "total" : 86 }
{ "_id" : "Czech", "total" : 6 }
{ "_id" : "Peruvian", "total" : 68 }
Type "it" for more
db.restaurants.aggregate(
[ // bracket indicates an array
{ // first "step" or stage
$group:{ // aggregate operator
_id:'$cuisine', // group by cuisine property
total: {$sum:1} // sum or count each “row”
}
}
]
);
> db.restaurants.aggregate(
... [
... {$group:{_id:'$cuisine', total: {$sum:1}}},
… {$sort: {total:-1}}
... ]
... );
{ "_id" : "American ", "total" : 6183 }
{ "_id" : "Chinese", "total" : 2418 }
{ "_id" : "Café/Coffee/Tea", "total" : 1214 }
{ "_id" : "Pizza", "total" : 1163 }
{ "_id" : "Italian", "total" : 1069 }
{ "_id" : "Other", "total" : 1011 }
{ "_id" : "Latin (Cuban, Dominican, Puerto Rican, South & Central American)", "total" : 850 }
{ "_id" : "Japanese", "total" : 760 }
{ "_id" : "Mexican", "total" : 754 }
{ "_id" : "Bakery", "total" : 691 }
{ "_id" : "Caribbean", "total" : 657 }
{ "_id" : "Spanish", "total" : 637 }
{ "_id" : "Donuts", "total" : 479 }
{ "_id" : "Pizza/Italian", "total" : 468 }
{ "_id" : "Sandwiches", "total" : 459 }
{ "_id" : "Hamburgers", "total" : 433 }
{ "_id" : "Chicken", "total" : 410 }
{ "_id" : "Ice Cream, Gelato,Yogurt, Ices", "total" : 348 }
{ "_id" : "French", "total" : 344 }
{ "_id" : "Delicatessen", "total" : 321 }
Type "it" for more
db.restaurants.aggregate(
[ // bracket indicates an array
{ // first "step" or stage
$group:{ // aggregate operator
_id:'$cuisine', // group by cuisine property
total: {$sum:1} // sum or count each “row”
}
},
{ // second "step" or stage
$sort: { // sort operator
total:-1 // sort on total; -1 indicates DESC
}
}
]
);
> db.restaurants.aggregate(
... [
... {$match : {borough: "Bronx"}},
... {$group:{_id:'$cuisine', total: {$sum:1}}},
... {$sort: {total:-1}}
... ]
... );
{ "_id" : "American ", "total" : 411 }
{ "_id" : "Chinese", "total" : 323 }
{ "_id" : "Pizza", "total" : 197 }
{ "_id" : "Latin (Cuban, Dominican, Puerto Rican, South & Central American)", "total" : 187 }
{ "_id" : "Spanish", "total" : 127 }
{ "_id" : "Caribbean", "total" : 110 }
{ "_id" : "Chicken", "total" : 108 }
{ "_id" : "Mexican", "total" : 89 }
{ "_id" : "Other", "total" : 86 }
{ "_id" : "Hamburgers", "total" : 78 }
{ "_id" : "Bakery", "total" : 71 }
{ "_id" : "Donuts", "total" : 68 }
{ "_id" : "Pizza/Italian", "total" : 53 }
{ "_id" : "Italian", "total" : 52 }
{ "_id" : "Sandwiches", "total" : 49 }
{ "_id" : "Café/Coffee/Tea", "total" : 45 }
{ "_id" : "Juice, Smoothies, Fruit Salads", "total" : 35 }
{ "_id" : "African", "total" : 31 }
{ "_id" : "Ice Cream, Gelato,Yogurt, Ices", "total" : 27 }
{ "_id" : "Seafood", "total" : 26 }
Type "it" for more
db.restaurants.aggregate(
[ // bracket indicates an array
{ // first "step" or stage
$match : { // match operator
borough: "Bronx" // where borough = "Bronx"
}
},
{ // second "step" or stage
$group:{ // aggregate operator
_id:'$cuisine', // group by cuisine property
total: {$sum:1} // sum or count each “row”
}
},
{ // third "step" or stage
$sort: {
total:-1 // sort on total; -1 indicates DESC
}
}
]
);
$sum
$avg
$first
$last
$max
$min
$push
$addToSet: similar to $push, but adds unique
values
Returns an array of all values that result from applying an expression to each document in a group
> db.restaurants.aggregate(
... [
... {
... $group:
... {
... _id: { cuisine: "$cuisine" },
... restaurantByStreet: { $push: { name: "$name" } }
... }
... },
... {$limit: 4},
... {$skip: 3}
... ]
... ).pretty();
{
"_id" : {
"cuisine" : "Hawaiian"
},
"restaurantByStreet" : [
{
"name" : "Makana"
},
{
"name" : "General Assembly"
},
{
"name" : "Onomea"
}
]
}
>
http://docs.mongodb.org/getting-started/shell/import-data/
http://docs.mongodb.org/getting-started/shell/import-data/
http://docs.mongodb.org/getting-started/shell/import-data/
Sort by borough ASC, cuisine DESC
> db.restaurants.aggregate(
... [
... {$group:{_id:{borough: '$borough', cuisine:'$cuisine' }, total: {$sum:1}}},
... {$sort: {"_id.borough":1, "_id.cuisine":-1}}, // use dot notation
... {$limit: 5 }
... ]
... );
{ "_id" : { "borough" : "Bronx", "cuisine" : "Thai" }, "total" : 2 }
{ "_id" : { "borough" : "Bronx", "cuisine" : "Tex-Mex" }, "total" : 11 }
{ "_id" : { "borough" : "Bronx", "cuisine" : "Steak" }, "total" : 4 }
{ "_id" : { "borough" : "Bronx", "cuisine" : "Spanish" }, "total" : 127 }
{ "_id" : { "borough" : "Bronx", "cuisine" : "Soups & Sandwiches" }, "total" : 1 }
>
Controls which values are output
> db.restaurants.aggregate(
... [
... {$limit:1},
... {$project: {_id:0, // hide the _id value
… restaurant_id:1, // show restaurant_id
… "restaurant_name":"$name", // rename/alias name to restaurant_name
… "grades.grade":1}} // show grades.grade
... ]).pretty();
{
"grades" : [
{
"grade" : "A" // part of output
},
{
"grade" : "B" // part of output
},
{
"grade" : "A" // part of output
},
{
"grade" : "A" // part of output
}
],
"restaurant_name" : "Wendy'S", // part of output; renamed
"restaurant_id" : "30112340" // part of output
}
>
Saves the output of a pipeline to a collection
> db.restaurants.aggregate(
... [
... {$match : {borough: "Bronx"}},
... {$group:{_id:'$cuisine', total: {$sum:1}}},
... {$sort: {total:-1}},
... {$limit: 5 },
... {$out: "top5"} // output data to a collection called top5
... ]
... );
> db.top5.find({}); // retrieve all data from top5
{ "_id" : "American ", "total" : 411 }
{ "_id" : "Chinese", "total" : 323 }
{ "_id" : "Pizza", "total" : 197 }
{ "_id" : "Latin (Cuban, Dominican, Puerto Rican, South & Central
American)", "total" : 187 }
{ "_id" : "Spanish", "total" : 127 }
>
Motivation: How many A grades
did a restaurant get?
> db.restaurants.find({_id: ObjectId("5602b9200a67e499361c05ad")}).pretty();
{
"_id" : ObjectId("5602b9200a67e499361c05ad"),
"address" : {
"street" : "Flatbush Avenue",
"zipcode" : "11225",
"building" : "469",
"coord" : [
-73.961704,
40.662942
]
},
"borough" : "Brooklyn",
"cuisine" : "Hamburgers",
"grades" : [ // this is an array of objects
{
"date" : ISODate("2014-12-30T00:00:00Z"),
"grade" : "A", // A grade
"score" : 8
},
{
"grade" : "B", // B grade
"score" : 23,
"date" : ISODate("2014-07-01T00:00:00Z")
},
{
"score" : 12,
"date" : ISODate("2013-04-30T00:00:00Z"),
"grade" : "A"
},
{
"date" : ISODate("2012-05-08T00:00:00Z"),
"grade" : "A",
"score" : 12
}
],
"name" : "Wendy'S",
"restaurant_id" : "30112340"
}
>
Basic pipeline
Stage 1: unwind grades
Stage 2: match grade of
“A”
Stage 3: group by / sum
Stage 4: project (alias)
There is only one document for that restaurant_id, but since there were 4 elements in
grades, the unwind operator created 4 documents, one for each grade
Notice the result of the following is four documents with the same restaurant_id
> db.restaurants.aggregate(
... [
... {$unwind: "$grades"}, // unwind the grades array
... {$limit:4}, // limit the output to 4 documents
... {$project: {_id:0, restaurant_id:1, "grades.date":1, "grades.grade":1, "grades.score":1}}
... ]).pretty();
{
"grades" : {
"date" : ISODate("2014-12-30T00:00:00Z"),
"grade" : "A",
"score" : 8
},
"restaurant_id": "30112340"
}
{
"grades" : {
"grade" : "B",
"score" : 23,
"date" : ISODate("2014-07-01T00:00:00Z")
},
"restaurant_id": "30112340"
}
{
"grades" : {
"score" : 12,
"date" : ISODate("2013-04-30T00:00:00Z"),
"grade" : "A"
},
"restaurant_id": "30112340"
}
{
"grades" : {
"date" : ISODate("2012-05-08T00:00:00Z"),
"grade" : "A",
"score" : 12
},
"restaurant_id": "30112340"
}
> db.restaurants.aggregate(
... [
... {$unwind: "$grades"},
... {$project: {_id:0, restaurant_id:1, name:1, "grades.grade":1}},
... {$match: {"grades.grade":"A"} }, // only count A grades
... {$group: {_id:{restaurant_id:'$restaurant_id', name:'$name' }, total: {$sum:1}}},
... {$sort: {total: -1}},
... {$limit: 5},
… // alias output to get nicer printout
... {$project: {_id:0, "rid":"$_id.restaurant_id", "rname":"$_id.name", total:1}}
... ]).pretty();
{ "total" : 8, "rid" : "41382858", "rname" : "TacoVeloz" }
{ "total" : 7, "rid" : "41587378", "rname" : "Lobster Joint" }
{"total" : 7, "rid" : "41611381", "rname" : "Burger King, Popeye'S Chicken & Biscuits"}
{ "total" : 7, "rid" : "41572121", "rname" : "Luke'S Pizza" }
{ "total" : 7, "rid" : "41578481", "rname" : "Top Hot Bagels & Grill" }
>
Mongodb Aggregation Pipeline

More Related Content

PDF
MongoDB Aggregation Framework
PPTX
MongoDB - Aggregation Pipeline
KEY
MongoDB, E-commerce and Transactions
KEY
JSON-LD and MongoDB
PPTX
Multiple inheritance in java3 (1).pptx
PDF
MongoDB and Node.js
PDF
MongodB Internals
PPTX
MongoDB GeoSpatial Feature
MongoDB Aggregation Framework
MongoDB - Aggregation Pipeline
MongoDB, E-commerce and Transactions
JSON-LD and MongoDB
Multiple inheritance in java3 (1).pptx
MongoDB and Node.js
MongodB Internals
MongoDB GeoSpatial Feature

What's hot (20)

PDF
Callback Function
PDF
PHP para Adultos: Clean Code e Object Calisthenics
PPTX
Hash table in java
PPTX
Testing web services
PPTX
XSS - Do you know EVERYTHING?
PPT
Introduction to Javascript
PPT
Introduction To Catalyst - Part 1
ODP
Product catalog using MongoDB
PPTX
Introduction to Spring Boot
PPTX
Random forest
PPTX
Database Connectivity in PHP
PDF
JavaScript Programming
PDF
Redshift performance tuning
PDF
NestJS - O framework progressivo
PDF
Présentation Angular 2
PPTX
Mongo DB Presentation
PPTX
MongoDB Aggregation
PPTX
Full stack devlopment using django main ppt
PPTX
Ensemble methods in machine learning
PDF
A Basic Django Introduction
Callback Function
PHP para Adultos: Clean Code e Object Calisthenics
Hash table in java
Testing web services
XSS - Do you know EVERYTHING?
Introduction to Javascript
Introduction To Catalyst - Part 1
Product catalog using MongoDB
Introduction to Spring Boot
Random forest
Database Connectivity in PHP
JavaScript Programming
Redshift performance tuning
NestJS - O framework progressivo
Présentation Angular 2
Mongo DB Presentation
MongoDB Aggregation
Full stack devlopment using django main ppt
Ensemble methods in machine learning
A Basic Django Introduction
Ad

Viewers also liked (12)

KEY
MongoDB Aggregation Framework
PPTX
Aggregation in MongoDB
PPTX
The Aggregation Framework
ODP
Aggregation Framework in MongoDB Overview Part-1
PPTX
Aggregation Framework
PDF
Mongo db aggregation guide
PPTX
MongoDB World 2016 : Advanced Aggregation
PDF
Aggregation Framework MongoDB Days Munich
PDF
Analytics with MongoDB Aggregation Framework and Hadoop Connector
PPTX
The Aggregation Framework
PPTX
MongoDB Analytics: Learn Aggregation by Example - Exploratory Analytics and V...
PDF
MongoDB Aggregation Framework in action !
MongoDB Aggregation Framework
Aggregation in MongoDB
The Aggregation Framework
Aggregation Framework in MongoDB Overview Part-1
Aggregation Framework
Mongo db aggregation guide
MongoDB World 2016 : Advanced Aggregation
Aggregation Framework MongoDB Days Munich
Analytics with MongoDB Aggregation Framework and Hadoop Connector
The Aggregation Framework
MongoDB Analytics: Learn Aggregation by Example - Exploratory Analytics and V...
MongoDB Aggregation Framework in action !
Ad

Similar to Mongodb Aggregation Pipeline (20)

PDF
Comment faire ses mappings ElasticSearch aux petits oignons ? - LINAGORA
PPTX
MongoDB Analytics: Learn Aggregation by Example - Exploratory Analytics and V...
PDF
Groovy kind of test
PDF
Groovy kind of test
PDF
Angular.js Fundamentals
PDF
Working with web_services
PPTX
Joins and Other MongoDB 3.2 Aggregation Enhancements
PPTX
Webinar: Applikationsentwicklung mit MongoDB : Teil 5: Reporting & Aggregation
PPT
Crash Course to SQL in PHP
PPTX
Powerful Analysis with the Aggregation Pipeline
PDF
Building Glassware with the Glass Development Kit
PDF
Riak 2.0 : For Beginners, and Everyone Else
PDF
"Kto to pisał?!... A, to ja.", czyli sposoby, żeby znienawidzić siebie z prze...
ODP
NYC* 2013 - "Advanced Data Processing: Beyond Queries and Slices"
ODP
Intravert Server side processing for Cassandra
DOCX
project
PDF
Hello everyone,Im actually working on a fast food order program..pdf
PDF
MongoDB Analytics
PDF
Perkenalan ReasonML
TXT
Programación de C++, Función Case
Comment faire ses mappings ElasticSearch aux petits oignons ? - LINAGORA
MongoDB Analytics: Learn Aggregation by Example - Exploratory Analytics and V...
Groovy kind of test
Groovy kind of test
Angular.js Fundamentals
Working with web_services
Joins and Other MongoDB 3.2 Aggregation Enhancements
Webinar: Applikationsentwicklung mit MongoDB : Teil 5: Reporting & Aggregation
Crash Course to SQL in PHP
Powerful Analysis with the Aggregation Pipeline
Building Glassware with the Glass Development Kit
Riak 2.0 : For Beginners, and Everyone Else
"Kto to pisał?!... A, to ja.", czyli sposoby, żeby znienawidzić siebie z prze...
NYC* 2013 - "Advanced Data Processing: Beyond Queries and Slices"
Intravert Server side processing for Cassandra
project
Hello everyone,Im actually working on a fast food order program..pdf
MongoDB Analytics
Perkenalan ReasonML
Programación de C++, Función Case

More from zahid-mian (9)

PDF
MongoD Essentials
PDF
Hadoop Technologies
PPTX
Intro to modern cryptography
PDF
Hadoop M/R Pig Hive
PDF
NoSQL Databases
PDF
Statistics101: Numerical Measures
PDF
Amazon SimpleDB
PDF
C# 6 New Features
PDF
Introduction to d3js (and SVG)
MongoD Essentials
Hadoop Technologies
Intro to modern cryptography
Hadoop M/R Pig Hive
NoSQL Databases
Statistics101: Numerical Measures
Amazon SimpleDB
C# 6 New Features
Introduction to d3js (and SVG)

Recently uploaded (20)

PPTX
climate analysis of Dhaka ,Banglades.pptx
PPT
Miokarditis (Inflamasi pada Otot Jantung)
PPTX
CEE 2 REPORT G7.pptxbdbshjdgsgjgsjfiuhsd
PPTX
advance b rammar.pptxfdgdfgdfsgdfgsdgfdfgdfgsdfgdfgdfg
PDF
Clinical guidelines as a resource for EBP(1).pdf
PDF
Lecture1 pattern recognition............
PPTX
iec ppt-1 pptx icmr ppt on rehabilitation.pptx
PPTX
Business Ppt On Nestle.pptx huunnnhhgfvu
PPTX
Business Acumen Training GuidePresentation.pptx
PDF
Recruitment and Placement PPT.pdfbjfibjdfbjfobj
PPTX
oil_refinery_comprehensive_20250804084928 (1).pptx
PPTX
Database Infoormation System (DBIS).pptx
PPTX
05. PRACTICAL GUIDE TO MICROSOFT EXCEL.pptx
PDF
22.Patil - Early prediction of Alzheimer’s disease using convolutional neural...
PPT
Reliability_Chapter_ presentation 1221.5784
PPTX
Supervised vs unsupervised machine learning algorithms
PDF
Introduction to Business Data Analytics.
PPTX
Data_Analytics_and_PowerBI_Presentation.pptx
PPTX
mbdjdhjjodule 5-1 rhfhhfjtjjhafbrhfnfbbfnb
PDF
“Getting Started with Data Analytics Using R – Concepts, Tools & Case Studies”
climate analysis of Dhaka ,Banglades.pptx
Miokarditis (Inflamasi pada Otot Jantung)
CEE 2 REPORT G7.pptxbdbshjdgsgjgsjfiuhsd
advance b rammar.pptxfdgdfgdfsgdfgsdgfdfgdfgsdfgdfgdfg
Clinical guidelines as a resource for EBP(1).pdf
Lecture1 pattern recognition............
iec ppt-1 pptx icmr ppt on rehabilitation.pptx
Business Ppt On Nestle.pptx huunnnhhgfvu
Business Acumen Training GuidePresentation.pptx
Recruitment and Placement PPT.pdfbjfibjdfbjfobj
oil_refinery_comprehensive_20250804084928 (1).pptx
Database Infoormation System (DBIS).pptx
05. PRACTICAL GUIDE TO MICROSOFT EXCEL.pptx
22.Patil - Early prediction of Alzheimer’s disease using convolutional neural...
Reliability_Chapter_ presentation 1221.5784
Supervised vs unsupervised machine learning algorithms
Introduction to Business Data Analytics.
Data_Analytics_and_PowerBI_Presentation.pptx
mbdjdhjjodule 5-1 rhfhhfjtjjhafbrhfnfbbfnb
“Getting Started with Data Analytics Using R – Concepts, Tools & Case Studies”

Mongodb Aggregation Pipeline

  • 1. Zahid Mian Part of the Brown-bag Series
  • 2. Basic Aggregate functions available Count, Distinct, Group MongoDB doesn’t support SQL syntax Aggregation requires building of “pipeline” Essentially, one step/stage at a time, e.g.: Step 1: Filter Step 2: Projection Step 3: Group
  • 5. > db.restaurants.group( { ... key: { borough: 1 }, ... cond: { cuisine: "Bakery"}, ... reduce: function(cur, result) { result.count += 1 }, ... initial: { count: 0 } ... } ); [ { "borough" : "Bronx", "count" : 71 }, { "borough" : "Manhattan", "count" : 221 }, { "borough" : "Brooklyn", "count" : 173 }, { "borough" : "Queens", "count" : 204 }, { "borough" : "Staten Island", "count" : 20 }, { "borough" : "Missing", "count" : 2 } ] > key is equivalent to the group by clause cond is equivalent to the where clause reduce function is called for each document in the collection that passes the condition reduce function has two parameters: cur and result. cur stores the current document and result stores the result so far for that group In this case result.count simply adds 1 for each document initial sets the initial value for each group result
  • 6. > db.restaurants.count(); 25359 > db.restaurants.aggregate([{$group:{_id:'$cuisine', total: {$sum:1}}}]); { "_id" : "Chilean", "total" : 1 } { "_id" : "Californian", "total" : 1 } { "_id" : "Creole/Cajun", "total" : 1 } { "_id" : "Hawaiian", "total" : 3 } { "_id" : "Nuts/Confectionary", "total" : 6 } { "_id" : "Chinese/Japanese", "total" : 59 } { "_id" : "Soups", "total" : 4 } { "_id" : "Bagels/Pretzels", "total" : 168 } { "_id" : "Polynesian", "total" : 1 } { "_id" : "Delicatessen", "total" : 321 } { "_id" : "Eastern European", "total" : 65 } { "_id" : "Scandinavian", "total" : 7 } { "_id" : "Afghan", "total" : 14 } { "_id" : "Iranian", "total" : 2 } { "_id" : "Fruits/Vegetables", "total" : 7 } { "_id" : "German", "total" : 31 } { "_id" : "Creole", "total" : 24 } { "_id" : "Steak", "total" : 86 } { "_id" : "Czech", "total" : 6 } { "_id" : "Peruvian", "total" : 68 } Type "it" for more
  • 7. db.restaurants.aggregate( [ // bracket indicates an array { // first "step" or stage $group:{ // aggregate operator _id:'$cuisine', // group by cuisine property total: {$sum:1} // sum or count each “row” } } ] );
  • 8. > db.restaurants.aggregate( ... [ ... {$group:{_id:'$cuisine', total: {$sum:1}}}, … {$sort: {total:-1}} ... ] ... ); { "_id" : "American ", "total" : 6183 } { "_id" : "Chinese", "total" : 2418 } { "_id" : "Café/Coffee/Tea", "total" : 1214 } { "_id" : "Pizza", "total" : 1163 } { "_id" : "Italian", "total" : 1069 } { "_id" : "Other", "total" : 1011 } { "_id" : "Latin (Cuban, Dominican, Puerto Rican, South & Central American)", "total" : 850 } { "_id" : "Japanese", "total" : 760 } { "_id" : "Mexican", "total" : 754 } { "_id" : "Bakery", "total" : 691 } { "_id" : "Caribbean", "total" : 657 } { "_id" : "Spanish", "total" : 637 } { "_id" : "Donuts", "total" : 479 } { "_id" : "Pizza/Italian", "total" : 468 } { "_id" : "Sandwiches", "total" : 459 } { "_id" : "Hamburgers", "total" : 433 } { "_id" : "Chicken", "total" : 410 } { "_id" : "Ice Cream, Gelato,Yogurt, Ices", "total" : 348 } { "_id" : "French", "total" : 344 } { "_id" : "Delicatessen", "total" : 321 } Type "it" for more
  • 9. db.restaurants.aggregate( [ // bracket indicates an array { // first "step" or stage $group:{ // aggregate operator _id:'$cuisine', // group by cuisine property total: {$sum:1} // sum or count each “row” } }, { // second "step" or stage $sort: { // sort operator total:-1 // sort on total; -1 indicates DESC } } ] );
  • 10. > db.restaurants.aggregate( ... [ ... {$match : {borough: "Bronx"}}, ... {$group:{_id:'$cuisine', total: {$sum:1}}}, ... {$sort: {total:-1}} ... ] ... ); { "_id" : "American ", "total" : 411 } { "_id" : "Chinese", "total" : 323 } { "_id" : "Pizza", "total" : 197 } { "_id" : "Latin (Cuban, Dominican, Puerto Rican, South & Central American)", "total" : 187 } { "_id" : "Spanish", "total" : 127 } { "_id" : "Caribbean", "total" : 110 } { "_id" : "Chicken", "total" : 108 } { "_id" : "Mexican", "total" : 89 } { "_id" : "Other", "total" : 86 } { "_id" : "Hamburgers", "total" : 78 } { "_id" : "Bakery", "total" : 71 } { "_id" : "Donuts", "total" : 68 } { "_id" : "Pizza/Italian", "total" : 53 } { "_id" : "Italian", "total" : 52 } { "_id" : "Sandwiches", "total" : 49 } { "_id" : "Café/Coffee/Tea", "total" : 45 } { "_id" : "Juice, Smoothies, Fruit Salads", "total" : 35 } { "_id" : "African", "total" : 31 } { "_id" : "Ice Cream, Gelato,Yogurt, Ices", "total" : 27 } { "_id" : "Seafood", "total" : 26 } Type "it" for more
  • 11. db.restaurants.aggregate( [ // bracket indicates an array { // first "step" or stage $match : { // match operator borough: "Bronx" // where borough = "Bronx" } }, { // second "step" or stage $group:{ // aggregate operator _id:'$cuisine', // group by cuisine property total: {$sum:1} // sum or count each “row” } }, { // third "step" or stage $sort: { total:-1 // sort on total; -1 indicates DESC } } ] );
  • 13. Returns an array of all values that result from applying an expression to each document in a group > db.restaurants.aggregate( ... [ ... { ... $group: ... { ... _id: { cuisine: "$cuisine" }, ... restaurantByStreet: { $push: { name: "$name" } } ... } ... }, ... {$limit: 4}, ... {$skip: 3} ... ] ... ).pretty(); { "_id" : { "cuisine" : "Hawaiian" }, "restaurantByStreet" : [ { "name" : "Makana" }, { "name" : "General Assembly" }, { "name" : "Onomea" } ] } >
  • 17. Sort by borough ASC, cuisine DESC > db.restaurants.aggregate( ... [ ... {$group:{_id:{borough: '$borough', cuisine:'$cuisine' }, total: {$sum:1}}}, ... {$sort: {"_id.borough":1, "_id.cuisine":-1}}, // use dot notation ... {$limit: 5 } ... ] ... ); { "_id" : { "borough" : "Bronx", "cuisine" : "Thai" }, "total" : 2 } { "_id" : { "borough" : "Bronx", "cuisine" : "Tex-Mex" }, "total" : 11 } { "_id" : { "borough" : "Bronx", "cuisine" : "Steak" }, "total" : 4 } { "_id" : { "borough" : "Bronx", "cuisine" : "Spanish" }, "total" : 127 } { "_id" : { "borough" : "Bronx", "cuisine" : "Soups & Sandwiches" }, "total" : 1 } >
  • 18. Controls which values are output > db.restaurants.aggregate( ... [ ... {$limit:1}, ... {$project: {_id:0, // hide the _id value … restaurant_id:1, // show restaurant_id … "restaurant_name":"$name", // rename/alias name to restaurant_name … "grades.grade":1}} // show grades.grade ... ]).pretty(); { "grades" : [ { "grade" : "A" // part of output }, { "grade" : "B" // part of output }, { "grade" : "A" // part of output }, { "grade" : "A" // part of output } ], "restaurant_name" : "Wendy'S", // part of output; renamed "restaurant_id" : "30112340" // part of output } >
  • 19. Saves the output of a pipeline to a collection > db.restaurants.aggregate( ... [ ... {$match : {borough: "Bronx"}}, ... {$group:{_id:'$cuisine', total: {$sum:1}}}, ... {$sort: {total:-1}}, ... {$limit: 5 }, ... {$out: "top5"} // output data to a collection called top5 ... ] ... ); > db.top5.find({}); // retrieve all data from top5 { "_id" : "American ", "total" : 411 } { "_id" : "Chinese", "total" : 323 } { "_id" : "Pizza", "total" : 197 } { "_id" : "Latin (Cuban, Dominican, Puerto Rican, South & Central American)", "total" : 187 } { "_id" : "Spanish", "total" : 127 } >
  • 20. Motivation: How many A grades did a restaurant get? > db.restaurants.find({_id: ObjectId("5602b9200a67e499361c05ad")}).pretty(); { "_id" : ObjectId("5602b9200a67e499361c05ad"), "address" : { "street" : "Flatbush Avenue", "zipcode" : "11225", "building" : "469", "coord" : [ -73.961704, 40.662942 ] }, "borough" : "Brooklyn", "cuisine" : "Hamburgers", "grades" : [ // this is an array of objects { "date" : ISODate("2014-12-30T00:00:00Z"), "grade" : "A", // A grade "score" : 8 }, { "grade" : "B", // B grade "score" : 23, "date" : ISODate("2014-07-01T00:00:00Z") }, { "score" : 12, "date" : ISODate("2013-04-30T00:00:00Z"), "grade" : "A" }, { "date" : ISODate("2012-05-08T00:00:00Z"), "grade" : "A", "score" : 12 } ], "name" : "Wendy'S", "restaurant_id" : "30112340" } > Basic pipeline Stage 1: unwind grades Stage 2: match grade of “A” Stage 3: group by / sum Stage 4: project (alias)
  • 21. There is only one document for that restaurant_id, but since there were 4 elements in grades, the unwind operator created 4 documents, one for each grade Notice the result of the following is four documents with the same restaurant_id > db.restaurants.aggregate( ... [ ... {$unwind: "$grades"}, // unwind the grades array ... {$limit:4}, // limit the output to 4 documents ... {$project: {_id:0, restaurant_id:1, "grades.date":1, "grades.grade":1, "grades.score":1}} ... ]).pretty(); { "grades" : { "date" : ISODate("2014-12-30T00:00:00Z"), "grade" : "A", "score" : 8 }, "restaurant_id": "30112340" } { "grades" : { "grade" : "B", "score" : 23, "date" : ISODate("2014-07-01T00:00:00Z") }, "restaurant_id": "30112340" } { "grades" : { "score" : 12, "date" : ISODate("2013-04-30T00:00:00Z"), "grade" : "A" }, "restaurant_id": "30112340" } { "grades" : { "date" : ISODate("2012-05-08T00:00:00Z"), "grade" : "A", "score" : 12 }, "restaurant_id": "30112340" }
  • 22. > db.restaurants.aggregate( ... [ ... {$unwind: "$grades"}, ... {$project: {_id:0, restaurant_id:1, name:1, "grades.grade":1}}, ... {$match: {"grades.grade":"A"} }, // only count A grades ... {$group: {_id:{restaurant_id:'$restaurant_id', name:'$name' }, total: {$sum:1}}}, ... {$sort: {total: -1}}, ... {$limit: 5}, … // alias output to get nicer printout ... {$project: {_id:0, "rid":"$_id.restaurant_id", "rname":"$_id.name", total:1}} ... ]).pretty(); { "total" : 8, "rid" : "41382858", "rname" : "TacoVeloz" } { "total" : 7, "rid" : "41587378", "rname" : "Lobster Joint" } {"total" : 7, "rid" : "41611381", "rname" : "Burger King, Popeye'S Chicken & Biscuits"} { "total" : 7, "rid" : "41572121", "rname" : "Luke'S Pizza" } { "total" : 7, "rid" : "41578481", "rname" : "Top Hot Bagels & Grill" } >