SlideShare une entreprise Scribd logo
1  sur  26
Télécharger pour lire hors ligne
1
MongoDB
visualisation of slow operations
Kay Agahd
4 June 2013
2
idealo and MongoDB
●
idealo = Europe's leading price comparison web site
●
Germany, Austria, United Kingdom, France, Italy, Poland and Spain
●
250 millions offers online (May 2013)
●
fast growing
●
different types of databases (MySQL, Oracle, MongoDB)
●
MongoDB in production since v1.6
●
sharding in production since MongoDB v1.8
●
MongoDB stores offers for back-end usage
●
30 mongoDB servers for offerStore + 3 servers for offerHistory
●
15 mongoDB servers for other purposes
●
nearly 15 TB of data all together
3
Review profiling
●
MongoDB supports profiling of “slow” operations
●
“slow” is a threshold to be set when turning profiling on (default 100 ms)
●
profiling per-database or per-instance on a running mongod
●
profiler writes collected data to a capped collection “system.profile”
4
Example of slow op entry (1/2)
{
"ts" : ISODate("2013­04­05T01:41:31.710Z"),
"op" : "getmore",
"ns" : "offerStore.offer",
"query" : {
“query” : {
"shopId" : 123,
"onlineProductIds" : {"$ne" : null},
"smallPicture" : {"$ne" : null},
"_id" : {"$gt" : 1555008076},
"lastChange" : {"$gt" : ISODate("2013­04­02T22:00:00Z")}
},
“orderby” : {
“_id” : 1
}
},
"cursorid" : NumberLong("5773493375904448215"),
"ntoreturn" : 500, 
...
5
Example of slow op entry (2/2)
...
"keyUpdates" : 0,
"numYield" : 2350,
"ts" : "lockStats" : {
"timeLockedMicros" : {
"r" : NumberLong(8724165),
"w" : NumberLong(0)
},
"timeAcquiringMicros" : {
"r" : NumberLong(5321722),
"w" : NumberLong(7)
}
},
"nreturned" : 500,
"responseLength" : 94656,
"millis" : 5322,
"client" : "172.16.65.202",
"user" : "pl_parser"
}
6
Inconveniences
●
each mongod needs to be handled separately
●
replSet: connect to master and every slave
●
sharding: incomplete view through router, thus replSet * n shards
●
gives only a view on a limited time span due to capped collection
●
different formats of “query” field makes querying more difficult
●
bug: ops through mongos omit the user (JIRA: SERVER-7538)
7
Example of different formats/schemata
- “query” as flat document:
{ "query" : { "shopId" : 123, 
    "onlineProductIds" : { "$ne" : null } }, 
   "user" : "pl_parser"}
- “query” embedded:
{"query" : { "query" : { "shopId" : 123,
       "onlineProductIds" : { "$ne" : null } }, 
   "orderby" : { "_id" : NumberLong(1) } }, 
 "user" : "pl_parser"}
- “query” embedded as $query:
{ "query" : { "$query" : { "shopId" : 123, 
          "onlineProductIds" : { "$ne" : null } }, 
              "$orderby" : { "_id" : NumberLong(1) },
              "$comment" : "profiling comment" }, 
 "user" : "pl_parser" }
8
idealo requirements
●
quick overview of types of slow-ops and their quantity within a time period
(“types” means op type, user, server, queried and sorted fields)
●
historical view to see how slow-ops evolve to extrapolate them
●
discovering spikes in time or in slow-op types
●
filtering by slow-op types and/or time range to drill down
9
Goals
●
faster queries
●
better adapted indexes
●
better adapted data schema
●
higher throughput by smarter workflow
10
Steps to go
●
two global steps:
●
1) collect and aggregate slow ops from all mongod's into one global
collection
●
2) GUI to query and show results
11
Step 1 of 2
●
global collection:
●
allows easy and fast querying of the whole mongoDB (shard) system
●
keeps historical data (no capped collection)
●
located on another replSet to avoid interfering with profiled mongod's
●
collector:
●
guarantee that only 1 instance is running at once (or add logic to avoid
doubled entries)
●
use tailable cursors to collect data from profiled mongod's
●
in case of failure: reconnect before data gets overwritten but avoid DoS
●
monitor it (nagios etc.)
●
profiled entries:
●
reduce size by keeping only interesting fields
●
make them easier to query (i.e. only 1 schema)
●
aggregate fields inside “query” and “orderby” to values
●
choose short field names
12
slow-op example
●
slow-op example of above becomes:
{
"_id" : ObjectId("512e43099bbcf52b9aff3602"),
"ts" : ISODate("2013­04­05T01:41:31.710Z"),
"adr" : "s233.ipx",
"op" : "getmore",
"fields" : 
["shopId","onlineProductIds","smallPicture","_id",“lastChange“],
"sort" : ["_id”],
"nret" : 500,
"reslen" : 94656,
"millis" : 5322,
"user" : "pl_parser"
}
13
Step 2 of 2
●
GUI:
●
x-axis = execution time
●
y-axis = duration of slow op
●
size of point = quantity of slow-op type
●
zoomable in x or y axis
14
How to query slow ops
●
group by time component allows resolution by year, month, week etc.
●
group by server address, user, operation, queried fields and sorted fields
allows to define different slow-op types
●
filter allows to focus on time period and specific slow ops
●
use slavePreferred option
●
error handling, i.e. result exceeds max of 16 MB
15
Query example
{$match:{ts : {$gt : #, $lt : # }}},
   fields : {$all : ["_id","shopId","bokey"]}
{$group:{_id : {op : "$op",
   user : "$user",
   fields : "$fields",
   year : { $year : "$ts" },
   month : { $month : "$ts" },
   dayOfMonth : { $dayOfMonth : "$ts" },
   hour : { $hour : "$ts" }},
   count : { $sum : 1 },
millis : { $sum : "$millis" },
avgMs : { $avg : "$millis" },
minMs : { $min : "$millis" },
maxMs : { $max : "$millis" },
firstts : { $first : "$ts" }}},
{ $sort:{ firstts : 1 }}
Filter
Slow-op
Resolution
Data
16
GUI
17
Resolution by minute
18
Resolution by minute & filter
19
dygraph.js
●
general syntax:
<script type="text/javascript">
   g = new Dygraph(document.getElementById("graph"),
    "x­name,   graph1­name,   graph2­name,   ..., graphN­namen" + 
    "x­value1, graph1­value1, graph2­value1, ..., graphN­value1n" +
    "x­value2, graph1­value2, graph2­value2, ..., graphN­value2n" +
    ...
    "x­valueN, graph1­valueN, graph2­valueN, ..., graphN­valueNn"
   );
 </script>
●
example for 2 slow-op types:
<script type="text/javascript">
 g = new Dygraph(document.getElementById("graph"),
 "Date,op=query;fields=[_id;shopId],n,min,max,op=query;fields=[_id],n,min,maxn" + 
 "2013/03/17,  5.4, 10, 3.2,  7.8,            10.4, 123, 3.1, 20.2n" + 
 "2013/03/18, 12.4, 23, 3.4, 55.8,               0,   0,   0,    0n" +
 "2013/03/19,    0,  0,   0,    0,            33.5,  66, 3.1, 89.3n" 
   );
</script>
20
dygraph.js Options 1/3
●
hide legend values from being drawn as graph:
<script type="text/javascript">
 g = new Dygraph(document.getElementById("graph"),
  "Date,op=query;fields=[_id;shopId],n,min,max,op=query;fields=[_id],n,min,maxn" + 
  "2013/03/17, 5.4, 10, 3.2, 7.8, 10.4, 123, 3.1, 20.2n" + 
  "2013/03/18, 12.4, 23, 3.4, 55.8, 0, 0, 0, 0n" +  
  "2013/03/19, 0, 0, 0, 0, 33.5, 66, 3.1, 89.3n",
  {//options:
    visibility:[true, false, false, false, true, false, false, false],
    showLabelsOnHighlight:false,
    hideOverlayOnMouseOut:false,
    labelsSeparateLines: true,
    drawPoints: true,
    legend: "always",
    xlabel: "Date",
    ylabel: "seconds",
    ... more options ...
  }
); 
21
dygraph.js Options 2/3
●
show custom legend on mouse over:
 highlightCallback: function(e, x, pts, row) {
  var text = "";
  var legend = new Array();
  for (var i = 0; i < pts.length; i++) {
    var rangeY = g.yAxisRange();
      if(pts[i].yval >= rangeY[0] && pts[i].yval <= rangeY[1]){//hide outside series
        var seriesProps = g.getPropertiesForSeries(pts[i].name);
        var count = g.getValue(row, seriesProps.column+1);
        var minSec = g.getValue(row, seriesProps.column+2);
        var maxSec = g.getValue(row, seriesProps.column+3);
        if(pts[i].yval != 0 && count != 0){
          legend.push([seriesProps.color, pts[i], count, minSec, maxSec]);
        }}}//end for
  legend.sort(function(a,b){return b[1].yval­a[1].yval});//sort by y­values
  for (var i = 0; i < legend.length; i++) {
    text += "<span style='color: " + legend[i][0] + ";'> " + legend[i][1].name +       
 "</span><br/><span>" + Dygraph.dateString_(legend[i][1].xval) + " count:" +        
 legend[i][2] + " minSec:" + legend[i][3] + " maxSec:" + legend[i][4] + "avgSec:" + 
 legend[i][1].yval + " </span><br/>";
  }
  document.getElementById("status").innerHTML = text; }, ... more options ...
22
dygraph.js Options 3/3
●
draw circles with surface of count:
  drawPointCallback : function(g, seriesName, ctx, cx, cy, color, pSize){
    if(lastSeries != seriesName || isNaN(currentRow) ){
lastSeries = seriesName;
currentRow = g.getLeftBoundary_() ­ 1;
    }
    currentRow++;
    var col = g.indexFromSetName(seriesName);
    var count = g.getValue(currentRow, col+1);
    ctx.strokeStyle = color;
    ctx.lineWidth = 0.8;
    ctx.beginPath();
    ctx.arc(cx, cy, Math.sqrt(count/Math.PI), 0, 2 * Math.PI, false); 
    ctx.closePath();
    ctx.stroke();
  }
 }//end options
);//end dygraph
23
Profiling status
24
Collector read/write status
25
Questions?
26
Thank you!

Contenu connexe

Tendances

LOFAR - finding transients in the radio spectrum
LOFAR - finding transients in the radio spectrumLOFAR - finding transients in the radio spectrum
LOFAR - finding transients in the radio spectrum
Gijs Molenaar
 

Tendances (20)

MongoDB - Sharded Cluster Tutorial
MongoDB - Sharded Cluster TutorialMongoDB - Sharded Cluster Tutorial
MongoDB - Sharded Cluster Tutorial
 
Sharded cluster tutorial
Sharded cluster tutorialSharded cluster tutorial
Sharded cluster tutorial
 
MongoDB
MongoDBMongoDB
MongoDB
 
Spark & Cassandra - DevFest Córdoba
Spark & Cassandra - DevFest CórdobaSpark & Cassandra - DevFest Córdoba
Spark & Cassandra - DevFest Córdoba
 
Mongo db improve the performance of your application codemotion2016
Mongo db improve the performance of your application codemotion2016Mongo db improve the performance of your application codemotion2016
Mongo db improve the performance of your application codemotion2016
 
Logs management
Logs managementLogs management
Logs management
 
Using MongoDB and Python
Using MongoDB and PythonUsing MongoDB and Python
Using MongoDB and Python
 
MongoDB - An Introduction
MongoDB - An IntroductionMongoDB - An Introduction
MongoDB - An Introduction
 
MySQL Without The SQL -- Oh My! PHP Detroit July 2018
MySQL Without The SQL -- Oh My! PHP Detroit July 2018MySQL Without The SQL -- Oh My! PHP Detroit July 2018
MySQL Without The SQL -- Oh My! PHP Detroit July 2018
 
Intro to cassandra
Intro to cassandraIntro to cassandra
Intro to cassandra
 
Ui5 con@Banglore - UI5 App with Offline Storage using PouchDB
Ui5 con@Banglore - UI5 App with Offline Storage using PouchDBUi5 con@Banglore - UI5 App with Offline Storage using PouchDB
Ui5 con@Banglore - UI5 App with Offline Storage using PouchDB
 
MongoDB
MongoDBMongoDB
MongoDB
 
LOFAR - finding transients in the radio spectrum
LOFAR - finding transients in the radio spectrumLOFAR - finding transients in the radio spectrum
LOFAR - finding transients in the radio spectrum
 
Webinar: Getting Started with MongoDB - Back to Basics
Webinar: Getting Started with MongoDB - Back to BasicsWebinar: Getting Started with MongoDB - Back to Basics
Webinar: Getting Started with MongoDB - Back to Basics
 
Indexing In MongoDB
Indexing In MongoDBIndexing In MongoDB
Indexing In MongoDB
 
MongoDB - Ekino PHP
MongoDB - Ekino PHPMongoDB - Ekino PHP
MongoDB - Ekino PHP
 
When big data meet python @ COSCUP 2012
When big data meet python @ COSCUP 2012When big data meet python @ COSCUP 2012
When big data meet python @ COSCUP 2012
 
Introduction to MongoDB
Introduction to MongoDBIntroduction to MongoDB
Introduction to MongoDB
 
ToroDB: Scaling PostgreSQL like MongoDB by Álvaro Hernández at Big Data Spain...
ToroDB: Scaling PostgreSQL like MongoDB by Álvaro Hernández at Big Data Spain...ToroDB: Scaling PostgreSQL like MongoDB by Álvaro Hernández at Big Data Spain...
ToroDB: Scaling PostgreSQL like MongoDB by Álvaro Hernández at Big Data Spain...
 
Back to Basics Webinar 1: Introduction to NoSQL
Back to Basics Webinar 1: Introduction to NoSQLBack to Basics Webinar 1: Introduction to NoSQL
Back to Basics Webinar 1: Introduction to NoSQL
 

En vedette

A Sceptical Guide to Functional Programming
A Sceptical Guide to Functional ProgrammingA Sceptical Guide to Functional Programming
A Sceptical Guide to Functional Programming
Garth Gilmour
 
Perl Development (Sample Courseware)
Perl Development (Sample Courseware)Perl Development (Sample Courseware)
Perl Development (Sample Courseware)
Garth Gilmour
 

En vedette (6)

A Sceptical Guide to Functional Programming
A Sceptical Guide to Functional ProgrammingA Sceptical Guide to Functional Programming
A Sceptical Guide to Functional Programming
 
Usability Testing Intro
Usability Testing IntroUsability Testing Intro
Usability Testing Intro
 
Scala sydoct2011
Scala sydoct2011Scala sydoct2011
Scala sydoct2011
 
Scala.io 2013 - M’enfin Scalac, où glandes-tu encore ?
Scala.io 2013 - M’enfin Scalac, où glandes-tu encore ?Scala.io 2013 - M’enfin Scalac, où glandes-tu encore ?
Scala.io 2013 - M’enfin Scalac, où glandes-tu encore ?
 
Perl Development (Sample Courseware)
Perl Development (Sample Courseware)Perl Development (Sample Courseware)
Perl Development (Sample Courseware)
 
Raphaël JS Conf
Raphaël JS ConfRaphaël JS Conf
Raphaël JS Conf
 

Similaire à MongoDB - visualisation of slow operations

MongoDB : The Definitive Guide
MongoDB : The Definitive GuideMongoDB : The Definitive Guide
MongoDB : The Definitive Guide
Wildan Maulana
 

Similaire à MongoDB - visualisation of slow operations (20)

MongoDB 3.2 - a giant leap. What’s new?
MongoDB 3.2 - a giant leap. What’s new?MongoDB 3.2 - a giant leap. What’s new?
MongoDB 3.2 - a giant leap. What’s new?
 
Eko10 Workshop Opensource Database Auditing
Eko10  Workshop Opensource Database AuditingEko10  Workshop Opensource Database Auditing
Eko10 Workshop Opensource Database Auditing
 
Eko10 workshop - OPEN SOURCE DATABASE MONITORING
Eko10 workshop - OPEN SOURCE DATABASE MONITORINGEko10 workshop - OPEN SOURCE DATABASE MONITORING
Eko10 workshop - OPEN SOURCE DATABASE MONITORING
 
Mongodb (1)
Mongodb (1)Mongodb (1)
Mongodb (1)
 
MongoDB: Advantages of an Open Source NoSQL Database
MongoDB: Advantages of an Open Source NoSQL DatabaseMongoDB: Advantages of an Open Source NoSQL Database
MongoDB: Advantages of an Open Source NoSQL Database
 
MongoDB Workshop Universidad de Huelva
MongoDB Workshop Universidad de HuelvaMongoDB Workshop Universidad de Huelva
MongoDB Workshop Universidad de Huelva
 
mongodb tutorial
mongodb tutorialmongodb tutorial
mongodb tutorial
 
Quick overview on mongo db
Quick overview on mongo dbQuick overview on mongo db
Quick overview on mongo db
 
MongoDB performance tuning and load testing, NOSQL Now! 2013 Conference prese...
MongoDB performance tuning and load testing, NOSQL Now! 2013 Conference prese...MongoDB performance tuning and load testing, NOSQL Now! 2013 Conference prese...
MongoDB performance tuning and load testing, NOSQL Now! 2013 Conference prese...
 
Eko10 - Security Monitoring for Big Infrastructures without a Million Dollar ...
Eko10 - Security Monitoring for Big Infrastructures without a Million Dollar ...Eko10 - Security Monitoring for Big Infrastructures without a Million Dollar ...
Eko10 - Security Monitoring for Big Infrastructures without a Million Dollar ...
 
Log MongoDB slow query
Log MongoDB slow queryLog MongoDB slow query
Log MongoDB slow query
 
2019.06.27 Intro to Ceph
2019.06.27 Intro to Ceph2019.06.27 Intro to Ceph
2019.06.27 Intro to Ceph
 
Logs @ OVHcloud
Logs @ OVHcloudLogs @ OVHcloud
Logs @ OVHcloud
 
MongoDB Pros and Cons
MongoDB Pros and ConsMongoDB Pros and Cons
MongoDB Pros and Cons
 
MongoDB FabLab León
MongoDB FabLab LeónMongoDB FabLab León
MongoDB FabLab León
 
AWS Big Data Demystified #1: Big data architecture lessons learned
AWS Big Data Demystified #1: Big data architecture lessons learned AWS Big Data Demystified #1: Big data architecture lessons learned
AWS Big Data Demystified #1: Big data architecture lessons learned
 
MongoDB : The Definitive Guide
MongoDB : The Definitive GuideMongoDB : The Definitive Guide
MongoDB : The Definitive Guide
 
Big data @ Hootsuite analtyics
Big data @ Hootsuite analtyicsBig data @ Hootsuite analtyics
Big data @ Hootsuite analtyics
 
Log stage zero-cost structured logging
Log stage  zero-cost structured loggingLog stage  zero-cost structured logging
Log stage zero-cost structured logging
 
Logstage - zero-cost-tructured-logging
Logstage - zero-cost-tructured-loggingLogstage - zero-cost-tructured-logging
Logstage - zero-cost-tructured-logging
 

Dernier

Architecting Cloud Native Applications
Architecting Cloud Native ApplicationsArchitecting Cloud Native Applications
Architecting Cloud Native Applications
WSO2
 
Finding Java's Hidden Performance Traps @ DevoxxUK 2024
Finding Java's Hidden Performance Traps @ DevoxxUK 2024Finding Java's Hidden Performance Traps @ DevoxxUK 2024
Finding Java's Hidden Performance Traps @ DevoxxUK 2024
Victor Rentea
 
Cloud Frontiers: A Deep Dive into Serverless Spatial Data and FME
Cloud Frontiers:  A Deep Dive into Serverless Spatial Data and FMECloud Frontiers:  A Deep Dive into Serverless Spatial Data and FME
Cloud Frontiers: A Deep Dive into Serverless Spatial Data and FME
Safe Software
 

Dernier (20)

Corporate and higher education May webinar.pptx
Corporate and higher education May webinar.pptxCorporate and higher education May webinar.pptx
Corporate and higher education May webinar.pptx
 
Rising Above_ Dubai Floods and the Fortitude of Dubai International Airport.pdf
Rising Above_ Dubai Floods and the Fortitude of Dubai International Airport.pdfRising Above_ Dubai Floods and the Fortitude of Dubai International Airport.pdf
Rising Above_ Dubai Floods and the Fortitude of Dubai International Airport.pdf
 
EMPOWERMENT TECHNOLOGY GRADE 11 QUARTER 2 REVIEWER
EMPOWERMENT TECHNOLOGY GRADE 11 QUARTER 2 REVIEWEREMPOWERMENT TECHNOLOGY GRADE 11 QUARTER 2 REVIEWER
EMPOWERMENT TECHNOLOGY GRADE 11 QUARTER 2 REVIEWER
 
Six Myths about Ontologies: The Basics of Formal Ontology
Six Myths about Ontologies: The Basics of Formal OntologySix Myths about Ontologies: The Basics of Formal Ontology
Six Myths about Ontologies: The Basics of Formal Ontology
 
FWD Group - Insurer Innovation Award 2024
FWD Group - Insurer Innovation Award 2024FWD Group - Insurer Innovation Award 2024
FWD Group - Insurer Innovation Award 2024
 
Strategize a Smooth Tenant-to-tenant Migration and Copilot Takeoff
Strategize a Smooth Tenant-to-tenant Migration and Copilot TakeoffStrategize a Smooth Tenant-to-tenant Migration and Copilot Takeoff
Strategize a Smooth Tenant-to-tenant Migration and Copilot Takeoff
 
Architecting Cloud Native Applications
Architecting Cloud Native ApplicationsArchitecting Cloud Native Applications
Architecting Cloud Native Applications
 
Finding Java's Hidden Performance Traps @ DevoxxUK 2024
Finding Java's Hidden Performance Traps @ DevoxxUK 2024Finding Java's Hidden Performance Traps @ DevoxxUK 2024
Finding Java's Hidden Performance Traps @ DevoxxUK 2024
 
Strategies for Landing an Oracle DBA Job as a Fresher
Strategies for Landing an Oracle DBA Job as a FresherStrategies for Landing an Oracle DBA Job as a Fresher
Strategies for Landing an Oracle DBA Job as a Fresher
 
Cloud Frontiers: A Deep Dive into Serverless Spatial Data and FME
Cloud Frontiers:  A Deep Dive into Serverless Spatial Data and FMECloud Frontiers:  A Deep Dive into Serverless Spatial Data and FME
Cloud Frontiers: A Deep Dive into Serverless Spatial Data and FME
 
Elevate Developer Efficiency & build GenAI Application with Amazon Q​
Elevate Developer Efficiency & build GenAI Application with Amazon Q​Elevate Developer Efficiency & build GenAI Application with Amazon Q​
Elevate Developer Efficiency & build GenAI Application with Amazon Q​
 
Mcleodganj Call Girls 🥰 8617370543 Service Offer VIP Hot Model
Mcleodganj Call Girls 🥰 8617370543 Service Offer VIP Hot ModelMcleodganj Call Girls 🥰 8617370543 Service Offer VIP Hot Model
Mcleodganj Call Girls 🥰 8617370543 Service Offer VIP Hot Model
 
TrustArc Webinar - Unlock the Power of AI-Driven Data Discovery
TrustArc Webinar - Unlock the Power of AI-Driven Data DiscoveryTrustArc Webinar - Unlock the Power of AI-Driven Data Discovery
TrustArc Webinar - Unlock the Power of AI-Driven Data Discovery
 
How to Troubleshoot Apps for the Modern Connected Worker
How to Troubleshoot Apps for the Modern Connected WorkerHow to Troubleshoot Apps for the Modern Connected Worker
How to Troubleshoot Apps for the Modern Connected Worker
 
Introduction to Multilingual Retrieval Augmented Generation (RAG)
Introduction to Multilingual Retrieval Augmented Generation (RAG)Introduction to Multilingual Retrieval Augmented Generation (RAG)
Introduction to Multilingual Retrieval Augmented Generation (RAG)
 
Navigating the Deluge_ Dubai Floods and the Resilience of Dubai International...
Navigating the Deluge_ Dubai Floods and the Resilience of Dubai International...Navigating the Deluge_ Dubai Floods and the Resilience of Dubai International...
Navigating the Deluge_ Dubai Floods and the Resilience of Dubai International...
 
Apidays New York 2024 - The value of a flexible API Management solution for O...
Apidays New York 2024 - The value of a flexible API Management solution for O...Apidays New York 2024 - The value of a flexible API Management solution for O...
Apidays New York 2024 - The value of a flexible API Management solution for O...
 
Apidays New York 2024 - APIs in 2030: The Risk of Technological Sleepwalk by ...
Apidays New York 2024 - APIs in 2030: The Risk of Technological Sleepwalk by ...Apidays New York 2024 - APIs in 2030: The Risk of Technological Sleepwalk by ...
Apidays New York 2024 - APIs in 2030: The Risk of Technological Sleepwalk by ...
 
Vector Search -An Introduction in Oracle Database 23ai.pptx
Vector Search -An Introduction in Oracle Database 23ai.pptxVector Search -An Introduction in Oracle Database 23ai.pptx
Vector Search -An Introduction in Oracle Database 23ai.pptx
 
Apidays New York 2024 - Scaling API-first by Ian Reasor and Radu Cotescu, Adobe
Apidays New York 2024 - Scaling API-first by Ian Reasor and Radu Cotescu, AdobeApidays New York 2024 - Scaling API-first by Ian Reasor and Radu Cotescu, Adobe
Apidays New York 2024 - Scaling API-first by Ian Reasor and Radu Cotescu, Adobe
 

MongoDB - visualisation of slow operations

  • 1. 1 MongoDB visualisation of slow operations Kay Agahd 4 June 2013
  • 2. 2 idealo and MongoDB ● idealo = Europe's leading price comparison web site ● Germany, Austria, United Kingdom, France, Italy, Poland and Spain ● 250 millions offers online (May 2013) ● fast growing ● different types of databases (MySQL, Oracle, MongoDB) ● MongoDB in production since v1.6 ● sharding in production since MongoDB v1.8 ● MongoDB stores offers for back-end usage ● 30 mongoDB servers for offerStore + 3 servers for offerHistory ● 15 mongoDB servers for other purposes ● nearly 15 TB of data all together
  • 3. 3 Review profiling ● MongoDB supports profiling of “slow” operations ● “slow” is a threshold to be set when turning profiling on (default 100 ms) ● profiling per-database or per-instance on a running mongod ● profiler writes collected data to a capped collection “system.profile”
  • 4. 4 Example of slow op entry (1/2) { "ts" : ISODate("2013­04­05T01:41:31.710Z"), "op" : "getmore", "ns" : "offerStore.offer", "query" : { “query” : { "shopId" : 123, "onlineProductIds" : {"$ne" : null}, "smallPicture" : {"$ne" : null}, "_id" : {"$gt" : 1555008076}, "lastChange" : {"$gt" : ISODate("2013­04­02T22:00:00Z")} }, “orderby” : { “_id” : 1 } }, "cursorid" : NumberLong("5773493375904448215"), "ntoreturn" : 500,  ...
  • 5. 5 Example of slow op entry (2/2) ... "keyUpdates" : 0, "numYield" : 2350, "ts" : "lockStats" : { "timeLockedMicros" : { "r" : NumberLong(8724165), "w" : NumberLong(0) }, "timeAcquiringMicros" : { "r" : NumberLong(5321722), "w" : NumberLong(7) } }, "nreturned" : 500, "responseLength" : 94656, "millis" : 5322, "client" : "172.16.65.202", "user" : "pl_parser" }
  • 6. 6 Inconveniences ● each mongod needs to be handled separately ● replSet: connect to master and every slave ● sharding: incomplete view through router, thus replSet * n shards ● gives only a view on a limited time span due to capped collection ● different formats of “query” field makes querying more difficult ● bug: ops through mongos omit the user (JIRA: SERVER-7538)
  • 7. 7 Example of different formats/schemata - “query” as flat document: { "query" : { "shopId" : 123,      "onlineProductIds" : { "$ne" : null } },     "user" : "pl_parser"} - “query” embedded: {"query" : { "query" : { "shopId" : 123,        "onlineProductIds" : { "$ne" : null } },     "orderby" : { "_id" : NumberLong(1) } },   "user" : "pl_parser"} - “query” embedded as $query: { "query" : { "$query" : { "shopId" : 123,            "onlineProductIds" : { "$ne" : null } },                "$orderby" : { "_id" : NumberLong(1) },               "$comment" : "profiling comment" },   "user" : "pl_parser" }
  • 8. 8 idealo requirements ● quick overview of types of slow-ops and their quantity within a time period (“types” means op type, user, server, queried and sorted fields) ● historical view to see how slow-ops evolve to extrapolate them ● discovering spikes in time or in slow-op types ● filtering by slow-op types and/or time range to drill down
  • 9. 9 Goals ● faster queries ● better adapted indexes ● better adapted data schema ● higher throughput by smarter workflow
  • 10. 10 Steps to go ● two global steps: ● 1) collect and aggregate slow ops from all mongod's into one global collection ● 2) GUI to query and show results
  • 11. 11 Step 1 of 2 ● global collection: ● allows easy and fast querying of the whole mongoDB (shard) system ● keeps historical data (no capped collection) ● located on another replSet to avoid interfering with profiled mongod's ● collector: ● guarantee that only 1 instance is running at once (or add logic to avoid doubled entries) ● use tailable cursors to collect data from profiled mongod's ● in case of failure: reconnect before data gets overwritten but avoid DoS ● monitor it (nagios etc.) ● profiled entries: ● reduce size by keeping only interesting fields ● make them easier to query (i.e. only 1 schema) ● aggregate fields inside “query” and “orderby” to values ● choose short field names
  • 12. 12 slow-op example ● slow-op example of above becomes: { "_id" : ObjectId("512e43099bbcf52b9aff3602"), "ts" : ISODate("2013­04­05T01:41:31.710Z"), "adr" : "s233.ipx", "op" : "getmore", "fields" :  ["shopId","onlineProductIds","smallPicture","_id",“lastChange“], "sort" : ["_id”], "nret" : 500, "reslen" : 94656, "millis" : 5322, "user" : "pl_parser" }
  • 13. 13 Step 2 of 2 ● GUI: ● x-axis = execution time ● y-axis = duration of slow op ● size of point = quantity of slow-op type ● zoomable in x or y axis
  • 14. 14 How to query slow ops ● group by time component allows resolution by year, month, week etc. ● group by server address, user, operation, queried fields and sorted fields allows to define different slow-op types ● filter allows to focus on time period and specific slow ops ● use slavePreferred option ● error handling, i.e. result exceeds max of 16 MB
  • 19. 19 dygraph.js ● general syntax: <script type="text/javascript">    g = new Dygraph(document.getElementById("graph"),     "x­name,   graph1­name,   graph2­name,   ..., graphN­namen" +      "x­value1, graph1­value1, graph2­value1, ..., graphN­value1n" +     "x­value2, graph1­value2, graph2­value2, ..., graphN­value2n" +     ...     "x­valueN, graph1­valueN, graph2­valueN, ..., graphN­valueNn"    );  </script> ● example for 2 slow-op types: <script type="text/javascript">  g = new Dygraph(document.getElementById("graph"),  "Date,op=query;fields=[_id;shopId],n,min,max,op=query;fields=[_id],n,min,maxn" +   "2013/03/17,  5.4, 10, 3.2,  7.8,            10.4, 123, 3.1, 20.2n" +   "2013/03/18, 12.4, 23, 3.4, 55.8,               0,   0,   0,    0n" +  "2013/03/19,    0,  0,   0,    0,            33.5,  66, 3.1, 89.3n"     ); </script>
  • 20. 20 dygraph.js Options 1/3 ● hide legend values from being drawn as graph: <script type="text/javascript">  g = new Dygraph(document.getElementById("graph"),   "Date,op=query;fields=[_id;shopId],n,min,max,op=query;fields=[_id],n,min,maxn" +    "2013/03/17, 5.4, 10, 3.2, 7.8, 10.4, 123, 3.1, 20.2n" +    "2013/03/18, 12.4, 23, 3.4, 55.8, 0, 0, 0, 0n" +     "2013/03/19, 0, 0, 0, 0, 33.5, 66, 3.1, 89.3n",   {//options:     visibility:[true, false, false, false, true, false, false, false],     showLabelsOnHighlight:false,     hideOverlayOnMouseOut:false,     labelsSeparateLines: true,     drawPoints: true,     legend: "always",     xlabel: "Date",     ylabel: "seconds",     ... more options ...   } ); 
  • 21. 21 dygraph.js Options 2/3 ● show custom legend on mouse over:  highlightCallback: function(e, x, pts, row) {   var text = "";   var legend = new Array();   for (var i = 0; i < pts.length; i++) {     var rangeY = g.yAxisRange();       if(pts[i].yval >= rangeY[0] && pts[i].yval <= rangeY[1]){//hide outside series         var seriesProps = g.getPropertiesForSeries(pts[i].name);         var count = g.getValue(row, seriesProps.column+1);         var minSec = g.getValue(row, seriesProps.column+2);         var maxSec = g.getValue(row, seriesProps.column+3);         if(pts[i].yval != 0 && count != 0){           legend.push([seriesProps.color, pts[i], count, minSec, maxSec]);         }}}//end for   legend.sort(function(a,b){return b[1].yval­a[1].yval});//sort by y­values   for (var i = 0; i < legend.length; i++) {     text += "<span style='color: " + legend[i][0] + ";'> " + legend[i][1].name +         "</span><br/><span>" + Dygraph.dateString_(legend[i][1].xval) + " count:" +          legend[i][2] + " minSec:" + legend[i][3] + " maxSec:" + legend[i][4] + "avgSec:" +   legend[i][1].yval + " </span><br/>";   }   document.getElementById("status").innerHTML = text; }, ... more options ...
  • 22. 22 dygraph.js Options 3/3 ● draw circles with surface of count:   drawPointCallback : function(g, seriesName, ctx, cx, cy, color, pSize){     if(lastSeries != seriesName || isNaN(currentRow) ){ lastSeries = seriesName; currentRow = g.getLeftBoundary_() ­ 1;     }     currentRow++;     var col = g.indexFromSetName(seriesName);     var count = g.getValue(currentRow, col+1);     ctx.strokeStyle = color;     ctx.lineWidth = 0.8;     ctx.beginPath();     ctx.arc(cx, cy, Math.sqrt(count/Math.PI), 0, 2 * Math.PI, false);      ctx.closePath();     ctx.stroke();   }  }//end options );//end dygraph