SlideShare a Scribd company logo
1 of 57
Download to read offline
• 
      


      


      


      



• 
      


      


      
• 

      



• 

      


      


      




      
 
     


     




 
     


     


     
 
 


 


 
 


 


 
!

 


 
 
     


     




 
     


     
 
     


     




 
     


     


     


     
 
�
�   �
• 
     ‣ 

     ‣ 

     ‣ 

• 
     ‣ 

     ‣ 
• 
     ‣ 

     ‣ 

     ‣ 

     ‣ 

     ‣ 
• 
     ‣ 

     ‣ 

     ‣ 

     ‣ 

     ‣ 
• 
     ‣ 

     ‣ 

     ‣ 

     ‣ 

     ‣ 

     ‣ 
• 
def mapper(key, value):!
      for word in value.split(): yield word,1!
def reducer(key, values):!
      yield key,sum(values)!
if __name__ == "__main__":!
      import dumbo!
      dumbo.run(mapper, reducer)




dumbo start wordcount.py !
-hadoop /path/to/hadoop !
-input wc_input.txt -output wc_output
• 
      ‣ 

      ‣ 

      ‣ 

      ‣ 

python wordcount.py map < wc_input.txt | sort | !
     python wordcount.py red > wc_output.txt
• 


     ‣ 

     ‣ 

     ‣ 

     ‣ 

     ‣ 
• 
• 
 -----Change------!
 ActionLogger    a{ChangeP}          (Point,1371,1383)       !
 ActionLogger    a{ChangeP}          (Point,2373,2423)!

 ActionLogger    a{ChangeMedal}      (lucky_star,9,10)    !
 ActionLogger    a{ChangeMedal}      (lucky_sea_bream,0,1)!

 ActionLogger    a{ChangeG}                  !

 ActionLogger    a{ChangeSubG} (SubGold,13,16)               !

 ActionLogger    a{ChangeWakuwakuP}       (buy,0,30)!
 ActionLogger    a{ChangeWakuwakuP}       (by gacha,30,0) !

 ------Get------!
 ActionLogger     a{GetMaterial}    (syouhinnomoto,0,-1) !
 ActionLogger     a{GetMaterial}    usesyouhinnomoto !
 ActionLogger     a{GetMaterial}    (omotyanomotoPRO,1,6)!
 ActionLogger     a{GetMaterial}    (sui-tunomoto,5,4)!

 ActionLogger    a{GetInterior}      (bakery_counter,0,1)!

 ActionLogger    a{GetAvatarPart}      (190167,0,1)      !
 ActionLogger    a{GetAvatarPart}      (old_girl_09,0,1) !

 -----Trade-----!
 ActionLogger     a{Trade}              buy 3 itigoke-kis from gree.jp:xxxxx   !
• 
            
           ‣ 
           ‣ 
2010-07-26 00:00:02,446 INFO catalina-exec-483 ActionLogger –
userId a{Make} make item onsenmanjyuu!
2010-07-26 00:00:02,478 INFO catalina-exec-411 ActionLogger –
userId a{LifeCycle} Login



userId 2010-07-26 00:00:02,446 a{Make}        {onsenmanjyuu,1}!
userId 2010-07-26 00:00:02,478 a{LifeCycle}   {Login,1}!
userId 2010-07-26 00:00:02,478 a{GetMaterial} {omotyanomotoPRO,5}!
• 
      
     ‣ 
     ‣ 
     ‣ 
     ‣ 
• 


     • 

     • 
     • 

     • 
     • 

     • 
     • 

     • 
     • 
• 
     { !
        "_id" : "2010-06-27+xxxxx+a{ChangeP}",!
        "lastUpdate" : "2010-09-17",!
        "date" : "2010-06-27" !
        "userId" : “xxxxx",!
        "actionType" : "a{ChangeP}",!
        "actionDetail" : { "Point" : 600 },!
     }!
     { !
        "_id" : "2010-06-27+xxxxx+a{LifeCycle}", !
        "lastUpdate" : "2010-09-17",!
        "date" : "2010-06-27" !
        "userId" : ”xxxxx",!
        "actionType" : "a{LifeCycle}",!
        "actionDetail" : { ”Login" : 3 }!
     }!
• 
     { "_id" : "2010-08-31+group+a{PutOn}", !
       "date" : "2010-08-31", !
       "lastUpdate" : "2010-09-21",!
       "actionType" : "a{PutOn}",!
        "actionDetail" : { "a{PutOn}" : 52050 } !
     }!
     {...!
      "actionType" : "a{Make}",!
      "actionDetail" : { !
                         ”syurijyou” : 11,!
                         ”aisukuri-mu” : 378,!
                         ”kinnokarakuridokei” : 103,!
                         ”puramoderu” : 22,!
                         ”guremurinno_n” : 164,!
                         ”kyodaipenginno_n” : 76,!
                         ”patinko” : 67,!
                         “wakizasi” : 250,!
                         “dendendaiko” : 13651,!
                         ... (over 100 items)!
                        }!
     }!
• 
     ‣ 


     ‣ 


     ‣ 

     ‣ 

     ‣ 

     ‣ 
• 
     ‣ 

     ‣ 

     ‣ 


     ‣ 

     ‣ 

     ‣ 
• 
MySQL:   select * from things where x=3 and y="foo"!
MongoDB: db.things.find( { x : 3, y : "foo" } );!


MySQL: select z from things where x=3!
MongoDB: db.things.find( { x : 3 }, { z : 1 } );


db.collection.find({ "field" : { $gt: value } } ); !
//       : field > value !

db.collection.find({ "field" : { $lt: value } } ); !
//       : field < value !

db.collection.find({"field”: {$gt: value1, $lt: value2}});!
 // value1 <= field <= value2
MySQL:   select * from things where x in (b,a,c)!
MongoDB: db.collection.find( { "field" : { $in : array } } ); !

     db.things.find({j:{$in: [2,4,6]}});!



db.customers.find( { name : /acme.*corp/i } ); !


db.myCollection.find().sort( { ts : -1 } ); // ts             !



>   m = function() { emit(this.user_id, 1); } !
>   r = function(k,vals) { return 1; } !
>   res = db.events.mapreduce(m, r, { query : {type:'sale'} }); !
>   db[res.result].find().limit(2) !
{   "_id" : 8321073716060 , "value" : 1 } !
{   "_id" : 7921232311289 , "value" : 1 } !
• 
{ !
   "_id" : "2010-06-27+xxxxx+a{ChangeP}",!
   "lastUpdate" : "2010-09-17",!
   "date" : "2010-06-27" !
   "userId" : “xxxxx",!
   "actionType" : "a{ChangeP}",!
   "actionDetail" : { "Point" : 600 },!
}!
{ !
   "_id" : "2010-06-27+xxxxx+a{LifeCycle}", !
   "lastUpdate" : "2010-09-17",!
   "date" : "2010-06-27" !
   "userId" : ”xxxxx",!
   "actionType" : "a{LifeCycle}",!
   "actionDetail" : { ”Login" : 3 }!
}!
• 
     ‣ 

• 
     ‣ 

     ‣ 

     ‣ 

     ‣ 
• 
      




         • 
         • 
         • 


         • 
         • 
         • 


         • 
         • 
         • 
• 

{!
 "_id" : "2010-06-28+xxxx+Charge",!
 "lastUpdate" : "2010-09-20",!
 "userId" : ”xxxx",!
 "date" : "2010-06-28",!
 "actionType" : "Charge",!
 "totalCharge" : 1210,!
 "boughtItem" : { "          EX 5 " : 1,!
                  "           5 " : 1,!
                  "         5 " : 1,!
                  "          " : 1,!
                  "     " : 2 }!
}!
• 
     ‣ 

     ‣ 

• 
     ‣ 

     ‣ 

     ‣ 
• 
• 
      
• 
      
• 
     {!
          "_id" : "2010-07-11+xxxxx+Registration",!
          "lastUpdate" : "2010-09-25",!
          "actionType" : "Registration",!
          "userId" : ”xxxxx",!
          "date" : "2010-07-11",!
          "firstCharge" : "2010-07-12",!
          "lastCharge" : "2010-09-02",!
          "lastLogin" : "2010-09-02",!
          "firstChargeTerm" : 1,!
          "playTerm" : 50,!
          "totalMonthCharge" : 1000,!
          "totalMonthChargeDetail" : {!
              "1th" : 74.3!
              "2th" : 17.1,!
              "3th" : 8.6,!                             i.e.
              "4th" : 0,!
          },!
          "totalCumlativeCharge" : 10000,!
          "totalCumlativeChargeDetail" : {!
              "1th" : 2,!
              "2th" : 0.5,!
              "3th" : 0.2,!
              "4th" : 0,!
              "5th" : 0.1,!
              "6th" : 27.5,!
              "7th" : 1.2,!
              "8th" : 49!
              "9th" : 19.5,!                     2.7%
          }!
     }!
• 
topMonthCharge = function(n){!
 return db.user_registration.find({},{!
   totalMonthCharge:true,!
   totalMonthChargeDetail:true,!
   userId:true!
 }).sort({totalMonthCharge:-1}).limit(n);!
}!

> topMonthCharge(20)                                          !
{ !
   "_id" : "2010-07-10+9999+Registration",!   Top20
   "userId" : ”9999”,!
   "totalMonthCharge" : 10000,!
   "totalMonthChargeDetail" : { "5th" : 13.7, "4th" : 27.6,
"3th" : 21, "2th" : 16.2, "1th" : 21.5 }!
}!
…!
findUser = function(x){ !
 return db.user_charge.find({userId:x},{!
   userId:true,!
   totalCharge:true,!
   boughtItem:true}).sort({date:-1})!
}!
> findUserCharge("9999")!
{!
     "_id" : "2010-09-08+9223458+Charge",!
     "totalCharge" : 2000,!
     "userId" : ”9999",!
     "boughtItem" : {!                       Top
         "        110 " : 2!
     }!
}!
{!
     "_id" : "2010-09-07+9223458+Charge",!
     "totalCharge" : 5000,!
     "userId" : ”9999",!
     "boughtItem" : {!
         "        350 " : 1,!
         "        110 " : 2!
     }!
}!
…!
• 
• 
• 
• 
• 
• 
db.user_error!
 db.user_access!    (           )!   db.user_trace!
(from          )!                    (from       )!




                    db.user_attr!
                    (          )!




 db.user_status!                     db.user_charge!
(from Cassandra)!                     (from MySQL)!
 
• 
     ‣ 


     ‣ 


     ‣ 

     ‣ 
• 
     ‣ 


     ‣ 


     ‣ 



     ‣ 
HadoopとMongoDBを活用したソーシャルアプリのログ解析

More Related Content

What's hot

Scaling MongoDB; Sharding Into and Beyond the Multi-Terabyte Range
Scaling MongoDB; Sharding Into and Beyond the Multi-Terabyte RangeScaling MongoDB; Sharding Into and Beyond the Multi-Terabyte Range
Scaling MongoDB; Sharding Into and Beyond the Multi-Terabyte Range
MongoDB
 
Юрий Буянов «Squeryl — ORM с человеческим лицом»
Юрий Буянов «Squeryl — ORM с человеческим лицом»Юрий Буянов «Squeryl — ORM с человеческим лицом»
Юрий Буянов «Squeryl — ORM с человеческим лицом»
e-Legion
 
jQuery%20on%20Rails%20Presentation
jQuery%20on%20Rails%20PresentationjQuery%20on%20Rails%20Presentation
jQuery%20on%20Rails%20Presentation
guestcf600a
 

What's hot (11)

SQLAlchemy Seminar
SQLAlchemy SeminarSQLAlchemy Seminar
SQLAlchemy Seminar
 
Scaling MongoDB; Sharding Into and Beyond the Multi-Terabyte Range
Scaling MongoDB; Sharding Into and Beyond the Multi-Terabyte RangeScaling MongoDB; Sharding Into and Beyond the Multi-Terabyte Range
Scaling MongoDB; Sharding Into and Beyond the Multi-Terabyte Range
 
PHP Loves MongoDB - Dublin MUG (by Hannes)
PHP Loves MongoDB - Dublin MUG (by Hannes)PHP Loves MongoDB - Dublin MUG (by Hannes)
PHP Loves MongoDB - Dublin MUG (by Hannes)
 
FrontInBahia 2014: 10 dicas de desempenho para apps mobile híbridas
FrontInBahia 2014: 10 dicas de desempenho para apps mobile híbridasFrontInBahia 2014: 10 dicas de desempenho para apps mobile híbridas
FrontInBahia 2014: 10 dicas de desempenho para apps mobile híbridas
 
Юрий Буянов «Squeryl — ORM с человеческим лицом»
Юрий Буянов «Squeryl — ORM с человеческим лицом»Юрий Буянов «Squeryl — ORM с человеческим лицом»
Юрий Буянов «Squeryl — ORM с человеческим лицом»
 
Solr & Lucene @ Etsy by Gregg Donovan
Solr & Lucene @ Etsy by Gregg DonovanSolr & Lucene @ Etsy by Gregg Donovan
Solr & Lucene @ Etsy by Gregg Donovan
 
前端MVC之BackboneJS
前端MVC之BackboneJS前端MVC之BackboneJS
前端MVC之BackboneJS
 
Letswift18 워크숍#1 스위프트 클린코드와 코드리뷰
Letswift18 워크숍#1 스위프트 클린코드와 코드리뷰Letswift18 워크숍#1 스위프트 클린코드와 코드리뷰
Letswift18 워크숍#1 스위프트 클린코드와 코드리뷰
 
JSF Mashups in Action
JSF Mashups in ActionJSF Mashups in Action
JSF Mashups in Action
 
I Love Ruby
I Love RubyI Love Ruby
I Love Ruby
 
jQuery%20on%20Rails%20Presentation
jQuery%20on%20Rails%20PresentationjQuery%20on%20Rails%20Presentation
jQuery%20on%20Rails%20Presentation
 

Viewers also liked

MongoDBとAjaxで作る解析フロントエンド&GraphDBを用いたソーシャルデータ解析
MongoDBとAjaxで作る解析フロントエンド&GraphDBを用いたソーシャルデータ解析MongoDBとAjaxで作る解析フロントエンド&GraphDBを用いたソーシャルデータ解析
MongoDBとAjaxで作る解析フロントエンド&GraphDBを用いたソーシャルデータ解析
Takahiro Inoue
 
A21 列指向DB HP Vertica ~その圧倒的な高速検索の謎を解き明かす~ byKeizo Aizawa
A21 列指向DB HP Vertica ~その圧倒的な高速検索の謎を解き明かす~ byKeizo AizawaA21 列指向DB HP Vertica ~その圧倒的な高速検索の謎を解き明かす~ byKeizo Aizawa
A21 列指向DB HP Vertica ~その圧倒的な高速検索の謎を解き明かす~ byKeizo Aizawa
Insight Technology, Inc.
 
MongoDBを用いたソーシャルアプリのログ解析 〜解析基盤構築からフロントUIまで、MongoDBを最大限に活用する〜
MongoDBを用いたソーシャルアプリのログ解析 〜解析基盤構築からフロントUIまで、MongoDBを最大限に活用する〜MongoDBを用いたソーシャルアプリのログ解析 〜解析基盤構築からフロントUIまで、MongoDBを最大限に活用する〜
MongoDBを用いたソーシャルアプリのログ解析 〜解析基盤構築からフロントUIまで、MongoDBを最大限に活用する〜
Takahiro Inoue
 
「GraphDB徹底入門」〜構造や仕組み理解から使いどころ・種々のGraphDBの比較まで幅広く〜
「GraphDB徹底入門」〜構造や仕組み理解から使いどころ・種々のGraphDBの比較まで幅広く〜「GraphDB徹底入門」〜構造や仕組み理解から使いどころ・種々のGraphDBの比較まで幅広く〜
「GraphDB徹底入門」〜構造や仕組み理解から使いどころ・種々のGraphDBの比較まで幅広く〜
Takahiro Inoue
 

Viewers also liked (20)

MongoDBとAjaxで作る解析フロントエンド&GraphDBを用いたソーシャルデータ解析
MongoDBとAjaxで作る解析フロントエンド&GraphDBを用いたソーシャルデータ解析MongoDBとAjaxで作る解析フロントエンド&GraphDBを用いたソーシャルデータ解析
MongoDBとAjaxで作る解析フロントエンド&GraphDBを用いたソーシャルデータ解析
 
A21 列指向DB HP Vertica ~その圧倒的な高速検索の謎を解き明かす~ byKeizo Aizawa
A21 列指向DB HP Vertica ~その圧倒的な高速検索の謎を解き明かす~ byKeizo AizawaA21 列指向DB HP Vertica ~その圧倒的な高速検索の謎を解き明かす~ byKeizo Aizawa
A21 列指向DB HP Vertica ~その圧倒的な高速検索の謎を解き明かす~ byKeizo Aizawa
 
ソーシャルゲームログ解析基盤のHadoop活用事例
ソーシャルゲームログ解析基盤のHadoop活用事例ソーシャルゲームログ解析基盤のHadoop活用事例
ソーシャルゲームログ解析基盤のHadoop活用事例
 
SQLまで使える高機能NoSQLであるCouchbase Serverの勉強会資料
SQLまで使える高機能NoSQLであるCouchbase Serverの勉強会資料SQLまで使える高機能NoSQLであるCouchbase Serverの勉強会資料
SQLまで使える高機能NoSQLであるCouchbase Serverの勉強会資料
 
Javaでmongo db
Javaでmongo dbJavaでmongo db
Javaでmongo db
 
MongoDBを用いたソーシャルアプリのログ解析 〜解析基盤構築からフロントUIまで、MongoDBを最大限に活用する〜
MongoDBを用いたソーシャルアプリのログ解析 〜解析基盤構築からフロントUIまで、MongoDBを最大限に活用する〜MongoDBを用いたソーシャルアプリのログ解析 〜解析基盤構築からフロントUIまで、MongoDBを最大限に活用する〜
MongoDBを用いたソーシャルアプリのログ解析 〜解析基盤構築からフロントUIまで、MongoDBを最大限に活用する〜
 
Awsでつくるapache kafkaといろんな悩み
Awsでつくるapache kafkaといろんな悩みAwsでつくるapache kafkaといろんな悩み
Awsでつくるapache kafkaといろんな悩み
 
Rancher/Kubernetes入門ハンズオン資料~第2回さくらとコンテナの夕べ #さくらの夕べ 番外編
 Rancher/Kubernetes入門ハンズオン資料~第2回さくらとコンテナの夕べ #さくらの夕べ 番外編 Rancher/Kubernetes入門ハンズオン資料~第2回さくらとコンテナの夕べ #さくらの夕べ 番外編
Rancher/Kubernetes入門ハンズオン資料~第2回さくらとコンテナの夕べ #さくらの夕べ 番外編
 
「GraphDB徹底入門」〜構造や仕組み理解から使いどころ・種々のGraphDBの比較まで幅広く〜
「GraphDB徹底入門」〜構造や仕組み理解から使いどころ・種々のGraphDBの比較まで幅広く〜「GraphDB徹底入門」〜構造や仕組み理解から使いどころ・種々のGraphDBの比較まで幅広く〜
「GraphDB徹底入門」〜構造や仕組み理解から使いどころ・種々のGraphDBの比較まで幅広く〜
 
分散処理基盤ApacheHadoop入門とHadoopエコシステムの最新技術動向(OSC2015 Kansai発表資料)
分散処理基盤ApacheHadoop入門とHadoopエコシステムの最新技術動向(OSC2015 Kansai発表資料)分散処理基盤ApacheHadoop入門とHadoopエコシステムの最新技術動向(OSC2015 Kansai発表資料)
分散処理基盤ApacheHadoop入門とHadoopエコシステムの最新技術動向(OSC2015 Kansai発表資料)
 
Mongo DBを半年運用してみた
Mongo DBを半年運用してみたMongo DBを半年運用してみた
Mongo DBを半年運用してみた
 
がっつりMongoDB事例紹介
がっつりMongoDB事例紹介がっつりMongoDB事例紹介
がっつりMongoDB事例紹介
 
AWS Blackbelt 2015シリーズ Amazon Storage Service (S3)
AWS Blackbelt 2015シリーズ Amazon Storage Service (S3)AWS Blackbelt 2015シリーズ Amazon Storage Service (S3)
AWS Blackbelt 2015シリーズ Amazon Storage Service (S3)
 
Amazon S3を中心とするデータ分析のベストプラクティス
Amazon S3を中心とするデータ分析のベストプラクティスAmazon S3を中心とするデータ分析のベストプラクティス
Amazon S3を中心とするデータ分析のベストプラクティス
 
Nosqlの基礎知識(2013年7月講義資料)
Nosqlの基礎知識(2013年7月講義資料)Nosqlの基礎知識(2013年7月講義資料)
Nosqlの基礎知識(2013年7月講義資料)
 
ログ管理のベストプラクティス
ログ管理のベストプラクティスログ管理のベストプラクティス
ログ管理のベストプラクティス
 
初心者向けMongoDBのキホン!
初心者向けMongoDBのキホン!初心者向けMongoDBのキホン!
初心者向けMongoDBのキホン!
 
Cassandraとh baseの比較して入門するno sql
Cassandraとh baseの比較して入門するno sqlCassandraとh baseの比較して入門するno sql
Cassandraとh baseの比較して入門するno sql
 
最新業界事情から見るデータサイエンティストの「実像」
最新業界事情から見るデータサイエンティストの「実像」最新業界事情から見るデータサイエンティストの「実像」
最新業界事情から見るデータサイエンティストの「実像」
 
何故DeNAがverticaを選んだか?
何故DeNAがverticaを選んだか?何故DeNAがverticaを選んだか?
何故DeNAがverticaを選んだか?
 

Similar to HadoopとMongoDBを活用したソーシャルアプリのログ解析

Mongo db washington dc 2014
Mongo db washington dc 2014Mongo db washington dc 2014
Mongo db washington dc 2014
ikanow
 
MongoDB Performance Tuning
MongoDB Performance TuningMongoDB Performance Tuning
MongoDB Performance Tuning
MongoDB
 

Similar to HadoopとMongoDBを活用したソーシャルアプリのログ解析 (20)

コードで学ぶドメイン駆動設計入門
コードで学ぶドメイン駆動設計入門コードで学ぶドメイン駆動設計入門
コードで学ぶドメイン駆動設計入門
 
Tabledown
TabledownTabledown
Tabledown
 
MongoDB + node.js で作るソーシャルゲーム
MongoDB + node.js で作るソーシャルゲームMongoDB + node.js で作るソーシャルゲーム
MongoDB + node.js で作るソーシャルゲーム
 
Interactively Search and Visualize Your Data: Presented by Romain Rigaux, Clo...
Interactively Search and Visualize Your Data: Presented by Romain Rigaux, Clo...Interactively Search and Visualize Your Data: Presented by Romain Rigaux, Clo...
Interactively Search and Visualize Your Data: Presented by Romain Rigaux, Clo...
 
Interactively Search and Visualize Your Big Data
Interactively Search and Visualize Your Big DataInteractively Search and Visualize Your Big Data
Interactively Search and Visualize Your Big Data
 
Building Your First MongoDB Application
Building Your First MongoDB ApplicationBuilding Your First MongoDB Application
Building Your First MongoDB Application
 
Ensuring High Availability for Real-time Analytics featuring Boxed Ice / Serv...
Ensuring High Availability for Real-time Analytics featuring Boxed Ice / Serv...Ensuring High Availability for Real-time Analytics featuring Boxed Ice / Serv...
Ensuring High Availability for Real-time Analytics featuring Boxed Ice / Serv...
 
PostgreSQLからMongoDBへ
PostgreSQLからMongoDBへPostgreSQLからMongoDBへ
PostgreSQLからMongoDBへ
 
Mcs011 solved assignment by divya singh
Mcs011 solved assignment by divya singhMcs011 solved assignment by divya singh
Mcs011 solved assignment by divya singh
 
A Century Of Weather Data - Midwest.io
A Century Of Weather Data - Midwest.ioA Century Of Weather Data - Midwest.io
A Century Of Weather Data - Midwest.io
 
Feed Normalization with Ember Data 1.0
Feed Normalization with Ember Data 1.0Feed Normalization with Ember Data 1.0
Feed Normalization with Ember Data 1.0
 
Mongo db washington dc 2014
Mongo db washington dc 2014Mongo db washington dc 2014
Mongo db washington dc 2014
 
JWT - To authentication and beyond!
JWT - To authentication and beyond!JWT - To authentication and beyond!
JWT - To authentication and beyond!
 
MongoDB Performance Tuning
MongoDB Performance TuningMongoDB Performance Tuning
MongoDB Performance Tuning
 
"Writing Maintainable JavaScript". Jon Bretman, Badoo
"Writing Maintainable JavaScript". Jon Bretman, Badoo"Writing Maintainable JavaScript". Jon Bretman, Badoo
"Writing Maintainable JavaScript". Jon Bretman, Badoo
 
Programming Contest Hacks
Programming Contest HacksProgramming Contest Hacks
Programming Contest Hacks
 
Mongo scaling
Mongo scalingMongo scaling
Mongo scaling
 
MongoDB In Production At Sailthru
MongoDB In Production At SailthruMongoDB In Production At Sailthru
MongoDB In Production At Sailthru
 
MongoDB - Monitoring and queueing
MongoDB - Monitoring and queueingMongoDB - Monitoring and queueing
MongoDB - Monitoring and queueing
 
MongoDB - Monitoring & queueing
MongoDB - Monitoring & queueingMongoDB - Monitoring & queueing
MongoDB - Monitoring & queueing
 

More from Takahiro Inoue

Tableauが魅せる Data Visualization の世界
Tableauが魅せる Data Visualization の世界Tableauが魅せる Data Visualization の世界
Tableauが魅せる Data Visualization の世界
Takahiro Inoue
 
20140708 オンラインゲームソリューション
20140708 オンラインゲームソリューション20140708 オンラインゲームソリューション
20140708 オンラインゲームソリューション
Takahiro Inoue
 
トレジャーデータ流,データ分析の始め方
トレジャーデータ流,データ分析の始め方トレジャーデータ流,データ分析の始め方
トレジャーデータ流,データ分析の始め方
Takahiro Inoue
 
オンラインゲームソリューション@トレジャーデータ
オンラインゲームソリューション@トレジャーデータオンラインゲームソリューション@トレジャーデータ
オンラインゲームソリューション@トレジャーデータ
Takahiro Inoue
 
事例で学ぶトレジャーデータ 20140612
事例で学ぶトレジャーデータ 20140612事例で学ぶトレジャーデータ 20140612
事例で学ぶトレジャーデータ 20140612
Takahiro Inoue
 
トレジャーデータ株式会社について(for all Data_Enthusiast!!)
トレジャーデータ株式会社について(for all Data_Enthusiast!!)トレジャーデータ株式会社について(for all Data_Enthusiast!!)
トレジャーデータ株式会社について(for all Data_Enthusiast!!)
Takahiro Inoue
 
この Visualization がすごい2014 〜データ世界を彩るツール6選〜
この Visualization がすごい2014 〜データ世界を彩るツール6選〜この Visualization がすごい2014 〜データ世界を彩るツール6選〜
この Visualization がすごい2014 〜データ世界を彩るツール6選〜
Takahiro Inoue
 
Treasure Data Intro for Data Enthusiast!!
Treasure Data Intro for Data Enthusiast!!Treasure Data Intro for Data Enthusiast!!
Treasure Data Intro for Data Enthusiast!!
Takahiro Inoue
 
Hadoop and the Data Scientist
Hadoop and the Data ScientistHadoop and the Data Scientist
Hadoop and the Data Scientist
Takahiro Inoue
 
MongoDB: Intro & Application for Big Data
MongoDB: Intro & Application  for Big DataMongoDB: Intro & Application  for Big Data
MongoDB: Intro & Application for Big Data
Takahiro Inoue
 
An Introduction to Fluent & MongoDB Plugins
An Introduction to Fluent & MongoDB PluginsAn Introduction to Fluent & MongoDB Plugins
An Introduction to Fluent & MongoDB Plugins
Takahiro Inoue
 
An Introduction to Tinkerpop
An Introduction to TinkerpopAn Introduction to Tinkerpop
An Introduction to Tinkerpop
Takahiro Inoue
 
An Introduction to Neo4j
An Introduction to Neo4jAn Introduction to Neo4j
An Introduction to Neo4j
Takahiro Inoue
 
The Definition of GraphDB
The Definition of GraphDBThe Definition of GraphDB
The Definition of GraphDB
Takahiro Inoue
 

More from Takahiro Inoue (20)

Treasure Data × Wave Analytics EC Demo
Treasure Data × Wave Analytics EC DemoTreasure Data × Wave Analytics EC Demo
Treasure Data × Wave Analytics EC Demo
 
トレジャーデータとtableau実現する自動レポーティング
トレジャーデータとtableau実現する自動レポーティングトレジャーデータとtableau実現する自動レポーティング
トレジャーデータとtableau実現する自動レポーティング
 
Tableauが魅せる Data Visualization の世界
Tableauが魅せる Data Visualization の世界Tableauが魅せる Data Visualization の世界
Tableauが魅せる Data Visualization の世界
 
トレジャーデータのバッチクエリとアドホッククエリを理解する
トレジャーデータのバッチクエリとアドホッククエリを理解するトレジャーデータのバッチクエリとアドホッククエリを理解する
トレジャーデータのバッチクエリとアドホッククエリを理解する
 
20140708 オンラインゲームソリューション
20140708 オンラインゲームソリューション20140708 オンラインゲームソリューション
20140708 オンラインゲームソリューション
 
トレジャーデータ流,データ分析の始め方
トレジャーデータ流,データ分析の始め方トレジャーデータ流,データ分析の始め方
トレジャーデータ流,データ分析の始め方
 
オンラインゲームソリューション@トレジャーデータ
オンラインゲームソリューション@トレジャーデータオンラインゲームソリューション@トレジャーデータ
オンラインゲームソリューション@トレジャーデータ
 
事例で学ぶトレジャーデータ 20140612
事例で学ぶトレジャーデータ 20140612事例で学ぶトレジャーデータ 20140612
事例で学ぶトレジャーデータ 20140612
 
トレジャーデータ株式会社について(for all Data_Enthusiast!!)
トレジャーデータ株式会社について(for all Data_Enthusiast!!)トレジャーデータ株式会社について(for all Data_Enthusiast!!)
トレジャーデータ株式会社について(for all Data_Enthusiast!!)
 
この Visualization がすごい2014 〜データ世界を彩るツール6選〜
この Visualization がすごい2014 〜データ世界を彩るツール6選〜この Visualization がすごい2014 〜データ世界を彩るツール6選〜
この Visualization がすごい2014 〜データ世界を彩るツール6選〜
 
Treasure Data Intro for Data Enthusiast!!
Treasure Data Intro for Data Enthusiast!!Treasure Data Intro for Data Enthusiast!!
Treasure Data Intro for Data Enthusiast!!
 
Hadoop and the Data Scientist
Hadoop and the Data ScientistHadoop and the Data Scientist
Hadoop and the Data Scientist
 
MongoDB: Intro & Application for Big Data
MongoDB: Intro & Application  for Big DataMongoDB: Intro & Application  for Big Data
MongoDB: Intro & Application for Big Data
 
An Introduction to Fluent & MongoDB Plugins
An Introduction to Fluent & MongoDB PluginsAn Introduction to Fluent & MongoDB Plugins
An Introduction to Fluent & MongoDB Plugins
 
An Introduction to Tinkerpop
An Introduction to TinkerpopAn Introduction to Tinkerpop
An Introduction to Tinkerpop
 
An Introduction to Neo4j
An Introduction to Neo4jAn Introduction to Neo4j
An Introduction to Neo4j
 
The Definition of GraphDB
The Definition of GraphDBThe Definition of GraphDB
The Definition of GraphDB
 
Large-Scale Graph Processing〜Introduction〜(完全版)
Large-Scale Graph Processing〜Introduction〜(完全版)Large-Scale Graph Processing〜Introduction〜(完全版)
Large-Scale Graph Processing〜Introduction〜(完全版)
 
Large-Scale Graph Processing〜Introduction〜(LT版)
Large-Scale Graph Processing〜Introduction〜(LT版)Large-Scale Graph Processing〜Introduction〜(LT版)
Large-Scale Graph Processing〜Introduction〜(LT版)
 
Advanced MongoDB #1
Advanced MongoDB #1Advanced MongoDB #1
Advanced MongoDB #1
 

Recently uploaded

Why Teams call analytics are critical to your entire business
Why Teams call analytics are critical to your entire businessWhy Teams call analytics are critical to your entire business
Why Teams call analytics are critical to your entire business
panagenda
 
+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...
+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...
+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...
?#DUbAI#??##{{(☎️+971_581248768%)**%*]'#abortion pills for sale in dubai@
 
Modular Monolith - a Practical Alternative to Microservices @ Devoxx UK 2024
Modular Monolith - a Practical Alternative to Microservices @ Devoxx UK 2024Modular Monolith - a Practical Alternative to Microservices @ Devoxx UK 2024
Modular Monolith - a Practical Alternative to Microservices @ Devoxx UK 2024
Victor Rentea
 

Recently uploaded (20)

Why Teams call analytics are critical to your entire business
Why Teams call analytics are critical to your entire businessWhy Teams call analytics are critical to your entire business
Why Teams call analytics are critical to your entire business
 
CNIC Information System with Pakdata Cf In Pakistan
CNIC Information System with Pakdata Cf In PakistanCNIC Information System with Pakdata Cf In Pakistan
CNIC Information System with Pakdata Cf In Pakistan
 
Artificial Intelligence Chap.5 : Uncertainty
Artificial Intelligence Chap.5 : UncertaintyArtificial Intelligence Chap.5 : Uncertainty
Artificial Intelligence Chap.5 : Uncertainty
 
Apidays New York 2024 - Scaling API-first by Ian Reasor and Radu Cotescu, Adobe
Apidays New York 2024 - Scaling API-first by Ian Reasor and Radu Cotescu, AdobeApidays New York 2024 - Scaling API-first by Ian Reasor and Radu Cotescu, Adobe
Apidays New York 2024 - Scaling API-first by Ian Reasor and Radu Cotescu, Adobe
 
"I see eyes in my soup": How Delivery Hero implemented the safety system for ...
"I see eyes in my soup": How Delivery Hero implemented the safety system for ..."I see eyes in my soup": How Delivery Hero implemented the safety system for ...
"I see eyes in my soup": How Delivery Hero implemented the safety system for ...
 
+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...
+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...
+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...
 
Connector Corner: Accelerate revenue generation using UiPath API-centric busi...
Connector Corner: Accelerate revenue generation using UiPath API-centric busi...Connector Corner: Accelerate revenue generation using UiPath API-centric busi...
Connector Corner: Accelerate revenue generation using UiPath API-centric busi...
 
Exploring the Future Potential of AI-Enabled Smartphone Processors
Exploring the Future Potential of AI-Enabled Smartphone ProcessorsExploring the Future Potential of AI-Enabled Smartphone Processors
Exploring the Future Potential of AI-Enabled Smartphone Processors
 
Axa Assurance Maroc - Insurer Innovation Award 2024
Axa Assurance Maroc - Insurer Innovation Award 2024Axa Assurance Maroc - Insurer Innovation Award 2024
Axa Assurance Maroc - Insurer Innovation Award 2024
 
Exploring Multimodal Embeddings with Milvus
Exploring Multimodal Embeddings with MilvusExploring Multimodal Embeddings with Milvus
Exploring Multimodal Embeddings with Milvus
 
Strategize a Smooth Tenant-to-tenant Migration and Copilot Takeoff
Strategize a Smooth Tenant-to-tenant Migration and Copilot TakeoffStrategize a Smooth Tenant-to-tenant Migration and Copilot Takeoff
Strategize a Smooth Tenant-to-tenant Migration and Copilot Takeoff
 
Apidays New York 2024 - The Good, the Bad and the Governed by David O'Neill, ...
Apidays New York 2024 - The Good, the Bad and the Governed by David O'Neill, ...Apidays New York 2024 - The Good, the Bad and the Governed by David O'Neill, ...
Apidays New York 2024 - The Good, the Bad and the Governed by David O'Neill, ...
 
EMPOWERMENT TECHNOLOGY GRADE 11 QUARTER 2 REVIEWER
EMPOWERMENT TECHNOLOGY GRADE 11 QUARTER 2 REVIEWEREMPOWERMENT TECHNOLOGY GRADE 11 QUARTER 2 REVIEWER
EMPOWERMENT TECHNOLOGY GRADE 11 QUARTER 2 REVIEWER
 
Apidays New York 2024 - Passkeys: Developing APIs to enable passwordless auth...
Apidays New York 2024 - Passkeys: Developing APIs to enable passwordless auth...Apidays New York 2024 - Passkeys: Developing APIs to enable passwordless auth...
Apidays New York 2024 - Passkeys: Developing APIs to enable passwordless auth...
 
TrustArc Webinar - Unlock the Power of AI-Driven Data Discovery
TrustArc Webinar - Unlock the Power of AI-Driven Data DiscoveryTrustArc Webinar - Unlock the Power of AI-Driven Data Discovery
TrustArc Webinar - Unlock the Power of AI-Driven Data Discovery
 
Apidays New York 2024 - APIs in 2030: The Risk of Technological Sleepwalk by ...
Apidays New York 2024 - APIs in 2030: The Risk of Technological Sleepwalk by ...Apidays New York 2024 - APIs in 2030: The Risk of Technological Sleepwalk by ...
Apidays New York 2024 - APIs in 2030: The Risk of Technological Sleepwalk by ...
 
AWS Community Day CPH - Three problems of Terraform
AWS Community Day CPH - Three problems of TerraformAWS Community Day CPH - Three problems of Terraform
AWS Community Day CPH - Three problems of Terraform
 
Cyberprint. Dark Pink Apt Group [EN].pdf
Cyberprint. Dark Pink Apt Group [EN].pdfCyberprint. Dark Pink Apt Group [EN].pdf
Cyberprint. Dark Pink Apt Group [EN].pdf
 
Modular Monolith - a Practical Alternative to Microservices @ Devoxx UK 2024
Modular Monolith - a Practical Alternative to Microservices @ Devoxx UK 2024Modular Monolith - a Practical Alternative to Microservices @ Devoxx UK 2024
Modular Monolith - a Practical Alternative to Microservices @ Devoxx UK 2024
 
2024: Domino Containers - The Next Step. News from the Domino Container commu...
2024: Domino Containers - The Next Step. News from the Domino Container commu...2024: Domino Containers - The Next Step. News from the Domino Container commu...
2024: Domino Containers - The Next Step. News from the Domino Container commu...
 

HadoopとMongoDBを活用したソーシャルアプリのログ解析

  • 1.
  • 2. •          •       
  • 3. •    •         
  • 4.            
  • 5.
  • 9.          
  • 10.              
  • 11.
  • 12. � �
  • 13.
  • 14.
  • 15.
  • 16. •  ‣  ‣  ‣  •  ‣  ‣ 
  • 17. •  ‣  ‣  ‣  ‣  ‣ 
  • 18. •  ‣  ‣  ‣  ‣  ‣ 
  • 19. •  ‣  ‣  ‣  ‣  ‣  ‣ 
  • 20. •  def mapper(key, value):! for word in value.split(): yield word,1! def reducer(key, values):! yield key,sum(values)! if __name__ == "__main__":! import dumbo! dumbo.run(mapper, reducer) dumbo start wordcount.py ! -hadoop /path/to/hadoop ! -input wc_input.txt -output wc_output
  • 21. •  ‣  ‣  ‣  ‣  python wordcount.py map < wc_input.txt | sort | ! python wordcount.py red > wc_output.txt
  • 22.
  • 23. •  ‣  ‣  ‣  ‣  ‣ 
  • 25. •  -----Change------! ActionLogger a{ChangeP} (Point,1371,1383) ! ActionLogger a{ChangeP} (Point,2373,2423)! ActionLogger a{ChangeMedal} (lucky_star,9,10) ! ActionLogger a{ChangeMedal} (lucky_sea_bream,0,1)! ActionLogger a{ChangeG} ! ActionLogger a{ChangeSubG} (SubGold,13,16) ! ActionLogger a{ChangeWakuwakuP} (buy,0,30)! ActionLogger a{ChangeWakuwakuP} (by gacha,30,0) ! ------Get------! ActionLogger a{GetMaterial} (syouhinnomoto,0,-1) ! ActionLogger a{GetMaterial} usesyouhinnomoto ! ActionLogger a{GetMaterial} (omotyanomotoPRO,1,6)! ActionLogger a{GetMaterial} (sui-tunomoto,5,4)! ActionLogger a{GetInterior} (bakery_counter,0,1)! ActionLogger a{GetAvatarPart} (190167,0,1) ! ActionLogger a{GetAvatarPart} (old_girl_09,0,1) ! -----Trade-----! ActionLogger a{Trade} buy 3 itigoke-kis from gree.jp:xxxxx !
  • 26. •    ‣  ‣  2010-07-26 00:00:02,446 INFO catalina-exec-483 ActionLogger – userId a{Make} make item onsenmanjyuu! 2010-07-26 00:00:02,478 INFO catalina-exec-411 ActionLogger – userId a{LifeCycle} Login userId 2010-07-26 00:00:02,446 a{Make} {onsenmanjyuu,1}! userId 2010-07-26 00:00:02,478 a{LifeCycle} {Login,1}! userId 2010-07-26 00:00:02,478 a{GetMaterial} {omotyanomotoPRO,5}!
  • 27. •    ‣  ‣  ‣  ‣ 
  • 28. •  •  •  •  •  •  •  •  •  • 
  • 29. •  { ! "_id" : "2010-06-27+xxxxx+a{ChangeP}",! "lastUpdate" : "2010-09-17",! "date" : "2010-06-27" ! "userId" : “xxxxx",! "actionType" : "a{ChangeP}",! "actionDetail" : { "Point" : 600 },! }! { ! "_id" : "2010-06-27+xxxxx+a{LifeCycle}", ! "lastUpdate" : "2010-09-17",! "date" : "2010-06-27" ! "userId" : ”xxxxx",! "actionType" : "a{LifeCycle}",! "actionDetail" : { ”Login" : 3 }! }!
  • 30. •  { "_id" : "2010-08-31+group+a{PutOn}", ! "date" : "2010-08-31", ! "lastUpdate" : "2010-09-21",! "actionType" : "a{PutOn}",! "actionDetail" : { "a{PutOn}" : 52050 } ! }! {...! "actionType" : "a{Make}",! "actionDetail" : { ! ”syurijyou” : 11,! ”aisukuri-mu” : 378,! ”kinnokarakuridokei” : 103,! ”puramoderu” : 22,! ”guremurinno_n” : 164,! ”kyodaipenginno_n” : 76,! ”patinko” : 67,! “wakizasi” : 250,! “dendendaiko” : 13651,! ... (over 100 items)! }! }!
  • 31.
  • 32.
  • 33. •  ‣  ‣  ‣  ‣  ‣  ‣ 
  • 34. •  ‣  ‣  ‣  ‣  ‣  ‣ 
  • 35. •  MySQL: select * from things where x=3 and y="foo"! MongoDB: db.things.find( { x : 3, y : "foo" } );! MySQL: select z from things where x=3! MongoDB: db.things.find( { x : 3 }, { z : 1 } ); db.collection.find({ "field" : { $gt: value } } ); ! // : field > value ! db.collection.find({ "field" : { $lt: value } } ); ! // : field < value ! db.collection.find({"field”: {$gt: value1, $lt: value2}});! // value1 <= field <= value2
  • 36. MySQL: select * from things where x in (b,a,c)! MongoDB: db.collection.find( { "field" : { $in : array } } ); ! db.things.find({j:{$in: [2,4,6]}});! db.customers.find( { name : /acme.*corp/i } ); ! db.myCollection.find().sort( { ts : -1 } ); // ts ! > m = function() { emit(this.user_id, 1); } ! > r = function(k,vals) { return 1; } ! > res = db.events.mapreduce(m, r, { query : {type:'sale'} }); ! > db[res.result].find().limit(2) ! { "_id" : 8321073716060 , "value" : 1 } ! { "_id" : 7921232311289 , "value" : 1 } !
  • 37. •  { ! "_id" : "2010-06-27+xxxxx+a{ChangeP}",! "lastUpdate" : "2010-09-17",! "date" : "2010-06-27" ! "userId" : “xxxxx",! "actionType" : "a{ChangeP}",! "actionDetail" : { "Point" : 600 },! }! { ! "_id" : "2010-06-27+xxxxx+a{LifeCycle}", ! "lastUpdate" : "2010-09-17",! "date" : "2010-06-27" ! "userId" : ”xxxxx",! "actionType" : "a{LifeCycle}",! "actionDetail" : { ”Login" : 3 }! }!
  • 38.
  • 39.
  • 40. •  ‣  •  ‣  ‣  ‣  ‣ 
  • 41. •    •  •  •  •  •  •  •  •  • 
  • 42. •  {! "_id" : "2010-06-28+xxxx+Charge",! "lastUpdate" : "2010-09-20",! "userId" : ”xxxx",! "date" : "2010-06-28",! "actionType" : "Charge",! "totalCharge" : 1210,! "boughtItem" : { " EX 5 " : 1,! " 5 " : 1,! " 5 " : 1,! " " : 1,! " " : 2 }! }!
  • 43.
  • 44. •  ‣  ‣  •  ‣  ‣  ‣ 
  • 46. • 
  • 47. • 
  • 48. •  {! "_id" : "2010-07-11+xxxxx+Registration",! "lastUpdate" : "2010-09-25",! "actionType" : "Registration",! "userId" : ”xxxxx",! "date" : "2010-07-11",! "firstCharge" : "2010-07-12",! "lastCharge" : "2010-09-02",! "lastLogin" : "2010-09-02",! "firstChargeTerm" : 1,! "playTerm" : 50,! "totalMonthCharge" : 1000,! "totalMonthChargeDetail" : {! "1th" : 74.3! "2th" : 17.1,! "3th" : 8.6,! i.e. "4th" : 0,! },! "totalCumlativeCharge" : 10000,! "totalCumlativeChargeDetail" : {! "1th" : 2,! "2th" : 0.5,! "3th" : 0.2,! "4th" : 0,! "5th" : 0.1,! "6th" : 27.5,! "7th" : 1.2,! "8th" : 49! "9th" : 19.5,! 2.7% }! }!
  • 49. •  topMonthCharge = function(n){! return db.user_registration.find({},{! totalMonthCharge:true,! totalMonthChargeDetail:true,! userId:true! }).sort({totalMonthCharge:-1}).limit(n);! }! > topMonthCharge(20) ! { ! "_id" : "2010-07-10+9999+Registration",! Top20 "userId" : ”9999”,! "totalMonthCharge" : 10000,! "totalMonthChargeDetail" : { "5th" : 13.7, "4th" : 27.6, "3th" : 21, "2th" : 16.2, "1th" : 21.5 }! }! …!
  • 50. findUser = function(x){ ! return db.user_charge.find({userId:x},{! userId:true,! totalCharge:true,! boughtItem:true}).sort({date:-1})! }! > findUserCharge("9999")! {! "_id" : "2010-09-08+9223458+Charge",! "totalCharge" : 2000,! "userId" : ”9999",! "boughtItem" : {! Top " 110 " : 2! }! }! {! "_id" : "2010-09-07+9223458+Charge",! "totalCharge" : 5000,! "userId" : ”9999",! "boughtItem" : {! " 350 " : 1,! " 110 " : 2! }! }! …!
  • 51.
  • 53. db.user_error! db.user_access! ( )! db.user_trace! (from )! (from )! db.user_attr! ( )! db.user_status! db.user_charge! (from Cassandra)! (from MySQL)!
  • 54.
  • 55. •  ‣  ‣  ‣  ‣ 
  • 56. •  ‣  ‣  ‣  ‣