SlideShare une entreprise Scribd logo
1  sur  31
Télécharger pour lire hors ligne
dplyr
@romain_francois
• Use R since 2002
• #rcatladies
• R Enthusiast
• R/C++ hero
• Performance
• dplyr
• Occasional comedy
%>%from magrittr
enjoy(cool(bake(shape(beat(append(bowl(rep("flour",
2), "yeast", "water", "milk", "oil"), "flour", until
= "soft"), duration = "3mins"), as = "balls", style =
"slightly-flat"), degrees = 200, duration =
"15mins"), duration = "5mins"))
bowl(rep("flour", 2), "yeast", "water", "milk", "oil") %>%
append("flour", until = "soft") %>%

beat(duration = "3mins") %>%

shape(as = "balls", style = "slightly-flat") %>%

bake(degrees = 200, duration = "15mins") %>%

cool(buns, duration = "5mins") %>%
enjoy()
nycflights13
> flights
Source: local data frame [336,776 x 16]
year month day dep_time dep_delay arr_time arr_delay carrier tailnum flight
1 2013 1 1 517 2 830 11 UA N14228 1545
2 2013 1 1 533 4 850 20 UA N24211 1714
.. ... ... ... ... ... ... ... ... ... ...
Variables not shown: origin (chr), dest (chr), air_time (dbl), distance (dbl),
hour (dbl), minute (dbl)
nycflights13
> glimpse(flights)
Observations: 336,776
Variables: 16
$ year (int) 2013, 2013, 2013, 2013, 2013, 2013, 2013, 2013, 2013, 201...
$ month (int) 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
$ day (int) 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
$ dep_time (int) 517, 533, 542, 544, 554, 554, 555, 557, 557, 558, 558, 55...
$ dep_delay (dbl) 2, 4, 2, -1, -6, -4, -5, -3, -3, -2, -2, -2, -2, -2, -1, ...
$ arr_time (int) 830, 850, 923, 1004, 812, 740, 913, 709, 838, 753, 849, 8...
$ arr_delay (dbl) 11, 20, 33, -18, -25, 12, 19, -14, -8, 8, -2, -3, 7, -14,...
$ carrier (chr) "UA", "UA", "AA", "B6", "DL", "UA", "B6", "EV", "B6", "AA...
$ tailnum (chr) "N14228", "N24211", "N619AA", "N804JB", "N668DN", "N39463...
$ flight (int) 1545, 1714, 1141, 725, 461, 1696, 507, 5708, 79, 301, 49,...
$ origin (chr) "EWR", "LGA", "JFK", "JFK", "LGA", "EWR", "EWR", "LGA", "...
$ dest (chr) "IAH", "IAH", "MIA", "BQN", "ATL", "ORD", "FLL", "IAD", "...
$ air_time (dbl) 227, 227, 160, 183, 116, 150, 158, 53, 140, 138, 149, 158...
$ distance (dbl) 1400, 1416, 1089, 1576, 762, 719, 1065, 229, 944, 733, 10...
$ hour (dbl) 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, ...
$ minute (dbl) 17, 33, 42, 44, 54, 54, 55, 57, 57, 58, 58, 58, 58, 58, 5...
filterA subset of the rows of the data frame
flights %>%
filter( dep_delay < 10 )
flights %>%
filter( arr_delay < dep_delay )
slicefilter rows by position
flights %>%
slice( 1:10 )
arrangereorder a data frame
flights %>%
filter( hour < 8 ) %>%
arrange( year, month, day )
selectselect certain columns from the data frame
select(flights, year, month, day)
select(flights, year:day)
select(flights, -(year:day))
mutatemodify or create columns based on others
flights %>%
mutate(
gain = arr_delay - dep_delay,
speed = distance / air_time * 60
) %>%
filter( gain > 0 ) %>%
arrange( desc(speed) ) %>%
select( year, month, day, dest, gain, speed )
summarisecollapse a data frame into one row …
flights %>%
summarise(delay = mean(dep_delay, na.rm = TRUE))
flights %>%
filter( dep_delay > 0 ) %>%
summarise(arr_delay = mean(arr_delay, na.rm = TRUE))
group_byGroup observations by one or more variables
flights %>%
group_by( tailnum ) %>%
summarise(
count = n(),
dist = mean(distance, na.rm = TRUE),
delay = mean(arr_delay, na.rm = TRUE)
) %>%
filter( is.finite(delay) ) %>%
arrange( desc(count) )
bind_rows
bind_rows( , )
color num
green 1
yellow 2
red 3
blue 4
pink 5
color num
green 1
yellow 2
color num
red 3
blue 4
pink 5
joins
a <- data_frame(
color = c("green", "yellow", "red"),
num = 1:3
)
b <- data_frame(
color = c("green", "yellow", "pink"),
size = c("S", "M", "L")
)
color num
green 1
yellow 2
red 3
color size
green S
yellow M
pink L
inner_join
color num
green 1
yellow 2
red 3
color size
green S
yellow M
pink L
inner_join( , )
color num size
green 1 S
yellow 2 M
left_join
color num
green 1
yellow 2
red 3
color size
green S
yellow M
pink L
left_join( , )
color num size
green 1 S
yellow 2 M
red 3
right_join
color num
green 1
yellow 2
red 3
color size
green S
yellow M
pink L
right_join( , )
color num size
green 1 S
yellow 2 M
pink L
full_join
color num
green 1
yellow 2
red 3
color size
green S
yellow M
pink L
full_join( , )
color num size
green 1 S
yellow 2 M
red 3
pink L
data_frameJust like data.frame, but better
> data_frame( x = 1:5, y = letters[1:5] ) %>% glimpse
Observations: 5
Variables: 2
$ x (int) 1, 2, 3, 4, 5
$ y (chr) "a", "b", "c", "d", "e"
> data_frame( x = 1:5, y = letters[1:5] , z = x + 1) %>% glimpse
Observations: 5
Variables: 3
$ x (int) 1, 2, 3, 4, 5
$ y (chr) "a", "b", "c", "d", "e"
$ z (dbl) 2, 3, 4, 5, 6
frame_data aka tibble
> frame_data(
+ ~colA, ~colB,
+ "a", 1,
+ "b", 2
+ )
Source: local data frame [2 x 2]
colA colB
(chr) (dbl)
1 a 1
2 b 2
_
g <- c("origin", "dest")
v <- "dep_delay"
flights %>%
group_by( g ) %>%
summarise( result = mean(v, na.rm = TRUE) )
🙀
🙀
g <- c("origin", "dest")
v <- "dep_delay"
flights %>%
group_by_( .dots = g ) %>%
summarise_( .dots =
interp(~ mean(var, na.rm = TRUE), var = as.name(v))
)
Future
• Performance improvements (parallel C++)
• Alternative back ends
• Different type of groupings (e.g. bootstrap)
As soon as we get hoverboard ...
dplyr
Romain François
@romain_francois
romain@r-enthusiasts.com

Contenu connexe

Tendances

Final_project_report.pdf
Final_project_report.pdfFinal_project_report.pdf
Final_project_report.pdfKarinaShakya4
 
ClickHouse and the Magic of Materialized Views, By Robert Hodges and Altinity...
ClickHouse and the Magic of Materialized Views, By Robert Hodges and Altinity...ClickHouse and the Magic of Materialized Views, By Robert Hodges and Altinity...
ClickHouse and the Magic of Materialized Views, By Robert Hodges and Altinity...Altinity Ltd
 
[Pgday.Seoul 2017] 6. GIN vs GiST 인덱스 이야기 - 박진우
[Pgday.Seoul 2017] 6. GIN vs GiST 인덱스 이야기 - 박진우[Pgday.Seoul 2017] 6. GIN vs GiST 인덱스 이야기 - 박진우
[Pgday.Seoul 2017] 6. GIN vs GiST 인덱스 이야기 - 박진우PgDay.Seoul
 
Interesting Facts About Javascript
Interesting Facts About JavascriptInteresting Facts About Javascript
Interesting Facts About JavascriptManish Jangir
 
How to Build a Telegraf Plugin by Noah Crowley
How to Build a Telegraf Plugin by Noah CrowleyHow to Build a Telegraf Plugin by Noah Crowley
How to Build a Telegraf Plugin by Noah CrowleyInfluxData
 
Bucket your partitions wisely - Cassandra summit 2016
Bucket your partitions wisely - Cassandra summit 2016Bucket your partitions wisely - Cassandra summit 2016
Bucket your partitions wisely - Cassandra summit 2016Markus Höfer
 
MongoDB Aggregation Framework
MongoDB Aggregation FrameworkMongoDB Aggregation Framework
MongoDB Aggregation FrameworkCaserta
 
Java web Project - travel management system
Java web Project - travel management systemJava web Project - travel management system
Java web Project - travel management systemWhasunKim
 
NoSQL 위에서 MMORPG 개발하기
NoSQL 위에서 MMORPG 개발하기NoSQL 위에서 MMORPG 개발하기
NoSQL 위에서 MMORPG 개발하기Hoyoung Choi
 
Javascript in Web development
Javascript in Web developmentJavascript in Web development
Javascript in Web developmentKiran Prajapati
 
Intel vmcs-shadowing-paper
Intel vmcs-shadowing-paperIntel vmcs-shadowing-paper
Intel vmcs-shadowing-paperAhmed Sallam
 
multi-thread 어플리케이션에 대해 모든 개발자가 알아 두지 않으면 안 되는 것
multi-thread 어플리케이션에 대해 모든 개발자가 알아 두지 않으면 안 되는 것multi-thread 어플리케이션에 대해 모든 개발자가 알아 두지 않으면 안 되는 것
multi-thread 어플리케이션에 대해 모든 개발자가 알아 두지 않으면 안 되는 것흥배 최
 
Cassandra concepts, patterns and anti-patterns
Cassandra concepts, patterns and anti-patternsCassandra concepts, patterns and anti-patterns
Cassandra concepts, patterns and anti-patternsDave Gardner
 
Node.js Express
Node.js  ExpressNode.js  Express
Node.js ExpressEyal Vardi
 

Tendances (20)

Final_project_report.pdf
Final_project_report.pdfFinal_project_report.pdf
Final_project_report.pdf
 
ClickHouse and the Magic of Materialized Views, By Robert Hodges and Altinity...
ClickHouse and the Magic of Materialized Views, By Robert Hodges and Altinity...ClickHouse and the Magic of Materialized Views, By Robert Hodges and Altinity...
ClickHouse and the Magic of Materialized Views, By Robert Hodges and Altinity...
 
[Pgday.Seoul 2017] 6. GIN vs GiST 인덱스 이야기 - 박진우
[Pgday.Seoul 2017] 6. GIN vs GiST 인덱스 이야기 - 박진우[Pgday.Seoul 2017] 6. GIN vs GiST 인덱스 이야기 - 박진우
[Pgday.Seoul 2017] 6. GIN vs GiST 인덱스 이야기 - 박진우
 
An Introduction to Redux
An Introduction to ReduxAn Introduction to Redux
An Introduction to Redux
 
Interesting Facts About Javascript
Interesting Facts About JavascriptInteresting Facts About Javascript
Interesting Facts About Javascript
 
How to Build a Telegraf Plugin by Noah Crowley
How to Build a Telegraf Plugin by Noah CrowleyHow to Build a Telegraf Plugin by Noah Crowley
How to Build a Telegraf Plugin by Noah Crowley
 
Bucket your partitions wisely - Cassandra summit 2016
Bucket your partitions wisely - Cassandra summit 2016Bucket your partitions wisely - Cassandra summit 2016
Bucket your partitions wisely - Cassandra summit 2016
 
An Intro into webpack
An Intro into webpackAn Intro into webpack
An Intro into webpack
 
MongoDB Aggregation Framework
MongoDB Aggregation FrameworkMongoDB Aggregation Framework
MongoDB Aggregation Framework
 
Java web Project - travel management system
Java web Project - travel management systemJava web Project - travel management system
Java web Project - travel management system
 
Ndc12 2
Ndc12 2Ndc12 2
Ndc12 2
 
NoSQL 위에서 MMORPG 개발하기
NoSQL 위에서 MMORPG 개발하기NoSQL 위에서 MMORPG 개발하기
NoSQL 위에서 MMORPG 개발하기
 
Javascript in Web development
Javascript in Web developmentJavascript in Web development
Javascript in Web development
 
LMAX Architecture
LMAX ArchitectureLMAX Architecture
LMAX Architecture
 
Intel vmcs-shadowing-paper
Intel vmcs-shadowing-paperIntel vmcs-shadowing-paper
Intel vmcs-shadowing-paper
 
Count min sketch
Count min sketchCount min sketch
Count min sketch
 
multi-thread 어플리케이션에 대해 모든 개발자가 알아 두지 않으면 안 되는 것
multi-thread 어플리케이션에 대해 모든 개발자가 알아 두지 않으면 안 되는 것multi-thread 어플리케이션에 대해 모든 개발자가 알아 두지 않으면 안 되는 것
multi-thread 어플리케이션에 대해 모든 개발자가 알아 두지 않으면 안 되는 것
 
Cassandra concepts, patterns and anti-patterns
Cassandra concepts, patterns and anti-patternsCassandra concepts, patterns and anti-patterns
Cassandra concepts, patterns and anti-patterns
 
Apache Velocity
Apache VelocityApache Velocity
Apache Velocity
 
Node.js Express
Node.js  ExpressNode.js  Express
Node.js Express
 

En vedette

Data Manipulation Using R (& dplyr)
Data Manipulation Using R (& dplyr)Data Manipulation Using R (& dplyr)
Data Manipulation Using R (& dplyr)Ram Narasimhan
 
Rデータ処理入門
Rデータ処理入門Rデータ処理入門
Rデータ処理入門Hiroki K
 
Introduction to R Short course Fall 2016
Introduction to R Short course Fall 2016Introduction to R Short course Fall 2016
Introduction to R Short course Fall 2016Spencer Fox
 
Data manipulation with dplyr
Data manipulation with dplyrData manipulation with dplyr
Data manipulation with dplyrRomain Francois
 
dplyrとは何だったのか
dplyrとは何だったのかdplyrとは何だったのか
dplyrとは何だったのかyutannihilation
 
「plyrパッケージで君も前処理スタ☆」改め「plyrパッケージ徹底入門」
「plyrパッケージで君も前処理スタ☆」改め「plyrパッケージ徹底入門」「plyrパッケージで君も前処理スタ☆」改め「plyrパッケージ徹底入門」
「plyrパッケージで君も前処理スタ☆」改め「plyrパッケージ徹底入門」Nagi Teramo
 
木と電話と選挙(causalTree)
木と電話と選挙(causalTree)木と電話と選挙(causalTree)
木と電話と選挙(causalTree)Shota Yasui
 
R入門(dplyrでデータ加工)-TokyoR42
R入門(dplyrでデータ加工)-TokyoR42R入門(dplyrでデータ加工)-TokyoR42
R入門(dplyrでデータ加工)-TokyoR42Atsushi Hayakawa
 

En vedette (10)

Data Manipulation Using R (& dplyr)
Data Manipulation Using R (& dplyr)Data Manipulation Using R (& dplyr)
Data Manipulation Using R (& dplyr)
 
Rデータ処理入門
Rデータ処理入門Rデータ処理入門
Rデータ処理入門
 
Tokyor36
Tokyor36Tokyor36
Tokyor36
 
Introduction to R Short course Fall 2016
Introduction to R Short course Fall 2016Introduction to R Short course Fall 2016
Introduction to R Short course Fall 2016
 
Data manipulation with dplyr
Data manipulation with dplyrData manipulation with dplyr
Data manipulation with dplyr
 
dplyrとは何だったのか
dplyrとは何だったのかdplyrとは何だったのか
dplyrとは何だったのか
 
「plyrパッケージで君も前処理スタ☆」改め「plyrパッケージ徹底入門」
「plyrパッケージで君も前処理スタ☆」改め「plyrパッケージ徹底入門」「plyrパッケージで君も前処理スタ☆」改め「plyrパッケージ徹底入門」
「plyrパッケージで君も前処理スタ☆」改め「plyrパッケージ徹底入門」
 
木と電話と選挙(causalTree)
木と電話と選挙(causalTree)木と電話と選挙(causalTree)
木と電話と選挙(causalTree)
 
R入門(dplyrでデータ加工)-TokyoR42
R入門(dplyrでデータ加工)-TokyoR42R入門(dplyrでデータ加工)-TokyoR42
R入門(dplyrでデータ加工)-TokyoR42
 
Tidyverseとは
TidyverseとはTidyverseとは
Tidyverseとは
 

Similaire à dplyr

dplyr and torrents from cpasbien
dplyr and torrents from cpasbiendplyr and torrents from cpasbien
dplyr and torrents from cpasbienRomain Francois
 
Τα Πολύ Βασικά για την Python
Τα Πολύ Βασικά για την PythonΤα Πολύ Βασικά για την Python
Τα Πολύ Βασικά για την PythonMoses Boudourides
 
Super Advanced Python –act1
Super Advanced Python –act1Super Advanced Python –act1
Super Advanced Python –act1Ke Wei Louis
 
sopa de pollo para el alma latina
sopa de pollo para el alma latinasopa de pollo para el alma latina
sopa de pollo para el alma latinaJOSE GARCIA PERALTA
 
Sopa de pollo para el alma Latina
Sopa de pollo para el alma LatinaSopa de pollo para el alma Latina
Sopa de pollo para el alma LatinaYonny Granda M
 
r studio presentation.pptx
r studio presentation.pptxr studio presentation.pptx
r studio presentation.pptxDevikaRaj14
 
r studio presentation.pptx
r studio presentation.pptxr studio presentation.pptx
r studio presentation.pptxDevikaRaj14
 
Evergreen trails master plan community meeting 1 boards
Evergreen trails master plan community meeting 1 boardsEvergreen trails master plan community meeting 1 boards
Evergreen trails master plan community meeting 1 boardsOV Consulting
 
Oceans 2019 tutorial-geophysical-nav_7-updated
Oceans 2019 tutorial-geophysical-nav_7-updatedOceans 2019 tutorial-geophysical-nav_7-updated
Oceans 2019 tutorial-geophysical-nav_7-updatedFrancisco Curado-Teixeira
 
Al Fazl International Weekly26 June 2015
Al Fazl International  Weekly26 June 2015Al Fazl International  Weekly26 June 2015
Al Fazl International Weekly26 June 2015muzaffertahir9
 
ΠΛΗ31 ΜΑΘΗΜΑ 2.2 (ΕΚΤΥΠΩΣΗ)
ΠΛΗ31 ΜΑΘΗΜΑ 2.2 (ΕΚΤΥΠΩΣΗ)ΠΛΗ31 ΜΑΘΗΜΑ 2.2 (ΕΚΤΥΠΩΣΗ)
ΠΛΗ31 ΜΑΘΗΜΑ 2.2 (ΕΚΤΥΠΩΣΗ)Dimitris Psounis
 
Palestra sobre Collections com Python
Palestra sobre Collections com PythonPalestra sobre Collections com Python
Palestra sobre Collections com Pythonpugpe
 
Global Change, Species Diversity, and the Future of Marine Ecosystems
Global Change, Species Diversity, and the Future of Marine EcosystemsGlobal Change, Species Diversity, and the Future of Marine Ecosystems
Global Change, Species Diversity, and the Future of Marine EcosystemsJarrett Byrnes
 
Python 101++: Let's Get Down to Business!
Python 101++: Let's Get Down to Business!Python 101++: Let's Get Down to Business!
Python 101++: Let's Get Down to Business!Paige Bailey
 

Similaire à dplyr (20)

dplyr and torrents from cpasbien
dplyr and torrents from cpasbiendplyr and torrents from cpasbien
dplyr and torrents from cpasbien
 
dplyr
dplyrdplyr
dplyr
 
Τα Πολύ Βασικά για την Python
Τα Πολύ Βασικά για την PythonΤα Πολύ Βασικά για την Python
Τα Πολύ Βασικά για την Python
 
Super Advanced Python –act1
Super Advanced Python –act1Super Advanced Python –act1
Super Advanced Python –act1
 
sopa de pollo para el alma latina
sopa de pollo para el alma latinasopa de pollo para el alma latina
sopa de pollo para el alma latina
 
Elixir
ElixirElixir
Elixir
 
Sopa de pollo para el alma Latina
Sopa de pollo para el alma LatinaSopa de pollo para el alma Latina
Sopa de pollo para el alma Latina
 
R programming language
R programming languageR programming language
R programming language
 
Basics
BasicsBasics
Basics
 
r studio presentation.pptx
r studio presentation.pptxr studio presentation.pptx
r studio presentation.pptx
 
r studio presentation.pptx
r studio presentation.pptxr studio presentation.pptx
r studio presentation.pptx
 
Evergreen trails master plan community meeting 1 boards
Evergreen trails master plan community meeting 1 boardsEvergreen trails master plan community meeting 1 boards
Evergreen trails master plan community meeting 1 boards
 
dplyr use case
dplyr use casedplyr use case
dplyr use case
 
Oceans 2019 tutorial-geophysical-nav_7-updated
Oceans 2019 tutorial-geophysical-nav_7-updatedOceans 2019 tutorial-geophysical-nav_7-updated
Oceans 2019 tutorial-geophysical-nav_7-updated
 
Al Fazl International Weekly26 June 2015
Al Fazl International  Weekly26 June 2015Al Fazl International  Weekly26 June 2015
Al Fazl International Weekly26 June 2015
 
ΠΛΗ31 ΜΑΘΗΜΑ 2.2 (ΕΚΤΥΠΩΣΗ)
ΠΛΗ31 ΜΑΘΗΜΑ 2.2 (ΕΚΤΥΠΩΣΗ)ΠΛΗ31 ΜΑΘΗΜΑ 2.2 (ΕΚΤΥΠΩΣΗ)
ΠΛΗ31 ΜΑΘΗΜΑ 2.2 (ΕΚΤΥΠΩΣΗ)
 
Palestra sobre Collections com Python
Palestra sobre Collections com PythonPalestra sobre Collections com Python
Palestra sobre Collections com Python
 
Global Change, Species Diversity, and the Future of Marine Ecosystems
Global Change, Species Diversity, and the Future of Marine EcosystemsGlobal Change, Species Diversity, and the Future of Marine Ecosystems
Global Change, Species Diversity, and the Future of Marine Ecosystems
 
Encuesta de valores de los mexicanos...
Encuesta de valores de los mexicanos...Encuesta de valores de los mexicanos...
Encuesta de valores de los mexicanos...
 
Python 101++: Let's Get Down to Business!
Python 101++: Let's Get Down to Business!Python 101++: Let's Get Down to Business!
Python 101++: Let's Get Down to Business!
 

Plus de Romain Francois (18)

R/C++
R/C++R/C++
R/C++
 
user2015 keynote talk
user2015 keynote talkuser2015 keynote talk
user2015 keynote talk
 
SevillaR meetup: dplyr and magrittr
SevillaR meetup: dplyr and magrittrSevillaR meetup: dplyr and magrittr
SevillaR meetup: dplyr and magrittr
 
R/C++ talk at earl 2014
R/C++ talk at earl 2014R/C++ talk at earl 2014
R/C++ talk at earl 2014
 
Rcpp11 genentech
Rcpp11 genentechRcpp11 genentech
Rcpp11 genentech
 
Rcpp11 useR2014
Rcpp11 useR2014Rcpp11 useR2014
Rcpp11 useR2014
 
Rcpp11
Rcpp11Rcpp11
Rcpp11
 
R and C++
R and C++R and C++
R and C++
 
R and cpp
R and cppR and cpp
R and cpp
 
Rcpp attributes
Rcpp attributesRcpp attributes
Rcpp attributes
 
Rcpp is-ready
Rcpp is-readyRcpp is-ready
Rcpp is-ready
 
Rcpp
RcppRcpp
Rcpp
 
Integrating R with C++: Rcpp, RInside and RProtoBuf
Integrating R with C++: Rcpp, RInside and RProtoBufIntegrating R with C++: Rcpp, RInside and RProtoBuf
Integrating R with C++: Rcpp, RInside and RProtoBuf
 
Object Oriented Design(s) in R
Object Oriented Design(s) in RObject Oriented Design(s) in R
Object Oriented Design(s) in R
 
Rcpp: Seemless R and C++
Rcpp: Seemless R and C++Rcpp: Seemless R and C++
Rcpp: Seemless R and C++
 
RProtoBuf: protocol buffers for R
RProtoBuf: protocol buffers for RRProtoBuf: protocol buffers for R
RProtoBuf: protocol buffers for R
 
Rcpp: Seemless R and C++
Rcpp: Seemless R and C++Rcpp: Seemless R and C++
Rcpp: Seemless R and C++
 
Rcpp: Seemless R and C++
Rcpp: Seemless R and C++Rcpp: Seemless R and C++
Rcpp: Seemless R and C++
 

Dernier

SIP trunking in Janus @ Kamailio World 2024
SIP trunking in Janus @ Kamailio World 2024SIP trunking in Janus @ Kamailio World 2024
SIP trunking in Janus @ Kamailio World 2024Lorenzo Miniero
 
"Debugging python applications inside k8s environment", Andrii Soldatenko
"Debugging python applications inside k8s environment", Andrii Soldatenko"Debugging python applications inside k8s environment", Andrii Soldatenko
"Debugging python applications inside k8s environment", Andrii SoldatenkoFwdays
 
Search Engine Optimization SEO PDF for 2024.pdf
Search Engine Optimization SEO PDF for 2024.pdfSearch Engine Optimization SEO PDF for 2024.pdf
Search Engine Optimization SEO PDF for 2024.pdfRankYa
 
Dev Dives: Streamline document processing with UiPath Studio Web
Dev Dives: Streamline document processing with UiPath Studio WebDev Dives: Streamline document processing with UiPath Studio Web
Dev Dives: Streamline document processing with UiPath Studio WebUiPathCommunity
 
Powerpoint exploring the locations used in television show Time Clash
Powerpoint exploring the locations used in television show Time ClashPowerpoint exploring the locations used in television show Time Clash
Powerpoint exploring the locations used in television show Time Clashcharlottematthew16
 
Story boards and shot lists for my a level piece
Story boards and shot lists for my a level pieceStory boards and shot lists for my a level piece
Story boards and shot lists for my a level piececharlottematthew16
 
SAP Build Work Zone - Overview L2-L3.pptx
SAP Build Work Zone - Overview L2-L3.pptxSAP Build Work Zone - Overview L2-L3.pptx
SAP Build Work Zone - Overview L2-L3.pptxNavinnSomaal
 
Scanning the Internet for External Cloud Exposures via SSL Certs
Scanning the Internet for External Cloud Exposures via SSL CertsScanning the Internet for External Cloud Exposures via SSL Certs
Scanning the Internet for External Cloud Exposures via SSL CertsRizwan Syed
 
Nell’iperspazio con Rocket: il Framework Web di Rust!
Nell’iperspazio con Rocket: il Framework Web di Rust!Nell’iperspazio con Rocket: il Framework Web di Rust!
Nell’iperspazio con Rocket: il Framework Web di Rust!Commit University
 
Streamlining Python Development: A Guide to a Modern Project Setup
Streamlining Python Development: A Guide to a Modern Project SetupStreamlining Python Development: A Guide to a Modern Project Setup
Streamlining Python Development: A Guide to a Modern Project SetupFlorian Wilhelm
 
Vertex AI Gemini Prompt Engineering Tips
Vertex AI Gemini Prompt Engineering TipsVertex AI Gemini Prompt Engineering Tips
Vertex AI Gemini Prompt Engineering TipsMiki Katsuragi
 
Are Multi-Cloud and Serverless Good or Bad?
Are Multi-Cloud and Serverless Good or Bad?Are Multi-Cloud and Serverless Good or Bad?
Are Multi-Cloud and Serverless Good or Bad?Mattias Andersson
 
CloudStudio User manual (basic edition):
CloudStudio User manual (basic edition):CloudStudio User manual (basic edition):
CloudStudio User manual (basic edition):comworks
 
Integration and Automation in Practice: CI/CD in Mule Integration and Automat...
Integration and Automation in Practice: CI/CD in Mule Integration and Automat...Integration and Automation in Practice: CI/CD in Mule Integration and Automat...
Integration and Automation in Practice: CI/CD in Mule Integration and Automat...Patryk Bandurski
 
Beyond Boundaries: Leveraging No-Code Solutions for Industry Innovation
Beyond Boundaries: Leveraging No-Code Solutions for Industry InnovationBeyond Boundaries: Leveraging No-Code Solutions for Industry Innovation
Beyond Boundaries: Leveraging No-Code Solutions for Industry InnovationSafe Software
 
"LLMs for Python Engineers: Advanced Data Analysis and Semantic Kernel",Oleks...
"LLMs for Python Engineers: Advanced Data Analysis and Semantic Kernel",Oleks..."LLMs for Python Engineers: Advanced Data Analysis and Semantic Kernel",Oleks...
"LLMs for Python Engineers: Advanced Data Analysis and Semantic Kernel",Oleks...Fwdays
 
Designing IA for AI - Information Architecture Conference 2024
Designing IA for AI - Information Architecture Conference 2024Designing IA for AI - Information Architecture Conference 2024
Designing IA for AI - Information Architecture Conference 2024Enterprise Knowledge
 
The Future of Software Development - Devin AI Innovative Approach.pdf
The Future of Software Development - Devin AI Innovative Approach.pdfThe Future of Software Development - Devin AI Innovative Approach.pdf
The Future of Software Development - Devin AI Innovative Approach.pdfSeasiaInfotech2
 
Tampa BSides - Chef's Tour of Microsoft Security Adoption Framework (SAF)
Tampa BSides - Chef's Tour of Microsoft Security Adoption Framework (SAF)Tampa BSides - Chef's Tour of Microsoft Security Adoption Framework (SAF)
Tampa BSides - Chef's Tour of Microsoft Security Adoption Framework (SAF)Mark Simos
 
New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024
New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024
New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024BookNet Canada
 

Dernier (20)

SIP trunking in Janus @ Kamailio World 2024
SIP trunking in Janus @ Kamailio World 2024SIP trunking in Janus @ Kamailio World 2024
SIP trunking in Janus @ Kamailio World 2024
 
"Debugging python applications inside k8s environment", Andrii Soldatenko
"Debugging python applications inside k8s environment", Andrii Soldatenko"Debugging python applications inside k8s environment", Andrii Soldatenko
"Debugging python applications inside k8s environment", Andrii Soldatenko
 
Search Engine Optimization SEO PDF for 2024.pdf
Search Engine Optimization SEO PDF for 2024.pdfSearch Engine Optimization SEO PDF for 2024.pdf
Search Engine Optimization SEO PDF for 2024.pdf
 
Dev Dives: Streamline document processing with UiPath Studio Web
Dev Dives: Streamline document processing with UiPath Studio WebDev Dives: Streamline document processing with UiPath Studio Web
Dev Dives: Streamline document processing with UiPath Studio Web
 
Powerpoint exploring the locations used in television show Time Clash
Powerpoint exploring the locations used in television show Time ClashPowerpoint exploring the locations used in television show Time Clash
Powerpoint exploring the locations used in television show Time Clash
 
Story boards and shot lists for my a level piece
Story boards and shot lists for my a level pieceStory boards and shot lists for my a level piece
Story boards and shot lists for my a level piece
 
SAP Build Work Zone - Overview L2-L3.pptx
SAP Build Work Zone - Overview L2-L3.pptxSAP Build Work Zone - Overview L2-L3.pptx
SAP Build Work Zone - Overview L2-L3.pptx
 
Scanning the Internet for External Cloud Exposures via SSL Certs
Scanning the Internet for External Cloud Exposures via SSL CertsScanning the Internet for External Cloud Exposures via SSL Certs
Scanning the Internet for External Cloud Exposures via SSL Certs
 
Nell’iperspazio con Rocket: il Framework Web di Rust!
Nell’iperspazio con Rocket: il Framework Web di Rust!Nell’iperspazio con Rocket: il Framework Web di Rust!
Nell’iperspazio con Rocket: il Framework Web di Rust!
 
Streamlining Python Development: A Guide to a Modern Project Setup
Streamlining Python Development: A Guide to a Modern Project SetupStreamlining Python Development: A Guide to a Modern Project Setup
Streamlining Python Development: A Guide to a Modern Project Setup
 
Vertex AI Gemini Prompt Engineering Tips
Vertex AI Gemini Prompt Engineering TipsVertex AI Gemini Prompt Engineering Tips
Vertex AI Gemini Prompt Engineering Tips
 
Are Multi-Cloud and Serverless Good or Bad?
Are Multi-Cloud and Serverless Good or Bad?Are Multi-Cloud and Serverless Good or Bad?
Are Multi-Cloud and Serverless Good or Bad?
 
CloudStudio User manual (basic edition):
CloudStudio User manual (basic edition):CloudStudio User manual (basic edition):
CloudStudio User manual (basic edition):
 
Integration and Automation in Practice: CI/CD in Mule Integration and Automat...
Integration and Automation in Practice: CI/CD in Mule Integration and Automat...Integration and Automation in Practice: CI/CD in Mule Integration and Automat...
Integration and Automation in Practice: CI/CD in Mule Integration and Automat...
 
Beyond Boundaries: Leveraging No-Code Solutions for Industry Innovation
Beyond Boundaries: Leveraging No-Code Solutions for Industry InnovationBeyond Boundaries: Leveraging No-Code Solutions for Industry Innovation
Beyond Boundaries: Leveraging No-Code Solutions for Industry Innovation
 
"LLMs for Python Engineers: Advanced Data Analysis and Semantic Kernel",Oleks...
"LLMs for Python Engineers: Advanced Data Analysis and Semantic Kernel",Oleks..."LLMs for Python Engineers: Advanced Data Analysis and Semantic Kernel",Oleks...
"LLMs for Python Engineers: Advanced Data Analysis and Semantic Kernel",Oleks...
 
Designing IA for AI - Information Architecture Conference 2024
Designing IA for AI - Information Architecture Conference 2024Designing IA for AI - Information Architecture Conference 2024
Designing IA for AI - Information Architecture Conference 2024
 
The Future of Software Development - Devin AI Innovative Approach.pdf
The Future of Software Development - Devin AI Innovative Approach.pdfThe Future of Software Development - Devin AI Innovative Approach.pdf
The Future of Software Development - Devin AI Innovative Approach.pdf
 
Tampa BSides - Chef's Tour of Microsoft Security Adoption Framework (SAF)
Tampa BSides - Chef's Tour of Microsoft Security Adoption Framework (SAF)Tampa BSides - Chef's Tour of Microsoft Security Adoption Framework (SAF)
Tampa BSides - Chef's Tour of Microsoft Security Adoption Framework (SAF)
 
New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024
New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024
New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024
 

dplyr

  • 2. • Use R since 2002 • #rcatladies • R Enthusiast • R/C++ hero • Performance • dplyr • Occasional comedy
  • 3.
  • 4.
  • 6. enjoy(cool(bake(shape(beat(append(bowl(rep("flour", 2), "yeast", "water", "milk", "oil"), "flour", until = "soft"), duration = "3mins"), as = "balls", style = "slightly-flat"), degrees = 200, duration = "15mins"), duration = "5mins")) bowl(rep("flour", 2), "yeast", "water", "milk", "oil") %>% append("flour", until = "soft") %>%
 beat(duration = "3mins") %>%
 shape(as = "balls", style = "slightly-flat") %>%
 bake(degrees = 200, duration = "15mins") %>%
 cool(buns, duration = "5mins") %>% enjoy()
  • 7. nycflights13 > flights Source: local data frame [336,776 x 16] year month day dep_time dep_delay arr_time arr_delay carrier tailnum flight 1 2013 1 1 517 2 830 11 UA N14228 1545 2 2013 1 1 533 4 850 20 UA N24211 1714 .. ... ... ... ... ... ... ... ... ... ... Variables not shown: origin (chr), dest (chr), air_time (dbl), distance (dbl), hour (dbl), minute (dbl)
  • 8. nycflights13 > glimpse(flights) Observations: 336,776 Variables: 16 $ year (int) 2013, 2013, 2013, 2013, 2013, 2013, 2013, 2013, 2013, 201... $ month (int) 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ... $ day (int) 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ... $ dep_time (int) 517, 533, 542, 544, 554, 554, 555, 557, 557, 558, 558, 55... $ dep_delay (dbl) 2, 4, 2, -1, -6, -4, -5, -3, -3, -2, -2, -2, -2, -2, -1, ... $ arr_time (int) 830, 850, 923, 1004, 812, 740, 913, 709, 838, 753, 849, 8... $ arr_delay (dbl) 11, 20, 33, -18, -25, 12, 19, -14, -8, 8, -2, -3, 7, -14,... $ carrier (chr) "UA", "UA", "AA", "B6", "DL", "UA", "B6", "EV", "B6", "AA... $ tailnum (chr) "N14228", "N24211", "N619AA", "N804JB", "N668DN", "N39463... $ flight (int) 1545, 1714, 1141, 725, 461, 1696, 507, 5708, 79, 301, 49,... $ origin (chr) "EWR", "LGA", "JFK", "JFK", "LGA", "EWR", "EWR", "LGA", "... $ dest (chr) "IAH", "IAH", "MIA", "BQN", "ATL", "ORD", "FLL", "IAD", "... $ air_time (dbl) 227, 227, 160, 183, 116, 150, 158, 53, 140, 138, 149, 158... $ distance (dbl) 1400, 1416, 1089, 1576, 762, 719, 1065, 229, 944, 733, 10... $ hour (dbl) 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, ... $ minute (dbl) 17, 33, 42, 44, 54, 54, 55, 57, 57, 58, 58, 58, 58, 58, 5...
  • 9. filterA subset of the rows of the data frame flights %>% filter( dep_delay < 10 ) flights %>% filter( arr_delay < dep_delay )
  • 10. slicefilter rows by position flights %>% slice( 1:10 )
  • 11. arrangereorder a data frame flights %>% filter( hour < 8 ) %>% arrange( year, month, day )
  • 12. selectselect certain columns from the data frame select(flights, year, month, day) select(flights, year:day) select(flights, -(year:day))
  • 13. mutatemodify or create columns based on others flights %>% mutate( gain = arr_delay - dep_delay, speed = distance / air_time * 60 ) %>% filter( gain > 0 ) %>% arrange( desc(speed) ) %>% select( year, month, day, dest, gain, speed )
  • 14. summarisecollapse a data frame into one row … flights %>% summarise(delay = mean(dep_delay, na.rm = TRUE)) flights %>% filter( dep_delay > 0 ) %>% summarise(arr_delay = mean(arr_delay, na.rm = TRUE))
  • 15. group_byGroup observations by one or more variables flights %>% group_by( tailnum ) %>% summarise( count = n(), dist = mean(distance, na.rm = TRUE), delay = mean(arr_delay, na.rm = TRUE) ) %>% filter( is.finite(delay) ) %>% arrange( desc(count) )
  • 16. bind_rows bind_rows( , ) color num green 1 yellow 2 red 3 blue 4 pink 5 color num green 1 yellow 2 color num red 3 blue 4 pink 5
  • 17. joins a <- data_frame( color = c("green", "yellow", "red"), num = 1:3 ) b <- data_frame( color = c("green", "yellow", "pink"), size = c("S", "M", "L") ) color num green 1 yellow 2 red 3 color size green S yellow M pink L
  • 18. inner_join color num green 1 yellow 2 red 3 color size green S yellow M pink L inner_join( , ) color num size green 1 S yellow 2 M
  • 19. left_join color num green 1 yellow 2 red 3 color size green S yellow M pink L left_join( , ) color num size green 1 S yellow 2 M red 3
  • 20. right_join color num green 1 yellow 2 red 3 color size green S yellow M pink L right_join( , ) color num size green 1 S yellow 2 M pink L
  • 21. full_join color num green 1 yellow 2 red 3 color size green S yellow M pink L full_join( , ) color num size green 1 S yellow 2 M red 3 pink L
  • 22. data_frameJust like data.frame, but better > data_frame( x = 1:5, y = letters[1:5] ) %>% glimpse Observations: 5 Variables: 2 $ x (int) 1, 2, 3, 4, 5 $ y (chr) "a", "b", "c", "d", "e" > data_frame( x = 1:5, y = letters[1:5] , z = x + 1) %>% glimpse Observations: 5 Variables: 3 $ x (int) 1, 2, 3, 4, 5 $ y (chr) "a", "b", "c", "d", "e" $ z (dbl) 2, 3, 4, 5, 6
  • 23. frame_data aka tibble > frame_data( + ~colA, ~colB, + "a", 1, + "b", 2 + ) Source: local data frame [2 x 2] colA colB (chr) (dbl) 1 a 1 2 b 2
  • 24. _
  • 25. g <- c("origin", "dest") v <- "dep_delay" flights %>% group_by( g ) %>% summarise( result = mean(v, na.rm = TRUE) ) 🙀 🙀
  • 26. g <- c("origin", "dest") v <- "dep_delay" flights %>% group_by_( .dots = g ) %>% summarise_( .dots = interp(~ mean(var, na.rm = TRUE), var = as.name(v)) )
  • 27.
  • 28.
  • 29.
  • 30. Future • Performance improvements (parallel C++) • Alternative back ends • Different type of groupings (e.g. bootstrap) As soon as we get hoverboard ...