SlideShare a Scribd company logo
1 of 31
Download to read offline
dplyr 
@romain_francois
• Use R since 2002 
• R Enthusiast 
• R/C++ hero 
• Performance 
• dplyr 
• Occasional comedy
Nov 26th
dplyr 
dplyr 
dplyr dplyr 
dplyr dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr dplyr 
dplyr 
dplyr 
dplyr dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr #rcatladies 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr dplyr
verb 
function that takes a 
data frame as its first 
argument
Examples of R verbs 
head, tail, … 
verb subject … 
> head( iris, n = 4 ) 
Sepal.Length Sepal.Width Petal.Length Petal.Width Species 
1 5.1 3.5 1.4 0.2 setosa 
2 4.9 3.0 1.4 0.2 setosa 
3 4.7 3.2 1.3 0.2 setosa 
4 4.6 3.1 1.5 0.2 setosa
%>% 
from magrittr
enjoy(cool(bake(shape(beat(append(bowl(rep("flour", 2), 
"yeast", "water", "milk", "oil"), "flour", until = 
"soft"), duration = "3mins"), as = "balls", style = 
"slightly-flat"), degrees = 200, duration = "15mins"), 
duration = "5mins")) 
bowl(rep("flour", 2), "yeast", "water", "milk", "oil") %>% 
append("flour", until = "soft") %>% 
beat(duration = "3mins") %>% 
shape(as = "balls", style = "slightly-flat") %>% 
bake(degrees = 200, duration = "15mins") %>% 
cool(buns, duration = "5mins") %>% 
enjoy()
enjoy(cool(bake(shape(beat(append(bowl(rep("flour", 2), 
"yeast", "water", "milk", "oil"), "flour", until = 
"soft"), duration = "3mins"), as = "balls", style = 
"slightly-flat"), degrees = 200, duration = "15mins"), 
duration = "5mins")) 
bowl(rep("flour", 2), "yeast", "water", "milk", "oil") %>% 
append("flour", until = "soft") %>% 
beat(duration = "3mins") %>% 
shape(as = "balls", style = "slightly-flat") %>% 
bake(degrees = 200, duration = "15mins") %>% 
cool(buns, duration = "5mins") %>% 
enjoy()
nycflights13: Data about flights departing NYC in 2013
> library("nycflights13") 
> flights 
Source: local data frame [336,776 x 16] 
year month day dep_time dep_delay arr_time arr_delay carrier tailnum flight 
1 2013 1 1 517 2 830 11 UA N14228 1545 
2 2013 1 1 533 4 850 20 UA N24211 1714 
3 2013 1 1 542 2 923 33 AA N619AA 1141 
4 2013 1 1 544 -1 1004 -18 B6 N804JB 725 
5 2013 1 1 554 -6 812 -25 DL N668DN 461 
6 2013 1 1 554 -4 740 12 UA N39463 1696 
7 2013 1 1 555 -5 913 19 B6 N516JB 507 
8 2013 1 1 557 -3 709 -14 EV N829AS 5708 
9 2013 1 1 557 -3 838 -8 B6 N593JB 79 
10 2013 1 1 558 -2 753 8 AA N3ALAA 301 
.. ... ... ... ... ... ... ... ... ... ... 
Variables not shown: origin (chr), dest (chr), air_time (dbl), distance (dbl), 
hour (dbl), minute (dbl)
tbl_df 
A data frame that does print all of itself by default 
> data <- tbl_df(mtcars) 
> data 
Source: local data frame [32 x 11] 
mpg cyl disp hp drat wt qsec vs am gear carb 
1 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4 
2 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4 
3 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1 
4 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1 
5 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2 
6 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1 
7 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4 
8 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2 
9 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2 
10 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4 
.. ... ... ... ... ... ... ... .. .. ... ...
filter 
A subset of the rows of the data frame
filter(flights, month == 1, day == 1) 
flights %>% 
filter( dep_delay < 10 ) 
flights %>% 
filter( arr_delay < dep_delay ) 
flights %>% 
filter( hour < 12, arr_delay <= 0 )
arrange reorder a data frame
flights %>% 
filter( hour < 8 ) %>% 
arrange( year, month, day ) 
flights %>% 
arrange( desc(dep_delay) )
select 
select certain columns from the data frame
# Select columns by name 
select(flights, year, month, day) 
# Select all columns between year and day 
select(flights, year:day) 
# Select all columns except those from year to 
# day (inclusive) 
select(flights, -(year:day))
mutate 
modify or create columns based on others
d <- flights %>% 
mutate( 
gain = arr_delay - dep_delay, 
speed = distance / air_time * 60 
) %>% 
filter( gain > 0 ) %>% 
arrange( desc(speed) ) 
d %>% 
select( year, month, day, dest, gain, speed )
summarise 
collapse a data frame into one row …
summarise(flights, 
delay = mean(dep_delay, na.rm = TRUE)) 
flights %>% 
filter( dep_delay > 0 ) %>% 
summarise(arr_delay = mean(arr_delay, na.rm = TRUE))
group_by 
Group observations by one or more variables
flights %>% 
group_by( tailnum ) %>% 
summarise( 
count = n(), 
dist = mean(distance, na.rm = TRUE), 
delay = mean(arr_delay, na.rm = TRUE) 
) %>% 
filter( is.finite(delay) ) %>% 
arrange( desc(count) )
flights %>% 
group_by(dest) %>% 
summarise( 
planes = n_distinct(tailnum), 
flights = n() 
) %>% 
arrange( desc(flights) )
joins 
joining two data frames
inner_join 
all rows from x where there are matching 
values in y, and all columns from x and y. If there are multiple matches 
between x and y, all combination of the matches are returned. 
destinations <- flights %>% 
group_by(dest) %>% 
summarise( 
planes = n_distinct(tailnum), 
flights = n() 
) %>% 
arrange( desc(flights) ) %>% 
rename( faa = dest ) 
inner_join( destinations, airports, by = "faa")
inner_join 
all rows from x where there are matching 
values in y, and all columns from x and y. If there are multiple matches 
between x and y, all combination of the matches are returned. 
destinations <- flights %>% 
group_by(dest) %>% 
summarise( 
planes = n_distinct(tailnum), 
flights = n() 
) %>% 
arrange( desc(flights) ) 
inner_join( destinations, airports, 
by = c( "dest" = "faa" ) )
other joins 
See ?join 
• left_join, right_join 
• inner_join, outer_join 
• semi_join 
• anti_join
dplyr %>% summary 
• Simple verbs: filter, mutate, select, summarise, 
arrange 
• Grouping with group_by 
• Joins with *_join 
• Convenient with %>% 
• F✈️ST
dplyr 
Romain François 
@romain_francois 
romain@r-enthusiasts.com

More Related Content

What's hot

Bytes in the Machine: Inside the CPython interpreter
Bytes in the Machine: Inside the CPython interpreterBytes in the Machine: Inside the CPython interpreter
Bytes in the Machine: Inside the CPython interpreterakaptur
 
Allison Kaptur: Bytes in the Machine: Inside the CPython interpreter, PyGotha...
Allison Kaptur: Bytes in the Machine: Inside the CPython interpreter, PyGotha...Allison Kaptur: Bytes in the Machine: Inside the CPython interpreter, PyGotha...
Allison Kaptur: Bytes in the Machine: Inside the CPython interpreter, PyGotha...akaptur
 
Exploring slides
Exploring slidesExploring slides
Exploring slidesakaptur
 
Are we ready to Go?
Are we ready to Go?Are we ready to Go?
Are we ready to Go?Adam Dudczak
 
2016 gunma.web games-and-asm.js
2016 gunma.web games-and-asm.js2016 gunma.web games-and-asm.js
2016 gunma.web games-and-asm.jsNoritada Shimizu
 
Diving into byte code optimization in python
Diving into byte code optimization in python Diving into byte code optimization in python
Diving into byte code optimization in python Chetan Giridhar
 
โปรแกรมย่อยและฟังชันก์มาตรฐาน
โปรแกรมย่อยและฟังชันก์มาตรฐานโปรแกรมย่อยและฟังชันก์มาตรฐาน
โปรแกรมย่อยและฟังชันก์มาตรฐานknang
 
Playing 44CON CTF for fun and profit
Playing 44CON CTF for fun and profitPlaying 44CON CTF for fun and profit
Playing 44CON CTF for fun and profit44CON
 
C c++-meetup-1nov2017-autofdo
C c++-meetup-1nov2017-autofdoC c++-meetup-1nov2017-autofdo
C c++-meetup-1nov2017-autofdoKim Phillips
 
Python opcodes
Python opcodesPython opcodes
Python opcodesalexgolec
 
Modern c++ Memory Management
Modern c++ Memory ManagementModern c++ Memory Management
Modern c++ Memory ManagementAlan Uthoff
 
TCO in Python via bytecode manipulation.
TCO in Python via bytecode manipulation.TCO in Python via bytecode manipulation.
TCO in Python via bytecode manipulation.lnikolaeva
 
Data Structure - 2nd Study
Data Structure - 2nd StudyData Structure - 2nd Study
Data Structure - 2nd StudyChris Ohk
 
Implementing Software Machines in Go and C
Implementing Software Machines in Go and CImplementing Software Machines in Go and C
Implementing Software Machines in Go and CEleanor McHugh
 
How to stand on the shoulders of giants
How to stand on the shoulders of giantsHow to stand on the shoulders of giants
How to stand on the shoulders of giantsIan Barber
 

What's hot (20)

Bytes in the Machine: Inside the CPython interpreter
Bytes in the Machine: Inside the CPython interpreterBytes in the Machine: Inside the CPython interpreter
Bytes in the Machine: Inside the CPython interpreter
 
Allison Kaptur: Bytes in the Machine: Inside the CPython interpreter, PyGotha...
Allison Kaptur: Bytes in the Machine: Inside the CPython interpreter, PyGotha...Allison Kaptur: Bytes in the Machine: Inside the CPython interpreter, PyGotha...
Allison Kaptur: Bytes in the Machine: Inside the CPython interpreter, PyGotha...
 
Exploring slides
Exploring slidesExploring slides
Exploring slides
 
Are we ready to Go?
Are we ready to Go?Are we ready to Go?
Are we ready to Go?
 
2016 gunma.web games-and-asm.js
2016 gunma.web games-and-asm.js2016 gunma.web games-and-asm.js
2016 gunma.web games-and-asm.js
 
Diving into byte code optimization in python
Diving into byte code optimization in python Diving into byte code optimization in python
Diving into byte code optimization in python
 
Groovy
GroovyGroovy
Groovy
 
Go a crash course
Go   a crash courseGo   a crash course
Go a crash course
 
โปรแกรมย่อยและฟังชันก์มาตรฐาน
โปรแกรมย่อยและฟังชันก์มาตรฐานโปรแกรมย่อยและฟังชันก์มาตรฐาน
โปรแกรมย่อยและฟังชันก์มาตรฐาน
 
20151224-games
20151224-games20151224-games
20151224-games
 
Playing 44CON CTF for fun and profit
Playing 44CON CTF for fun and profitPlaying 44CON CTF for fun and profit
Playing 44CON CTF for fun and profit
 
C c++-meetup-1nov2017-autofdo
C c++-meetup-1nov2017-autofdoC c++-meetup-1nov2017-autofdo
C c++-meetup-1nov2017-autofdo
 
Python opcodes
Python opcodesPython opcodes
Python opcodes
 
Modern c++ Memory Management
Modern c++ Memory ManagementModern c++ Memory Management
Modern c++ Memory Management
 
TCO in Python via bytecode manipulation.
TCO in Python via bytecode manipulation.TCO in Python via bytecode manipulation.
TCO in Python via bytecode manipulation.
 
Groovy
GroovyGroovy
Groovy
 
Data Structure - 2nd Study
Data Structure - 2nd StudyData Structure - 2nd Study
Data Structure - 2nd Study
 
Implementing Software Machines in Go and C
Implementing Software Machines in Go and CImplementing Software Machines in Go and C
Implementing Software Machines in Go and C
 
How to stand on the shoulders of giants
How to stand on the shoulders of giantsHow to stand on the shoulders of giants
How to stand on the shoulders of giants
 
Let's golang
Let's golangLet's golang
Let's golang
 

Viewers also liked

Object Oriented Design(s) in R
Object Oriented Design(s) in RObject Oriented Design(s) in R
Object Oriented Design(s) in RRomain Francois
 
dplyr and torrents from cpasbien
dplyr and torrents from cpasbiendplyr and torrents from cpasbien
dplyr and torrents from cpasbienRomain Francois
 
Data manipulation with dplyr
Data manipulation with dplyrData manipulation with dplyr
Data manipulation with dplyrRomain Francois
 
Rcpp: Seemless R and C++
Rcpp: Seemless R and C++Rcpp: Seemless R and C++
Rcpp: Seemless R and C++Romain Francois
 
Integrating R with C++: Rcpp, RInside and RProtoBuf
Integrating R with C++: Rcpp, RInside and RProtoBufIntegrating R with C++: Rcpp, RInside and RProtoBuf
Integrating R with C++: Rcpp, RInside and RProtoBufRomain Francois
 

Viewers also liked (7)

Rcpp
RcppRcpp
Rcpp
 
Object Oriented Design(s) in R
Object Oriented Design(s) in RObject Oriented Design(s) in R
Object Oriented Design(s) in R
 
R/C++
R/C++R/C++
R/C++
 
dplyr and torrents from cpasbien
dplyr and torrents from cpasbiendplyr and torrents from cpasbien
dplyr and torrents from cpasbien
 
Data manipulation with dplyr
Data manipulation with dplyrData manipulation with dplyr
Data manipulation with dplyr
 
Rcpp: Seemless R and C++
Rcpp: Seemless R and C++Rcpp: Seemless R and C++
Rcpp: Seemless R and C++
 
Integrating R with C++: Rcpp, RInside and RProtoBuf
Integrating R with C++: Rcpp, RInside and RProtoBufIntegrating R with C++: Rcpp, RInside and RProtoBuf
Integrating R with C++: Rcpp, RInside and RProtoBuf
 

Similar to dplyr

SevillaR meetup: dplyr and magrittr
SevillaR meetup: dplyr and magrittrSevillaR meetup: dplyr and magrittr
SevillaR meetup: dplyr and magrittrRomain Francois
 
Palestra sobre Collections com Python
Palestra sobre Collections com PythonPalestra sobre Collections com Python
Palestra sobre Collections com Pythonpugpe
 
Do snow.rwn
Do snow.rwnDo snow.rwn
Do snow.rwnARUN DN
 
MH prediction modeling and validation in r (1) regression 190709
MH prediction modeling and validation in r (1) regression 190709MH prediction modeling and validation in r (1) regression 190709
MH prediction modeling and validation in r (1) regression 190709Min-hyung Kim
 
Table of Useful R commands.
Table of Useful R commands.Table of Useful R commands.
Table of Useful R commands.Dr. Volkan OBAN
 
Megadata With Python and Hadoop
Megadata With Python and HadoopMegadata With Python and Hadoop
Megadata With Python and Hadoopryancox
 
Easy HTML Tables in RStudio with Tabyl and kableExtra
Easy HTML Tables in RStudio with Tabyl and kableExtraEasy HTML Tables in RStudio with Tabyl and kableExtra
Easy HTML Tables in RStudio with Tabyl and kableExtraBarry DeCicco
 
Super Advanced Python –act1
Super Advanced Python –act1Super Advanced Python –act1
Super Advanced Python –act1Ke Wei Louis
 
Let’s Talk About Ruby
Let’s Talk About RubyLet’s Talk About Ruby
Let’s Talk About RubyIan Bishop
 
Pumps, Compressors and Turbine Fault Frequency Analysis
Pumps, Compressors and Turbine Fault Frequency AnalysisPumps, Compressors and Turbine Fault Frequency Analysis
Pumps, Compressors and Turbine Fault Frequency AnalysisUniversity of Illinois,Chicago
 
Pumps, Compressors and Turbine Fault Frequency Analysis
Pumps, Compressors and Turbine Fault Frequency AnalysisPumps, Compressors and Turbine Fault Frequency Analysis
Pumps, Compressors and Turbine Fault Frequency AnalysisUniversity of Illinois,Chicago
 
Ruby Language - A quick tour
Ruby Language - A quick tourRuby Language - A quick tour
Ruby Language - A quick touraztack
 
Samantha Wang [InfluxData] | Best Practices on How to Transform Your Data Usi...
Samantha Wang [InfluxData] | Best Practices on How to Transform Your Data Usi...Samantha Wang [InfluxData] | Best Practices on How to Transform Your Data Usi...
Samantha Wang [InfluxData] | Best Practices on How to Transform Your Data Usi...InfluxData
 

Similar to dplyr (20)

SevillaR meetup: dplyr and magrittr
SevillaR meetup: dplyr and magrittrSevillaR meetup: dplyr and magrittr
SevillaR meetup: dplyr and magrittr
 
R programming language
R programming languageR programming language
R programming language
 
dplyr use case
dplyr use casedplyr use case
dplyr use case
 
Bash tricks
Bash tricksBash tricks
Bash tricks
 
Palestra sobre Collections com Python
Palestra sobre Collections com PythonPalestra sobre Collections com Python
Palestra sobre Collections com Python
 
Python 1
Python 1Python 1
Python 1
 
Do snow.rwn
Do snow.rwnDo snow.rwn
Do snow.rwn
 
MH prediction modeling and validation in r (1) regression 190709
MH prediction modeling and validation in r (1) regression 190709MH prediction modeling and validation in r (1) regression 190709
MH prediction modeling and validation in r (1) regression 190709
 
Table of Useful R commands.
Table of Useful R commands.Table of Useful R commands.
Table of Useful R commands.
 
Megadata With Python and Hadoop
Megadata With Python and HadoopMegadata With Python and Hadoop
Megadata With Python and Hadoop
 
Easy HTML Tables in RStudio with Tabyl and kableExtra
Easy HTML Tables in RStudio with Tabyl and kableExtraEasy HTML Tables in RStudio with Tabyl and kableExtra
Easy HTML Tables in RStudio with Tabyl and kableExtra
 
Super Advanced Python –act1
Super Advanced Python –act1Super Advanced Python –act1
Super Advanced Python –act1
 
dplyr-tutorial.pdf
dplyr-tutorial.pdfdplyr-tutorial.pdf
dplyr-tutorial.pdf
 
Let’s Talk About Ruby
Let’s Talk About RubyLet’s Talk About Ruby
Let’s Talk About Ruby
 
Pumps, Compressors and Turbine Fault Frequency Analysis
Pumps, Compressors and Turbine Fault Frequency AnalysisPumps, Compressors and Turbine Fault Frequency Analysis
Pumps, Compressors and Turbine Fault Frequency Analysis
 
Pumps, Compressors and Turbine Fault Frequency Analysis
Pumps, Compressors and Turbine Fault Frequency AnalysisPumps, Compressors and Turbine Fault Frequency Analysis
Pumps, Compressors and Turbine Fault Frequency Analysis
 
Ruby Language - A quick tour
Ruby Language - A quick tourRuby Language - A quick tour
Ruby Language - A quick tour
 
R code for data manipulation
R code for data manipulationR code for data manipulation
R code for data manipulation
 
R code for data manipulation
R code for data manipulationR code for data manipulation
R code for data manipulation
 
Samantha Wang [InfluxData] | Best Practices on How to Transform Your Data Usi...
Samantha Wang [InfluxData] | Best Practices on How to Transform Your Data Usi...Samantha Wang [InfluxData] | Best Practices on How to Transform Your Data Usi...
Samantha Wang [InfluxData] | Best Practices on How to Transform Your Data Usi...
 

More from Romain Francois (9)

dplyr
dplyrdplyr
dplyr
 
Rcpp11
Rcpp11Rcpp11
Rcpp11
 
R and C++
R and C++R and C++
R and C++
 
R and cpp
R and cppR and cpp
R and cpp
 
Rcpp attributes
Rcpp attributesRcpp attributes
Rcpp attributes
 
Rcpp is-ready
Rcpp is-readyRcpp is-ready
Rcpp is-ready
 
RProtoBuf: protocol buffers for R
RProtoBuf: protocol buffers for RRProtoBuf: protocol buffers for R
RProtoBuf: protocol buffers for R
 
Rcpp: Seemless R and C++
Rcpp: Seemless R and C++Rcpp: Seemless R and C++
Rcpp: Seemless R and C++
 
Rcpp: Seemless R and C++
Rcpp: Seemless R and C++Rcpp: Seemless R and C++
Rcpp: Seemless R and C++
 

Recently uploaded

Strategies for Unlocking Knowledge Management in Microsoft 365 in the Copilot...
Strategies for Unlocking Knowledge Management in Microsoft 365 in the Copilot...Strategies for Unlocking Knowledge Management in Microsoft 365 in the Copilot...
Strategies for Unlocking Knowledge Management in Microsoft 365 in the Copilot...Drew Madelung
 
AWS Community Day CPH - Three problems of Terraform
AWS Community Day CPH - Three problems of TerraformAWS Community Day CPH - Three problems of Terraform
AWS Community Day CPH - Three problems of TerraformAndrey Devyatkin
 
MS Copilot expands with MS Graph connectors
MS Copilot expands with MS Graph connectorsMS Copilot expands with MS Graph connectors
MS Copilot expands with MS Graph connectorsNanddeep Nachan
 
presentation ICT roal in 21st century education
presentation ICT roal in 21st century educationpresentation ICT roal in 21st century education
presentation ICT roal in 21st century educationjfdjdjcjdnsjd
 
FWD Group - Insurer Innovation Award 2024
FWD Group - Insurer Innovation Award 2024FWD Group - Insurer Innovation Award 2024
FWD Group - Insurer Innovation Award 2024The Digital Insurer
 
Web Form Automation for Bonterra Impact Management (fka Social Solutions Apri...
Web Form Automation for Bonterra Impact Management (fka Social Solutions Apri...Web Form Automation for Bonterra Impact Management (fka Social Solutions Apri...
Web Form Automation for Bonterra Impact Management (fka Social Solutions Apri...Jeffrey Haguewood
 
MINDCTI Revenue Release Quarter One 2024
MINDCTI Revenue Release Quarter One 2024MINDCTI Revenue Release Quarter One 2024
MINDCTI Revenue Release Quarter One 2024MIND CTI
 
Polkadot JAM Slides - Token2049 - By Dr. Gavin Wood
Polkadot JAM Slides - Token2049 - By Dr. Gavin WoodPolkadot JAM Slides - Token2049 - By Dr. Gavin Wood
Polkadot JAM Slides - Token2049 - By Dr. Gavin WoodJuan lago vázquez
 
Artificial Intelligence Chap.5 : Uncertainty
Artificial Intelligence Chap.5 : UncertaintyArtificial Intelligence Chap.5 : Uncertainty
Artificial Intelligence Chap.5 : UncertaintyKhushali Kathiriya
 
Emergent Methods: Multi-lingual narrative tracking in the news - real-time ex...
Emergent Methods: Multi-lingual narrative tracking in the news - real-time ex...Emergent Methods: Multi-lingual narrative tracking in the news - real-time ex...
Emergent Methods: Multi-lingual narrative tracking in the news - real-time ex...Zilliz
 
EMPOWERMENT TECHNOLOGY GRADE 11 QUARTER 2 REVIEWER
EMPOWERMENT TECHNOLOGY GRADE 11 QUARTER 2 REVIEWEREMPOWERMENT TECHNOLOGY GRADE 11 QUARTER 2 REVIEWER
EMPOWERMENT TECHNOLOGY GRADE 11 QUARTER 2 REVIEWERMadyBayot
 
GenAI Risks & Security Meetup 01052024.pdf
GenAI Risks & Security Meetup 01052024.pdfGenAI Risks & Security Meetup 01052024.pdf
GenAI Risks & Security Meetup 01052024.pdflior mazor
 
Data Cloud, More than a CDP by Matt Robison
Data Cloud, More than a CDP by Matt RobisonData Cloud, More than a CDP by Matt Robison
Data Cloud, More than a CDP by Matt RobisonAnna Loughnan Colquhoun
 
How to Troubleshoot Apps for the Modern Connected Worker
How to Troubleshoot Apps for the Modern Connected WorkerHow to Troubleshoot Apps for the Modern Connected Worker
How to Troubleshoot Apps for the Modern Connected WorkerThousandEyes
 
ProductAnonymous-April2024-WinProductDiscovery-MelissaKlemke
ProductAnonymous-April2024-WinProductDiscovery-MelissaKlemkeProductAnonymous-April2024-WinProductDiscovery-MelissaKlemke
ProductAnonymous-April2024-WinProductDiscovery-MelissaKlemkeProduct Anonymous
 
Navi Mumbai Call Girls 🥰 8617370543 Service Offer VIP Hot Model
Navi Mumbai Call Girls 🥰 8617370543 Service Offer VIP Hot ModelNavi Mumbai Call Girls 🥰 8617370543 Service Offer VIP Hot Model
Navi Mumbai Call Girls 🥰 8617370543 Service Offer VIP Hot ModelDeepika Singh
 
Manulife - Insurer Transformation Award 2024
Manulife - Insurer Transformation Award 2024Manulife - Insurer Transformation Award 2024
Manulife - Insurer Transformation Award 2024The Digital Insurer
 
TrustArc Webinar - Unlock the Power of AI-Driven Data Discovery
TrustArc Webinar - Unlock the Power of AI-Driven Data DiscoveryTrustArc Webinar - Unlock the Power of AI-Driven Data Discovery
TrustArc Webinar - Unlock the Power of AI-Driven Data DiscoveryTrustArc
 
Automating Google Workspace (GWS) & more with Apps Script
Automating Google Workspace (GWS) & more with Apps ScriptAutomating Google Workspace (GWS) & more with Apps Script
Automating Google Workspace (GWS) & more with Apps Scriptwesley chun
 
Powerful Google developer tools for immediate impact! (2023-24 C)
Powerful Google developer tools for immediate impact! (2023-24 C)Powerful Google developer tools for immediate impact! (2023-24 C)
Powerful Google developer tools for immediate impact! (2023-24 C)wesley chun
 

Recently uploaded (20)

Strategies for Unlocking Knowledge Management in Microsoft 365 in the Copilot...
Strategies for Unlocking Knowledge Management in Microsoft 365 in the Copilot...Strategies for Unlocking Knowledge Management in Microsoft 365 in the Copilot...
Strategies for Unlocking Knowledge Management in Microsoft 365 in the Copilot...
 
AWS Community Day CPH - Three problems of Terraform
AWS Community Day CPH - Three problems of TerraformAWS Community Day CPH - Three problems of Terraform
AWS Community Day CPH - Three problems of Terraform
 
MS Copilot expands with MS Graph connectors
MS Copilot expands with MS Graph connectorsMS Copilot expands with MS Graph connectors
MS Copilot expands with MS Graph connectors
 
presentation ICT roal in 21st century education
presentation ICT roal in 21st century educationpresentation ICT roal in 21st century education
presentation ICT roal in 21st century education
 
FWD Group - Insurer Innovation Award 2024
FWD Group - Insurer Innovation Award 2024FWD Group - Insurer Innovation Award 2024
FWD Group - Insurer Innovation Award 2024
 
Web Form Automation for Bonterra Impact Management (fka Social Solutions Apri...
Web Form Automation for Bonterra Impact Management (fka Social Solutions Apri...Web Form Automation for Bonterra Impact Management (fka Social Solutions Apri...
Web Form Automation for Bonterra Impact Management (fka Social Solutions Apri...
 
MINDCTI Revenue Release Quarter One 2024
MINDCTI Revenue Release Quarter One 2024MINDCTI Revenue Release Quarter One 2024
MINDCTI Revenue Release Quarter One 2024
 
Polkadot JAM Slides - Token2049 - By Dr. Gavin Wood
Polkadot JAM Slides - Token2049 - By Dr. Gavin WoodPolkadot JAM Slides - Token2049 - By Dr. Gavin Wood
Polkadot JAM Slides - Token2049 - By Dr. Gavin Wood
 
Artificial Intelligence Chap.5 : Uncertainty
Artificial Intelligence Chap.5 : UncertaintyArtificial Intelligence Chap.5 : Uncertainty
Artificial Intelligence Chap.5 : Uncertainty
 
Emergent Methods: Multi-lingual narrative tracking in the news - real-time ex...
Emergent Methods: Multi-lingual narrative tracking in the news - real-time ex...Emergent Methods: Multi-lingual narrative tracking in the news - real-time ex...
Emergent Methods: Multi-lingual narrative tracking in the news - real-time ex...
 
EMPOWERMENT TECHNOLOGY GRADE 11 QUARTER 2 REVIEWER
EMPOWERMENT TECHNOLOGY GRADE 11 QUARTER 2 REVIEWEREMPOWERMENT TECHNOLOGY GRADE 11 QUARTER 2 REVIEWER
EMPOWERMENT TECHNOLOGY GRADE 11 QUARTER 2 REVIEWER
 
GenAI Risks & Security Meetup 01052024.pdf
GenAI Risks & Security Meetup 01052024.pdfGenAI Risks & Security Meetup 01052024.pdf
GenAI Risks & Security Meetup 01052024.pdf
 
Data Cloud, More than a CDP by Matt Robison
Data Cloud, More than a CDP by Matt RobisonData Cloud, More than a CDP by Matt Robison
Data Cloud, More than a CDP by Matt Robison
 
How to Troubleshoot Apps for the Modern Connected Worker
How to Troubleshoot Apps for the Modern Connected WorkerHow to Troubleshoot Apps for the Modern Connected Worker
How to Troubleshoot Apps for the Modern Connected Worker
 
ProductAnonymous-April2024-WinProductDiscovery-MelissaKlemke
ProductAnonymous-April2024-WinProductDiscovery-MelissaKlemkeProductAnonymous-April2024-WinProductDiscovery-MelissaKlemke
ProductAnonymous-April2024-WinProductDiscovery-MelissaKlemke
 
Navi Mumbai Call Girls 🥰 8617370543 Service Offer VIP Hot Model
Navi Mumbai Call Girls 🥰 8617370543 Service Offer VIP Hot ModelNavi Mumbai Call Girls 🥰 8617370543 Service Offer VIP Hot Model
Navi Mumbai Call Girls 🥰 8617370543 Service Offer VIP Hot Model
 
Manulife - Insurer Transformation Award 2024
Manulife - Insurer Transformation Award 2024Manulife - Insurer Transformation Award 2024
Manulife - Insurer Transformation Award 2024
 
TrustArc Webinar - Unlock the Power of AI-Driven Data Discovery
TrustArc Webinar - Unlock the Power of AI-Driven Data DiscoveryTrustArc Webinar - Unlock the Power of AI-Driven Data Discovery
TrustArc Webinar - Unlock the Power of AI-Driven Data Discovery
 
Automating Google Workspace (GWS) & more with Apps Script
Automating Google Workspace (GWS) & more with Apps ScriptAutomating Google Workspace (GWS) & more with Apps Script
Automating Google Workspace (GWS) & more with Apps Script
 
Powerful Google developer tools for immediate impact! (2023-24 C)
Powerful Google developer tools for immediate impact! (2023-24 C)Powerful Google developer tools for immediate impact! (2023-24 C)
Powerful Google developer tools for immediate impact! (2023-24 C)
 

dplyr

  • 2. • Use R since 2002 • R Enthusiast • R/C++ hero • Performance • dplyr • Occasional comedy
  • 4. dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr #rcatladies dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr
  • 5. verb function that takes a data frame as its first argument
  • 6. Examples of R verbs head, tail, … verb subject … > head( iris, n = 4 ) Sepal.Length Sepal.Width Petal.Length Petal.Width Species 1 5.1 3.5 1.4 0.2 setosa 2 4.9 3.0 1.4 0.2 setosa 3 4.7 3.2 1.3 0.2 setosa 4 4.6 3.1 1.5 0.2 setosa
  • 8. enjoy(cool(bake(shape(beat(append(bowl(rep("flour", 2), "yeast", "water", "milk", "oil"), "flour", until = "soft"), duration = "3mins"), as = "balls", style = "slightly-flat"), degrees = 200, duration = "15mins"), duration = "5mins")) bowl(rep("flour", 2), "yeast", "water", "milk", "oil") %>% append("flour", until = "soft") %>% beat(duration = "3mins") %>% shape(as = "balls", style = "slightly-flat") %>% bake(degrees = 200, duration = "15mins") %>% cool(buns, duration = "5mins") %>% enjoy()
  • 9. enjoy(cool(bake(shape(beat(append(bowl(rep("flour", 2), "yeast", "water", "milk", "oil"), "flour", until = "soft"), duration = "3mins"), as = "balls", style = "slightly-flat"), degrees = 200, duration = "15mins"), duration = "5mins")) bowl(rep("flour", 2), "yeast", "water", "milk", "oil") %>% append("flour", until = "soft") %>% beat(duration = "3mins") %>% shape(as = "balls", style = "slightly-flat") %>% bake(degrees = 200, duration = "15mins") %>% cool(buns, duration = "5mins") %>% enjoy()
  • 10. nycflights13: Data about flights departing NYC in 2013
  • 11. > library("nycflights13") > flights Source: local data frame [336,776 x 16] year month day dep_time dep_delay arr_time arr_delay carrier tailnum flight 1 2013 1 1 517 2 830 11 UA N14228 1545 2 2013 1 1 533 4 850 20 UA N24211 1714 3 2013 1 1 542 2 923 33 AA N619AA 1141 4 2013 1 1 544 -1 1004 -18 B6 N804JB 725 5 2013 1 1 554 -6 812 -25 DL N668DN 461 6 2013 1 1 554 -4 740 12 UA N39463 1696 7 2013 1 1 555 -5 913 19 B6 N516JB 507 8 2013 1 1 557 -3 709 -14 EV N829AS 5708 9 2013 1 1 557 -3 838 -8 B6 N593JB 79 10 2013 1 1 558 -2 753 8 AA N3ALAA 301 .. ... ... ... ... ... ... ... ... ... ... Variables not shown: origin (chr), dest (chr), air_time (dbl), distance (dbl), hour (dbl), minute (dbl)
  • 12. tbl_df A data frame that does print all of itself by default > data <- tbl_df(mtcars) > data Source: local data frame [32 x 11] mpg cyl disp hp drat wt qsec vs am gear carb 1 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4 2 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4 3 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1 4 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1 5 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2 6 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1 7 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4 8 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2 9 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2 10 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4 .. ... ... ... ... ... ... ... .. .. ... ...
  • 13. filter A subset of the rows of the data frame
  • 14. filter(flights, month == 1, day == 1) flights %>% filter( dep_delay < 10 ) flights %>% filter( arr_delay < dep_delay ) flights %>% filter( hour < 12, arr_delay <= 0 )
  • 15. arrange reorder a data frame
  • 16. flights %>% filter( hour < 8 ) %>% arrange( year, month, day ) flights %>% arrange( desc(dep_delay) )
  • 17. select select certain columns from the data frame
  • 18. # Select columns by name select(flights, year, month, day) # Select all columns between year and day select(flights, year:day) # Select all columns except those from year to # day (inclusive) select(flights, -(year:day))
  • 19. mutate modify or create columns based on others
  • 20. d <- flights %>% mutate( gain = arr_delay - dep_delay, speed = distance / air_time * 60 ) %>% filter( gain > 0 ) %>% arrange( desc(speed) ) d %>% select( year, month, day, dest, gain, speed )
  • 21. summarise collapse a data frame into one row …
  • 22. summarise(flights, delay = mean(dep_delay, na.rm = TRUE)) flights %>% filter( dep_delay > 0 ) %>% summarise(arr_delay = mean(arr_delay, na.rm = TRUE))
  • 23. group_by Group observations by one or more variables
  • 24. flights %>% group_by( tailnum ) %>% summarise( count = n(), dist = mean(distance, na.rm = TRUE), delay = mean(arr_delay, na.rm = TRUE) ) %>% filter( is.finite(delay) ) %>% arrange( desc(count) )
  • 25. flights %>% group_by(dest) %>% summarise( planes = n_distinct(tailnum), flights = n() ) %>% arrange( desc(flights) )
  • 26. joins joining two data frames
  • 27. inner_join all rows from x where there are matching values in y, and all columns from x and y. If there are multiple matches between x and y, all combination of the matches are returned. destinations <- flights %>% group_by(dest) %>% summarise( planes = n_distinct(tailnum), flights = n() ) %>% arrange( desc(flights) ) %>% rename( faa = dest ) inner_join( destinations, airports, by = "faa")
  • 28. inner_join all rows from x where there are matching values in y, and all columns from x and y. If there are multiple matches between x and y, all combination of the matches are returned. destinations <- flights %>% group_by(dest) %>% summarise( planes = n_distinct(tailnum), flights = n() ) %>% arrange( desc(flights) ) inner_join( destinations, airports, by = c( "dest" = "faa" ) )
  • 29. other joins See ?join • left_join, right_join • inner_join, outer_join • semi_join • anti_join
  • 30. dplyr %>% summary • Simple verbs: filter, mutate, select, summarise, arrange • Grouping with group_by • Joins with *_join • Convenient with %>% • F✈️ST
  • 31. dplyr Romain François @romain_francois romain@r-enthusiasts.com