SlideShare une entreprise Scribd logo
1  sur  13
Télécharger pour lire hors ligne
Introduction to R for Data Science
Lecturers
dipl. ing Branko Kovač
Data Analyst at CUBE/Data Science Mentor
at Springboard
Data Science zajednica Srbije
branko.kovac@gmail.com
dr Goran S. Milovanović
Data Scientist at DiploFoundation
Data Science zajednica Srbije
goran.s.milovanovic@gmail.com
goranm@diplomacy.edu
Strings in R
• {base} for strings
• {stringr} for strings
• {stringi} for strings
Intro to R for Data Science
Session 5: Structuring Data: Strings in R
# Introduction to R for Data Science
# SESSION 5 :: 26 May, 2016
# Processing strings in R
library(stringr)
# strings in R are charactervectors
stringA <- "Hello world"
stringB <- "Sun shines!"
stringA
stringB
is.character(stringA) # TRUE
as.character(200*5)
as.numeric("1000")
as.double("3.14")
# Introduction to R for Data Science
# SESSION 5 :: 26 May, 2016
# Using " and '
# either:
stringA <- "Hello 'World'"
stringA
# or
stringA <- 'Hello "World"'
stringA # prints:"Hello "World"" - what is
this:  ?
print(stringA)
# try:
writeLines(stringA)
print(stringA)
# Escapingin R: use , the R escape
character
stringA <- 'Hello "World"'
stringA
print(stringA)
writeLines(stringA)
# Escapingescaping
writeLines("")# nice
Intro to R for Data Science
Session 5: Structuring Data: Strings in R
# Introduction to R for Data Science
# SESSION 5 :: 26 May, 2016
# String Concatenationin R
stringC <- c(stringA,stringB) # a character
vectorof length == 2
length(stringC)
stringC <- paste(stringA,stringB,
sep=",") # length == 1, base
function
writeLines(stringC)
# sep w. collapse (paste args)
stringC <- c(stringA,stringB)
stringC <- paste(stringC,collapse="__")
writeLines(stringC)
# paste0 is paste w. sep="",fasterthan
paste(),base function
# Introduction to R for Data Science
# SESSION 5 :: 26 May, 2016
strA <- "One"
strB <- "Two"
strC <- "Three"
paste0(strA,strB, strC)
# the collapse argumentis used in paste0 as well
strD <- c(strA,strB,strC)
paste0(strD,collapse="-")
# stringr concatenation,also has sep and collapse
as args
str_c(strA,strB,strC)
str_c(strA,strB,strC,sep="...")
str_c(strD,collapse="...")
# both paste {base}and str_c {stringr} are
vectorized
paste("Prefix-",strD, sep="-")
str_c("Prefix-",strD,sep="-")
Strings in R
• Concatenation
Intro to R for Data Science
Session 5: Structuring Data: Strings in R
# Introduction to R for Data Science
# SESSION 5 :: 26 May, 2016
stringA <- "The quick brown fox jumps overthe lazy dog";
splitA <- strsplit(stringA," ") # is.list(splitA) == T
splitA <- unlist(strsplit(stringA," "))
# "The quick brown" from "The quick brown fox jumps overthe lazy dog"
splitA <- paste(unlist(strsplit(stringA," "))[1:3],collapse=" ")
# or
splitA <- paste(strsplit(stringA," ")[[1]][1:3],collapse=" ")
# advice:use
splitA <- strsplit(stringA," ",fixed=T) # fixed=T says:match the split argumentexactly,
# otherwise,split is an regularexpression;defaultis: fixed = FALSE
# string split w. {stringr}
is.list(str_split(stringA," "))
# this is interesting:
str_split(stringA," ", n=3)
# "The quick brown" from "The quick brown fox jumps overthe lazy dog"
paste0(str_split(stringA," ", n=4)[[1]][1:3],collapse=" ")
Strings in R
• Splitting
Intro to R for Data Science
Session 5: Structuring Data: Strings in R
# Introduction to R for Data Science
# SESSION 5 :: 26 May, 2016
# default: str_split(string,pattern,n = Inf), where pattern is regex
str_split(stringA,boundary("word"))
# very useful:
stringA1 <- "The quick brown fox jumps overthe lazy dog"
str_split(stringA1,boundary("word"))
stringA1 <- "Aboveall, don'tlie to yourself.
The man who lies to himselfand listens to his own lie comes to a pointthat he cannotdistinguish the
truth within him, or around him,and so loses all respectfor himselfand for others.
And having no respecthe ceasesto love."
str_split(stringA1,boundary("word"))
str_split(stringA1,boundary("word",skip_word_none= F)) # includingpunctuation and special
str_split(stringA1,boundary("line_break"))
writeLines(str_split(stringA1,boundary("line_break"))[[1]])
Strings in R
• Splitting
Intro to R for Data Science
Session 5: Structuring Data: Strings in R
# Introduction to R for Data Science
# SESSION 5 :: 26 May, 2016
stringA <- c("Belgrade","Zagreb","Ljubljana")# {stringr}
str_sub(stringA,1, 2)
# counting backwards
str_sub(stringA, -3, -1)
# {base}
substr(stringA,1, 3)
# play:
substr(stringA,c(1,2,3),c(2,3,4))
# nope:
substr(stringA, -2, -1) # {base}
Strings in R
• Subsetting strings
Intro to R for Data Science
Session 5: Structuring Data: Strings in R
# Introduction to R for Data Science
# SESSION 5 :: 26 May, 2016
# Replacingcharactersin strings
stringB <- stringA # just a copy of stringA
str_sub(stringB,1,2)<- "00"
stringB
# {base}
stringB <- stringA # just a copy of stringA
substr(stringB,1,3)<- "WowWow" # check the
result!
stringB
substr(stringB,1,4)<- "WoWWow" # check the
result!
stringB
substr(stringB,1,6)<- "WowWow" # check the
result!
stringB
Strings in R
• Subsetting strings
# Introduction to R for Data Science
# SESSION 5 :: 26 May, 2016
# UPPER CASE to lower case and vice versa
in R
stringA <- "ABRACADABRA"
# {base}
tolower(stringA)
stringA <- tolower(stringA)
toupper(stringA)
stringA <- toupper(stringA)
# {stringr}
str_to_lower(stringA)
stringB <- str_to_lower(stringA)
str_to_upper(stringA)
# capitalize first letter
str_to_title(stringB)
• Transforming strings
Intro to R for Data Science
Session 5: Structuring Data: Strings in R
# Introduction to R for Data Science
# SESSION 5 :: 26 May, 2016
# Remove whitespace
stringA <- c(" Removewhitespace ");
str_trim(stringA)
# remove leading whitespace
str_trim(stringA,side="left")
# remove trailing whitespace
str_trim(stringA,side="right")
# remove all whitespace?
stringA <- c(" Remove whitespace ") # how aboutthis one?
# there are differentways to do it. Try:
gsub(" ", "", stringA,fixed=T) # (!(fixed==T)),the first (pattern) argumentis regex
# in general:
stringA <- "The quick brown fox jumps overthe lazy dog The quick brown"
gsub("Thequick brown","The slow red", stringA,fixed=T)
Strings in R
• More transforming
Intro to R for Data Science
Session 5: Structuring Data: Strings in R
# Introduction to R for Data Science
# SESSION 5 :: 26 May, 2016
# Searchingfor somethingin a string
# Does a string encompass a substring?
grepl("Thequick brown",stringA,fixed = T)
grepl("Thefastred", stringA, fixed = T)
stringB <- "Uraaaaaaaa"
grep("Thequick brown",c(stringA,stringB),fixed = T)
# where?
stringA <- "The quick brown fox jumps overthe lazy dog The quick brown"
w <- gregexpr("Thequick brown",stringA)
str(w)
b1 <- w[[1]][1] # first match starts at
b2 <- w[[1]][2] # second match starts at
# now, match.length is an attribute of w[[1]], not w itself:
e1 <- attr(w[[1]],"match.length",exact= T)[1]
e2 <- attr(w[[1]],"match.length",exact= T)[2]
Strings in R
• Search
Intro to R for Data Science
Session 5: Structuring Data: Strings in R
# Introduction to R for Data Science
# SESSION 5 :: 26 May, 2016
# first match extraction:
str_sub(stringA,b1,b1+e1-1)
# second matchextraction:
str_sub(stringA,b2,b2+e2-1)
# Ok, but easierand more convenientwith {stringr}
str_detect(stringA,"The quickbrown") # T or F
str_locate(stringA,"The quickbrown") # first match
str_locate_all(stringA,"The quickbrown") # all matches
# term frequency,as we know,is very importantin text-mining:
term1 <- str_locate_all(stringA,"The quickbrown")[[1]]# all matches for term1 ie. "The quick
brown"
dim(term1)[1] # how many matches = how many rows in the str_locate_alloutputmatrix...
Strings in R
• Search
Intro to R for Data Science
Session 5: Structuring Data: Strings in R
# Introduction to R for Data Science
# SESSION 5 :: 26 May, 2016
# Sorting strings in R
letters
str_sort(letters,locale="en")# locale = en
str_sort(letters,locale="haw")# locale = Hawaiian
# backwards
str_sort(letters,decreasing= T)
# handy:
stringA <- c("New York","Paris",NA, "Moscow","Tokyo")
str_sort(stringA,na_last=T)
# [1] "Moscow" "New York" "Paris" "Tokyo" NA
str_sort(stringA,na_last=F)
# [1] NA "Moscow" "New York" "Paris" "Tokyo"
# {base}
sort(stringA)
sort(stringA,decreasing=T)
Strings in R
• Sorting strings
Intro to R for Data Science
Session 5: Structuring Data: Strings in R
# Introduction to R for Data Science
# SESSION 5 :: 26 May, 2016
# Take home messageon encodings
# 1. Most of the time, you simply need to know the source encoding
# 2. All of the time *** converteverythingto UTF-8*** - as soon as possible
# 3. Most {base},and all {stringr} and {stringi} functions thatprocessstrings in R
# will converttheir outputto UTF-8 automatically
# Working inside R only, running an English locale,will nevercause you any trouble
# However,in Data Science you will probably needto do a lot of web-scraping fora living
# - and that's where the fan starts.
# God bless iconv()- but don'tget to excited,it does not avoid all problems
# Next session:Thurday,June2, 2016 :: LinearRegressionw. R
Strings in R
• Encodings…
Introduction to R for Data Science :: Session 5 [Data Structuring: Strings in R]

Contenu connexe

Tendances

Wireless sensor network Apriori an N-RMP
Wireless sensor network Apriori an N-RMP Wireless sensor network Apriori an N-RMP
Wireless sensor network Apriori an N-RMP
Amrit Khandelwal
 
Managing large datasets in R – ff examples and concepts
Managing large datasets in R – ff examples and conceptsManaging large datasets in R – ff examples and concepts
Managing large datasets in R – ff examples and concepts
Ajay Ohri
 

Tendances (20)

Introduction to Data Mining with R and Data Import/Export in R
Introduction to Data Mining with R and Data Import/Export in RIntroduction to Data Mining with R and Data Import/Export in R
Introduction to Data Mining with R and Data Import/Export in R
 
RDataMining slides-text-mining-with-r
RDataMining slides-text-mining-with-rRDataMining slides-text-mining-with-r
RDataMining slides-text-mining-with-r
 
Introduction to source{d} Engine and source{d} Lookout
Introduction to source{d} Engine and source{d} Lookout Introduction to source{d} Engine and source{d} Lookout
Introduction to source{d} Engine and source{d} Lookout
 
R language
R languageR language
R language
 
Text mining and social network analysis of twitter data part 1
Text mining and social network analysis of twitter data part 1Text mining and social network analysis of twitter data part 1
Text mining and social network analysis of twitter data part 1
 
my$talk=qr{((?:ir)?reg(?:ular )?exp(?:ressions?)?)}i;
my$talk=qr{((?:ir)?reg(?:ular )?exp(?:ressions?)?)}i;my$talk=qr{((?:ir)?reg(?:ular )?exp(?:ressions?)?)}i;
my$talk=qr{((?:ir)?reg(?:ular )?exp(?:ressions?)?)}i;
 
2. R-basics, Vectors, Arrays, Matrices, Factors
2. R-basics, Vectors, Arrays, Matrices, Factors2. R-basics, Vectors, Arrays, Matrices, Factors
2. R-basics, Vectors, Arrays, Matrices, Factors
 
Wireless sensor network Apriori an N-RMP
Wireless sensor network Apriori an N-RMP Wireless sensor network Apriori an N-RMP
Wireless sensor network Apriori an N-RMP
 
Python pandas Library
Python pandas LibraryPython pandas Library
Python pandas Library
 
Text analytics in Python and R with examples from Tobacco Control
Text analytics in Python and R with examples from Tobacco ControlText analytics in Python and R with examples from Tobacco Control
Text analytics in Python and R with examples from Tobacco Control
 
Future features for openCypher: Schema, Constraints, Subqueries, Configurable...
Future features for openCypher: Schema, Constraints, Subqueries, Configurable...Future features for openCypher: Schema, Constraints, Subqueries, Configurable...
Future features for openCypher: Schema, Constraints, Subqueries, Configurable...
 
Introduction to R Programming
Introduction to R ProgrammingIntroduction to R Programming
Introduction to R Programming
 
Hybrid acquisition of temporal scopes for rdf data
Hybrid acquisition of temporal scopes for rdf dataHybrid acquisition of temporal scopes for rdf data
Hybrid acquisition of temporal scopes for rdf data
 
Training in Analytics, R and Social Media Analytics
Training in Analytics, R and Social Media AnalyticsTraining in Analytics, R and Social Media Analytics
Training in Analytics, R and Social Media Analytics
 
Natural Language Processing in R (rNLP)
Natural Language Processing in R (rNLP)Natural Language Processing in R (rNLP)
Natural Language Processing in R (rNLP)
 
defense
defensedefense
defense
 
Stack Algorithm
Stack AlgorithmStack Algorithm
Stack Algorithm
 
Managing large datasets in R – ff examples and concepts
Managing large datasets in R – ff examples and conceptsManaging large datasets in R – ff examples and concepts
Managing large datasets in R – ff examples and concepts
 
Merge Multiple CSV in single data frame using R
Merge Multiple CSV in single data frame using RMerge Multiple CSV in single data frame using R
Merge Multiple CSV in single data frame using R
 
January 2016 Meetup: Speeding up (big) data manipulation with data.table package
January 2016 Meetup: Speeding up (big) data manipulation with data.table packageJanuary 2016 Meetup: Speeding up (big) data manipulation with data.table package
January 2016 Meetup: Speeding up (big) data manipulation with data.table package
 

En vedette

15 03 16_data sciences pour l'actuariat_f. soulie fogelman
15 03 16_data sciences pour l'actuariat_f. soulie fogelman15 03 16_data sciences pour l'actuariat_f. soulie fogelman
15 03 16_data sciences pour l'actuariat_f. soulie fogelman
Arthur Charpentier
 

En vedette (12)

Accessing Databases from R
Accessing Databases from RAccessing Databases from R
Accessing Databases from R
 
Slides erm-cea-ia
Slides erm-cea-iaSlides erm-cea-ia
Slides erm-cea-ia
 
IA-advanced-R
IA-advanced-RIA-advanced-R
IA-advanced-R
 
Slides ads ia
Slides ads iaSlides ads ia
Slides ads ia
 
Classification
ClassificationClassification
Classification
 
Slides lln-risques
Slides lln-risquesSlides lln-risques
Slides lln-risques
 
15 03 16_data sciences pour l'actuariat_f. soulie fogelman
15 03 16_data sciences pour l'actuariat_f. soulie fogelman15 03 16_data sciences pour l'actuariat_f. soulie fogelman
15 03 16_data sciences pour l'actuariat_f. soulie fogelman
 
Slides barcelona Machine Learning
Slides barcelona Machine LearningSlides barcelona Machine Learning
Slides barcelona Machine Learning
 
Graduate Econometrics Course, part 4, 2017
Graduate Econometrics Course, part 4, 2017Graduate Econometrics Course, part 4, 2017
Graduate Econometrics Course, part 4, 2017
 
Econometrics, PhD Course, #1 Nonlinearities
Econometrics, PhD Course, #1 NonlinearitiesEconometrics, PhD Course, #1 Nonlinearities
Econometrics, PhD Course, #1 Nonlinearities
 
Slides econometrics-2017-graduate-2
Slides econometrics-2017-graduate-2Slides econometrics-2017-graduate-2
Slides econometrics-2017-graduate-2
 
Econometrics 2017-graduate-3
Econometrics 2017-graduate-3Econometrics 2017-graduate-3
Econometrics 2017-graduate-3
 

Similaire à Introduction to R for Data Science :: Session 5 [Data Structuring: Strings in R]

Python advanced 2. regular expression in python
Python advanced 2. regular expression in pythonPython advanced 2. regular expression in python
Python advanced 2. regular expression in python
John(Qiang) Zhang
 
Bioinformatica 10-11-2011-t5-database searching
Bioinformatica 10-11-2011-t5-database searchingBioinformatica 10-11-2011-t5-database searching
Bioinformatica 10-11-2011-t5-database searching
Prof. Wim Van Criekinge
 
R is a very flexible and powerful programming language, as well as a.pdf
R is a very flexible and powerful programming language, as well as a.pdfR is a very flexible and powerful programming language, as well as a.pdf
R is a very flexible and powerful programming language, as well as a.pdf
annikasarees
 
Learning notes of r for python programmer (Temp1)
Learning notes of r for python programmer (Temp1)Learning notes of r for python programmer (Temp1)
Learning notes of r for python programmer (Temp1)
Chia-Chi Chang
 
Ejercicios de estilo en la programación
Ejercicios de estilo en la programaciónEjercicios de estilo en la programación
Ejercicios de estilo en la programación
Software Guru
 

Similaire à Introduction to R for Data Science :: Session 5 [Data Structuring: Strings in R] (20)

Python advanced 2. regular expression in python
Python advanced 2. regular expression in pythonPython advanced 2. regular expression in python
Python advanced 2. regular expression in python
 
Bioinformatics t5-databasesearching v2014
Bioinformatics t5-databasesearching v2014Bioinformatics t5-databasesearching v2014
Bioinformatics t5-databasesearching v2014
 
Bioinformatica 10-11-2011-t5-database searching
Bioinformatica 10-11-2011-t5-database searchingBioinformatica 10-11-2011-t5-database searching
Bioinformatica 10-11-2011-t5-database searching
 
Bioinformatics t5-database searching-v2013_wim_vancriekinge
Bioinformatics t5-database searching-v2013_wim_vancriekingeBioinformatics t5-database searching-v2013_wim_vancriekinge
Bioinformatics t5-database searching-v2013_wim_vancriekinge
 
2016 bioinformatics i_database_searching_wimvancriekinge
2016 bioinformatics i_database_searching_wimvancriekinge2016 bioinformatics i_database_searching_wimvancriekinge
2016 bioinformatics i_database_searching_wimvancriekinge
 
blast and fasta
 blast and fasta blast and fasta
blast and fasta
 
Module 3 - Regular Expressions, Dictionaries.pdf
Module 3 - Regular  Expressions,  Dictionaries.pdfModule 3 - Regular  Expressions,  Dictionaries.pdf
Module 3 - Regular Expressions, Dictionaries.pdf
 
R is a very flexible and powerful programming language, as well as a.pdf
R is a very flexible and powerful programming language, as well as a.pdfR is a very flexible and powerful programming language, as well as a.pdf
R is a very flexible and powerful programming language, as well as a.pdf
 
Learning notes of r for python programmer (Temp1)
Learning notes of r for python programmer (Temp1)Learning notes of r for python programmer (Temp1)
Learning notes of r for python programmer (Temp1)
 
22 spam
22 spam22 spam
22 spam
 
P3 2017 python_regexes
P3 2017 python_regexesP3 2017 python_regexes
P3 2017 python_regexes
 
Ggplot2 work
Ggplot2 workGgplot2 work
Ggplot2 work
 
Day 1b R structures objects.pptx
Day 1b   R structures   objects.pptxDay 1b   R structures   objects.pptx
Day 1b R structures objects.pptx
 
Regular expressions
Regular expressionsRegular expressions
Regular expressions
 
R Programming: Export/Output Data In R
R Programming: Export/Output Data In RR Programming: Export/Output Data In R
R Programming: Export/Output Data In R
 
Strings in python
Strings in pythonStrings in python
Strings in python
 
stringsinpython-181122100212.pdf
stringsinpython-181122100212.pdfstringsinpython-181122100212.pdf
stringsinpython-181122100212.pdf
 
Sequence comparison techniques
Sequence comparison techniquesSequence comparison techniques
Sequence comparison techniques
 
Rbootcamp Day 5
Rbootcamp Day 5Rbootcamp Day 5
Rbootcamp Day 5
 
Ejercicios de estilo en la programación
Ejercicios de estilo en la programaciónEjercicios de estilo en la programación
Ejercicios de estilo en la programación
 

Plus de Goran S. Milovanovic

Geneva Social Media Index - Report 2015 full report
Geneva Social Media Index - Report 2015 full reportGeneva Social Media Index - Report 2015 full report
Geneva Social Media Index - Report 2015 full report
Goran S. Milovanovic
 
Milovanović, G.S., Krstić, M. & Filipović, O. (2015). Kršenje homogenosti pre...
Milovanović, G.S., Krstić, M. & Filipović, O. (2015). Kršenje homogenosti pre...Milovanović, G.S., Krstić, M. & Filipović, O. (2015). Kršenje homogenosti pre...
Milovanović, G.S., Krstić, M. & Filipović, O. (2015). Kršenje homogenosti pre...
Goran S. Milovanovic
 
247113920-Cognitive-technologies-mapping-the-Internet-governance-debate
247113920-Cognitive-technologies-mapping-the-Internet-governance-debate247113920-Cognitive-technologies-mapping-the-Internet-governance-debate
247113920-Cognitive-technologies-mapping-the-Internet-governance-debate
Goran S. Milovanovic
 

Plus de Goran S. Milovanovic (20)

Uvod u R za Data Science :: Sesija 1 [Intro to R for Data Science :: Session 1]
Uvod u R za Data Science :: Sesija 1 [Intro to R for Data Science :: Session 1]Uvod u R za Data Science :: Sesija 1 [Intro to R for Data Science :: Session 1]
Uvod u R za Data Science :: Sesija 1 [Intro to R for Data Science :: Session 1]
 
Geneva Social Media Index - Report 2015 full report
Geneva Social Media Index - Report 2015 full reportGeneva Social Media Index - Report 2015 full report
Geneva Social Media Index - Report 2015 full report
 
Milovanović, G.S., Krstić, M. & Filipović, O. (2015). Kršenje homogenosti pre...
Milovanović, G.S., Krstić, M. & Filipović, O. (2015). Kršenje homogenosti pre...Milovanović, G.S., Krstić, M. & Filipović, O. (2015). Kršenje homogenosti pre...
Milovanović, G.S., Krstić, M. & Filipović, O. (2015). Kršenje homogenosti pre...
 
247113920-Cognitive-technologies-mapping-the-Internet-governance-debate
247113920-Cognitive-technologies-mapping-the-Internet-governance-debate247113920-Cognitive-technologies-mapping-the-Internet-governance-debate
247113920-Cognitive-technologies-mapping-the-Internet-governance-debate
 
Učenje i viši kognitivni procesi 10. Simboličke funkcije, VI Deo: Rešavanje p...
Učenje i viši kognitivni procesi 10. Simboličke funkcije, VI Deo: Rešavanje p...Učenje i viši kognitivni procesi 10. Simboličke funkcije, VI Deo: Rešavanje p...
Učenje i viši kognitivni procesi 10. Simboličke funkcije, VI Deo: Rešavanje p...
 
Učenje i viši kognitivni procesi 9. Simboličke funkcije, V Deo: Rezonovanje u...
Učenje i viši kognitivni procesi 9. Simboličke funkcije, V Deo: Rezonovanje u...Učenje i viši kognitivni procesi 9. Simboličke funkcije, V Deo: Rezonovanje u...
Učenje i viši kognitivni procesi 9. Simboličke funkcije, V Deo: Rezonovanje u...
 
Učenje i viši kognitivni procesi 9. Simboličke funkcije, V Deo: Suđenje, heur...
Učenje i viši kognitivni procesi 9. Simboličke funkcije, V Deo: Suđenje, heur...Učenje i viši kognitivni procesi 9. Simboličke funkcije, V Deo: Suđenje, heur...
Učenje i viši kognitivni procesi 9. Simboličke funkcije, V Deo: Suđenje, heur...
 
Učenje i viši kognitivni procesi 8. Simboličke funkcije, IV Deo: Analogija i ...
Učenje i viši kognitivni procesi 8. Simboličke funkcije, IV Deo: Analogija i ...Učenje i viši kognitivni procesi 8. Simboličke funkcije, IV Deo: Analogija i ...
Učenje i viši kognitivni procesi 8. Simboličke funkcije, IV Deo: Analogija i ...
 
Učenje i viši kognitivni procesi 9. Simboličke funkcije, III Deo: Kauzalnost,...
Učenje i viši kognitivni procesi 9. Simboličke funkcije, III Deo: Kauzalnost,...Učenje i viši kognitivni procesi 9. Simboličke funkcije, III Deo: Kauzalnost,...
Učenje i viši kognitivni procesi 9. Simboličke funkcije, III Deo: Kauzalnost,...
 
Učenje i viši kognitivni procesi 8. Simboličke funkcije, II Deo: Distribuiran...
Učenje i viši kognitivni procesi 8. Simboličke funkcije, II Deo: Distribuiran...Učenje i viši kognitivni procesi 8. Simboličke funkcije, II Deo: Distribuiran...
Učenje i viši kognitivni procesi 8. Simboličke funkcije, II Deo: Distribuiran...
 
Učenje i viši kognitivni procesi 8. Simboličke funkcije, II Deo: Konekcioniza...
Učenje i viši kognitivni procesi 8. Simboličke funkcije, II Deo: Konekcioniza...Učenje i viši kognitivni procesi 8. Simboličke funkcije, II Deo: Konekcioniza...
Učenje i viši kognitivni procesi 8. Simboličke funkcije, II Deo: Konekcioniza...
 
Učenje i viši kognitivni procesi 7a. Simboličke funkcije, I Deo: Učenje kateg...
Učenje i viši kognitivni procesi 7a. Simboličke funkcije, I Deo: Učenje kateg...Učenje i viši kognitivni procesi 7a. Simboličke funkcije, I Deo: Učenje kateg...
Učenje i viši kognitivni procesi 7a. Simboličke funkcije, I Deo: Učenje kateg...
 
Učenje i viši kognitivni procesi 7. Simboličke funkcije, I Deo: Koncepti, kat...
Učenje i viši kognitivni procesi 7. Simboličke funkcije, I Deo: Koncepti, kat...Učenje i viši kognitivni procesi 7. Simboličke funkcije, I Deo: Koncepti, kat...
Učenje i viši kognitivni procesi 7. Simboličke funkcije, I Deo: Koncepti, kat...
 
Učenje i viši kognitivni procesi 7. Učenje, IV Deo: Neasocijativno učenje, ef...
Učenje i viši kognitivni procesi 7. Učenje, IV Deo: Neasocijativno učenje, ef...Učenje i viši kognitivni procesi 7. Učenje, IV Deo: Neasocijativno učenje, ef...
Učenje i viši kognitivni procesi 7. Učenje, IV Deo: Neasocijativno učenje, ef...
 
Učenje i viši kognitivni procesi 6. Učenje, III Deo: Hernstejnov zakon slagan...
Učenje i viši kognitivni procesi 6. Učenje, III Deo: Hernstejnov zakon slagan...Učenje i viši kognitivni procesi 6. Učenje, III Deo: Hernstejnov zakon slagan...
Učenje i viši kognitivni procesi 6. Učenje, III Deo: Hernstejnov zakon slagan...
 
Učenje i viši kognitivni procesi 6. Učenje, III Deo: Instrumentalno učenje
Učenje i viši kognitivni procesi 6. Učenje, III Deo: Instrumentalno učenjeUčenje i viši kognitivni procesi 6. Učenje, III Deo: Instrumentalno učenje
Učenje i viši kognitivni procesi 6. Učenje, III Deo: Instrumentalno učenje
 
Učenje i viši kognitivni procesi 5. Učenje, II Deo: Blokiranje, osenčavanje, ...
Učenje i viši kognitivni procesi 5. Učenje, II Deo: Blokiranje, osenčavanje, ...Učenje i viši kognitivni procesi 5. Učenje, II Deo: Blokiranje, osenčavanje, ...
Učenje i viši kognitivni procesi 5. Učenje, II Deo: Blokiranje, osenčavanje, ...
 
Učenje i viši kognitivni procesi 5. Učenje, II Deo: klasično uslovljavanje i ...
Učenje i viši kognitivni procesi 5. Učenje, II Deo: klasično uslovljavanje i ...Učenje i viši kognitivni procesi 5. Učenje, II Deo: klasično uslovljavanje i ...
Učenje i viši kognitivni procesi 5. Učenje, II Deo: klasično uslovljavanje i ...
 
Učenje i viši kognitivni procesi 5. Učenje, I Deo
Učenje i viši kognitivni procesi 5. Učenje, I DeoUčenje i viši kognitivni procesi 5. Učenje, I Deo
Učenje i viši kognitivni procesi 5. Učenje, I Deo
 
Učenje i viši kognitivni procesi 4a. Debata o racionalnosti, nastavak
Učenje i viši kognitivni procesi 4a. Debata o racionalnosti, nastavakUčenje i viši kognitivni procesi 4a. Debata o racionalnosti, nastavak
Učenje i viši kognitivni procesi 4a. Debata o racionalnosti, nastavak
 

Dernier

Jual Obat Aborsi Hongkong ( Asli No.1 ) 085657271886 Obat Penggugur Kandungan...
Jual Obat Aborsi Hongkong ( Asli No.1 ) 085657271886 Obat Penggugur Kandungan...Jual Obat Aborsi Hongkong ( Asli No.1 ) 085657271886 Obat Penggugur Kandungan...
Jual Obat Aborsi Hongkong ( Asli No.1 ) 085657271886 Obat Penggugur Kandungan...
ZurliaSoop
 
Russian Escort Service in Delhi 11k Hotel Foreigner Russian Call Girls in Delhi
Russian Escort Service in Delhi 11k Hotel Foreigner Russian Call Girls in DelhiRussian Escort Service in Delhi 11k Hotel Foreigner Russian Call Girls in Delhi
Russian Escort Service in Delhi 11k Hotel Foreigner Russian Call Girls in Delhi
kauryashika82
 
Seal of Good Local Governance (SGLG) 2024Final.pptx
Seal of Good Local Governance (SGLG) 2024Final.pptxSeal of Good Local Governance (SGLG) 2024Final.pptx
Seal of Good Local Governance (SGLG) 2024Final.pptx
negromaestrong
 

Dernier (20)

Kodo Millet PPT made by Ghanshyam bairwa college of Agriculture kumher bhara...
Kodo Millet  PPT made by Ghanshyam bairwa college of Agriculture kumher bhara...Kodo Millet  PPT made by Ghanshyam bairwa college of Agriculture kumher bhara...
Kodo Millet PPT made by Ghanshyam bairwa college of Agriculture kumher bhara...
 
Jual Obat Aborsi Hongkong ( Asli No.1 ) 085657271886 Obat Penggugur Kandungan...
Jual Obat Aborsi Hongkong ( Asli No.1 ) 085657271886 Obat Penggugur Kandungan...Jual Obat Aborsi Hongkong ( Asli No.1 ) 085657271886 Obat Penggugur Kandungan...
Jual Obat Aborsi Hongkong ( Asli No.1 ) 085657271886 Obat Penggugur Kandungan...
 
Unit-IV; Professional Sales Representative (PSR).pptx
Unit-IV; Professional Sales Representative (PSR).pptxUnit-IV; Professional Sales Representative (PSR).pptx
Unit-IV; Professional Sales Representative (PSR).pptx
 
Magic bus Group work1and 2 (Team 3).pptx
Magic bus Group work1and 2 (Team 3).pptxMagic bus Group work1and 2 (Team 3).pptx
Magic bus Group work1and 2 (Team 3).pptx
 
Russian Escort Service in Delhi 11k Hotel Foreigner Russian Call Girls in Delhi
Russian Escort Service in Delhi 11k Hotel Foreigner Russian Call Girls in DelhiRussian Escort Service in Delhi 11k Hotel Foreigner Russian Call Girls in Delhi
Russian Escort Service in Delhi 11k Hotel Foreigner Russian Call Girls in Delhi
 
ICT role in 21st century education and it's challenges.
ICT role in 21st century education and it's challenges.ICT role in 21st century education and it's challenges.
ICT role in 21st century education and it's challenges.
 
Application orientated numerical on hev.ppt
Application orientated numerical on hev.pptApplication orientated numerical on hev.ppt
Application orientated numerical on hev.ppt
 
This PowerPoint helps students to consider the concept of infinity.
This PowerPoint helps students to consider the concept of infinity.This PowerPoint helps students to consider the concept of infinity.
This PowerPoint helps students to consider the concept of infinity.
 
SOC 101 Demonstration of Learning Presentation
SOC 101 Demonstration of Learning PresentationSOC 101 Demonstration of Learning Presentation
SOC 101 Demonstration of Learning Presentation
 
Sociology 101 Demonstration of Learning Exhibit
Sociology 101 Demonstration of Learning ExhibitSociology 101 Demonstration of Learning Exhibit
Sociology 101 Demonstration of Learning Exhibit
 
2024-NATIONAL-LEARNING-CAMP-AND-OTHER.pptx
2024-NATIONAL-LEARNING-CAMP-AND-OTHER.pptx2024-NATIONAL-LEARNING-CAMP-AND-OTHER.pptx
2024-NATIONAL-LEARNING-CAMP-AND-OTHER.pptx
 
Seal of Good Local Governance (SGLG) 2024Final.pptx
Seal of Good Local Governance (SGLG) 2024Final.pptxSeal of Good Local Governance (SGLG) 2024Final.pptx
Seal of Good Local Governance (SGLG) 2024Final.pptx
 
Holdier Curriculum Vitae (April 2024).pdf
Holdier Curriculum Vitae (April 2024).pdfHoldier Curriculum Vitae (April 2024).pdf
Holdier Curriculum Vitae (April 2024).pdf
 
Grant Readiness 101 TechSoup and Remy Consulting
Grant Readiness 101 TechSoup and Remy ConsultingGrant Readiness 101 TechSoup and Remy Consulting
Grant Readiness 101 TechSoup and Remy Consulting
 
PROCESS RECORDING FORMAT.docx
PROCESS      RECORDING        FORMAT.docxPROCESS      RECORDING        FORMAT.docx
PROCESS RECORDING FORMAT.docx
 
UGC NET Paper 1 Mathematical Reasoning & Aptitude.pdf
UGC NET Paper 1 Mathematical Reasoning & Aptitude.pdfUGC NET Paper 1 Mathematical Reasoning & Aptitude.pdf
UGC NET Paper 1 Mathematical Reasoning & Aptitude.pdf
 
Unit-V; Pricing (Pharma Marketing Management).pptx
Unit-V; Pricing (Pharma Marketing Management).pptxUnit-V; Pricing (Pharma Marketing Management).pptx
Unit-V; Pricing (Pharma Marketing Management).pptx
 
Micro-Scholarship, What it is, How can it help me.pdf
Micro-Scholarship, What it is, How can it help me.pdfMicro-Scholarship, What it is, How can it help me.pdf
Micro-Scholarship, What it is, How can it help me.pdf
 
Asian American Pacific Islander Month DDSD 2024.pptx
Asian American Pacific Islander Month DDSD 2024.pptxAsian American Pacific Islander Month DDSD 2024.pptx
Asian American Pacific Islander Month DDSD 2024.pptx
 
Python Notes for mca i year students osmania university.docx
Python Notes for mca i year students osmania university.docxPython Notes for mca i year students osmania university.docx
Python Notes for mca i year students osmania university.docx
 

Introduction to R for Data Science :: Session 5 [Data Structuring: Strings in R]

  • 1. Introduction to R for Data Science Lecturers dipl. ing Branko Kovač Data Analyst at CUBE/Data Science Mentor at Springboard Data Science zajednica Srbije branko.kovac@gmail.com dr Goran S. Milovanović Data Scientist at DiploFoundation Data Science zajednica Srbije goran.s.milovanovic@gmail.com goranm@diplomacy.edu
  • 2. Strings in R • {base} for strings • {stringr} for strings • {stringi} for strings Intro to R for Data Science Session 5: Structuring Data: Strings in R # Introduction to R for Data Science # SESSION 5 :: 26 May, 2016 # Processing strings in R library(stringr) # strings in R are charactervectors stringA <- "Hello world" stringB <- "Sun shines!" stringA stringB is.character(stringA) # TRUE as.character(200*5) as.numeric("1000") as.double("3.14") # Introduction to R for Data Science # SESSION 5 :: 26 May, 2016 # Using " and ' # either: stringA <- "Hello 'World'" stringA # or stringA <- 'Hello "World"' stringA # prints:"Hello "World"" - what is this: ? print(stringA) # try: writeLines(stringA) print(stringA) # Escapingin R: use , the R escape character stringA <- 'Hello "World"' stringA print(stringA) writeLines(stringA) # Escapingescaping writeLines("")# nice
  • 3. Intro to R for Data Science Session 5: Structuring Data: Strings in R # Introduction to R for Data Science # SESSION 5 :: 26 May, 2016 # String Concatenationin R stringC <- c(stringA,stringB) # a character vectorof length == 2 length(stringC) stringC <- paste(stringA,stringB, sep=",") # length == 1, base function writeLines(stringC) # sep w. collapse (paste args) stringC <- c(stringA,stringB) stringC <- paste(stringC,collapse="__") writeLines(stringC) # paste0 is paste w. sep="",fasterthan paste(),base function # Introduction to R for Data Science # SESSION 5 :: 26 May, 2016 strA <- "One" strB <- "Two" strC <- "Three" paste0(strA,strB, strC) # the collapse argumentis used in paste0 as well strD <- c(strA,strB,strC) paste0(strD,collapse="-") # stringr concatenation,also has sep and collapse as args str_c(strA,strB,strC) str_c(strA,strB,strC,sep="...") str_c(strD,collapse="...") # both paste {base}and str_c {stringr} are vectorized paste("Prefix-",strD, sep="-") str_c("Prefix-",strD,sep="-") Strings in R • Concatenation
  • 4. Intro to R for Data Science Session 5: Structuring Data: Strings in R # Introduction to R for Data Science # SESSION 5 :: 26 May, 2016 stringA <- "The quick brown fox jumps overthe lazy dog"; splitA <- strsplit(stringA," ") # is.list(splitA) == T splitA <- unlist(strsplit(stringA," ")) # "The quick brown" from "The quick brown fox jumps overthe lazy dog" splitA <- paste(unlist(strsplit(stringA," "))[1:3],collapse=" ") # or splitA <- paste(strsplit(stringA," ")[[1]][1:3],collapse=" ") # advice:use splitA <- strsplit(stringA," ",fixed=T) # fixed=T says:match the split argumentexactly, # otherwise,split is an regularexpression;defaultis: fixed = FALSE # string split w. {stringr} is.list(str_split(stringA," ")) # this is interesting: str_split(stringA," ", n=3) # "The quick brown" from "The quick brown fox jumps overthe lazy dog" paste0(str_split(stringA," ", n=4)[[1]][1:3],collapse=" ") Strings in R • Splitting
  • 5. Intro to R for Data Science Session 5: Structuring Data: Strings in R # Introduction to R for Data Science # SESSION 5 :: 26 May, 2016 # default: str_split(string,pattern,n = Inf), where pattern is regex str_split(stringA,boundary("word")) # very useful: stringA1 <- "The quick brown fox jumps overthe lazy dog" str_split(stringA1,boundary("word")) stringA1 <- "Aboveall, don'tlie to yourself. The man who lies to himselfand listens to his own lie comes to a pointthat he cannotdistinguish the truth within him, or around him,and so loses all respectfor himselfand for others. And having no respecthe ceasesto love." str_split(stringA1,boundary("word")) str_split(stringA1,boundary("word",skip_word_none= F)) # includingpunctuation and special str_split(stringA1,boundary("line_break")) writeLines(str_split(stringA1,boundary("line_break"))[[1]]) Strings in R • Splitting
  • 6. Intro to R for Data Science Session 5: Structuring Data: Strings in R # Introduction to R for Data Science # SESSION 5 :: 26 May, 2016 stringA <- c("Belgrade","Zagreb","Ljubljana")# {stringr} str_sub(stringA,1, 2) # counting backwards str_sub(stringA, -3, -1) # {base} substr(stringA,1, 3) # play: substr(stringA,c(1,2,3),c(2,3,4)) # nope: substr(stringA, -2, -1) # {base} Strings in R • Subsetting strings
  • 7. Intro to R for Data Science Session 5: Structuring Data: Strings in R # Introduction to R for Data Science # SESSION 5 :: 26 May, 2016 # Replacingcharactersin strings stringB <- stringA # just a copy of stringA str_sub(stringB,1,2)<- "00" stringB # {base} stringB <- stringA # just a copy of stringA substr(stringB,1,3)<- "WowWow" # check the result! stringB substr(stringB,1,4)<- "WoWWow" # check the result! stringB substr(stringB,1,6)<- "WowWow" # check the result! stringB Strings in R • Subsetting strings # Introduction to R for Data Science # SESSION 5 :: 26 May, 2016 # UPPER CASE to lower case and vice versa in R stringA <- "ABRACADABRA" # {base} tolower(stringA) stringA <- tolower(stringA) toupper(stringA) stringA <- toupper(stringA) # {stringr} str_to_lower(stringA) stringB <- str_to_lower(stringA) str_to_upper(stringA) # capitalize first letter str_to_title(stringB) • Transforming strings
  • 8. Intro to R for Data Science Session 5: Structuring Data: Strings in R # Introduction to R for Data Science # SESSION 5 :: 26 May, 2016 # Remove whitespace stringA <- c(" Removewhitespace "); str_trim(stringA) # remove leading whitespace str_trim(stringA,side="left") # remove trailing whitespace str_trim(stringA,side="right") # remove all whitespace? stringA <- c(" Remove whitespace ") # how aboutthis one? # there are differentways to do it. Try: gsub(" ", "", stringA,fixed=T) # (!(fixed==T)),the first (pattern) argumentis regex # in general: stringA <- "The quick brown fox jumps overthe lazy dog The quick brown" gsub("Thequick brown","The slow red", stringA,fixed=T) Strings in R • More transforming
  • 9. Intro to R for Data Science Session 5: Structuring Data: Strings in R # Introduction to R for Data Science # SESSION 5 :: 26 May, 2016 # Searchingfor somethingin a string # Does a string encompass a substring? grepl("Thequick brown",stringA,fixed = T) grepl("Thefastred", stringA, fixed = T) stringB <- "Uraaaaaaaa" grep("Thequick brown",c(stringA,stringB),fixed = T) # where? stringA <- "The quick brown fox jumps overthe lazy dog The quick brown" w <- gregexpr("Thequick brown",stringA) str(w) b1 <- w[[1]][1] # first match starts at b2 <- w[[1]][2] # second match starts at # now, match.length is an attribute of w[[1]], not w itself: e1 <- attr(w[[1]],"match.length",exact= T)[1] e2 <- attr(w[[1]],"match.length",exact= T)[2] Strings in R • Search
  • 10. Intro to R for Data Science Session 5: Structuring Data: Strings in R # Introduction to R for Data Science # SESSION 5 :: 26 May, 2016 # first match extraction: str_sub(stringA,b1,b1+e1-1) # second matchextraction: str_sub(stringA,b2,b2+e2-1) # Ok, but easierand more convenientwith {stringr} str_detect(stringA,"The quickbrown") # T or F str_locate(stringA,"The quickbrown") # first match str_locate_all(stringA,"The quickbrown") # all matches # term frequency,as we know,is very importantin text-mining: term1 <- str_locate_all(stringA,"The quickbrown")[[1]]# all matches for term1 ie. "The quick brown" dim(term1)[1] # how many matches = how many rows in the str_locate_alloutputmatrix... Strings in R • Search
  • 11. Intro to R for Data Science Session 5: Structuring Data: Strings in R # Introduction to R for Data Science # SESSION 5 :: 26 May, 2016 # Sorting strings in R letters str_sort(letters,locale="en")# locale = en str_sort(letters,locale="haw")# locale = Hawaiian # backwards str_sort(letters,decreasing= T) # handy: stringA <- c("New York","Paris",NA, "Moscow","Tokyo") str_sort(stringA,na_last=T) # [1] "Moscow" "New York" "Paris" "Tokyo" NA str_sort(stringA,na_last=F) # [1] NA "Moscow" "New York" "Paris" "Tokyo" # {base} sort(stringA) sort(stringA,decreasing=T) Strings in R • Sorting strings
  • 12. Intro to R for Data Science Session 5: Structuring Data: Strings in R # Introduction to R for Data Science # SESSION 5 :: 26 May, 2016 # Take home messageon encodings # 1. Most of the time, you simply need to know the source encoding # 2. All of the time *** converteverythingto UTF-8*** - as soon as possible # 3. Most {base},and all {stringr} and {stringi} functions thatprocessstrings in R # will converttheir outputto UTF-8 automatically # Working inside R only, running an English locale,will nevercause you any trouble # However,in Data Science you will probably needto do a lot of web-scraping fora living # - and that's where the fan starts. # God bless iconv()- but don'tget to excited,it does not avoid all problems # Next session:Thurday,June2, 2016 :: LinearRegressionw. R Strings in R • Encodings…