Contenu connexe
Similaire à R de Hadoop (Oracle R Advanced Analytics for Hadoopご説明資料) (20)
R de Hadoop (Oracle R Advanced Analytics for Hadoopご説明資料)
- 1. Copyright © 2014 Oracle and/or its affiliates. All rights reserved. |
> ore.connect
function (user = "", sid = "", host = "localhost", password = "",
port = 1521, service_name = NULL, conn_string = NULL, all = FALSE,
type = c("ORACLE", "HIVE"), tzone = Sys.getenv("TZ"))
{
type <- match.arg(type)
if (ore.is.connected())
ore.disconnect()
.ore.QueryEnvInit()
switch(type, ORACLE = .ore.oracleQuerySetup(), HIVE = .ore.hiveQuerySetup())
.ore.QueryEnv$connect(user, sid, host, password, port, service_name,
conn_string, tzone)
if (all) {
ore.sync()
ore.attach()
}
invisible()
}
<environment: namespace:OREbase>
R de Hadoop
Hadoop R
~
ehn]->b @ m -@EO
&Nrhr ,J d b<Kk e^, hf'
- 15. Copyright © 2014 Oracle and/or its affiliates. All rights reserved. |
• 9X OV Egm^e
• B4 –
IGH ~
~ •
– Hbgnq H@YHE>N=NUYL=PD IGH
– Sbg]hpl eb[Kk]>e lHh ]^k,]ee fdeYkm,]ee L=PD
– @> ~ &Hbgnq-Sbg]hpl' IGH KN= HAYDKIA-eb[
• Cc 2VK K KMU - – &KN@'
Intel Math Kernel Library (MKL)
ORD MKL
15
R> Sys.BlasLapack()
$vendor
[1] "Intel Math Kernel Library (Intel MKL)"
$nthreads
[1] -1
R> Sys.BlasLapack()
$vendor
[1] "R internal BLAS and LAPACK"
$nthreads
[1] 1
KN==D
IGH ~
- 16. Copyright © 2014 Oracle and/or its affiliates. All rights reserved. | 16
Vhk e^<[b ] m ebm^ uW" B
Kk e^ @blmkb[nmbhg h_ N o^klbhg 1,0,. &++' ++ Bnee h_ Eg k^]b^gml
hirkb am & ' Pa^ N Bhng] mbhg _hk Om mblmb e hfinmbg
Le m_hkf8 q64Y42+ngdghpg+ebgnq+ gn &42+[bm'
N
–
%eb ^gl^&'% %eb ^g ^&'%
N
% hgmkb[nmhkl&'%
N N ~
% bm mbhg&'%
%]^fh&'% –
%a^ei&'%
%a^ei,lm km&'% DPIH
%j&'% N
Uhn k^ nlbg Kk e^%l ]blmkb[nmbhg h_ N, Le^ l^ hgm m
Kk e^ Oniihkm _hk gr ikh[e^fl rhn ^g hngm^k pbma mabl ]blmkb[nmbhg,
Orl,>e lH i d&'
"o^g]hk
V/W Egm^e I ma G^kg^e Hb[k kr &Egm^e IGH'
"gmak^ ]l
V/W +/
- 27. Copyright © 2014 Oracle and/or its affiliates. All rights reserved. |
• KN
• 42
– i i vp
ORAAH
R
¥KMVO B 5X O¥ ¥S O B5 n ¥O M XXOM hk^ - @> ~
¥KMVO KNO¥ P ¥ KN ¥MR M XXOM a]_l,qq* hk a 46Cu 42 ~ ~
C ¥MR M XXOM a]_l,qq* hk a @>-D@BO ~
28
R
46C a]_l,qq * hk a
S O ¥O M XXOM hk^ -
C K¥U K¥U M XXOM li kd,qq *hk a,fe
HDFS
Spark Hive
R
¥KMVO B 5X O ¥S O Kk e^ =]o g ^]
=g ermb l @> & '
,
Kk e^ Hh ]^k _hk D ]hhi D ]hhi
Kk e^ @> ~
KN==D
- 28. Copyright © 2014 Oracle and/or its affiliates. All rights reserved. |
ore.frame
• S O 42 i ¥O P¥KWO
– ~ ~ DERA ~
– ] m ,_k f^
• ¥O P¥KWO ] m ,_k f^ ln[ e ll
29
B5 ( ( i i 1 ¥KMVO B 5X O¥ ¥S O i B t B
R 0 N M ¥KMVO M W MN 5,/(,(J'( N M ( ( O,..)' K K P R W 39 6:116
B5 ( , E O¥ 7_SNO B O¥K ¥ KXN 6_XM S X C_ ¥ ON Lc ¥KMVO B 5X O¥ ¥S O
ammil8--]h l,hk e^, hf- ]-A45600Y./-KNAQC-CQE@+ 1B.76B.+AA7B+23 >+>7B3+ 6@2=B741@7.,amf!KNAQC344
Client R
ore.frame DBSQL
> class(TITANIC01)
[1] "ore.frame"
attr(,"package")
[1] "OREbase"
> colnames(TITANIC01)
[1] "Class" "Sex" "Age" "Survived" "Freq"
> mean(TITANIC01$Freq)
[1] 68.78125
B5 i CA
y r r
- 29. Copyright © 2014 Oracle and/or its affiliates. All rights reserved. |
Oracle
•
30
> ore.create(iris,table="iris_hive2")
Warning message:
In ore.create(iris, table = "iris_hive2") :
column names modified by "ore.make.names" function
> colnames(iris_hive2)
[1] "sepal_length" "sepal_width" "petal_length" "petal_width" "species"
> class(iris_hive2$species)
[1] "ore.factor"
attr(,"package")
[1] "OREbase"
¥O MRK¥KM O¥ & '
¥O NK O
¥O NK O SWO
¥O NSPP SWO
¥O PKM ¥ ~
¥O P¥KWO ~ ~
¥O SX OQO¥
¥O VS
¥O V QSMKV
¥O WK ¥Sb
¥O X_WO¥SM
¥O LTOM
¥O OM ¥
> test.mat <- matrix(1:16,nrow=4,ncol=4)
> test.mat
[,1] [,2] [,3] [,4]
[1,] 1 5 9 13
[2,] 2 6 10 14
[3,] 3 7 11 15
[4,] 4 8 12 16
> test.mat.hive <- ore.push(test.mat,table="hive_mat")
> test.mat.hive
[,1] [,2] [,3] [,4]
[1,] 1 5 9 13
[2,] 2 6 10 14
[3,] 3 7 11 15
[4,] 4 8 12 16
> class(test.mat.hive)
[1] "ore.vecmatrix"
attr(,"package")
[1] "OREbase"
hive
Matrix
- 31. Copyright © 2014 Oracle and/or its affiliates. All rights reserved. |
ORCH
• B11 ~ B3 ~
– KN D ~ Kk e^ N =]o g ^] =g ermb l _hk D ]hhi&KN==D'
• @^[n ~
– –
32
library(ORCH)>
n B B3 i i
KN v
> library(ORCH)
OREbase
OREcommon
: ‘OREbase’
The following objects are masked from ‘package:base’:
cbind, data.frame, eval, interaction, order, paste, pmax, pmin,
rbind, table
OREstats
MASS
ORCHcore
rJava
Oracle R Connector for Hadoop 2.5.1 (rev. 307)
Info: using native C base64 encoding implementation
Info: Hadoop distribution is Cloudera's CDH v5.5.1
Info: using auto-detected ORCH HAL v4.2
Info: HDFS workdir is set to "/user/oracle"
Warning: mapReduce checks are skipped due to "ORCH_MAPRED_CHECK"=FALSE
Warning: HDFS checks are skipped due to "ORCH_HDFS_CHECK"=FALSE
Info: Hadoop 2.6.0-cdh5.5.1 is up
Info: Sqoop 1.4.6-cdh5.5.1 is up
Info: OLH 3.5.0 is up
Info: Hive 1.1.0-cdh5.5.1-standalone is up
Info: loaded ORCH core Java library "orch-core-2.5.1-mr2.jar"
ORCHstats
orch.dbg.on("all")
orch.debug(T)
>
>
- 44. Copyright © 2014 Oracle and/or its affiliates. All rights reserved. | 45
> hdfs.mkdir("oratest")
[1] "/user/oracle/oratest"
attr(,"orch.isdir")
[1] TRUE
attr(,"orch.dfs.path")
[1] TRUE
> hdfs.cd("oratest")
[1] "/user/oracle/oratest"
attr(,"orch.isdir")
[1] TRUE
attr(,"orch.dfs.path")
[1] TRUE
> hdfs.ls()
NULL
> hdfs.put(iris,dfs.name="iris.hdfs")
[1] "/user/oracle/oratest/iris.hdfs"
attr(,"orch.dfs.id")
[1] TRUE
> hdfs.ls()
[1] "iris.hdfs"
> hdfs.head("iris.hdfs",5)
[1] "5.1,3.5,1.4,0.2,setosa" "4.9,3,1.4,0.2,setosa" "4.7,3.2,1.3,0.2,setosa"
[4] "4.6,3.1,1.5,0.2,setosa" "5,3.6,1.4,0.2,setosa"
• hk m^lm
• hk m^lm
•
• bkbl bkbl,a]_l D@BO ~
•
• bkbl,a]_l 3
- 52. Copyright © 2014 Oracle and/or its affiliates. All rights reserved. |
) Hive R
¥O MRK¥KM O¥
g a k* mhehp^k* mhnii^k* l^_he]* ln[* ln[lmk*
ln[lmkbg
¥O P¥KWO
lahp* mm a* V* "* "9+* VV* VV9+* a^ ]* m be* e^g ma*
gkhp* g he* JNKS* J KH* ]bf* g f^l* g f^l9+*
heg f^l* heg f^l9+* l,eblm* ngeblm* lnff kr* k[bg]*
[bg]* ] m ,_k f^* l,] m ,_k f^* l,^go* ^o e* )* +*
(* X* ##* #-#* -* hfi k^* Hh b * * qhk* bl,g *
bl,_bgbm^* bl,bg_bgbm^* bl,g g* [l* lb g* ljkm* ^bebg * _ehhk*
mkng * eh * eh /.* eh 0* eh /i* eh [* hl* lbg* m g*
^qi* ^qif/* hl* lbg* m g* khng]* Onff kr*
khpOnfl* heOnfl* khpI^ gl* heI^ gl* ngbjn^* [r*
f^k ^
KNAOm ml8 _bo^gnf* k^ m^* jn gmbe^* l]* o k&hger
_hk o^ mhkl'* f^]b g* EMN
¥O OM ¥
lahp* e^g ma* * bl,o^ mhk* l,o^ mhk* l, a k m^k* l,gnf^kb *
l,bgm^ ^k* l,eh b e* V * V9+ * E* hfi k^* hk^,k^ h]^* bl,g *
#bg# * ngbjn^* lhkm* m [e^* i lm^* m iier* [r* a^ ]* m be
¥O V QSMKV
9* * ::* 9:* :* * qhk* b_^el^* g]* hk
¥O X_WLO¥
)* +* (* X* ##* #-#* -* bl,_bgbm^* bl,bg_bgbm^* bl,g g* [l* lb g*
ljkm* ^bebg * _ehhk* mkng * eh * eh /.* eh 0* eh /i* eh [* hl* lbg
m g* ^qi* ^qif/* hl* lbg* m g* s ilf ee* khng]* Onff kr*
lnff kr* f^ g
i e
hk^, k^ m^* hk^,]khi* hk^,inla* hk^,inee* hk^, ^m
e
bl,hk^,_k f^* bl,hk^,o^ mhk* bl,hk^,eh b e* bl,hk^,bgm^ ^k*
bl,hk^,gnf^kb * bl,hk^, a k m^k* bl,hk^* l,hk^,_k f^*
l,hk^,o^ mhk* l,hk^,eh b e* l,hk^,bgm^ ^k* l,hk^,gnf^kb *
l,hk^, a k m^k* l,hk^
- 53. Copyright © 2014 Oracle and/or its affiliates. All rights reserved. | 54
> ore.connect(host="localhost",port=10000,user="hive",schema="default",type="HIVE")
> ore.sync()
> ore.attach()
> ore.ls()
[1] "cmnt" "cust" "datx2_hive"
[4] "datx2_tohive" "datx_hive2"
> ore.create(cbind(iris,id=1:150) ,table="iris_hive")
The following object is masked _by_ .GlobalEnv:
iris_hive
Warning message:
In ore.create(cbind(iris, id = 1:150), table = "iris_hive") :
column names modified by "ore.make.names" function
> nrow(iris_hive)
[1] 150
> colnames(iris_hive)
[1] "sepal_length" "sepal_width" "petal_length" "petal_width" "species"
[6] "id"
> row.names(iris_hive) <- iris_hive$id
> summary(iris_hive$sepal_length)
Min. 1st Qu. Median Mean 3rd Qu. Max.
4.300 5.100 5.800 5.843 6.400 7.900
> iris_hive$newcol <- 150:1
> head(iris_hive,1)
sepal_length sepal_width petal_length petal_width species id newcol
1 5.1 3.5 1.4 0.2 setosa 1 150
• Dbo^
• ~
• ~
• Dbo^
• E@ khp,g f^l
• l^i eYe^g ma lnffkr
• bkblYabo^ ] m ,_k f^ g^p he
• bkbl E@ bkblYabo^
~
• gkhp
• heg f^l
- 55. Copyright © 2014 Oracle and/or its affiliates. All rights reserved. |
Join the two tables by one common variable
> joined <- merge(tab_input,tab_input2,by="value")
JOIN
HIVE
Oracle R Advanced Analytics
for Hadoop Client Packages
HIVE
Transparency Engine
The new table is temporary, so it’s not pushed to HIVE
> ore.ls()
[1] “tab_input" “tab_input2"
But, it’s part of the local R objects
> ls()
[1] "joined"
> names(joined)
[1] "value" "v1.x" "v2.x" "v3.x" "v4.x" "v5.x" "v6.x" "v7.x" "v8.x" "v9.x"
[11] "v10.x" "v11.x" "v12.x" "v13.x" "v14.x" "v15.x" "v16.x" "v17.x" "v18.x" "v19.x"
[21] "v20.x" "v21.x" "v1.y" "v2.y" "v3.y" "v4.y" "v5.y" "v6.y" "v7.y" "v8.y"
[31] "v9.y" "v10.y" "v11.y" "v12.y" "v13.y" "v14.y" "v15.y" "v16.y" "v17.y" "v18.y"
[41] "v19.y" "v20.y" "v21.y"
4
/user/oracle/tab_input
HDFS Storage
HDFS Storage
3
HIVE
Thrift
Server
1HQL
Metastore
Metastore
Metastore
Metastore
2
56
Oracle Distribution of R version 3.2.0 (--) -- "Full of Ingredients"
- 66. Copyright © 2014 Oracle and/or its affiliates. All rights reserved. | 67
> spark.connect(master="yarn-client",memory="1G",dfs.namenode="bigdatalite.localdomain")
> spark.connected()
[1] TRUE
> spark.session()
Reference class object of class ".orch.SparkSession"
Field ".jRef":
[1] "Java-Object{com.oracle.orch.spark.Session@4a901445}"
Field ".jSig":
[1] "Lcom/oracle/orch/spark/Session;"
Field ".stop":
[1] TRUE
Field ".error":
[1] FALSE
Field ".active":
[1] TRUE
> kyp.hdfs <- hdfs.put(kyphosis)
> kyp.rdd <- hdfs.toRDD(kyp.hdfs)
> kyp.rdd
[1] "/tmp/orch66e040bd0859"
attr(,"orch.dfs.id")
[1] TRUE
attr(,"orch.spark.rdd")
[1] "Java-Object{com.oracle.orch.spark.TextRDD@784d9bc}"
> kyp.fit <- orch.glm2(Kyphosis ~ Age + Number + Start, dfs.dat = kyp.hdfs)
• Oi kd
•
•
• driahlbl a]_l ~ ]_l,b]
dri,a]_l
• dri,a]_l Oi kd ~ N@@
dri,k]]
• dri,k]]
• hk a, ef0 ef & N@@
'
- 73. Copyright © 2014 Oracle and/or its affiliates. All rights reserved. | 74
> orch.connect("oreuser","ora12c","shiva",port=1521, driver ="olh")
Connecting ORCH to RDBMS via [olh]
Host: shiva
Port: 1521
SID: ora12c
User: oreuser
Enter password for [oreuser]: ********
Connected to database "shiva:ora12c".
> hdfs.pull(cars.dfs,db.name="CARS_OLH")
Enter password for [oreuser]: ********
Enter password for [oreuser]: ********
> orch.connect("oreuser","ora12c","shiva",port=1521, driver ="sqoop")
Connecting ORCH to RDBMS via [sqoop]
Host: shiva
Port: 1521
SID: ora12c
User: oreuser
Enter password for [oreuser]:******
Connected to database "shiva:ora12c".
> cars.dfs2 <- hdfs.push("CARS_TABLE")
>
> hdfs.toHive(cars.dfs2,table="cars_hive")
• @> KHD
• a]_l,inee D@BO ~
~ ~
• Ojhhi @>
• a]_l,inla =NOYP=>HA D@BO
~
• D@BO kl,]_l0 klYabo^
DERA ~
- 81. Copyright © 2014 Oracle and/or its affiliates. All rights reserved. |
SUb 5' WSUb(_dc"XaXb Th6 H_TRXTb #
aTb 5' C AA
SUb(aTb 5' WPS _(ad "
SUb
P__Ta 6 Ud RcX " Th eP b# i
TheP " Th eP b#
k
aTSdRTa 6 Ud RcX " Th eP b# i
SPc 5' S (RP "aQX S(SPcP(UaP T eP b#
aRW(S Ve"R P Tb"SPc##
S 6 "ETcP (AT VcW l HT_P (AT VcW ETcP (LXScW SPcP6SPc#
U P T 5' _PbcT" UXc' Th (_ V bT_6 #
_ V"U P T#
_Pa" Ua f6R", ,# RTg6*(0 Pa6R"0 0 0 .# Tg6*(2#
_ c" S XS( 6+ RTg(RP_cX 6*(2 fWXRW6+4.#
STe( UU"#
WSUb(USXa 5' )dbTa)_ VUX Tb
WSUb(U P T 5' _PbcT"WSUb(USXa ) U P T bT_6 #
bhbcT "_PbcT" WPS _ Ub 'R _h=a A RP U P T WSUb(USXa##
_aTS 5' _aTSXRc" S SPc#
TheP "C AA aRW(_PR "_aTS WSUb(U P T##
k
#
Oracle RAAH
Client Packages
Map/Reduce Call
res <- hdfs.get(dfs.res)
finalres = list()
for (i in 1:nrow(res))
{ finalres[[i]] <-
orch.unpack(res[i,])}
/user/oracle/iris
Mapper(s)
Reducer(s)
R Result Object
Stored in HDFS
2
5
lm
Map-Reduce R
1
4
YARN: Hadoop
Map Reduce Job
82
Oracle Distribution of R version 3.2.0 (--) -- "Full of Ingredients"
- 82. Copyright © 2014 Oracle and/or its affiliates. All rights reserved. |
hadoop.run
q
• D ]hhi Omk^ fbg
83
sh -c /usr/lib/hadoop/bin/hadoop jar /usr/lib/hadoop-mapreduce/hadoop-streaming.jar
-libjars /u01/app/oracle/product/12.1.0.2/dbhome_1/R/library/ORCHcore/java/orch-core-2.5.1-mr2.jar
-D stream.io.identifier.resolver.class=com.oracle.orch.streaming.io.NoSplitIdentifierResolver
-D mapred.job.name=ORCH_20160414091432-1
-input /tmp/orch66c568b00bbe
-output /tmp/orch66c58105bce
-mapper "/usr/bin/env R --vanilla --quiet --slave --no-save --no-restore -e
¥"source('orch66c57801bd90.R')¥" --args -m"
-reducer "/usr/bin/env R --vanilla --quiet --slave --no-save --no-restore -e
¥"source('orch66c57801bd90.R')¥" --args -r"
-file /tmp/orch66c57801bd90.R
-file /u01/app/oracle/product/12.1.0.2/dbhome_1/R/library/ORCHcore/libs/ORCHcore.so
-cmdenv R_HOME= -cmdenv R_PROFILE_USER= -cmdenv HADOOP_HOME= 2>&1; echo "?$?"
Hadoop Streaming
- 86. Copyright © 2014 Oracle and/or its affiliates. All rights reserved. |
• K BON_MO
– &hk a, ho* hk a, hk'
– &hk a,ikbg hfi* hk a,ik^]b m'
– G+f^ gl &hk a,df^ gl*
hk a,ik^]b m'
– &hk a,ef* hk a,ik^]b m'
– &hk a, ef'
– &Hhp N gd I mkbq
B mhkbs mbhg'
&hk a,ef_'
– &hk a,gf_'
• C K¥U
– ~ ~ &hk a,g^nk e'
– &hk a, ef0'
– C K¥U VSL
• &hk a,fe,]m'
• &hk a,fe,e^ g^k'
• H llh &hk a,fe,e llh'
• Nb] ^ &hk a,fe,kb] ^'
• &hk a,fe,k g]hf,_hk^lm'
• ORI&hk a,fe,lof'
• df^ gl&hk a,fe,df^ gl'
• &hk a,fe,eh blmb '
• &hk a,fe,i '
87
C K¥U
2.6
NEW
- 87. Copyright © 2014 Oracle and/or its affiliates. All rights reserved. |
Hadoop
orch.princomp
88
> USARRESTS <- hdfs.put(USArrests)
> arrestsModel <- orch.princomp(USARRESTS, cor = TRUE)
> arrestsModel
Call:
princomp(cor = cor, covmat = list(cov = cov, center = center,
n.obs = n.obs))
Standard deviations:
Comp.1 Comp.2 Comp.3 Comp.4
1.5748783 0.9948694 0.5971291 0.4164494
4 variables and 50 observations.
> res <- orch.predict(arrestsModel, USARRESTS)
> hdfs.head(res,3)
[1] "-0.985565884503144,1.13339237770997,0.444268787550733,0.156267144919712"
[2] "-1.95013775033503,1.07321325616849,-2.04000333289159,-0.438583439947189"
[3] "-1.76316353972298,-0.745956780637291,-0.0547808243262842,-0.834652924308098"
• a]_l,inm QO=kk^lml ~
D@BO –
• hk a,ikbg hfi
D ]hhi
•
• ik^]b m
• 1
- 88. Copyright © 2014 Oracle and/or its affiliates. All rights reserved. |
Mappers
Mappers
Mappers
Mappers
Invoke ORAAH custom parallel distributed model (Linear Regression)
HDFS Hadoop
7 cX T 5' WSUb(PccPRW" )dbTa) aPR T) cX TOb #
7 O S 5' aRW( "8 <A8N l HI8C:< <E <A8N
SUb(SPc6 cX T BP__Tab 6 .
TSdRTab 6 ,#
Oracle R Advanced Analytics
for Hadoop Client Packages
Machine Learning
algorithms module
7 bd Pah" O S#
:P 4
aT( "U a d P 6 8 <A8N l HI8C:< <E <A8N SPcP 6 DCI B<OH#
TbXSdP b4
BX +F BTSXP -F BPg
'+.0,(./ '0(31 '+(-0 /(*1 3,/(*2
: TUUXRXT cb4
<bcX PcT HcS( <aa a c eP dT Ea"7jcj#
" cTaRT_c# ,(,/.T'*+ /(+31T'*, .(--0 +(./T'*/
HI8C:< '+(,+2T'*- /(2*-T'*/ ',*(313 5 ,T'+0
<E <A8N 3(0,/T'*+ +(+/+T'*- 2-0(,23 5 ,T'+0
'''
HXV XU( R STb4 * n o *(**+ n o *(*+ n o *(*/ n(o *(+ n o +
TbXSdP bcP SPaS Taa a4 +.(1- ,+/+.. STVaTTb U UaTTS
".12/ QbTaePcX b ST TcTS SdT c XbbX V Tbb#
Bd cX_ T 'b dPaTS4 *(10.1 8SYdbcTS 'b dPaTS4 *(10.1
='bcPcXbcXR4 -(.31T */ , P S ,+/+.. = _'eP dT4 5 ,(,T'+0
2
/user/oracle/ontime_s
YARN: Hadoop
Map Reduce Job
1
4
Custom Java
Algorithm ReducersCustom Java
Algorithm Reducers
3
89
Oracle Distribution of R version 3.2.0 (--) -- "Full of Ingredients"
- 89. Copyright © 2014 Oracle and/or its affiliates. All rights reserved. |
R Hadoop
• N D ]hhi ~
ik^]b m –
– ikbg hfi* df^ gl
90
> irisModel <- princomp(~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width,
data = iris)
> iris.hdfs.put <- hdfs.put(iris)
> iris.hdfs.pred <- orch.predict(irisModel, iris.hdfs.put)
> hdfs.head(iris.hdfs.pred,5)
[1] "-2.68412562596953,-0.319397246585102,0.0279148275894149,0.0022624370713179,setosa"
[2] "-2.71414168729432,0.17700122506478,0.210464272378243,0.0990265503235873,setosa"
[3] "-2.88899056905929,0.144949426085557,-0.0179002563208903,0.019968389709029,setosa"
[4] "-2.74534285564141,0.318298979251915,-0.0315593736056814,-0.0755758166136827,setosa"
[5] "-2.72871653655453,-0.326754512934921,-0.0900792405512,-0.0612585925856928,setosa"
• N ikbg hfi
• &D@BO ~ –'
• N ikbg hfi D@BO ~ ik^]b m
- 90. Copyright © 2014 Oracle and/or its affiliates. All rights reserved. |
&LMF by mahout als)
• z
91
> u <- sample(1:100, 300, replace=TRUE)
> i <- sample(1:10, 300, replace=TRUE)
> ui <- unique(cbind(u,i))
> r <- sample(1:5, nrow(ui), replace=TRUE)
> input <- cbind(ui,r)
> inputFile <- ORCHcore:::.orch.tmpfile()
> write.table(input, file=inputFile, sep=",", col.names=FALSE, row.names=FALSE)
> fit <- orch.lmf(inputFile, method="mahout-als", rank=3, iterations=5)
> fit
Input HDFS Directory : /tmp/orch6f9c5c0fbbf7
Model HDFS Directory : /user/oracle/orch6f9c7790aff3
> class(fit)
[1] "orch.mahout.lmf.als" "orch.lmf"
> orch.recommend(fit, n=2, maxRating=5)
[1] "/user/oracle/orch6f9c2ddd6da4"
attr(,"orch.dfs.path")
[1] TRUE
• ~
•
• pkbm^,m [e^ ~
• hk a,ef_ I ahnm-=HO HIB
~
• hk a,k^ hffg^] ~
- 91. Copyright © 2014 Oracle and/or its affiliates. All rights reserved. |
( )
• Number of obs 155671
Number of columns 46 columns
Missing values yes
# hidden Elapsed time Elapsed time
neurons (sec) nnet (sec) orch.neural
10 934.176 44.181
20 1861.812 44.969
30 2634.434 35.196
40 3674.379 39.217
50 4400.551 49.527
• Hardware spec: Single BDA node
MemTotal: 49GB
CPUs: 24 (3058MHz each)
• XXO
–
• ¥MR &Kk e^ N =]o g ^] =ge rmb l _hk D ]hhi'
– i v( ,72 h i
e ¥MR XO_¥KV i
– ~ I iN^]n ^
0
500
1000
1500
2000
2500
3000
3500
4000
4500
5000
10 20 30 40 50
()
Hidden Layer Node
nnet
orch.neural
N
Oracle R Advanced
Analytics for Hadoop
Spark
- 98. Copyright © 2014 Oracle and/or its affiliates. All rights reserved. | 100
> spark.connect( master="yarn-client", memory="1G",dfs.namenode="bigdatalite" )
> kyp.mod <- orch.ml.kmeans(formula = ~ Number + Age, data =kyp.dfs)
OBX Model Matrix: created MLlib Vector RDD (81 rows) 0.014 sec
OBX Machine Learning: MLlib K-means elapsed time 4.284 sec
> kyp.mod
$formula
[1] "~Number + Age"
$predictMetadata
[1] "Java-Object{com.oracle.obx.csv.CSVPredictMetadata@5cf1bbd3}"
$predictor
[1] "Java-Object{com.oracle.obx.ml.MLKMeans@204a02a4}"
$technique
[1] "MLlib K-means"
attr(,"class")
[1] "orch.ml.kmeans"
> kyp.pred <- predict(kyp.mod, newdata=kyp.dfs, supplemental = c("Kyphosis", "Age"))
OBX Model Matrix: created predict RDD (81 rows) 0.030 sec
> hdfs.write(kyp.pred,outPath="kyp_pred_km")
> hdfs.attach("kyp_pred_km")
Info: sampling data types of hdfs:"/user/oracle/kyp_pred_km"
Data class : data.frame
Column types: character, numeric, integer
Column count: 3
Has key : no
Pristine : yes
Key delim :
Value delim : ,
[1] "/user/oracle/kyp_pred_km"
attr(,"orch.dfs.id")
[1] TRUE
• li kd, hgg^ m
• hk a,fe,df^ gl
•
• ik^]b m
• Oi kd D@BO pkbm^
• D@BO KN D ~
- 99. Copyright © 2014 Oracle and/or its affiliates. All rights reserved. |
Invoke ORAAH custom interface to Spark Mllib algorithms within R
Lasso by Spark MLlib (orch.ml.lasso)
7 : TRcb c H_Pa
7 b_Pa (R TRc" hPa 'R XT c T ah6 ,.V #
7 8ccPRWTb cWT > =H UX T U a dbT fXcWX
7 :P dbT TXcWTa > =H X _dc SPcP a > K< cPQ Tb
7 SPcP 5' WSUb(PccPRW" )dbTa) aPR T) h_W bXb #
9dX SX V P A8HHD ST fXcW H_Pa BA XQ Ua P T X T U R ST X
7 ST 5' aRW( ( Pbb "U a d P 6 h_W bXb l Cd QTa 8VT SPcP 6 SPcP#
D9M B ST BPcaXg4 _a RTbbTS + UPRc a ePaXPQ Tb *(+// bTR
D9M B ST BPcaXg4 RaTPcTS BA XQ APQT TSE X c "2+ a fb# *(*+/ bTR
D9M BPRWX T ATPa X V4 BA XQ APbb T P_bTS cX T -(/2, bTR
HR aX V P A8HHD ST fXcW H_Pa B XQ Ua P T X T U R ST X
7 _aTS 5' _aTSXRc" ST TfSPcP 6 SPcP bd__ T T cP 6 R" h_W bXb 8VT ##
D9M B ST BPcaXg4 RaTPcTS _aTSXRc "2+ a fb# *(**1 bTR
Oracle Distribution of R version 3.2.0 (--) -- "Full of Ingredients" YARN: Apache
Spark Job
1
4
2
Spark MLlib Algorithm
distributed in-Memory ComputationSpark MLlib Algorithm
distributed in-Memory Computation
/user/oracle/kyphosis
3
Oracle R Advanced Analytics
for Hadoop Client Packages
Spark-Based Machine
Learning algorithms
module
101
- 100. Copyright © 2014 Oracle and/or its affiliates. All rights reserved. |
Spark Mlib
Spark Mlib
¥MR WV N Oi kd Ieb[
¥MR WV VSXOK¥ Oi kd Ieb[
¥MR WV VK Oi kd Ieb[ H llh
¥MR WV ¥SNQO Oi kd Ieb[
¥MR WV V QS SM Oi kd Ieb[
¥MR WV MK Oi kd Ieb[
¥MR WV ¥KXN W P ¥O Oi kd Ieb[
¥MR WV W Oi kd Ieb[ ~
¥MR WV UWOKX Oi kd Ieb[ G+I^ gl
¥MR W NOV WK ¥Sb Oi kd fh]^e,f mkbq
¥MR V KN W NOV D@BO Oi kd Oi kd Ieb[ ~
¥MR K O W NOV Oi kd D@BO Oi kd Ieb[
¥MR ¥O K¥O W NOV WK ¥Sb Oi kd fh]^e,f kmbq
RNP a¥S O Oi kd fh]^e,f mkbq D@BO –
Oi kd ¥MR ¥O K¥O D@BO Oi kd ~
¥MR _X ¥O K¥O Oi kd fh]^e,f mkbq
102
- 110. Copyright © 2014 Oracle and/or its affiliates. All rights reserved. |
HDFS ( )
i RNP VO OV 46C i VO OV
RNP KW VO 46C i
RNP MVOKX_ 9X _ 46C i 1 i
RNP ROKN 46C u e i f
RNP KSV 46C u e i f
113
i i RNP _ B B 46C i
RNP QO 46C i B i
RNP B4K K B i B4K K 46C
RNP P¥ WB4K K B4K K 46C B
RNP _ V KN B i p 46C i
RNP N aXV KN 46C i B i i
RNP _ R i i i 46C i
RNP _VV 46C i i i i
RNP S O 46C S O i
RNP P¥ W S O S O 46C i
RNP B44 46C C K¥U B44 i i
- 111. Copyright © 2014 Oracle and/or its affiliates. All rights reserved. |
ORAAH + ORE /Hive
Hive Oracle DB
6_XM S X 4O M¥S S X
a]_l,_khfDbo^ KN D Dbo^ D@BO
a]_l,mhDbo^ D@BO hk^,_k f^ Dbo^
hk^, k^ m^ ] m ,_k f^ hk^,_k f^ ~ ~
hk^,]khi ~ ~ ~
hk^, ^m hk^,_k f^
hk^,inee Dbo^ N ~ ~
hk^,inla N Dbo^ ~ ~
hk^,k^ h]^ hk^,o^ mhk
114
- 113. Copyright © 2014 Oracle and/or its affiliates. All rights reserved. |
6_XM S X 4O M¥S S X
¥MR M ¥ L^ klhg
¥MR M
¥MR QO HVO OV fh]^e,f mkbq qe^o – ~ , ^mTe^o^el
¥MR QVW D@BO ~
¥MR QVW)
¥MR UWOKX D@BO ~ G
¥MR VW
m ee+ g]+ldbggr MN &POMN' Kk e^ N Agm^kikbl^ hk^,ef
~
¥MR VWP
c^eer_bla I ahnm em^kg mbg e^ lm ljn k^l pbma p^b am^] k^ ne kbs mbhg &=HO+SN'
¥MR XO_¥KV ~ ~ ~ ~
¥MR XWP
c^eer_bla
~ N JIB ~ ~ –
¥MR XWP 6KVQ N JIB ~ ~ ~ ~
¥MR ¥SXM W
¥MR ¥OM WWOXN hk a,f ahnm,ef_, le – ~ ~ g
¥MR KW VO
¥MR MKVO ~
116
- 116. Copyright © 2014 Oracle and/or its affiliates. All rights reserved. | 119
hk a,][ ,e lm^kk ~ ~
hk a,][ ,h__ ~
hk a,][ ,hg ~ N KN
¥KMVO B 1N KXMON 1XKVc SM P ¥ KN
hk a,][ ,hnminm
hk a,o^klbhg KN D ~ ~
hk a,]^[n I iN^]n ^ N N
- 118. Copyright © 2014 Oracle and/or its affiliates. All rights reserved. |
Spark Mlib
Spark Mlib
¥MR WV N Oi kd Ieb[
¥MR WV VSXOK¥ Oi kd Ieb[
¥MR WV VK Oi kd Ieb[ H llh
¥MR WV ¥SNQO Oi kd Ieb[
¥MR WV V QS SM Oi kd Ieb[
¥MR WV MK Oi kd Ieb[
¥MR WV ¥KXN W P ¥O Oi kd Ieb[
¥MR WV W Oi kd Ieb[ ~
¥MR WV UWOKX Oi kd Ieb[ G+I^ gl
¥MR W NOV WK ¥Sb Oi kd fh]^e,f mkbq
¥MR V KN W NOV D@BO Oi kd Oi kd Ieb[ ~
¥MR K O W NOV Oi kd D@BO Oi kd Ieb[
¥MR ¥O K¥O W NOV WK ¥Sb Oi kd fh]^e,f kmbq
RNP a¥S O Oi kd fh]^e,f mkbq D@BO –
Oi kd ¥MR ¥O K¥O D@BO Oi kd ~
¥MR _X ¥O K¥O Oi kd fh]^e,f mkbq
121