SlideShare une entreprise Scribd logo
1  sur  6
Télécharger pour lire hors ligne
package org.ajug;

import   cascading.cascade.Cascade;
import   cascading.cascade.CascadeConnector;
import   cascading.flow.Flow;
import   cascading.flow.FlowConnector;
import   cascading.pipe.Each;
import   cascading.pipe.Every;
import   cascading.pipe.GroupBy;
import   cascading.pipe.Pipe;
import   cascading.scheme.TextDelimited;
import   cascading.scheme.TextLine;
import   cascading.tap.Hfs;
import   cascading.tap.SinkMode;
import   cascading.tap.Tap;
import   cascading.tuple.Fields;

import java.util.Properties;

public class Main {

   public static void main(String[] args) {
       Properties properties = new Properties();
       FlowConnector.setApplicationJarClass(properties, Main.class);
       properties.put("mapred.reduce.tasks", 5);

        Pipe mainPipe = new Each("M&M", new Fields("line"), new Parser());
        mainPipe = new GroupBy(mainPipe, new Fields("COLOR"));
        mainPipe = new Every(mainPipe, Fields.ALL, new ColorAggregator(new
ColorData()));

         Tap sourceTap = new Hfs(new TextLine(), args[0]);

        TextDelimited scheme = new TextDelimited(new Fields("COLOR",
"AVG_WIDTH", "AVG_WEIGHT"), ",", """);
        scheme.setNumSinkParts(1); // make sure we only get one file


         Tap sinkTap = new Hfs(scheme, args[1], SinkMode.REPLACE);

         FlowConnector flowConnector = new FlowConnector(properties);
         CascadeConnector cascadeConnector = new CascadeConnector();

         Flow flow = flowConnector.connect(sourceTap, sinkTap, mainPipe);

         Cascade cascade = cascadeConnector.connect(flow);
         cascade.complete();     // Finally run everything

    }
}
================================================
package org.ajug;

import   cascading.cascade.Cascade;
import   cascading.cascade.CascadeConnector;
import   cascading.flow.Flow;
import   cascading.flow.FlowConnector;
import   cascading.pipe.*;
import   cascading.scheme.TextDelimited;
import   cascading.scheme.TextLine;
import   cascading.tap.Hfs;
import   cascading.tap.SinkMode;
import   cascading.tap.Tap;
import   cascading.tuple.Fields;

import java.util.HashMap;
import java.util.Map;
import java.util.Properties;

public class MultiOutputMain {

   public static void main(String[] args) {
       Properties properties = new Properties();
       FlowConnector.setApplicationJarClass(properties, Main.class);
       properties.put("mapred.reduce.tasks", 5);

         Pipe sourcePipe = new Each("M&M", new Fields("line"), new Parser());

        Pipe totalPipe = new GroupBy("Total", sourcePipe, new Fields("ONE"));
        totalPipe = new Every(totalPipe, Fields.ALL, new TotalAggregator(new
TotalData()));

        Pipe mainPipe = new GroupBy("Color", sourcePipe, new Fields("COLOR"));
        mainPipe = new Every(mainPipe, Fields.ALL, new ColorAggregator(new
ColorData()));

         Tap sourceTap = new Hfs(new TextLine(), args[0]);

        TextDelimited scheme = new TextDelimited(new Fields("COLOR",
"AVG_WIDTH", "AVG_WEIGHT"), ",", """);
        scheme.setNumSinkParts(1); // make sure we only get one file
        Tap colorTap = new Hfs(scheme, args[1] + "/color", SinkMode.REPLACE);


        TextDelimited totalScheme = new TextDelimited(new
Fields("FINAL_WIDTH", "FINAL_WEIGHT"), ",", """);
        totalScheme.setNumSinkParts(1); // make sure we only get one file
        Tap totalTap = new Hfs(totalScheme, args[1] + "/total",
SinkMode.REPLACE);

         FlowConnector flowConnector = new FlowConnector(properties);
         CascadeConnector cascadeConnector = new CascadeConnector();

         Map<String, Tap> outputs = new HashMap<String, Tap>();
         outputs.put(totalPipe.getName(), totalTap);
         outputs.put(mainPipe.getName(), colorTap);

        Flow flow = flowConnector.connect(sourceTap, outputs, totalPipe,
mainPipe);

         Cascade cascade = cascadeConnector.connect(flow);
         cascade.complete();     // Finally run everything

    }
}
=======================================
package org.ajug;
import   cascading.flow.FlowProcess;
import   cascading.operation.Function;
import   cascading.operation.FunctionCall;
import   cascading.tuple.Fields;
import   cascading.tuple.Tuple;

import java.io.Serializable;


public class Parser extends cascading.operation.BaseOperation implements
Serializable, Function {

    public Parser() {
        super(new Fields("ONE","COLOR", "WIDTH", "WEIGHT"));   // should be
constants file ;)
    }


     public void operate(FlowProcess a_flow, FunctionCall a_call) {

         String sourceData = a_call.getArguments().getString(0);
         sourceData = sourceData.trim();
         if (sourceData == null || sourceData.length() == 0) {
             return;       // blank line read from the source file, so ignore
it
         }

         String values[] = sourceData.split(",");

         Tuple output = new Tuple();

         output.add("1");
         output.add(values[0]);
         output.add(values[1]);
         output.add(values[2]);
         a_call.getOutputCollector().add(output);
     }

}
==============================================
package org.ajug;


import cascading.tuple.Tuple;

import java.io.Serializable;

public class ColorData implements Serializable {

     private long m_num = 0;
     private double m_width = 0;
     private double m_weight = 0;

     public void reset(){
         m_num = 0;
         m_width = 0;
         m_weight = 0;
     }
public void addData(double a_weight, double a_width){
       m_weight += a_weight;
       m_width+=a_width;
       m_num++;
   }

   public   Tuple getTuple() {

       if (m_num == 0) return null;

       Tuple rtnValue = new Tuple();

       rtnValue.add(m_width/m_num);
       rtnValue.add(m_weight/m_num);

       return rtnValue;
   }
}
===============================================
package org.ajug;

import cascading.tuple.Tuple;

import java.io.Serializable;

public class TotalData implements Serializable {

   private long m_num = 0;
   private double m_width = 0;
   private double m_weight = 0;

   public void reset(){
       m_num = 0;
       m_width = 0;
       m_weight = 0;
   }

   public void addData(double a_weight, double a_width){
       m_weight += a_weight;
       m_width+=a_width;
       m_num++;
   }

   public Tuple getTuple() {

       if (m_num == 0) return null;

       Tuple rtnValue = new Tuple();

       rtnValue.add(m_width/m_num);
       rtnValue.add(m_weight/m_num);

       return rtnValue;
   }
}
==================================================
package org.ajug;
import   cascading.flow.FlowProcess;
import   cascading.operation.Aggregator;
import   cascading.operation.AggregatorCall;
import   cascading.operation.BaseOperation;
import   cascading.tuple.Fields;
import   cascading.tuple.Tuple;
import   cascading.tuple.TupleEntry;
import   org.apache.log4j.Logger;

public class TotalAggregator extends BaseOperation<TotalData>
        implements Aggregator<TotalData> {

    static Logger m_logger =
Logger.getLogger(TotalAggregator.class.getName());

    private TotalData m_row;

    public TotalAggregator(TotalData a_row) {
        super(new Fields("FINAL_WIDTH", "FINAL_WEIGHT"));
        m_row = a_row;
    }

    public void start(FlowProcess flowProcess,
                      AggregatorCall<TotalData> aggregatorCall) {

         // set the context object
         m_row.reset();
         aggregatorCall.setContext(m_row);
    }

    public void complete(FlowProcess flowProcess,
                         AggregatorCall<TotalData> aggregatorCall) {
        TotalData context = aggregatorCall.getContext();

         Tuple results = context.getTuple();
         if (results == null) return;        // Nothing there to report
         aggregatorCall.getOutputCollector().add(results);

    }

    public void aggregate(FlowProcess flowProcess,
                          AggregatorCall<TotalData> aggregatorCall) {
        TupleEntry arguments = aggregatorCall.getArguments();
        TotalData context = aggregatorCall.getContext();

         double weight = arguments.getDouble("WEIGHT");
         double width = arguments.getDouble("WIDTH");

         context.addData(weight, width);

    }
}

=========================================
package org.ajug;

import cascading.flow.FlowProcess;
import cascading.operation.Aggregator;
import cascading.operation.AggregatorCall;
import   cascading.operation.BaseOperation;
import   cascading.tuple.Fields;
import   cascading.tuple.Tuple;
import   cascading.tuple.TupleEntry;
import   org.apache.log4j.Logger;


public class ColorAggregator extends BaseOperation<ColorData>
        implements Aggregator<ColorData> {

    static Logger m_logger =
Logger.getLogger(ColorAggregator.class.getName());

    private ColorData m_row;

    public ColorAggregator(ColorData a_row) {
        super(new Fields("AVG_WIDTH", "AVG_WEIGHT"));
        m_row = a_row;
    }

    public void start(FlowProcess flowProcess,
                      AggregatorCall<ColorData> aggregatorCall) {

         // set the context object
         m_row.reset();
         aggregatorCall.setContext(m_row);
    }

    public void complete(FlowProcess flowProcess,
                         AggregatorCall<ColorData> aggregatorCall) {
        ColorData context = aggregatorCall.getContext();

         Tuple results = context.getTuple();
         if (results == null) return;        // Nothing there to report
         aggregatorCall.getOutputCollector().add(results);

    }

    public void aggregate(FlowProcess flowProcess,
                          AggregatorCall<ColorData> aggregatorCall) {
        TupleEntry arguments = aggregatorCall.getArguments();
        ColorData context = aggregatorCall.getContext();

         double weight = arguments.getDouble("WEIGHT");
         double width = arguments.getDouble("WIDTH");

         context.addData(weight, width);

    }
}

Contenu connexe

Tendances

Apache Spark - Basics of RDD & RDD Operations | Big Data Hadoop Spark Tutoria...
Apache Spark - Basics of RDD & RDD Operations | Big Data Hadoop Spark Tutoria...Apache Spark - Basics of RDD & RDD Operations | Big Data Hadoop Spark Tutoria...
Apache Spark - Basics of RDD & RDD Operations | Big Data Hadoop Spark Tutoria...CloudxLab
 
Compact and safely: static DSL on Kotlin
Compact and safely: static DSL on KotlinCompact and safely: static DSL on Kotlin
Compact and safely: static DSL on KotlinDmitry Pranchuk
 
Greach, GroovyFx Workshop
Greach, GroovyFx WorkshopGreach, GroovyFx Workshop
Greach, GroovyFx WorkshopDierk König
 
Jggug 2010 330 Grails 1.3 観察
Jggug 2010 330 Grails 1.3 観察Jggug 2010 330 Grails 1.3 観察
Jggug 2010 330 Grails 1.3 観察Tsuyoshi Yamamoto
 
The Ring programming language version 1.9 book - Part 43 of 210
The Ring programming language version 1.9 book - Part 43 of 210The Ring programming language version 1.9 book - Part 43 of 210
The Ring programming language version 1.9 book - Part 43 of 210Mahmoud Samir Fayed
 
Michael Häusler – Everyday flink
Michael Häusler – Everyday flinkMichael Häusler – Everyday flink
Michael Häusler – Everyday flinkFlink Forward
 
Spring data ii
Spring data iiSpring data ii
Spring data ii명철 강
 
Reactive Programming for a demanding world: building event-driven and respons...
Reactive Programming for a demanding world: building event-driven and respons...Reactive Programming for a demanding world: building event-driven and respons...
Reactive Programming for a demanding world: building event-driven and respons...Mario Fusco
 
Scott Anderson [InfluxData] | InfluxDB Tasks – Beyond Downsampling | InfluxDa...
Scott Anderson [InfluxData] | InfluxDB Tasks – Beyond Downsampling | InfluxDa...Scott Anderson [InfluxData] | InfluxDB Tasks – Beyond Downsampling | InfluxDa...
Scott Anderson [InfluxData] | InfluxDB Tasks – Beyond Downsampling | InfluxDa...InfluxData
 
Столпы функционального программирования для адептов ООП, Николай Мозговой
Столпы функционального программирования для адептов ООП, Николай МозговойСтолпы функционального программирования для адептов ООП, Николай Мозговой
Столпы функционального программирования для адептов ООП, Николай МозговойSigma Software
 
Image magick++
Image magick++Image magick++
Image magick++Yubin Lim
 
Look Ma, “update DB to HTML5 using C++”, no hands! 
Look Ma, “update DB to HTML5 using C++”, no hands! Look Ma, “update DB to HTML5 using C++”, no hands! 
Look Ma, “update DB to HTML5 using C++”, no hands! aleks-f
 

Tendances (20)

Apache Spark - Basics of RDD & RDD Operations | Big Data Hadoop Spark Tutoria...
Apache Spark - Basics of RDD & RDD Operations | Big Data Hadoop Spark Tutoria...Apache Spark - Basics of RDD & RDD Operations | Big Data Hadoop Spark Tutoria...
Apache Spark - Basics of RDD & RDD Operations | Big Data Hadoop Spark Tutoria...
 
Angular2 rxjs
Angular2 rxjsAngular2 rxjs
Angular2 rxjs
 
Kitura Todolist tutorial
Kitura Todolist tutorialKitura Todolist tutorial
Kitura Todolist tutorial
 
Compact and safely: static DSL on Kotlin
Compact and safely: static DSL on KotlinCompact and safely: static DSL on Kotlin
Compact and safely: static DSL on Kotlin
 
Greach, GroovyFx Workshop
Greach, GroovyFx WorkshopGreach, GroovyFx Workshop
Greach, GroovyFx Workshop
 
Jggug 2010 330 Grails 1.3 観察
Jggug 2010 330 Grails 1.3 観察Jggug 2010 330 Grails 1.3 観察
Jggug 2010 330 Grails 1.3 観察
 
Rxjs ngvikings
Rxjs ngvikingsRxjs ngvikings
Rxjs ngvikings
 
The Ring programming language version 1.9 book - Part 43 of 210
The Ring programming language version 1.9 book - Part 43 of 210The Ring programming language version 1.9 book - Part 43 of 210
The Ring programming language version 1.9 book - Part 43 of 210
 
Michael Häusler – Everyday flink
Michael Häusler – Everyday flinkMichael Häusler – Everyday flink
Michael Häusler – Everyday flink
 
Spring data ii
Spring data iiSpring data ii
Spring data ii
 
Reactive Programming for a demanding world: building event-driven and respons...
Reactive Programming for a demanding world: building event-driven and respons...Reactive Programming for a demanding world: building event-driven and respons...
Reactive Programming for a demanding world: building event-driven and respons...
 
Scott Anderson [InfluxData] | InfluxDB Tasks – Beyond Downsampling | InfluxDa...
Scott Anderson [InfluxData] | InfluxDB Tasks – Beyond Downsampling | InfluxDa...Scott Anderson [InfluxData] | InfluxDB Tasks – Beyond Downsampling | InfluxDa...
Scott Anderson [InfluxData] | InfluxDB Tasks – Beyond Downsampling | InfluxDa...
 
Столпы функционального программирования для адептов ООП, Николай Мозговой
Столпы функционального программирования для адептов ООП, Николай МозговойСтолпы функционального программирования для адептов ООП, Николай Мозговой
Столпы функционального программирования для адептов ООП, Николай Мозговой
 
Image magick++
Image magick++Image magick++
Image magick++
 
Look Ma, “update DB to HTML5 using C++”, no hands! 
Look Ma, “update DB to HTML5 using C++”, no hands! Look Ma, “update DB to HTML5 using C++”, no hands! 
Look Ma, “update DB to HTML5 using C++”, no hands! 
 
Spark_Documentation_Template1
Spark_Documentation_Template1Spark_Documentation_Template1
Spark_Documentation_Template1
 
Parallel streams in java 8
Parallel streams in java 8Parallel streams in java 8
Parallel streams in java 8
 
Oop assignment 02
Oop assignment 02Oop assignment 02
Oop assignment 02
 
Hadoop Puzzlers
Hadoop PuzzlersHadoop Puzzlers
Hadoop Puzzlers
 
Gwt and Xtend
Gwt and XtendGwt and Xtend
Gwt and Xtend
 

En vedette

Jenny, Katerina And Arynda
Jenny, Katerina And AryndaJenny, Katerina And Arynda
Jenny, Katerina And Aryndakaterinawsy
 
Hadoop and Cascading At AJUG July 2009
Hadoop and Cascading At AJUG July 2009Hadoop and Cascading At AJUG July 2009
Hadoop and Cascading At AJUG July 2009Christopher Curtin
 
Atlanta hadoop users group july 2013
Atlanta hadoop users group july 2013Atlanta hadoop users group july 2013
Atlanta hadoop users group july 2013Christopher Curtin
 
2011 march cloud computing atlanta
2011 march cloud computing atlanta2011 march cloud computing atlanta
2011 march cloud computing atlantaChristopher Curtin
 
UnConference for Georgia Southern Computer Science March 31, 2015
UnConference for Georgia Southern Computer Science March 31, 2015UnConference for Georgia Southern Computer Science March 31, 2015
UnConference for Georgia Southern Computer Science March 31, 2015Christopher Curtin
 

En vedette (7)

Jenny, Katerina And Arynda
Jenny, Katerina And AryndaJenny, Katerina And Arynda
Jenny, Katerina And Arynda
 
Hadoop and Cascading At AJUG July 2009
Hadoop and Cascading At AJUG July 2009Hadoop and Cascading At AJUG July 2009
Hadoop and Cascading At AJUG July 2009
 
Atlanta hadoop users group july 2013
Atlanta hadoop users group july 2013Atlanta hadoop users group july 2013
Atlanta hadoop users group july 2013
 
2011 march cloud computing atlanta
2011 march cloud computing atlanta2011 march cloud computing atlanta
2011 march cloud computing atlanta
 
Nosql East October 2009
Nosql East October 2009Nosql East October 2009
Nosql East October 2009
 
Ajug april 2011
Ajug april 2011Ajug april 2011
Ajug april 2011
 
UnConference for Georgia Southern Computer Science March 31, 2015
UnConference for Georgia Southern Computer Science March 31, 2015UnConference for Georgia Southern Computer Science March 31, 2015
UnConference for Georgia Southern Computer Science March 31, 2015
 

Similaire à AJUG April 2011 Cascading example

TypeScript Introduction
TypeScript IntroductionTypeScript Introduction
TypeScript IntroductionDmitry Sheiko
 
AJUG April 2011 Raw hadoop example
AJUG April 2011 Raw hadoop exampleAJUG April 2011 Raw hadoop example
AJUG April 2011 Raw hadoop exampleChristopher Curtin
 
VISUALIZAR REGISTROS EN UN JTABLE
VISUALIZAR REGISTROS EN UN JTABLEVISUALIZAR REGISTROS EN UN JTABLE
VISUALIZAR REGISTROS EN UN JTABLEDarwin Durand
 
Cascading Through Hadoop for the Boulder JUG
Cascading Through Hadoop for the Boulder JUGCascading Through Hadoop for the Boulder JUG
Cascading Through Hadoop for the Boulder JUGMatthew McCullough
 
JJUG CCC 2011 Spring
JJUG CCC 2011 SpringJJUG CCC 2011 Spring
JJUG CCC 2011 SpringKiyotaka Oku
 
Hadoop Integration in Cassandra
Hadoop Integration in CassandraHadoop Integration in Cassandra
Hadoop Integration in CassandraJairam Chandar
 
Hazelcast
HazelcastHazelcast
Hazelcastoztalip
 
Clustering your Application with Hazelcast
Clustering your Application with HazelcastClustering your Application with Hazelcast
Clustering your Application with HazelcastHazelcast
 
Store and Process Big Data with Hadoop and Cassandra
Store and Process Big Data with Hadoop and CassandraStore and Process Big Data with Hadoop and Cassandra
Store and Process Big Data with Hadoop and CassandraDeependra Ariyadewa
 
Hadoop Installation_13_09_2022(1).docx
Hadoop Installation_13_09_2022(1).docxHadoop Installation_13_09_2022(1).docx
Hadoop Installation_13_09_2022(1).docx1MS20CS406
 
Codepot - Pig i Hive: szybkie wprowadzenie / Pig and Hive crash course
Codepot - Pig i Hive: szybkie wprowadzenie / Pig and Hive crash courseCodepot - Pig i Hive: szybkie wprowadzenie / Pig and Hive crash course
Codepot - Pig i Hive: szybkie wprowadzenie / Pig and Hive crash courseSages
 
Nice to meet Kotlin
Nice to meet KotlinNice to meet Kotlin
Nice to meet KotlinJieyi Wu
 
The current program only run one iteration of the KMeans algorithm. .docx
The current program only run one iteration of the KMeans algorithm. .docxThe current program only run one iteration of the KMeans algorithm. .docx
The current program only run one iteration of the KMeans algorithm. .docxtodd241
 
The core libraries you always wanted - Google Guava
The core libraries you always wanted - Google GuavaThe core libraries you always wanted - Google Guava
The core libraries you always wanted - Google GuavaMite Mitreski
 
Easy Scaling with Open Source Data Structures, by Talip Ozturk
Easy Scaling with Open Source Data Structures, by Talip OzturkEasy Scaling with Open Source Data Structures, by Talip Ozturk
Easy Scaling with Open Source Data Structures, by Talip OzturkZeroTurnaround
 
Logic Equations Resolver J Script
Logic Equations Resolver   J ScriptLogic Equations Resolver   J Script
Logic Equations Resolver J ScriptRoman Agaev
 
Reactive programming on Android
Reactive programming on AndroidReactive programming on Android
Reactive programming on AndroidTomáš Kypta
 
Main class --------------------------import java.awt.FlowLayout.pdf
Main class --------------------------import java.awt.FlowLayout.pdfMain class --------------------------import java.awt.FlowLayout.pdf
Main class --------------------------import java.awt.FlowLayout.pdfanushkaent7
 

Similaire à AJUG April 2011 Cascading example (20)

TypeScript Introduction
TypeScript IntroductionTypeScript Introduction
TypeScript Introduction
 
AJUG April 2011 Raw hadoop example
AJUG April 2011 Raw hadoop exampleAJUG April 2011 Raw hadoop example
AJUG April 2011 Raw hadoop example
 
VISUALIZAR REGISTROS EN UN JTABLE
VISUALIZAR REGISTROS EN UN JTABLEVISUALIZAR REGISTROS EN UN JTABLE
VISUALIZAR REGISTROS EN UN JTABLE
 
Cascading Through Hadoop for the Boulder JUG
Cascading Through Hadoop for the Boulder JUGCascading Through Hadoop for the Boulder JUG
Cascading Through Hadoop for the Boulder JUG
 
Amazon elastic map reduce
Amazon elastic map reduceAmazon elastic map reduce
Amazon elastic map reduce
 
JJUG CCC 2011 Spring
JJUG CCC 2011 SpringJJUG CCC 2011 Spring
JJUG CCC 2011 Spring
 
Hadoop Integration in Cassandra
Hadoop Integration in CassandraHadoop Integration in Cassandra
Hadoop Integration in Cassandra
 
Hazelcast
HazelcastHazelcast
Hazelcast
 
Clustering your Application with Hazelcast
Clustering your Application with HazelcastClustering your Application with Hazelcast
Clustering your Application with Hazelcast
 
Store and Process Big Data with Hadoop and Cassandra
Store and Process Big Data with Hadoop and CassandraStore and Process Big Data with Hadoop and Cassandra
Store and Process Big Data with Hadoop and Cassandra
 
Hadoop Installation_13_09_2022(1).docx
Hadoop Installation_13_09_2022(1).docxHadoop Installation_13_09_2022(1).docx
Hadoop Installation_13_09_2022(1).docx
 
Codepot - Pig i Hive: szybkie wprowadzenie / Pig and Hive crash course
Codepot - Pig i Hive: szybkie wprowadzenie / Pig and Hive crash courseCodepot - Pig i Hive: szybkie wprowadzenie / Pig and Hive crash course
Codepot - Pig i Hive: szybkie wprowadzenie / Pig and Hive crash course
 
Jersey Guice AOP
Jersey Guice AOPJersey Guice AOP
Jersey Guice AOP
 
Nice to meet Kotlin
Nice to meet KotlinNice to meet Kotlin
Nice to meet Kotlin
 
The current program only run one iteration of the KMeans algorithm. .docx
The current program only run one iteration of the KMeans algorithm. .docxThe current program only run one iteration of the KMeans algorithm. .docx
The current program only run one iteration of the KMeans algorithm. .docx
 
The core libraries you always wanted - Google Guava
The core libraries you always wanted - Google GuavaThe core libraries you always wanted - Google Guava
The core libraries you always wanted - Google Guava
 
Easy Scaling with Open Source Data Structures, by Talip Ozturk
Easy Scaling with Open Source Data Structures, by Talip OzturkEasy Scaling with Open Source Data Structures, by Talip Ozturk
Easy Scaling with Open Source Data Structures, by Talip Ozturk
 
Logic Equations Resolver J Script
Logic Equations Resolver   J ScriptLogic Equations Resolver   J Script
Logic Equations Resolver J Script
 
Reactive programming on Android
Reactive programming on AndroidReactive programming on Android
Reactive programming on Android
 
Main class --------------------------import java.awt.FlowLayout.pdf
Main class --------------------------import java.awt.FlowLayout.pdfMain class --------------------------import java.awt.FlowLayout.pdf
Main class --------------------------import java.awt.FlowLayout.pdf
 

Dernier

2024 April Patch Tuesday
2024 April Patch Tuesday2024 April Patch Tuesday
2024 April Patch TuesdayIvanti
 
Abdul Kader Baba- Managing Cybersecurity Risks and Compliance Requirements i...
Abdul Kader Baba- Managing Cybersecurity Risks  and Compliance Requirements i...Abdul Kader Baba- Managing Cybersecurity Risks  and Compliance Requirements i...
Abdul Kader Baba- Managing Cybersecurity Risks and Compliance Requirements i...itnewsafrica
 
Generative AI - Gitex v1Generative AI - Gitex v1.pptx
Generative AI - Gitex v1Generative AI - Gitex v1.pptxGenerative AI - Gitex v1Generative AI - Gitex v1.pptx
Generative AI - Gitex v1Generative AI - Gitex v1.pptxfnnc6jmgwh
 
UiPath Community: Communication Mining from Zero to Hero
UiPath Community: Communication Mining from Zero to HeroUiPath Community: Communication Mining from Zero to Hero
UiPath Community: Communication Mining from Zero to HeroUiPathCommunity
 
How AI, OpenAI, and ChatGPT impact business and software.
How AI, OpenAI, and ChatGPT impact business and software.How AI, OpenAI, and ChatGPT impact business and software.
How AI, OpenAI, and ChatGPT impact business and software.Curtis Poe
 
React Native vs Ionic - The Best Mobile App Framework
React Native vs Ionic - The Best Mobile App FrameworkReact Native vs Ionic - The Best Mobile App Framework
React Native vs Ionic - The Best Mobile App FrameworkPixlogix Infotech
 
Testing tools and AI - ideas what to try with some tool examples
Testing tools and AI - ideas what to try with some tool examplesTesting tools and AI - ideas what to try with some tool examples
Testing tools and AI - ideas what to try with some tool examplesKari Kakkonen
 
Decarbonising Buildings: Making a net-zero built environment a reality
Decarbonising Buildings: Making a net-zero built environment a realityDecarbonising Buildings: Making a net-zero built environment a reality
Decarbonising Buildings: Making a net-zero built environment a realityIES VE
 
Digital Identity is Under Attack: FIDO Paris Seminar.pptx
Digital Identity is Under Attack: FIDO Paris Seminar.pptxDigital Identity is Under Attack: FIDO Paris Seminar.pptx
Digital Identity is Under Attack: FIDO Paris Seminar.pptxLoriGlavin3
 
How to Effectively Monitor SD-WAN and SASE Environments with ThousandEyes
How to Effectively Monitor SD-WAN and SASE Environments with ThousandEyesHow to Effectively Monitor SD-WAN and SASE Environments with ThousandEyes
How to Effectively Monitor SD-WAN and SASE Environments with ThousandEyesThousandEyes
 
Connecting the Dots for Information Discovery.pdf
Connecting the Dots for Information Discovery.pdfConnecting the Dots for Information Discovery.pdf
Connecting the Dots for Information Discovery.pdfNeo4j
 
Use of FIDO in the Payments and Identity Landscape: FIDO Paris Seminar.pptx
Use of FIDO in the Payments and Identity Landscape: FIDO Paris Seminar.pptxUse of FIDO in the Payments and Identity Landscape: FIDO Paris Seminar.pptx
Use of FIDO in the Payments and Identity Landscape: FIDO Paris Seminar.pptxLoriGlavin3
 
Long journey of Ruby standard library at RubyConf AU 2024
Long journey of Ruby standard library at RubyConf AU 2024Long journey of Ruby standard library at RubyConf AU 2024
Long journey of Ruby standard library at RubyConf AU 2024Hiroshi SHIBATA
 
Generative Artificial Intelligence: How generative AI works.pdf
Generative Artificial Intelligence: How generative AI works.pdfGenerative Artificial Intelligence: How generative AI works.pdf
Generative Artificial Intelligence: How generative AI works.pdfIngrid Airi González
 
The Role of FIDO in a Cyber Secure Netherlands: FIDO Paris Seminar.pptx
The Role of FIDO in a Cyber Secure Netherlands: FIDO Paris Seminar.pptxThe Role of FIDO in a Cyber Secure Netherlands: FIDO Paris Seminar.pptx
The Role of FIDO in a Cyber Secure Netherlands: FIDO Paris Seminar.pptxLoriGlavin3
 
A Framework for Development in the AI Age
A Framework for Development in the AI AgeA Framework for Development in the AI Age
A Framework for Development in the AI AgeCprime
 
Transcript: New from BookNet Canada for 2024: Loan Stars - Tech Forum 2024
Transcript: New from BookNet Canada for 2024: Loan Stars - Tech Forum 2024Transcript: New from BookNet Canada for 2024: Loan Stars - Tech Forum 2024
Transcript: New from BookNet Canada for 2024: Loan Stars - Tech Forum 2024BookNet Canada
 
The Ultimate Guide to Choosing WordPress Pros and Cons
The Ultimate Guide to Choosing WordPress Pros and ConsThe Ultimate Guide to Choosing WordPress Pros and Cons
The Ultimate Guide to Choosing WordPress Pros and ConsPixlogix Infotech
 
Varsha Sewlal- Cyber Attacks on Critical Critical Infrastructure
Varsha Sewlal- Cyber Attacks on Critical Critical InfrastructureVarsha Sewlal- Cyber Attacks on Critical Critical Infrastructure
Varsha Sewlal- Cyber Attacks on Critical Critical Infrastructureitnewsafrica
 
Bridging Between CAD & GIS: 6 Ways to Automate Your Data Integration
Bridging Between CAD & GIS:  6 Ways to Automate Your Data IntegrationBridging Between CAD & GIS:  6 Ways to Automate Your Data Integration
Bridging Between CAD & GIS: 6 Ways to Automate Your Data Integrationmarketing932765
 

Dernier (20)

2024 April Patch Tuesday
2024 April Patch Tuesday2024 April Patch Tuesday
2024 April Patch Tuesday
 
Abdul Kader Baba- Managing Cybersecurity Risks and Compliance Requirements i...
Abdul Kader Baba- Managing Cybersecurity Risks  and Compliance Requirements i...Abdul Kader Baba- Managing Cybersecurity Risks  and Compliance Requirements i...
Abdul Kader Baba- Managing Cybersecurity Risks and Compliance Requirements i...
 
Generative AI - Gitex v1Generative AI - Gitex v1.pptx
Generative AI - Gitex v1Generative AI - Gitex v1.pptxGenerative AI - Gitex v1Generative AI - Gitex v1.pptx
Generative AI - Gitex v1Generative AI - Gitex v1.pptx
 
UiPath Community: Communication Mining from Zero to Hero
UiPath Community: Communication Mining from Zero to HeroUiPath Community: Communication Mining from Zero to Hero
UiPath Community: Communication Mining from Zero to Hero
 
How AI, OpenAI, and ChatGPT impact business and software.
How AI, OpenAI, and ChatGPT impact business and software.How AI, OpenAI, and ChatGPT impact business and software.
How AI, OpenAI, and ChatGPT impact business and software.
 
React Native vs Ionic - The Best Mobile App Framework
React Native vs Ionic - The Best Mobile App FrameworkReact Native vs Ionic - The Best Mobile App Framework
React Native vs Ionic - The Best Mobile App Framework
 
Testing tools and AI - ideas what to try with some tool examples
Testing tools and AI - ideas what to try with some tool examplesTesting tools and AI - ideas what to try with some tool examples
Testing tools and AI - ideas what to try with some tool examples
 
Decarbonising Buildings: Making a net-zero built environment a reality
Decarbonising Buildings: Making a net-zero built environment a realityDecarbonising Buildings: Making a net-zero built environment a reality
Decarbonising Buildings: Making a net-zero built environment a reality
 
Digital Identity is Under Attack: FIDO Paris Seminar.pptx
Digital Identity is Under Attack: FIDO Paris Seminar.pptxDigital Identity is Under Attack: FIDO Paris Seminar.pptx
Digital Identity is Under Attack: FIDO Paris Seminar.pptx
 
How to Effectively Monitor SD-WAN and SASE Environments with ThousandEyes
How to Effectively Monitor SD-WAN and SASE Environments with ThousandEyesHow to Effectively Monitor SD-WAN and SASE Environments with ThousandEyes
How to Effectively Monitor SD-WAN and SASE Environments with ThousandEyes
 
Connecting the Dots for Information Discovery.pdf
Connecting the Dots for Information Discovery.pdfConnecting the Dots for Information Discovery.pdf
Connecting the Dots for Information Discovery.pdf
 
Use of FIDO in the Payments and Identity Landscape: FIDO Paris Seminar.pptx
Use of FIDO in the Payments and Identity Landscape: FIDO Paris Seminar.pptxUse of FIDO in the Payments and Identity Landscape: FIDO Paris Seminar.pptx
Use of FIDO in the Payments and Identity Landscape: FIDO Paris Seminar.pptx
 
Long journey of Ruby standard library at RubyConf AU 2024
Long journey of Ruby standard library at RubyConf AU 2024Long journey of Ruby standard library at RubyConf AU 2024
Long journey of Ruby standard library at RubyConf AU 2024
 
Generative Artificial Intelligence: How generative AI works.pdf
Generative Artificial Intelligence: How generative AI works.pdfGenerative Artificial Intelligence: How generative AI works.pdf
Generative Artificial Intelligence: How generative AI works.pdf
 
The Role of FIDO in a Cyber Secure Netherlands: FIDO Paris Seminar.pptx
The Role of FIDO in a Cyber Secure Netherlands: FIDO Paris Seminar.pptxThe Role of FIDO in a Cyber Secure Netherlands: FIDO Paris Seminar.pptx
The Role of FIDO in a Cyber Secure Netherlands: FIDO Paris Seminar.pptx
 
A Framework for Development in the AI Age
A Framework for Development in the AI AgeA Framework for Development in the AI Age
A Framework for Development in the AI Age
 
Transcript: New from BookNet Canada for 2024: Loan Stars - Tech Forum 2024
Transcript: New from BookNet Canada for 2024: Loan Stars - Tech Forum 2024Transcript: New from BookNet Canada for 2024: Loan Stars - Tech Forum 2024
Transcript: New from BookNet Canada for 2024: Loan Stars - Tech Forum 2024
 
The Ultimate Guide to Choosing WordPress Pros and Cons
The Ultimate Guide to Choosing WordPress Pros and ConsThe Ultimate Guide to Choosing WordPress Pros and Cons
The Ultimate Guide to Choosing WordPress Pros and Cons
 
Varsha Sewlal- Cyber Attacks on Critical Critical Infrastructure
Varsha Sewlal- Cyber Attacks on Critical Critical InfrastructureVarsha Sewlal- Cyber Attacks on Critical Critical Infrastructure
Varsha Sewlal- Cyber Attacks on Critical Critical Infrastructure
 
Bridging Between CAD & GIS: 6 Ways to Automate Your Data Integration
Bridging Between CAD & GIS:  6 Ways to Automate Your Data IntegrationBridging Between CAD & GIS:  6 Ways to Automate Your Data Integration
Bridging Between CAD & GIS: 6 Ways to Automate Your Data Integration
 

AJUG April 2011 Cascading example

  • 1. package org.ajug; import cascading.cascade.Cascade; import cascading.cascade.CascadeConnector; import cascading.flow.Flow; import cascading.flow.FlowConnector; import cascading.pipe.Each; import cascading.pipe.Every; import cascading.pipe.GroupBy; import cascading.pipe.Pipe; import cascading.scheme.TextDelimited; import cascading.scheme.TextLine; import cascading.tap.Hfs; import cascading.tap.SinkMode; import cascading.tap.Tap; import cascading.tuple.Fields; import java.util.Properties; public class Main { public static void main(String[] args) { Properties properties = new Properties(); FlowConnector.setApplicationJarClass(properties, Main.class); properties.put("mapred.reduce.tasks", 5); Pipe mainPipe = new Each("M&M", new Fields("line"), new Parser()); mainPipe = new GroupBy(mainPipe, new Fields("COLOR")); mainPipe = new Every(mainPipe, Fields.ALL, new ColorAggregator(new ColorData())); Tap sourceTap = new Hfs(new TextLine(), args[0]); TextDelimited scheme = new TextDelimited(new Fields("COLOR", "AVG_WIDTH", "AVG_WEIGHT"), ",", """); scheme.setNumSinkParts(1); // make sure we only get one file Tap sinkTap = new Hfs(scheme, args[1], SinkMode.REPLACE); FlowConnector flowConnector = new FlowConnector(properties); CascadeConnector cascadeConnector = new CascadeConnector(); Flow flow = flowConnector.connect(sourceTap, sinkTap, mainPipe); Cascade cascade = cascadeConnector.connect(flow); cascade.complete(); // Finally run everything } } ================================================ package org.ajug; import cascading.cascade.Cascade; import cascading.cascade.CascadeConnector; import cascading.flow.Flow; import cascading.flow.FlowConnector; import cascading.pipe.*; import cascading.scheme.TextDelimited;
  • 2. import cascading.scheme.TextLine; import cascading.tap.Hfs; import cascading.tap.SinkMode; import cascading.tap.Tap; import cascading.tuple.Fields; import java.util.HashMap; import java.util.Map; import java.util.Properties; public class MultiOutputMain { public static void main(String[] args) { Properties properties = new Properties(); FlowConnector.setApplicationJarClass(properties, Main.class); properties.put("mapred.reduce.tasks", 5); Pipe sourcePipe = new Each("M&M", new Fields("line"), new Parser()); Pipe totalPipe = new GroupBy("Total", sourcePipe, new Fields("ONE")); totalPipe = new Every(totalPipe, Fields.ALL, new TotalAggregator(new TotalData())); Pipe mainPipe = new GroupBy("Color", sourcePipe, new Fields("COLOR")); mainPipe = new Every(mainPipe, Fields.ALL, new ColorAggregator(new ColorData())); Tap sourceTap = new Hfs(new TextLine(), args[0]); TextDelimited scheme = new TextDelimited(new Fields("COLOR", "AVG_WIDTH", "AVG_WEIGHT"), ",", """); scheme.setNumSinkParts(1); // make sure we only get one file Tap colorTap = new Hfs(scheme, args[1] + "/color", SinkMode.REPLACE); TextDelimited totalScheme = new TextDelimited(new Fields("FINAL_WIDTH", "FINAL_WEIGHT"), ",", """); totalScheme.setNumSinkParts(1); // make sure we only get one file Tap totalTap = new Hfs(totalScheme, args[1] + "/total", SinkMode.REPLACE); FlowConnector flowConnector = new FlowConnector(properties); CascadeConnector cascadeConnector = new CascadeConnector(); Map<String, Tap> outputs = new HashMap<String, Tap>(); outputs.put(totalPipe.getName(), totalTap); outputs.put(mainPipe.getName(), colorTap); Flow flow = flowConnector.connect(sourceTap, outputs, totalPipe, mainPipe); Cascade cascade = cascadeConnector.connect(flow); cascade.complete(); // Finally run everything } } ======================================= package org.ajug;
  • 3. import cascading.flow.FlowProcess; import cascading.operation.Function; import cascading.operation.FunctionCall; import cascading.tuple.Fields; import cascading.tuple.Tuple; import java.io.Serializable; public class Parser extends cascading.operation.BaseOperation implements Serializable, Function { public Parser() { super(new Fields("ONE","COLOR", "WIDTH", "WEIGHT")); // should be constants file ;) } public void operate(FlowProcess a_flow, FunctionCall a_call) { String sourceData = a_call.getArguments().getString(0); sourceData = sourceData.trim(); if (sourceData == null || sourceData.length() == 0) { return; // blank line read from the source file, so ignore it } String values[] = sourceData.split(","); Tuple output = new Tuple(); output.add("1"); output.add(values[0]); output.add(values[1]); output.add(values[2]); a_call.getOutputCollector().add(output); } } ============================================== package org.ajug; import cascading.tuple.Tuple; import java.io.Serializable; public class ColorData implements Serializable { private long m_num = 0; private double m_width = 0; private double m_weight = 0; public void reset(){ m_num = 0; m_width = 0; m_weight = 0; }
  • 4. public void addData(double a_weight, double a_width){ m_weight += a_weight; m_width+=a_width; m_num++; } public Tuple getTuple() { if (m_num == 0) return null; Tuple rtnValue = new Tuple(); rtnValue.add(m_width/m_num); rtnValue.add(m_weight/m_num); return rtnValue; } } =============================================== package org.ajug; import cascading.tuple.Tuple; import java.io.Serializable; public class TotalData implements Serializable { private long m_num = 0; private double m_width = 0; private double m_weight = 0; public void reset(){ m_num = 0; m_width = 0; m_weight = 0; } public void addData(double a_weight, double a_width){ m_weight += a_weight; m_width+=a_width; m_num++; } public Tuple getTuple() { if (m_num == 0) return null; Tuple rtnValue = new Tuple(); rtnValue.add(m_width/m_num); rtnValue.add(m_weight/m_num); return rtnValue; } } ================================================== package org.ajug;
  • 5. import cascading.flow.FlowProcess; import cascading.operation.Aggregator; import cascading.operation.AggregatorCall; import cascading.operation.BaseOperation; import cascading.tuple.Fields; import cascading.tuple.Tuple; import cascading.tuple.TupleEntry; import org.apache.log4j.Logger; public class TotalAggregator extends BaseOperation<TotalData> implements Aggregator<TotalData> { static Logger m_logger = Logger.getLogger(TotalAggregator.class.getName()); private TotalData m_row; public TotalAggregator(TotalData a_row) { super(new Fields("FINAL_WIDTH", "FINAL_WEIGHT")); m_row = a_row; } public void start(FlowProcess flowProcess, AggregatorCall<TotalData> aggregatorCall) { // set the context object m_row.reset(); aggregatorCall.setContext(m_row); } public void complete(FlowProcess flowProcess, AggregatorCall<TotalData> aggregatorCall) { TotalData context = aggregatorCall.getContext(); Tuple results = context.getTuple(); if (results == null) return; // Nothing there to report aggregatorCall.getOutputCollector().add(results); } public void aggregate(FlowProcess flowProcess, AggregatorCall<TotalData> aggregatorCall) { TupleEntry arguments = aggregatorCall.getArguments(); TotalData context = aggregatorCall.getContext(); double weight = arguments.getDouble("WEIGHT"); double width = arguments.getDouble("WIDTH"); context.addData(weight, width); } } ========================================= package org.ajug; import cascading.flow.FlowProcess; import cascading.operation.Aggregator; import cascading.operation.AggregatorCall;
  • 6. import cascading.operation.BaseOperation; import cascading.tuple.Fields; import cascading.tuple.Tuple; import cascading.tuple.TupleEntry; import org.apache.log4j.Logger; public class ColorAggregator extends BaseOperation<ColorData> implements Aggregator<ColorData> { static Logger m_logger = Logger.getLogger(ColorAggregator.class.getName()); private ColorData m_row; public ColorAggregator(ColorData a_row) { super(new Fields("AVG_WIDTH", "AVG_WEIGHT")); m_row = a_row; } public void start(FlowProcess flowProcess, AggregatorCall<ColorData> aggregatorCall) { // set the context object m_row.reset(); aggregatorCall.setContext(m_row); } public void complete(FlowProcess flowProcess, AggregatorCall<ColorData> aggregatorCall) { ColorData context = aggregatorCall.getContext(); Tuple results = context.getTuple(); if (results == null) return; // Nothing there to report aggregatorCall.getOutputCollector().add(results); } public void aggregate(FlowProcess flowProcess, AggregatorCall<ColorData> aggregatorCall) { TupleEntry arguments = aggregatorCall.getArguments(); ColorData context = aggregatorCall.getContext(); double weight = arguments.getDouble("WEIGHT"); double width = arguments.getDouble("WIDTH"); context.addData(weight, width); } }