SlideShare une entreprise Scribd logo
1  sur  109
Télécharger pour lire hors ligne
Hadoop
3    Hadoop
Hadoop

•
    -
    -
    -

•   HDFS(Hadoop Distributed Filesystem)
HDFS

•
    -
        ‣   MB, GB, TB

    -
        ‣
    -
        ‣
        ‣
HDFS

•
    -
        ‣
    -
        ‣
    -
        ‣
        ‣
HDFS

•
    -        64MB




    -
        ‣
        ‣
        ‣
HDFS

•
    -   /

    -       (   )

    -       (   )
HDFS

•
    -
               (           )

    -              (
                       )
HDFS

•
    -
    -

    -
HDFS

•

    -          (   ,   )

    -
HDFS

•
    -


        NameNode   SecondaryNameNode
HDFS

•
    -
 open()
append()
 write()
           NameNode   SecondaryNameNode
HDFS

•
    -
 open()
append()
 write()
           NameNode   SecondaryNameNode
HDFS

•
    -
 open()
append()
 write()
           NameNode   SecondaryNameNode
HDFS

•
    -


        NameNode   SecondaryNameNode
HDFS

•
    -


        NameNode   SecondaryNameNode
HDFS

•
    -


        NameNode   SecondaryNameNode
HDFS

•
    -


        NameNode   SecondaryNameNode
•   hadoop fs -copyFromLocal <localsrc> ... <dst>

•   hadoop fs -copyToLocal <src> <localdst>

•   hadoop fs -ls <path>

•   hadoop fs -mkdir <path>


•   hadoop fs -help
Hadoop

                        •hadoop fs -ls file:///
                        •hadoop fs -ls hdfs:///
                        •hadoop fs -ls hftp:///
                         URI
                                                    java

        local             file          org.apache.hadoop.fs.localFileSystem
        HDFS             hdfs     org.apache.hadoop.hdfs.DistributesFileSystem
        HFTP             hftp         org.apache.hadoop.hdfs.HftpFileSystem
        HSFTP            hsftp       org.apache.hadoop.hdfs.HsftpFileSystem
        HAR               har          org.apache.hadoop.fs.HarFileSystem
         KFS              kfs       org.apache.hadoop.fs.kfs.KosmosFileSystem
         FTP              ftp         org.apache.hadoop.fs.ftp.FTPFileSystem
         S3
                         s3n     org.apache.hadoop.fs.s3native.NativeS3FileSystem
    (           )
         S3
                          s3            org.apache.hadoop.fs.S3FileSystem
(                   )
•   Thrift

•   C
    -   libhdfs




•   FUSE(FileSystem in Userspace)

•   WebDAV

•
    -   HTTP, FTP(           )
Java

    •   Hadoop URL

public class URLCat {
	 static {
	 	 URL.setURLStreamHandlerFactory(new FsUrlStreamHandlerFactory());
	 }

	   public static void main(String[] args) throws Exception {
	   	 InputStream in = null;
	   	 try {
	   	 	 in = new URL(args[0]).openStream();
	   	 	 IOUtils.copyBytes(in, System.out, 4096, false);
	   	 } finally {
	   	 	 IOUtils.closeStream(in);
	   	 }
	   }
}
Java

•   FileSystem API

    public class FileSystemCat {
    	 public static void main(String[] args) throws Exception {
    	 	 String uri = args[0];
    	 	 Configuration conf = new Configuration();
    	 	 FileSystem fs = FileSystem.get(URI.create(uri), conf);
    	 	 InputStream in = null;
    	 	 try {
    	 	 	 in = fs.open(new Path(uri));
    	 	 	 IOUtils.copyBytes(in, System.out, 4096, false);
    	 	 } finally {
    	 	 	 IOUtils.closeStream(in);
    	 	 }
    	 }
Java

•       FSDataInputStream
    public class FSDataInputStream extends DataInputStream
        implements Seekable, PositionedReadable {

          //
    }




        public interface Seekable {
            void seek(long pos) throws IOException;
            long getPos() throws IOException;
            boolean seekToNewSource(long targetPos) throws IOException;
        }
Java

  •   FSDataInputStream

public class FileSystemDoubleCat {
	 public static void main(String[] args) throws Exception {
	 	 String uri = args[0];
	 	 FileSystem fs = FileSystem.get(URI.create(uri), new Configuration());
	 	 FSDataInputStream in = null;
	 	 try {
	 	 	 in = fs.open(new Path(uri));
	 	 	 IOUtils.copyBytes(in, System.out, 4096, false);
	 	 	 in.seek(0);
	 	 	 IOUtils.copyBytes(in, System.out, 4096, false);
	 	 } finally {
	 	 	 IOUtils.closeStream(in);
	 	 }
	 }
}
Java

•       FSDataInputStream
    public class FSDataInputStream extends DataInputStream
        implements Seekable, PositionedReadable {

         //
    }




public interface PositionedReadable {
    int read(long position, byte buffer[], int offset, int length)
    throws IOException;
    void readFully(long position, byte buffer[], int offset, int length)
    throws IOException;
    void readFully(long position, byte buffer[]) throws IOException;
}
Java

•
    -   public FSDataOutputStream create(Path f)
        throws IOException

    -   public FSDataOutputStream append(Path f)
        throws IOException
Java

•   FSDateOutputStream
    -   FileSystem   create(), append()

    -

        public class FSDataOutputStream extends DataOutputStream
            implements Syncable {

            public long getPos() throws IOException {
                //
            }

            //
        }
Java

•
    -   public boolean mkdirs(Path f) throws IOException
Java

  •
FileStatus status = fs.getFileStatus(new Path("hdfs://localhost/hogehoge"));

status.isDir(); //
status.getLen();     //
status.getModificationTime();            //
status.getReplication();            //
status.getBlockSize();         //                  (   64MB)
status.getOwner();        //
status.getGroup();        //
status.getPermission().toString();            //
Java

•
    -   public FileStatus[] listStatus(Path f) throws IOException;

    -   public FileStatus[] listStatus(Path f, PathFilter filter)
        throws IOException;

    -   public FileStatus[] listStatus(Path[] files)
        throws IOException;

    -   public FileStatus[] listStatus(Path[] files, PathFilter filter)
        throws IOException;
Java

•
    public class ListStatus {
    	 public static void main(String[] args) throws Exception {
    	 	 String uri = args[0];
    	 	 Configuration conf = new Configuration();
    	 	 FileSystem fs = FileSystem.get(URI.create(uri), conf);
    	 	
    	 	 Path[] paths = new Path[args.length];
    	 	 for (int i = 0; i < paths.length; i++) {
    	 	 	 paths[i] = new Path(args[i]);
    	 	 }
    	 	
    	 	 FileStatus[] status = fs.listStatus(paths);
    	 	 for (FileStatus stat : status) {
    	 	 	 System.out.println(stat.getPath().toUri().getPath());
    	 	 }
    	 }
    }
Java

•
    -   public FileStatus[] globStatus(Path pathPattern) throws IOException

    -   public FileStatus[] globStatus(Path pathPattern, PathFilter filter)
        throws IOException
Java

•


    [ab]                        {a,b}


    [^ab]                       {a,b}

                            {a,b}           (a b       )
    [a-b]
                    a       b
                    {a,b}           (a b           )       a   b
    [^a-b]

    {a,b}                               a    b


     ¥c                     c                      c
Java

•
    public interface PathFilter {
        boolean accept(Path path);
    }
Java

   •
         public class RegexExcludePathFilter implements PathFilter {

         	   private final String regex;
         	
         	   public RegexExcludePathFilter(String regex) {
         	   	 this.regex = regex;
         	   }
         	
         	   @Override
         	   public boolean accept(Path path) {
         	   	 return !path.toString().matches(regex);
         	   }
         }




fs.globStatus(new Path("/2007/*/*"), new RegexExcludePathFilter("^.*/2007/12/31$"));
Java

•
    -   public boolean delete(Path f, boolean recursive)
        throws IOException;
•
HDFS       DistributedFileSystem                              NameNode




            FSDataInputStream




               DateNode1             DateNode2    DateNode3              DateNode4



            block1                 block3        block1              block2

            block4                 block4        block2              block3
•
           open(new Path(“/aaa.txt”))
HDFS                                    DistributedFileSystem                              NameNode




                                         FSDataInputStream




                                            DateNode1             DateNode2    DateNode3              DateNode4



                                         block1                 block3        block1              block2

                                         block4                 block4        block2              block3
•
                                                                “/aaa.txt”
           open(new Path(“/aaa.txt”))                                 (          )
HDFS                                    DistributedFileSystem                                     NameNode




                                         FSDataInputStream




                                            DateNode1                DateNode2        DateNode3              DateNode4



                                         block1                   block3             block1              block2

                                         block4                   block4             block2              block3
•
                                                                “/aaa.txt”
           open(new Path(“/aaa.txt”))                                 (          )
HDFS                                    DistributedFileSystem                                     NameNode


                                                                                                   aaa.txt : block1, block2. block3, block4

                                                                                                   block1 : DataNode1, DataNode3
                                                                                                   block2 : DataNode3, DataNode4
                                         FSDataInputStream                                         block3 : DataNode2, DataNode3
                                                                                                   block4 : DataNode1, DataNode2




                                            DateNode1                DateNode2        DateNode3                 DateNode4



                                         block1                   block3             block1                  block2

                                         block4                   block4             block2                  block3
•
                                                                “/aaa.txt”
           open(new Path(“/aaa.txt”))                                 (                )
HDFS                                    DistributedFileSystem                                                NameNode

                                                                 block1 : DataNode1, DataNode3
                                                                 block2 : DataNode3, DataNode4                aaa.txt : block1, block2. block3, block4

                                                                                                              block1 : DataNode1, DataNode3
                                                                                                              block2 : DataNode3, DataNode4
                                         FSDataInputStream                                                    block3 : DataNode2, DataNode3
                                                                                                              block4 : DataNode1, DataNode2




                                            DateNode1                DateNode2                   DateNode3                 DateNode4



                                         block1                   block3                     block1                     block2

                                         block4                   block4                     block2                     block3
•
                                                                “/aaa.txt”
           open(new Path(“/aaa.txt”))                                 (                )
HDFS                                    DistributedFileSystem                                                NameNode

                                                                 block1 : DataNode1, DataNode3
                        read()
                                                                 block2 : DataNode3, DataNode4                aaa.txt : block1, block2. block3, block4

                                                                                                              block1 : DataNode1, DataNode3
                                                                                                              block2 : DataNode3, DataNode4
                                         FSDataInputStream                                                    block3 : DataNode2, DataNode3
                                                                                                              block4 : DataNode1, DataNode2




                                            DateNode1                DateNode2                   DateNode3                 DateNode4



                                         block1                   block3                     block1                     block2

                                         block4                   block4                     block2                     block3
•
                                                                “/aaa.txt”
           open(new Path(“/aaa.txt”))                                 (                )
HDFS                                    DistributedFileSystem                                                NameNode

                                                                 block1 : DataNode1, DataNode3
                        read()
                                                                 block2 : DataNode3, DataNode4                aaa.txt : block1, block2. block3, block4

                                                                                                              block1 : DataNode1, DataNode3
                                                                                                              block2 : DataNode3, DataNode4
                                         FSDataInputStream                                                    block3 : DataNode2, DataNode3
                                                                                                              block4 : DataNode1, DataNode2




                                            DateNode1                DateNode2                   DateNode3                 DateNode4



                                         block1                   block3                     block1                     block2

                                         block4                   block4                     block2                     block3
•
                                                                “/aaa.txt”
           open(new Path(“/aaa.txt”))                                 (                )
HDFS                                    DistributedFileSystem                                                NameNode

                                                                 block1 : DataNode1, DataNode3
                        read()
                                                                 block2 : DataNode3, DataNode4                aaa.txt : block1, block2. block3, block4

                                                                                                              block1 : DataNode1, DataNode3
                                                                                                              block2 : DataNode3, DataNode4
                                         FSDataInputStream                                                    block3 : DataNode2, DataNode3
                                                                                                              block4 : DataNode1, DataNode2




                                            DateNode1                DateNode2                   DateNode3                 DateNode4



                                         block1                   block3                     block1                     block2

                                         block4                   block4                     block2                     block3
•
                                                                “/aaa.txt”
           open(new Path(“/aaa.txt”))                                 (                )
HDFS                                    DistributedFileSystem                                                NameNode

                                                                 block1 : DataNode1, DataNode3
                        read()
                                                                 block2 : DataNode3, DataNode4                aaa.txt : block1, block2. block3, block4

                                                                                                              block1 : DataNode1, DataNode3
                                                                                                              block2 : DataNode3, DataNode4
                                         FSDataInputStream                                                    block3 : DataNode2, DataNode3
                                                                                                              block4 : DataNode1, DataNode2




                                            DateNode1                DateNode2                   DateNode3                 DateNode4



                                         block1                   block3                     block1                     block2

                                         block4                   block4                     block2                     block3
•
                                                                “/aaa.txt”
           open(new Path(“/aaa.txt”))                                 (                   )
HDFS                                    DistributedFileSystem                                                NameNode

                                                                 block1 : DataNode1, DataNode3
                        read()
                                                                 block2 : DataNode3, DataNode4                aaa.txt : block1, block2. block3, block4

                                                                                                              block1 : DataNode1, DataNode3
                                                                                                              block2 : DataNode3, DataNode4
                                         FSDataInputStream                 block3 : DataNode2, DataNode3      block3 : DataNode2, DataNode3
                                                                           block4 : DataNode1, DataNode2      block4 : DataNode1, DataNode2




                                            DateNode1                DateNode2                   DateNode3                 DateNode4



                                         block1                   block3                       block1                   block2

                                         block4                   block4                       block2                   block3
•
                                                                “/aaa.txt”
           open(new Path(“/aaa.txt”))                                 (                   )
HDFS                                    DistributedFileSystem                                                NameNode

                                                                 block1 : DataNode1, DataNode3
                        read()
                                                                 block2 : DataNode3, DataNode4                aaa.txt : block1, block2. block3, block4

                                                                                                              block1 : DataNode1, DataNode3
                                                                                                              block2 : DataNode3, DataNode4
                                         FSDataInputStream                 block3 : DataNode2, DataNode3      block3 : DataNode2, DataNode3
                                                                           block4 : DataNode1, DataNode2      block4 : DataNode1, DataNode2




                                            DateNode1                DateNode2                   DateNode3                 DateNode4



                                         block1                   block3                       block1                   block2

                                         block4                   block4                       block2                   block3
•
                                                                “/aaa.txt”
           open(new Path(“/aaa.txt”))                                 (                   )
HDFS                                    DistributedFileSystem                                                NameNode

                                                                 block1 : DataNode1, DataNode3
                        read()
                                                                 block2 : DataNode3, DataNode4                aaa.txt : block1, block2. block3, block4

                                                                                                              block1 : DataNode1, DataNode3
                                                                                                              block2 : DataNode3, DataNode4
                                         FSDataInputStream                 block3 : DataNode2, DataNode3      block3 : DataNode2, DataNode3
                                                                           block4 : DataNode1, DataNode2      block4 : DataNode1, DataNode2




                                            DateNode1                DateNode2                   DateNode3                 DateNode4



                                         block1                   block3                       block1                   block2

                                         block4                   block4                       block2                   block3
•
                                                                “/aaa.txt”
           open(new Path(“/aaa.txt”))                                 (                   )
HDFS                                    DistributedFileSystem                                                NameNode

                                                                 block1 : DataNode1, DataNode3
                         read()
                                                                 block2 : DataNode3, DataNode4                aaa.txt : block1, block2. block3, block4

               close()                                                                                        block1 : DataNode1, DataNode3
                                                                                                              block2 : DataNode3, DataNode4
                                         FSDataInputStream                 block3 : DataNode2, DataNode3      block3 : DataNode2, DataNode3
                                                                           block4 : DataNode1, DataNode2      block4 : DataNode1, DataNode2




                                            DateNode1                DateNode2                   DateNode3                 DateNode4



                                         block1                   block3                       block1                   block2

                                         block4                   block4                       block2                   block3
•
                                                                “/aaa.txt”
           open(new Path(“/aaa.txt”))                                 (                )
HDFS                                    DistributedFileSystem                                                NameNode

                                                                 block1 : DataNode1, DataNode3
                        read()
                                                                 block2 : DataNode3, DataNode4                aaa.txt : block1, block2. block3, block4

                                                                                                              block1 : DataNode1, DataNode3
                                                                                                              block2 : DataNode3, DataNode4
                                         FSDataInputStream                                                    block3 : DataNode2, DataNode3
                                                                                                              block4 : DataNode1, DataNode2




                                            DateNode1                DateNode2                   DateNode3                 DateNode4



                                         block1                   block3                     block1                     block2

                                         block4                   block4                     block2                     block3
•
                                                                “/aaa.txt”
           open(new Path(“/aaa.txt”))                                 (                )
HDFS                                    DistributedFileSystem                                                NameNode

                                                                 block1 : DataNode1, DataNode3
                        read()
                                                                 block2 : DataNode3, DataNode4                aaa.txt : block1, block2. block3, block4

                                                                                                              block1 : DataNode1, DataNode3
                                                                                                              block2 : DataNode3, DataNode4
                                         FSDataInputStream                                                    block3 : DataNode2, DataNode3
                                                                                                              block4 : DataNode1, DataNode2




                                            DateNode1                DateNode2                   DateNode3                 DateNode4



                                         block1                   block3                     block1                     block2

                                         block4                   block4                     block2                     block3
•
                                                                “/aaa.txt”
           open(new Path(“/aaa.txt”))                                 (                )
HDFS                                    DistributedFileSystem                                                NameNode

                                                                 block1 : DataNode1, DataNode3
                        read()
                                                                 block2 : DataNode3, DataNode4                aaa.txt : block1, block2. block3, block4

                                                                                                              block1 : DataNode1, DataNode3
                                                                                                              block2 : DataNode3, DataNode4
                                         FSDataInputStream                                                    block3 : DataNode2, DataNode3
                                                                                                              block4 : DataNode1, DataNode2




                                            DateNode1                DateNode2                   DateNode3                 DateNode4



                                         block1                   block3                     block1                     block2

                                         block4                   block4                     block2                     block3
•
                                                                “/aaa.txt”
           open(new Path(“/aaa.txt”))                                 (                )
HDFS                                    DistributedFileSystem                                                NameNode

                                                                 block1 : DataNode1, DataNode3
                        read()
                                                                 block2 : DataNode3, DataNode4                aaa.txt : block1, block2. block3, block4

                                                                                                              block1 : DataNode1, DataNode3
                                                                                                              block2 : DataNode3, DataNode4
                                         FSDataInputStream                                                    block3 : DataNode2, DataNode3
                                                                                                              block4 : DataNode1, DataNode2




                                            DateNode1                DateNode2                   DateNode3                 DateNode4



                                         block1                   block3                     block1                     block2

                                         block4                   block4                     block2                     block3
•
                                                                “/aaa.txt”
           open(new Path(“/aaa.txt”))                                 (                )
HDFS                                    DistributedFileSystem                                                NameNode

                                                                 block1 : DataNode1, DataNode3
                        read()
                                                                 block2 : DataNode3, DataNode4                aaa.txt : block1, block2. block3, block4

                                                                                                              block1 : DataNode1, DataNode3
                                                                                                              block2 : DataNode3, DataNode4
                                         FSDataInputStream                                                    block3 : DataNode2, DataNode3
                                                                                                              block4 : DataNode1, DataNode2




                                            DateNode1                DateNode2                   DateNode3                 DateNode4



                                         block1                   block3                     block1                     block2

                                         block4                   block4                     block2                     block3
•
                                                                “/aaa.txt”
           open(new Path(“/aaa.txt”))                                 (                )
HDFS                                    DistributedFileSystem                                                NameNode

                                                                 block1 : DataNode1, DataNode3
                        read()
                                                                 block2 : DataNode3, DataNode4                aaa.txt : block1, block2. block3, block4

                                                                                                              block1 : DataNode1, DataNode3
                                                                                                              block2 : DataNode3, DataNode4
                                         FSDataInputStream                                                    block3 : DataNode2, DataNode3
                                                                                                              block4 : DataNode1, DataNode2




                                            DateNode1                DateNode2                   DateNode3                 DateNode4



                                         block1                   block3                     block1                     block2

                                         block4                   block4                     block2                     block3
•
                                                                “/aaa.txt”
           open(new Path(“/aaa.txt”))                                 (                )
HDFS                                    DistributedFileSystem                                                NameNode

                                                                 block1 : DataNode1, DataNode3
                        read()
                                                                 block2 : DataNode3, DataNode4                aaa.txt : block1, block2. block3, block4

                                                                                                              block1 : DataNode1, DataNode3
                                                                                                              block2 : DataNode3, DataNode4
                                         FSDataInputStream                                                    block3 : DataNode2, DataNode3
                                                                                                              block4 : DataNode1, DataNode2




                                            DateNode1                DateNode2                   DateNode3                 DateNode4



                                         block1                   block3                     block1                     block2

                                         block4                   block4                     block2                     block3
•
                                                                “/aaa.txt”
           open(new Path(“/aaa.txt”))                                 (                )
HDFS                                    DistributedFileSystem                                                NameNode

                                                                 block1 : DataNode1, DataNode3
                        read()
                                                                 block2 : DataNode3, DataNode4                aaa.txt : block1, block2. block3, block4

                                                                                                              block1 : DataNode1, DataNode3
                                                                                                              block2 : DataNode3, DataNode4
                                         FSDataInputStream                                                    block3 : DataNode2, DataNode3
                                                                                                              block4 : DataNode1, DataNode2




                                            DateNode1                DateNode2                   DateNode3                 DateNode4



                                         block1                   block3                     block1                     block2

                                         block4                   block4                     block2                     block3
•
                                                                “/aaa.txt”
           open(new Path(“/aaa.txt”))                                 (                )
HDFS                                    DistributedFileSystem                                                NameNode

                                                                 block1 : DataNode1, DataNode3
                        read()
                                                                 block2 : DataNode3, DataNode4                aaa.txt : block1, block2. block3, block4

                                                                                                              block1 : DataNode1, DataNode3
                                                                                                              block2 : DataNode3, DataNode4
                                         FSDataInputStream                                                    block3 : DataNode2, DataNode3
                                                                                                              block4 : DataNode1, DataNode2




                                            DateNode1                DateNode2                   DateNode3                 DateNode4



                                         block1                   block3                     block1                     block2

                                         block4                   block4                     block2                     block3
•
                                                                “/aaa.txt”
           open(new Path(“/aaa.txt”))                                 (                )
HDFS                                    DistributedFileSystem                                                NameNode

                                                                 block1 : DataNode1, DataNode3
                        read()
                                                                 block2 : DataNode3, DataNode4                aaa.txt : block1, block2. block3, block4

                                                                                                              block1 : DataNode1, DataNode3
                                                                                                              block2 : DataNode3, DataNode4
                                         FSDataInputStream                                                    block3 : DataNode2, DataNode3
                                                                                                              block4 : DataNode1, DataNode2




                                            DateNode1                DateNode2                   DateNode3                 DateNode4



                                         block1                   block3                     block1                     block2

                                         block4                   block4                     block2                     block3
•
    -
    -
    -
•
    -
    -                  9.1.1

    -
                                    (/d1/r1/n1, /d1/r1/n1) = 0
                  d1           d2   (/d1/r1/n1, /d1/r1/n2) = 2

                                    (/d1/r1/n1, /d1/r2/n3) = 4
        r1             r2      r3
                                    (/d1/r1/n1, /d2/r3/n4) = 6

n1           n2        n3      n4
•
HDFS       DistributedFileSystem                           NameNode




           FSDataOutputStream




               DateNode1           DateNode2   DateNode3
•
           create(new Path(“/aaa.txt”))

HDFS                                      DistributedFileSystem                           NameNode




                                          FSDataOutputStream




                                              DateNode1           DateNode2   DateNode3
•
           create(new Path(“/aaa.txt”))

HDFS                                      DistributedFileSystem                           NameNode




                                          FSDataOutputStream




                                              DateNode1           DateNode2   DateNode3
•
           create(new Path(“/aaa.txt”))

HDFS                                      DistributedFileSystem                           NameNode




                                          FSDataOutputStream




                                              DateNode1           DateNode2   DateNode3
•
           create(new Path(“/aaa.txt”))

HDFS                                      DistributedFileSystem                           NameNode

                       write()



                                          FSDataOutputStream




                                              DateNode1           DateNode2   DateNode3
•
           create(new Path(“/aaa.txt”))

HDFS                                      DistributedFileSystem                           NameNode

                       write()



                                          FSDataOutputStream




                           ack




                                              DateNode1           DateNode2   DateNode3
•
           create(new Path(“/aaa.txt”))

HDFS                                      DistributedFileSystem                           NameNode

                       write()



                                          FSDataOutputStream




                           ack




                                              DateNode1           DateNode2   DateNode3
•
           create(new Path(“/aaa.txt”))

HDFS                                      DistributedFileSystem                           NameNode

                       write()



                                          FSDataOutputStream




                           ack




                                              DateNode1           DateNode2   DateNode3
•
           create(new Path(“/aaa.txt”))

HDFS                                      DistributedFileSystem                                NameNode

                       write()



                                          FSDataOutputStream

                                                        DataStreamer




                           ack




                                              DateNode1                DateNode2   DateNode3
•
           create(new Path(“/aaa.txt”))

HDFS                                      DistributedFileSystem                                NameNode

                       write()



                                          FSDataOutputStream

                                                        DataStreamer




                           ack




                                              DateNode1                DateNode2   DateNode3
•
           create(new Path(“/aaa.txt”))

HDFS                                      DistributedFileSystem                                              NameNode

                       write()



                                          FSDataOutputStream                       block1 : DataNode1, DataNode2, DataNode3

                                                        DataStreamer




                           ack




                                              DateNode1                DateNode2                DateNode3
•
           create(new Path(“/aaa.txt”))

HDFS                                      DistributedFileSystem                                              NameNode

                       write()



                                          FSDataOutputStream                       block1 : DataNode1, DataNode2, DataNode3

                                                        DataStreamer




                           ack




                                              DateNode1                DateNode2                DateNode3
•
           create(new Path(“/aaa.txt”))

HDFS                                      DistributedFileSystem                                              NameNode

                       write()



                                          FSDataOutputStream                       block1 : DataNode1, DataNode2, DataNode3

                                                        DataStreamer




                           ack




                                              DateNode1                DateNode2                DateNode3


                                            block1
•
           create(new Path(“/aaa.txt”))

HDFS                                      DistributedFileSystem                                                NameNode

                       write()



                                          FSDataOutputStream                         block1 : DataNode1, DataNode2, DataNode3

                                                        DataStreamer




                           ack




                                              DateNode1                  DateNode2                DateNode3


                                            block1                     block1
•
           create(new Path(“/aaa.txt”))

HDFS                                      DistributedFileSystem                                                NameNode

                       write()



                                          FSDataOutputStream                         block1 : DataNode1, DataNode2, DataNode3

                                                        DataStreamer




                           ack




                                              DateNode1                  DateNode2                DateNode3


                                            block1                     block1                  block1
•
           create(new Path(“/aaa.txt”))

HDFS                                      DistributedFileSystem                                                NameNode

                       write()



                                          FSDataOutputStream                         block1 : DataNode1, DataNode2, DataNode3

                                                        DataStreamer




                           ack




                                              DateNode1                  DateNode2                DateNode3


                                            block1                     block1                  block1
•
           create(new Path(“/aaa.txt”))

HDFS                                      DistributedFileSystem                                                NameNode

                       write()



                                          FSDataOutputStream                         block1 : DataNode1, DataNode2, DataNode3

                                                        DataStreamer




                           ack




                                              DateNode1                  DateNode2                DateNode3


                                            block1                     block1                  block1
•
           create(new Path(“/aaa.txt”))

HDFS                                      DistributedFileSystem                                                NameNode

                       write()



                                          FSDataOutputStream                         block1 : DataNode1, DataNode2, DataNode3

                                                        DataStreamer




                           ack




                                              DateNode1                  DateNode2                DateNode3


                                            block1                     block1                  block1
•
           create(new Path(“/aaa.txt”))

HDFS                                      DistributedFileSystem                                                NameNode

                       write()



                                          FSDataOutputStream                         block1 : DataNode1, DataNode2, DataNode3

                                                        DataStreamer




                           ack




                                              DateNode1                  DateNode2                DateNode3


                                            block1                     block1                  block1
•
           create(new Path(“/aaa.txt”))

HDFS                                      DistributedFileSystem                                                NameNode

                       write()



                                          FSDataOutputStream                         block1 : DataNode1, DataNode2, DataNode3

                                                        DataStreamer




                           ack




                                              DateNode1                  DateNode2                DateNode3


                                            block1                     block1                  block1
•
           create(new Path(“/aaa.txt”))

HDFS                                      DistributedFileSystem                                                NameNode

                       write()


                  close()
                                          FSDataOutputStream                         block1 : DataNode1, DataNode2, DataNode3

                                                        DataStreamer




                            ack




                                              DateNode1                  DateNode2                DateNode3


                                            block1                     block1                  block1
•
           create(new Path(“/aaa.txt”))

HDFS                                      DistributedFileSystem                                                NameNode

                       write()


                  close()
                                          FSDataOutputStream                         block1 : DataNode1, DataNode2, DataNode3

                                                        DataStreamer




                            ack




                                              DateNode1                  DateNode2                DateNode3


                                            block1                     block1                  block1
•
           create(new Path(“/aaa.txt”))

HDFS                                      DistributedFileSystem                                NameNode

                       write()



                                          FSDataOutputStream

                                                        DataStreamer




                           ack




                                              DateNode1                DateNode2   DateNode3
•
           create(new Path(“/aaa.txt”))

HDFS                                      DistributedFileSystem                                NameNode

                       write()



                                          FSDataOutputStream

                                                        DataStreamer




                           ack




                                              DateNode1                DateNode2   DateNode3


                                            block1
•
           create(new Path(“/aaa.txt”))

HDFS                                      DistributedFileSystem                                  NameNode

                       write()



                                          FSDataOutputStream

                                                        DataStreamer




                           ack




                                              DateNode1                  DateNode2   DateNode3


                                            block1                     block1
•
           create(new Path(“/aaa.txt”))

HDFS                                      DistributedFileSystem                                  NameNode

                       write()



                                          FSDataOutputStream

                                                        DataStreamer




                           ack




                                              DateNode1                  DateNode2   DateNode3


                                            block1                     block1
•
           create(new Path(“/aaa.txt”))

HDFS                                      DistributedFileSystem                                  NameNode

                       write()



                                          FSDataOutputStream

                                                        DataStreamer




                           ack




                                              DateNode1                  DateNode2   DateNode3


                                            block1                     block1
•
           create(new Path(“/aaa.txt”))

HDFS                                      DistributedFileSystem                                  NameNode

                       write()



                                          FSDataOutputStream

                                                        DataStreamer




                           ack




                                              DateNode1                  DateNode2   DateNode3


                                            block2
                                            block1                     block1
•
           create(new Path(“/aaa.txt”))

HDFS                                      DistributedFileSystem                                  NameNode

                       write()



                                          FSDataOutputStream

                                                        DataStreamer




                           ack




                                              DateNode1                  DateNode2   DateNode3


                                            block2
                                            block1                     block1
•
           create(new Path(“/aaa.txt”))

HDFS                                      DistributedFileSystem                                  NameNode

                       write()



                                          FSDataOutputStream

                                                        DataStreamer




                           ack




                                              DateNode1                  DateNode2   DateNode3


                                            block2
                                            block1                     block1
•
           create(new Path(“/aaa.txt”))

HDFS                                      DistributedFileSystem                                     NameNode

                       write()



                                          FSDataOutputStream

                                                        DataStreamer




                           ack




                                              DateNode1                  DateNode2      DateNode3


                                            block2
                                            block1                     block1        block2
•
           create(new Path(“/aaa.txt”))

HDFS                                      DistributedFileSystem                                     NameNode

                       write()



                                          FSDataOutputStream

                                                        DataStreamer




                           ack




                                              DateNode1                  DateNode2      DateNode3


                                            block2
                                            block1                     block1        block2
•
           create(new Path(“/aaa.txt”))

HDFS                                      DistributedFileSystem                                     NameNode

                       write()



                                          FSDataOutputStream

                                                        DataStreamer




                           ack




                                              DateNode1                  DateNode2      DateNode3


                                            block2
                                            block1                     block1        block2
•
           create(new Path(“/aaa.txt”))

HDFS                                      DistributedFileSystem                                     NameNode

                       write()



                                          FSDataOutputStream

                                                        DataStreamer




                           ack




                                              DateNode1                  DateNode2      DateNode3


                                            block2
                                            block1                     block1        block2
•
           create(new Path(“/aaa.txt”))

HDFS                                      DistributedFileSystem                                     NameNode

                       write()



                                          FSDataOutputStream

                                                        DataStreamer




                           ack




                                              DateNode1                  DateNode2      DateNode3


                                            block2
                                            block1                     block1        block2
•
           create(new Path(“/aaa.txt”))

HDFS                                      DistributedFileSystem                                   NameNode

                       write()



                                          FSDataOutputStream

                                                        DataStreamer




                           ack




                                              DateNode1                DateNode2      DateNode3


                                            block2
                                            block1                                 block2
•
           create(new Path(“/aaa.txt”))

HDFS                                      DistributedFileSystem                                   NameNode

                       write()



                                          FSDataOutputStream

                                                        DataStreamer




                           ack




                                              DateNode1                DateNode2      DateNode3


                                            block2
                                            block1                                 block2
•
           create(new Path(“/aaa.txt”))

HDFS                                      DistributedFileSystem                                     NameNode

                       write()



                                          FSDataOutputStream

                                                        DataStreamer




                           ack




                                              DateNode1                  DateNode2      DateNode3


                                            block2
                                            block1                     block2        block2
•
    -                      dfs.replication.min(        1)



    -   (dfs.replication                          3)




    -
•
    1.       (       )

    2.

    3.

    4.   (       )
•
    -

             fs.create(new Path("p"));




    -

        OutputStream out = fs.create(new Path("p"));
        out.write("content".getBytes("UTF-8"));
        out.flush();
•
    -   FSDataOutputStream sync()

    -   sync()   close()


                 FSDataOutputStream out = fs.create(new Path("p"));
                 out.write("content".getBytes("UTF-8"));
                 out.flush();
                 out.sync();
•
    -
        ‣   sync()

        ‣            sync()

        ‣   sync()
distcp

•   2        HDFS

    -   hadoop distcp hdfs://namenode1/foo hdfs://namenode2/bar

    -   hadoop distcp -overwrite hdfs://namenode1/foo hdfs://namenode2/bar/foo

    -   hadoop distcp -update hdfs://namenode1/foo hdfs://namenode2/bar/foo


•   MapReduce

    -                   256MB                 (1GB                 4             )

    -                                   map             (
                        )

    -                map        1        (tasktraker)             20map
Hadoop

•

•   HAR


•   hadoop archive -archiveName files.har /my/files /my
Hadoop

•
    -
                                             (
                )

    -
    -   HAR            MapReduce
                                     (   7.2.1.4
        CombineFileInputFormat   )
•   HDFS
    -
    -
    -
    -


•   distcp

•   HAR
•

•

Contenu connexe

Tendances

How mysql handles ORDER BY, GROUP BY, and DISTINCT
How mysql handles ORDER BY, GROUP BY, and DISTINCTHow mysql handles ORDER BY, GROUP BY, and DISTINCT
How mysql handles ORDER BY, GROUP BY, and DISTINCT
Sergey Petrunya
 
MongoDB & Hadoop: Flexible Hourly Batch Processing Model
MongoDB & Hadoop: Flexible Hourly Batch Processing ModelMongoDB & Hadoop: Flexible Hourly Batch Processing Model
MongoDB & Hadoop: Flexible Hourly Batch Processing Model
Takahiro Inoue
 
Полнотекстовый поиск в PostgreSQL за миллисекунды (Олег Бартунов, Александр К...
Полнотекстовый поиск в PostgreSQL за миллисекунды (Олег Бартунов, Александр К...Полнотекстовый поиск в PostgreSQL за миллисекунды (Олег Бартунов, Александр К...
Полнотекстовый поиск в PostgreSQL за миллисекунды (Олег Бартунов, Александр К...
Ontico
 

Tendances (20)

Perl for System Automation - 01 Advanced File Processing
Perl for System Automation - 01 Advanced File ProcessingPerl for System Automation - 01 Advanced File Processing
Perl for System Automation - 01 Advanced File Processing
 
Unix Basics Commands
Unix Basics CommandsUnix Basics Commands
Unix Basics Commands
 
Tajo Seoul Meetup-201501
Tajo Seoul Meetup-201501Tajo Seoul Meetup-201501
Tajo Seoul Meetup-201501
 
Hypertable - massively scalable nosql database
Hypertable - massively scalable nosql databaseHypertable - massively scalable nosql database
Hypertable - massively scalable nosql database
 
Hypertable
HypertableHypertable
Hypertable
 
Database Architectures and Hypertable
Database Architectures and HypertableDatabase Architectures and Hypertable
Database Architectures and Hypertable
 
Hdfs connector api
Hdfs connector apiHdfs connector api
Hdfs connector api
 
Course 102: Lecture 3: Basic Concepts And Commands
Course 102: Lecture 3: Basic Concepts And Commands Course 102: Lecture 3: Basic Concepts And Commands
Course 102: Lecture 3: Basic Concepts And Commands
 
Unix Basics For Testers
Unix Basics For TestersUnix Basics For Testers
Unix Basics For Testers
 
Python mongo db-training-europython-2011
Python mongo db-training-europython-2011Python mongo db-training-europython-2011
Python mongo db-training-europython-2011
 
Postgresql search demystified
Postgresql search demystifiedPostgresql search demystified
Postgresql search demystified
 
Hadoop Interacting with HDFS
Hadoop Interacting with HDFSHadoop Interacting with HDFS
Hadoop Interacting with HDFS
 
MongoDB 在盛大大数据量下的应用
MongoDB 在盛大大数据量下的应用MongoDB 在盛大大数据量下的应用
MongoDB 在盛大大数据量下的应用
 
Percona Live 2017 ­- Sharded cluster tutorial
Percona Live 2017 ­- Sharded cluster tutorialPercona Live 2017 ­- Sharded cluster tutorial
Percona Live 2017 ­- Sharded cluster tutorial
 
How mysql handles ORDER BY, GROUP BY, and DISTINCT
How mysql handles ORDER BY, GROUP BY, and DISTINCTHow mysql handles ORDER BY, GROUP BY, and DISTINCT
How mysql handles ORDER BY, GROUP BY, and DISTINCT
 
2015 bioinformatics python_io_wim_vancriekinge
2015 bioinformatics python_io_wim_vancriekinge2015 bioinformatics python_io_wim_vancriekinge
2015 bioinformatics python_io_wim_vancriekinge
 
Fuse'ing python for rapid development of storage efficient FS
Fuse'ing python for rapid development of storage efficient FSFuse'ing python for rapid development of storage efficient FS
Fuse'ing python for rapid development of storage efficient FS
 
HDFS_Command_Reference
HDFS_Command_ReferenceHDFS_Command_Reference
HDFS_Command_Reference
 
MongoDB & Hadoop: Flexible Hourly Batch Processing Model
MongoDB & Hadoop: Flexible Hourly Batch Processing ModelMongoDB & Hadoop: Flexible Hourly Batch Processing Model
MongoDB & Hadoop: Flexible Hourly Batch Processing Model
 
Полнотекстовый поиск в PostgreSQL за миллисекунды (Олег Бартунов, Александр К...
Полнотекстовый поиск в PostgreSQL за миллисекунды (Олег Бартунов, Александр К...Полнотекстовый поиск в PostgreSQL за миллисекунды (Олег Бартунов, Александр К...
Полнотекстовый поиск в PostgreSQL за миллисекунды (Олег Бартунов, Александр К...
 

Similaire à 第2回 Hadoop 輪読会

Javase7 1641812
Javase7 1641812Javase7 1641812
Javase7 1641812
Vinay H G
 
Building Restful Web Services with Java
Building Restful Web Services with JavaBuilding Restful Web Services with Java
Building Restful Web Services with Java
Vassil Popovski
 
HBase_-_data_operaet le opérations de calciletions_final.pptx
HBase_-_data_operaet le opérations de calciletions_final.pptxHBase_-_data_operaet le opérations de calciletions_final.pptx
HBase_-_data_operaet le opérations de calciletions_final.pptx
HmadSADAQ2
 

Similaire à 第2回 Hadoop 輪読会 (20)

WhatsNewNIO2.pdf
WhatsNewNIO2.pdfWhatsNewNIO2.pdf
WhatsNewNIO2.pdf
 
Big data using Hadoop, Hive, Sqoop with Installation
Big data using Hadoop, Hive, Sqoop with InstallationBig data using Hadoop, Hive, Sqoop with Installation
Big data using Hadoop, Hive, Sqoop with Installation
 
Accessing external hadoop data sources using pivotal e xtension framework (px...
Accessing external hadoop data sources using pivotal e xtension framework (px...Accessing external hadoop data sources using pivotal e xtension framework (px...
Accessing external hadoop data sources using pivotal e xtension framework (px...
 
Hadoop HDFS
Hadoop HDFS Hadoop HDFS
Hadoop HDFS
 
5. Ввод-вывод, доступ к файловой системе
5. Ввод-вывод, доступ к файловой системе5. Ввод-вывод, доступ к файловой системе
5. Ввод-вывод, доступ к файловой системе
 
RESTful Web Services with Jersey
RESTful Web Services with JerseyRESTful Web Services with Jersey
RESTful Web Services with Jersey
 
Java 7 - short intro to NIO.2
Java 7 - short intro to NIO.2Java 7 - short intro to NIO.2
Java 7 - short intro to NIO.2
 
Big data, just an introduction to Hadoop and Scripting Languages
Big data, just an introduction to Hadoop and Scripting LanguagesBig data, just an introduction to Hadoop and Scripting Languages
Big data, just an introduction to Hadoop and Scripting Languages
 
Javase7 1641812
Javase7 1641812Javase7 1641812
Javase7 1641812
 
Jug java7
Jug java7Jug java7
Jug java7
 
PyFilesystem
PyFilesystemPyFilesystem
PyFilesystem
 
HBaseCon 2012 | HBase Coprocessors – Deploy Shared Functionality Directly on ...
HBaseCon 2012 | HBase Coprocessors – Deploy Shared Functionality Directly on ...HBaseCon 2012 | HBase Coprocessors – Deploy Shared Functionality Directly on ...
HBaseCon 2012 | HBase Coprocessors – Deploy Shared Functionality Directly on ...
 
Node.js - A practical introduction (v2)
Node.js  - A practical introduction (v2)Node.js  - A practical introduction (v2)
Node.js - A practical introduction (v2)
 
Leveraging Hadoop in your PostgreSQL Environment
Leveraging Hadoop in your PostgreSQL EnvironmentLeveraging Hadoop in your PostgreSQL Environment
Leveraging Hadoop in your PostgreSQL Environment
 
Oscon Java Testing on the Fast Lane
Oscon Java Testing on the Fast LaneOscon Java Testing on the Fast Lane
Oscon Java Testing on the Fast Lane
 
Nov. 4, 2011 o reilly webcast-hbase- lars george
Nov. 4, 2011 o reilly webcast-hbase- lars georgeNov. 4, 2011 o reilly webcast-hbase- lars george
Nov. 4, 2011 o reilly webcast-hbase- lars george
 
Gsummit apis-2012
Gsummit apis-2012Gsummit apis-2012
Gsummit apis-2012
 
Gsummit apis-2013
Gsummit apis-2013Gsummit apis-2013
Gsummit apis-2013
 
Building Restful Web Services with Java
Building Restful Web Services with JavaBuilding Restful Web Services with Java
Building Restful Web Services with Java
 
HBase_-_data_operaet le opérations de calciletions_final.pptx
HBase_-_data_operaet le opérations de calciletions_final.pptxHBase_-_data_operaet le opérations de calciletions_final.pptx
HBase_-_data_operaet le opérations de calciletions_final.pptx
 

Plus de Toshihiro Suzuki

HBaseでグラフ構造を扱う(開発中)
HBaseでグラフ構造を扱う(開発中)HBaseでグラフ構造を扱う(開発中)
HBaseでグラフ構造を扱う(開発中)
Toshihiro Suzuki
 
MySQLによってタフになる会12章
MySQLによってタフになる会12章MySQLによってタフになる会12章
MySQLによってタフになる会12章
Toshihiro Suzuki
 

Plus de Toshihiro Suzuki (10)

Apache HBaseの現在 - 火山と呼ばれたHBaseは今どうなっているのか
Apache HBaseの現在 - 火山と呼ばれたHBaseは今どうなっているのかApache HBaseの現在 - 火山と呼ばれたHBaseは今どうなっているのか
Apache HBaseの現在 - 火山と呼ばれたHBaseは今どうなっているのか
 
第25回 Hadoopソースコードリーディング 「HBase 最新情報」
第25回 Hadoopソースコードリーディング 「HBase 最新情報」第25回 Hadoopソースコードリーディング 「HBase 最新情報」
第25回 Hadoopソースコードリーディング 「HBase 最新情報」
 
HDP ハンズオンセミナー
HDP ハンズオンセミナーHDP ハンズオンセミナー
HDP ハンズオンセミナー
 
Kuduを調べてみた #dogenzakalt
Kuduを調べてみた #dogenzakaltKuduを調べてみた #dogenzakalt
Kuduを調べてみた #dogenzakalt
 
HBaseを用いたグラフDB「Hornet」の設計と運用
HBaseを用いたグラフDB「Hornet」の設計と運用HBaseを用いたグラフDB「Hornet」の設計と運用
HBaseを用いたグラフDB「Hornet」の設計と運用
 
HBase at Ameba
HBase at AmebaHBase at Ameba
HBase at Ameba
 
HBaseを用いたグラフDB「Hornet」
HBaseを用いたグラフDB「Hornet」HBaseを用いたグラフDB「Hornet」
HBaseを用いたグラフDB「Hornet」
 
HBaseでグラフ構造を扱う(開発中)
HBaseでグラフ構造を扱う(開発中)HBaseでグラフ構造を扱う(開発中)
HBaseでグラフ構造を扱う(開発中)
 
Amebaサービスのログ解析基盤
Amebaサービスのログ解析基盤Amebaサービスのログ解析基盤
Amebaサービスのログ解析基盤
 
MySQLによってタフになる会12章
MySQLによってタフになる会12章MySQLによってタフになる会12章
MySQLによってタフになる会12章
 

Dernier

Architecting Cloud Native Applications
Architecting Cloud Native ApplicationsArchitecting Cloud Native Applications
Architecting Cloud Native Applications
WSO2
 
+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...
+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...
+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...
?#DUbAI#??##{{(☎️+971_581248768%)**%*]'#abortion pills for sale in dubai@
 

Dernier (20)

Cyberprint. Dark Pink Apt Group [EN].pdf
Cyberprint. Dark Pink Apt Group [EN].pdfCyberprint. Dark Pink Apt Group [EN].pdf
Cyberprint. Dark Pink Apt Group [EN].pdf
 
Exploring Multimodal Embeddings with Milvus
Exploring Multimodal Embeddings with MilvusExploring Multimodal Embeddings with Milvus
Exploring Multimodal Embeddings with Milvus
 
Biography Of Angeliki Cooney | Senior Vice President Life Sciences | Albany, ...
Biography Of Angeliki Cooney | Senior Vice President Life Sciences | Albany, ...Biography Of Angeliki Cooney | Senior Vice President Life Sciences | Albany, ...
Biography Of Angeliki Cooney | Senior Vice President Life Sciences | Albany, ...
 
Strategies for Landing an Oracle DBA Job as a Fresher
Strategies for Landing an Oracle DBA Job as a FresherStrategies for Landing an Oracle DBA Job as a Fresher
Strategies for Landing an Oracle DBA Job as a Fresher
 
Polkadot JAM Slides - Token2049 - By Dr. Gavin Wood
Polkadot JAM Slides - Token2049 - By Dr. Gavin WoodPolkadot JAM Slides - Token2049 - By Dr. Gavin Wood
Polkadot JAM Slides - Token2049 - By Dr. Gavin Wood
 
Architecting Cloud Native Applications
Architecting Cloud Native ApplicationsArchitecting Cloud Native Applications
Architecting Cloud Native Applications
 
MINDCTI Revenue Release Quarter One 2024
MINDCTI Revenue Release Quarter One 2024MINDCTI Revenue Release Quarter One 2024
MINDCTI Revenue Release Quarter One 2024
 
Apidays New York 2024 - Scaling API-first by Ian Reasor and Radu Cotescu, Adobe
Apidays New York 2024 - Scaling API-first by Ian Reasor and Radu Cotescu, AdobeApidays New York 2024 - Scaling API-first by Ian Reasor and Radu Cotescu, Adobe
Apidays New York 2024 - Scaling API-first by Ian Reasor and Radu Cotescu, Adobe
 
Rising Above_ Dubai Floods and the Fortitude of Dubai International Airport.pdf
Rising Above_ Dubai Floods and the Fortitude of Dubai International Airport.pdfRising Above_ Dubai Floods and the Fortitude of Dubai International Airport.pdf
Rising Above_ Dubai Floods and the Fortitude of Dubai International Airport.pdf
 
TrustArc Webinar - Unlock the Power of AI-Driven Data Discovery
TrustArc Webinar - Unlock the Power of AI-Driven Data DiscoveryTrustArc Webinar - Unlock the Power of AI-Driven Data Discovery
TrustArc Webinar - Unlock the Power of AI-Driven Data Discovery
 
Manulife - Insurer Transformation Award 2024
Manulife - Insurer Transformation Award 2024Manulife - Insurer Transformation Award 2024
Manulife - Insurer Transformation Award 2024
 
MS Copilot expands with MS Graph connectors
MS Copilot expands with MS Graph connectorsMS Copilot expands with MS Graph connectors
MS Copilot expands with MS Graph connectors
 
Boost Fertility New Invention Ups Success Rates.pdf
Boost Fertility New Invention Ups Success Rates.pdfBoost Fertility New Invention Ups Success Rates.pdf
Boost Fertility New Invention Ups Success Rates.pdf
 
ProductAnonymous-April2024-WinProductDiscovery-MelissaKlemke
ProductAnonymous-April2024-WinProductDiscovery-MelissaKlemkeProductAnonymous-April2024-WinProductDiscovery-MelissaKlemke
ProductAnonymous-April2024-WinProductDiscovery-MelissaKlemke
 
Artificial Intelligence Chap.5 : Uncertainty
Artificial Intelligence Chap.5 : UncertaintyArtificial Intelligence Chap.5 : Uncertainty
Artificial Intelligence Chap.5 : Uncertainty
 
DBX First Quarter 2024 Investor Presentation
DBX First Quarter 2024 Investor PresentationDBX First Quarter 2024 Investor Presentation
DBX First Quarter 2024 Investor Presentation
 
Navigating the Deluge_ Dubai Floods and the Resilience of Dubai International...
Navigating the Deluge_ Dubai Floods and the Resilience of Dubai International...Navigating the Deluge_ Dubai Floods and the Resilience of Dubai International...
Navigating the Deluge_ Dubai Floods and the Resilience of Dubai International...
 
Connector Corner: Accelerate revenue generation using UiPath API-centric busi...
Connector Corner: Accelerate revenue generation using UiPath API-centric busi...Connector Corner: Accelerate revenue generation using UiPath API-centric busi...
Connector Corner: Accelerate revenue generation using UiPath API-centric busi...
 
CNIC Information System with Pakdata Cf In Pakistan
CNIC Information System with Pakdata Cf In PakistanCNIC Information System with Pakdata Cf In Pakistan
CNIC Information System with Pakdata Cf In Pakistan
 
+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...
+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...
+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...
 

第2回 Hadoop 輪読会

  • 1. Hadoop 3 Hadoop
  • 2. Hadoop • - - - • HDFS(Hadoop Distributed Filesystem)
  • 3. HDFS • - ‣ MB, GB, TB - ‣ - ‣ ‣
  • 4. HDFS • - ‣ - ‣ - ‣ ‣
  • 5. HDFS • - 64MB - ‣ ‣ ‣
  • 6. HDFS • - / - ( ) - ( )
  • 7. HDFS • - ( ) - ( )
  • 8. HDFS • - - -
  • 9. HDFS • - ( , ) -
  • 10. HDFS • - NameNode SecondaryNameNode
  • 11. HDFS • - open() append() write() NameNode SecondaryNameNode
  • 12. HDFS • - open() append() write() NameNode SecondaryNameNode
  • 13. HDFS • - open() append() write() NameNode SecondaryNameNode
  • 14. HDFS • - NameNode SecondaryNameNode
  • 15. HDFS • - NameNode SecondaryNameNode
  • 16. HDFS • - NameNode SecondaryNameNode
  • 17. HDFS • - NameNode SecondaryNameNode
  • 18. hadoop fs -copyFromLocal <localsrc> ... <dst> • hadoop fs -copyToLocal <src> <localdst> • hadoop fs -ls <path> • hadoop fs -mkdir <path> • hadoop fs -help
  • 19. Hadoop •hadoop fs -ls file:/// •hadoop fs -ls hdfs:/// •hadoop fs -ls hftp:/// URI java local file org.apache.hadoop.fs.localFileSystem HDFS hdfs org.apache.hadoop.hdfs.DistributesFileSystem HFTP hftp org.apache.hadoop.hdfs.HftpFileSystem HSFTP hsftp org.apache.hadoop.hdfs.HsftpFileSystem HAR har org.apache.hadoop.fs.HarFileSystem KFS kfs org.apache.hadoop.fs.kfs.KosmosFileSystem FTP ftp org.apache.hadoop.fs.ftp.FTPFileSystem S3 s3n org.apache.hadoop.fs.s3native.NativeS3FileSystem ( ) S3 s3 org.apache.hadoop.fs.S3FileSystem ( )
  • 20. Thrift • C - libhdfs • FUSE(FileSystem in Userspace) • WebDAV • - HTTP, FTP( )
  • 21. Java • Hadoop URL public class URLCat { static { URL.setURLStreamHandlerFactory(new FsUrlStreamHandlerFactory()); } public static void main(String[] args) throws Exception { InputStream in = null; try { in = new URL(args[0]).openStream(); IOUtils.copyBytes(in, System.out, 4096, false); } finally { IOUtils.closeStream(in); } } }
  • 22. Java • FileSystem API public class FileSystemCat { public static void main(String[] args) throws Exception { String uri = args[0]; Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(URI.create(uri), conf); InputStream in = null; try { in = fs.open(new Path(uri)); IOUtils.copyBytes(in, System.out, 4096, false); } finally { IOUtils.closeStream(in); } }
  • 23. Java • FSDataInputStream public class FSDataInputStream extends DataInputStream implements Seekable, PositionedReadable { // } public interface Seekable { void seek(long pos) throws IOException; long getPos() throws IOException; boolean seekToNewSource(long targetPos) throws IOException; }
  • 24. Java • FSDataInputStream public class FileSystemDoubleCat { public static void main(String[] args) throws Exception { String uri = args[0]; FileSystem fs = FileSystem.get(URI.create(uri), new Configuration()); FSDataInputStream in = null; try { in = fs.open(new Path(uri)); IOUtils.copyBytes(in, System.out, 4096, false); in.seek(0); IOUtils.copyBytes(in, System.out, 4096, false); } finally { IOUtils.closeStream(in); } } }
  • 25. Java • FSDataInputStream public class FSDataInputStream extends DataInputStream implements Seekable, PositionedReadable { // } public interface PositionedReadable { int read(long position, byte buffer[], int offset, int length) throws IOException; void readFully(long position, byte buffer[], int offset, int length) throws IOException; void readFully(long position, byte buffer[]) throws IOException; }
  • 26. Java • - public FSDataOutputStream create(Path f) throws IOException - public FSDataOutputStream append(Path f) throws IOException
  • 27. Java • FSDateOutputStream - FileSystem create(), append() - public class FSDataOutputStream extends DataOutputStream implements Syncable { public long getPos() throws IOException { // } // }
  • 28. Java • - public boolean mkdirs(Path f) throws IOException
  • 29. Java • FileStatus status = fs.getFileStatus(new Path("hdfs://localhost/hogehoge")); status.isDir(); // status.getLen(); // status.getModificationTime(); // status.getReplication(); // status.getBlockSize(); // ( 64MB) status.getOwner(); // status.getGroup(); // status.getPermission().toString(); //
  • 30. Java • - public FileStatus[] listStatus(Path f) throws IOException; - public FileStatus[] listStatus(Path f, PathFilter filter) throws IOException; - public FileStatus[] listStatus(Path[] files) throws IOException; - public FileStatus[] listStatus(Path[] files, PathFilter filter) throws IOException;
  • 31. Java • public class ListStatus { public static void main(String[] args) throws Exception { String uri = args[0]; Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(URI.create(uri), conf); Path[] paths = new Path[args.length]; for (int i = 0; i < paths.length; i++) { paths[i] = new Path(args[i]); } FileStatus[] status = fs.listStatus(paths); for (FileStatus stat : status) { System.out.println(stat.getPath().toUri().getPath()); } } }
  • 32. Java • - public FileStatus[] globStatus(Path pathPattern) throws IOException - public FileStatus[] globStatus(Path pathPattern, PathFilter filter) throws IOException
  • 33. Java • [ab] {a,b} [^ab] {a,b} {a,b} (a b ) [a-b] a b {a,b} (a b ) a b [^a-b] {a,b} a b ¥c c c
  • 34. Java • public interface PathFilter { boolean accept(Path path); }
  • 35. Java • public class RegexExcludePathFilter implements PathFilter { private final String regex; public RegexExcludePathFilter(String regex) { this.regex = regex; } @Override public boolean accept(Path path) { return !path.toString().matches(regex); } } fs.globStatus(new Path("/2007/*/*"), new RegexExcludePathFilter("^.*/2007/12/31$"));
  • 36. Java • - public boolean delete(Path f, boolean recursive) throws IOException;
  • 37. • HDFS DistributedFileSystem NameNode FSDataInputStream DateNode1 DateNode2 DateNode3 DateNode4 block1 block3 block1 block2 block4 block4 block2 block3
  • 38. open(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode FSDataInputStream DateNode1 DateNode2 DateNode3 DateNode4 block1 block3 block1 block2 block4 block4 block2 block3
  • 39. “/aaa.txt” open(new Path(“/aaa.txt”)) ( ) HDFS DistributedFileSystem NameNode FSDataInputStream DateNode1 DateNode2 DateNode3 DateNode4 block1 block3 block1 block2 block4 block4 block2 block3
  • 40. “/aaa.txt” open(new Path(“/aaa.txt”)) ( ) HDFS DistributedFileSystem NameNode aaa.txt : block1, block2. block3, block4 block1 : DataNode1, DataNode3 block2 : DataNode3, DataNode4 FSDataInputStream block3 : DataNode2, DataNode3 block4 : DataNode1, DataNode2 DateNode1 DateNode2 DateNode3 DateNode4 block1 block3 block1 block2 block4 block4 block2 block3
  • 41. “/aaa.txt” open(new Path(“/aaa.txt”)) ( ) HDFS DistributedFileSystem NameNode block1 : DataNode1, DataNode3 block2 : DataNode3, DataNode4 aaa.txt : block1, block2. block3, block4 block1 : DataNode1, DataNode3 block2 : DataNode3, DataNode4 FSDataInputStream block3 : DataNode2, DataNode3 block4 : DataNode1, DataNode2 DateNode1 DateNode2 DateNode3 DateNode4 block1 block3 block1 block2 block4 block4 block2 block3
  • 42. “/aaa.txt” open(new Path(“/aaa.txt”)) ( ) HDFS DistributedFileSystem NameNode block1 : DataNode1, DataNode3 read() block2 : DataNode3, DataNode4 aaa.txt : block1, block2. block3, block4 block1 : DataNode1, DataNode3 block2 : DataNode3, DataNode4 FSDataInputStream block3 : DataNode2, DataNode3 block4 : DataNode1, DataNode2 DateNode1 DateNode2 DateNode3 DateNode4 block1 block3 block1 block2 block4 block4 block2 block3
  • 43. “/aaa.txt” open(new Path(“/aaa.txt”)) ( ) HDFS DistributedFileSystem NameNode block1 : DataNode1, DataNode3 read() block2 : DataNode3, DataNode4 aaa.txt : block1, block2. block3, block4 block1 : DataNode1, DataNode3 block2 : DataNode3, DataNode4 FSDataInputStream block3 : DataNode2, DataNode3 block4 : DataNode1, DataNode2 DateNode1 DateNode2 DateNode3 DateNode4 block1 block3 block1 block2 block4 block4 block2 block3
  • 44. “/aaa.txt” open(new Path(“/aaa.txt”)) ( ) HDFS DistributedFileSystem NameNode block1 : DataNode1, DataNode3 read() block2 : DataNode3, DataNode4 aaa.txt : block1, block2. block3, block4 block1 : DataNode1, DataNode3 block2 : DataNode3, DataNode4 FSDataInputStream block3 : DataNode2, DataNode3 block4 : DataNode1, DataNode2 DateNode1 DateNode2 DateNode3 DateNode4 block1 block3 block1 block2 block4 block4 block2 block3
  • 45. “/aaa.txt” open(new Path(“/aaa.txt”)) ( ) HDFS DistributedFileSystem NameNode block1 : DataNode1, DataNode3 read() block2 : DataNode3, DataNode4 aaa.txt : block1, block2. block3, block4 block1 : DataNode1, DataNode3 block2 : DataNode3, DataNode4 FSDataInputStream block3 : DataNode2, DataNode3 block4 : DataNode1, DataNode2 DateNode1 DateNode2 DateNode3 DateNode4 block1 block3 block1 block2 block4 block4 block2 block3
  • 46. “/aaa.txt” open(new Path(“/aaa.txt”)) ( ) HDFS DistributedFileSystem NameNode block1 : DataNode1, DataNode3 read() block2 : DataNode3, DataNode4 aaa.txt : block1, block2. block3, block4 block1 : DataNode1, DataNode3 block2 : DataNode3, DataNode4 FSDataInputStream block3 : DataNode2, DataNode3 block3 : DataNode2, DataNode3 block4 : DataNode1, DataNode2 block4 : DataNode1, DataNode2 DateNode1 DateNode2 DateNode3 DateNode4 block1 block3 block1 block2 block4 block4 block2 block3
  • 47. “/aaa.txt” open(new Path(“/aaa.txt”)) ( ) HDFS DistributedFileSystem NameNode block1 : DataNode1, DataNode3 read() block2 : DataNode3, DataNode4 aaa.txt : block1, block2. block3, block4 block1 : DataNode1, DataNode3 block2 : DataNode3, DataNode4 FSDataInputStream block3 : DataNode2, DataNode3 block3 : DataNode2, DataNode3 block4 : DataNode1, DataNode2 block4 : DataNode1, DataNode2 DateNode1 DateNode2 DateNode3 DateNode4 block1 block3 block1 block2 block4 block4 block2 block3
  • 48. “/aaa.txt” open(new Path(“/aaa.txt”)) ( ) HDFS DistributedFileSystem NameNode block1 : DataNode1, DataNode3 read() block2 : DataNode3, DataNode4 aaa.txt : block1, block2. block3, block4 block1 : DataNode1, DataNode3 block2 : DataNode3, DataNode4 FSDataInputStream block3 : DataNode2, DataNode3 block3 : DataNode2, DataNode3 block4 : DataNode1, DataNode2 block4 : DataNode1, DataNode2 DateNode1 DateNode2 DateNode3 DateNode4 block1 block3 block1 block2 block4 block4 block2 block3
  • 49. “/aaa.txt” open(new Path(“/aaa.txt”)) ( ) HDFS DistributedFileSystem NameNode block1 : DataNode1, DataNode3 read() block2 : DataNode3, DataNode4 aaa.txt : block1, block2. block3, block4 close() block1 : DataNode1, DataNode3 block2 : DataNode3, DataNode4 FSDataInputStream block3 : DataNode2, DataNode3 block3 : DataNode2, DataNode3 block4 : DataNode1, DataNode2 block4 : DataNode1, DataNode2 DateNode1 DateNode2 DateNode3 DateNode4 block1 block3 block1 block2 block4 block4 block2 block3
  • 50. “/aaa.txt” open(new Path(“/aaa.txt”)) ( ) HDFS DistributedFileSystem NameNode block1 : DataNode1, DataNode3 read() block2 : DataNode3, DataNode4 aaa.txt : block1, block2. block3, block4 block1 : DataNode1, DataNode3 block2 : DataNode3, DataNode4 FSDataInputStream block3 : DataNode2, DataNode3 block4 : DataNode1, DataNode2 DateNode1 DateNode2 DateNode3 DateNode4 block1 block3 block1 block2 block4 block4 block2 block3
  • 51. “/aaa.txt” open(new Path(“/aaa.txt”)) ( ) HDFS DistributedFileSystem NameNode block1 : DataNode1, DataNode3 read() block2 : DataNode3, DataNode4 aaa.txt : block1, block2. block3, block4 block1 : DataNode1, DataNode3 block2 : DataNode3, DataNode4 FSDataInputStream block3 : DataNode2, DataNode3 block4 : DataNode1, DataNode2 DateNode1 DateNode2 DateNode3 DateNode4 block1 block3 block1 block2 block4 block4 block2 block3
  • 52. “/aaa.txt” open(new Path(“/aaa.txt”)) ( ) HDFS DistributedFileSystem NameNode block1 : DataNode1, DataNode3 read() block2 : DataNode3, DataNode4 aaa.txt : block1, block2. block3, block4 block1 : DataNode1, DataNode3 block2 : DataNode3, DataNode4 FSDataInputStream block3 : DataNode2, DataNode3 block4 : DataNode1, DataNode2 DateNode1 DateNode2 DateNode3 DateNode4 block1 block3 block1 block2 block4 block4 block2 block3
  • 53. “/aaa.txt” open(new Path(“/aaa.txt”)) ( ) HDFS DistributedFileSystem NameNode block1 : DataNode1, DataNode3 read() block2 : DataNode3, DataNode4 aaa.txt : block1, block2. block3, block4 block1 : DataNode1, DataNode3 block2 : DataNode3, DataNode4 FSDataInputStream block3 : DataNode2, DataNode3 block4 : DataNode1, DataNode2 DateNode1 DateNode2 DateNode3 DateNode4 block1 block3 block1 block2 block4 block4 block2 block3
  • 54. “/aaa.txt” open(new Path(“/aaa.txt”)) ( ) HDFS DistributedFileSystem NameNode block1 : DataNode1, DataNode3 read() block2 : DataNode3, DataNode4 aaa.txt : block1, block2. block3, block4 block1 : DataNode1, DataNode3 block2 : DataNode3, DataNode4 FSDataInputStream block3 : DataNode2, DataNode3 block4 : DataNode1, DataNode2 DateNode1 DateNode2 DateNode3 DateNode4 block1 block3 block1 block2 block4 block4 block2 block3
  • 55. “/aaa.txt” open(new Path(“/aaa.txt”)) ( ) HDFS DistributedFileSystem NameNode block1 : DataNode1, DataNode3 read() block2 : DataNode3, DataNode4 aaa.txt : block1, block2. block3, block4 block1 : DataNode1, DataNode3 block2 : DataNode3, DataNode4 FSDataInputStream block3 : DataNode2, DataNode3 block4 : DataNode1, DataNode2 DateNode1 DateNode2 DateNode3 DateNode4 block1 block3 block1 block2 block4 block4 block2 block3
  • 56. “/aaa.txt” open(new Path(“/aaa.txt”)) ( ) HDFS DistributedFileSystem NameNode block1 : DataNode1, DataNode3 read() block2 : DataNode3, DataNode4 aaa.txt : block1, block2. block3, block4 block1 : DataNode1, DataNode3 block2 : DataNode3, DataNode4 FSDataInputStream block3 : DataNode2, DataNode3 block4 : DataNode1, DataNode2 DateNode1 DateNode2 DateNode3 DateNode4 block1 block3 block1 block2 block4 block4 block2 block3
  • 57. “/aaa.txt” open(new Path(“/aaa.txt”)) ( ) HDFS DistributedFileSystem NameNode block1 : DataNode1, DataNode3 read() block2 : DataNode3, DataNode4 aaa.txt : block1, block2. block3, block4 block1 : DataNode1, DataNode3 block2 : DataNode3, DataNode4 FSDataInputStream block3 : DataNode2, DataNode3 block4 : DataNode1, DataNode2 DateNode1 DateNode2 DateNode3 DateNode4 block1 block3 block1 block2 block4 block4 block2 block3
  • 58. “/aaa.txt” open(new Path(“/aaa.txt”)) ( ) HDFS DistributedFileSystem NameNode block1 : DataNode1, DataNode3 read() block2 : DataNode3, DataNode4 aaa.txt : block1, block2. block3, block4 block1 : DataNode1, DataNode3 block2 : DataNode3, DataNode4 FSDataInputStream block3 : DataNode2, DataNode3 block4 : DataNode1, DataNode2 DateNode1 DateNode2 DateNode3 DateNode4 block1 block3 block1 block2 block4 block4 block2 block3
  • 59. “/aaa.txt” open(new Path(“/aaa.txt”)) ( ) HDFS DistributedFileSystem NameNode block1 : DataNode1, DataNode3 read() block2 : DataNode3, DataNode4 aaa.txt : block1, block2. block3, block4 block1 : DataNode1, DataNode3 block2 : DataNode3, DataNode4 FSDataInputStream block3 : DataNode2, DataNode3 block4 : DataNode1, DataNode2 DateNode1 DateNode2 DateNode3 DateNode4 block1 block3 block1 block2 block4 block4 block2 block3
  • 60. - - -
  • 61. - - 9.1.1 - (/d1/r1/n1, /d1/r1/n1) = 0 d1 d2 (/d1/r1/n1, /d1/r1/n2) = 2 (/d1/r1/n1, /d1/r2/n3) = 4 r1 r2 r3 (/d1/r1/n1, /d2/r3/n4) = 6 n1 n2 n3 n4
  • 62. • HDFS DistributedFileSystem NameNode FSDataOutputStream DateNode1 DateNode2 DateNode3
  • 63. create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode FSDataOutputStream DateNode1 DateNode2 DateNode3
  • 64. create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode FSDataOutputStream DateNode1 DateNode2 DateNode3
  • 65. create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode FSDataOutputStream DateNode1 DateNode2 DateNode3
  • 66. create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode write() FSDataOutputStream DateNode1 DateNode2 DateNode3
  • 67. create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode write() FSDataOutputStream ack DateNode1 DateNode2 DateNode3
  • 68. create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode write() FSDataOutputStream ack DateNode1 DateNode2 DateNode3
  • 69. create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode write() FSDataOutputStream ack DateNode1 DateNode2 DateNode3
  • 70. create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode write() FSDataOutputStream DataStreamer ack DateNode1 DateNode2 DateNode3
  • 71. create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode write() FSDataOutputStream DataStreamer ack DateNode1 DateNode2 DateNode3
  • 72. create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode write() FSDataOutputStream block1 : DataNode1, DataNode2, DataNode3 DataStreamer ack DateNode1 DateNode2 DateNode3
  • 73. create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode write() FSDataOutputStream block1 : DataNode1, DataNode2, DataNode3 DataStreamer ack DateNode1 DateNode2 DateNode3
  • 74. create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode write() FSDataOutputStream block1 : DataNode1, DataNode2, DataNode3 DataStreamer ack DateNode1 DateNode2 DateNode3 block1
  • 75. create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode write() FSDataOutputStream block1 : DataNode1, DataNode2, DataNode3 DataStreamer ack DateNode1 DateNode2 DateNode3 block1 block1
  • 76. create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode write() FSDataOutputStream block1 : DataNode1, DataNode2, DataNode3 DataStreamer ack DateNode1 DateNode2 DateNode3 block1 block1 block1
  • 77. create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode write() FSDataOutputStream block1 : DataNode1, DataNode2, DataNode3 DataStreamer ack DateNode1 DateNode2 DateNode3 block1 block1 block1
  • 78. create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode write() FSDataOutputStream block1 : DataNode1, DataNode2, DataNode3 DataStreamer ack DateNode1 DateNode2 DateNode3 block1 block1 block1
  • 79. create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode write() FSDataOutputStream block1 : DataNode1, DataNode2, DataNode3 DataStreamer ack DateNode1 DateNode2 DateNode3 block1 block1 block1
  • 80. create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode write() FSDataOutputStream block1 : DataNode1, DataNode2, DataNode3 DataStreamer ack DateNode1 DateNode2 DateNode3 block1 block1 block1
  • 81. create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode write() FSDataOutputStream block1 : DataNode1, DataNode2, DataNode3 DataStreamer ack DateNode1 DateNode2 DateNode3 block1 block1 block1
  • 82. create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode write() close() FSDataOutputStream block1 : DataNode1, DataNode2, DataNode3 DataStreamer ack DateNode1 DateNode2 DateNode3 block1 block1 block1
  • 83. create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode write() close() FSDataOutputStream block1 : DataNode1, DataNode2, DataNode3 DataStreamer ack DateNode1 DateNode2 DateNode3 block1 block1 block1
  • 84. create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode write() FSDataOutputStream DataStreamer ack DateNode1 DateNode2 DateNode3
  • 85. create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode write() FSDataOutputStream DataStreamer ack DateNode1 DateNode2 DateNode3 block1
  • 86. create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode write() FSDataOutputStream DataStreamer ack DateNode1 DateNode2 DateNode3 block1 block1
  • 87. create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode write() FSDataOutputStream DataStreamer ack DateNode1 DateNode2 DateNode3 block1 block1
  • 88. create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode write() FSDataOutputStream DataStreamer ack DateNode1 DateNode2 DateNode3 block1 block1
  • 89. create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode write() FSDataOutputStream DataStreamer ack DateNode1 DateNode2 DateNode3 block2 block1 block1
  • 90. create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode write() FSDataOutputStream DataStreamer ack DateNode1 DateNode2 DateNode3 block2 block1 block1
  • 91. create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode write() FSDataOutputStream DataStreamer ack DateNode1 DateNode2 DateNode3 block2 block1 block1
  • 92. create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode write() FSDataOutputStream DataStreamer ack DateNode1 DateNode2 DateNode3 block2 block1 block1 block2
  • 93. create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode write() FSDataOutputStream DataStreamer ack DateNode1 DateNode2 DateNode3 block2 block1 block1 block2
  • 94. create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode write() FSDataOutputStream DataStreamer ack DateNode1 DateNode2 DateNode3 block2 block1 block1 block2
  • 95. create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode write() FSDataOutputStream DataStreamer ack DateNode1 DateNode2 DateNode3 block2 block1 block1 block2
  • 96. create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode write() FSDataOutputStream DataStreamer ack DateNode1 DateNode2 DateNode3 block2 block1 block1 block2
  • 97. create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode write() FSDataOutputStream DataStreamer ack DateNode1 DateNode2 DateNode3 block2 block1 block2
  • 98. create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode write() FSDataOutputStream DataStreamer ack DateNode1 DateNode2 DateNode3 block2 block1 block2
  • 99. create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode write() FSDataOutputStream DataStreamer ack DateNode1 DateNode2 DateNode3 block2 block1 block2 block2
  • 100. - dfs.replication.min( 1) - (dfs.replication 3) -
  • 101. 1. ( ) 2. 3. 4. ( )
  • 102. - fs.create(new Path("p")); - OutputStream out = fs.create(new Path("p")); out.write("content".getBytes("UTF-8")); out.flush();
  • 103. - FSDataOutputStream sync() - sync() close() FSDataOutputStream out = fs.create(new Path("p")); out.write("content".getBytes("UTF-8")); out.flush(); out.sync();
  • 104. - ‣ sync() ‣ sync() ‣ sync()
  • 105. distcp • 2 HDFS - hadoop distcp hdfs://namenode1/foo hdfs://namenode2/bar - hadoop distcp -overwrite hdfs://namenode1/foo hdfs://namenode2/bar/foo - hadoop distcp -update hdfs://namenode1/foo hdfs://namenode2/bar/foo • MapReduce - 256MB (1GB 4 ) - map ( ) - map 1 (tasktraker) 20map
  • 106. Hadoop • • HAR • hadoop archive -archiveName files.har /my/files /my
  • 107. Hadoop • - ( ) - - HAR MapReduce ( 7.2.1.4 CombineFileInputFormat )
  • 108. HDFS - - - - • distcp • HAR