Contenu connexe Similaire à 第2回 Hadoop 輪読会 (20) Plus de Toshihiro Suzuki (10) 第2回 Hadoop 輪読会2. Hadoop
•
-
-
-
• HDFS(Hadoop Distributed Filesystem)
3. HDFS
•
-
‣ MB, GB, TB
-
‣
-
‣
‣
10. HDFS
•
-
NameNode SecondaryNameNode
11. HDFS
•
-
open()
append()
write()
NameNode SecondaryNameNode
12. HDFS
•
-
open()
append()
write()
NameNode SecondaryNameNode
13. HDFS
•
-
open()
append()
write()
NameNode SecondaryNameNode
14. HDFS
•
-
NameNode SecondaryNameNode
15. HDFS
•
-
NameNode SecondaryNameNode
16. HDFS
•
-
NameNode SecondaryNameNode
17. HDFS
•
-
NameNode SecondaryNameNode
18. • hadoop fs -copyFromLocal <localsrc> ... <dst>
• hadoop fs -copyToLocal <src> <localdst>
• hadoop fs -ls <path>
• hadoop fs -mkdir <path>
• hadoop fs -help
19. Hadoop
•hadoop fs -ls file:///
•hadoop fs -ls hdfs:///
•hadoop fs -ls hftp:///
URI
java
local file org.apache.hadoop.fs.localFileSystem
HDFS hdfs org.apache.hadoop.hdfs.DistributesFileSystem
HFTP hftp org.apache.hadoop.hdfs.HftpFileSystem
HSFTP hsftp org.apache.hadoop.hdfs.HsftpFileSystem
HAR har org.apache.hadoop.fs.HarFileSystem
KFS kfs org.apache.hadoop.fs.kfs.KosmosFileSystem
FTP ftp org.apache.hadoop.fs.ftp.FTPFileSystem
S3
s3n org.apache.hadoop.fs.s3native.NativeS3FileSystem
( )
S3
s3 org.apache.hadoop.fs.S3FileSystem
( )
20. • Thrift
• C
- libhdfs
• FUSE(FileSystem in Userspace)
• WebDAV
•
- HTTP, FTP( )
21. Java
• Hadoop URL
public class URLCat {
static {
URL.setURLStreamHandlerFactory(new FsUrlStreamHandlerFactory());
}
public static void main(String[] args) throws Exception {
InputStream in = null;
try {
in = new URL(args[0]).openStream();
IOUtils.copyBytes(in, System.out, 4096, false);
} finally {
IOUtils.closeStream(in);
}
}
}
22. Java
• FileSystem API
public class FileSystemCat {
public static void main(String[] args) throws Exception {
String uri = args[0];
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(URI.create(uri), conf);
InputStream in = null;
try {
in = fs.open(new Path(uri));
IOUtils.copyBytes(in, System.out, 4096, false);
} finally {
IOUtils.closeStream(in);
}
}
23. Java
• FSDataInputStream
public class FSDataInputStream extends DataInputStream
implements Seekable, PositionedReadable {
//
}
public interface Seekable {
void seek(long pos) throws IOException;
long getPos() throws IOException;
boolean seekToNewSource(long targetPos) throws IOException;
}
24. Java
• FSDataInputStream
public class FileSystemDoubleCat {
public static void main(String[] args) throws Exception {
String uri = args[0];
FileSystem fs = FileSystem.get(URI.create(uri), new Configuration());
FSDataInputStream in = null;
try {
in = fs.open(new Path(uri));
IOUtils.copyBytes(in, System.out, 4096, false);
in.seek(0);
IOUtils.copyBytes(in, System.out, 4096, false);
} finally {
IOUtils.closeStream(in);
}
}
}
25. Java
• FSDataInputStream
public class FSDataInputStream extends DataInputStream
implements Seekable, PositionedReadable {
//
}
public interface PositionedReadable {
int read(long position, byte buffer[], int offset, int length)
throws IOException;
void readFully(long position, byte buffer[], int offset, int length)
throws IOException;
void readFully(long position, byte buffer[]) throws IOException;
}
26. Java
•
- public FSDataOutputStream create(Path f)
throws IOException
- public FSDataOutputStream append(Path f)
throws IOException
27. Java
• FSDateOutputStream
- FileSystem create(), append()
-
public class FSDataOutputStream extends DataOutputStream
implements Syncable {
public long getPos() throws IOException {
//
}
//
}
28. Java
•
- public boolean mkdirs(Path f) throws IOException
29. Java
•
FileStatus status = fs.getFileStatus(new Path("hdfs://localhost/hogehoge"));
status.isDir(); //
status.getLen(); //
status.getModificationTime(); //
status.getReplication(); //
status.getBlockSize(); // ( 64MB)
status.getOwner(); //
status.getGroup(); //
status.getPermission().toString(); //
30. Java
•
- public FileStatus[] listStatus(Path f) throws IOException;
- public FileStatus[] listStatus(Path f, PathFilter filter)
throws IOException;
- public FileStatus[] listStatus(Path[] files)
throws IOException;
- public FileStatus[] listStatus(Path[] files, PathFilter filter)
throws IOException;
31. Java
•
public class ListStatus {
public static void main(String[] args) throws Exception {
String uri = args[0];
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(URI.create(uri), conf);
Path[] paths = new Path[args.length];
for (int i = 0; i < paths.length; i++) {
paths[i] = new Path(args[i]);
}
FileStatus[] status = fs.listStatus(paths);
for (FileStatus stat : status) {
System.out.println(stat.getPath().toUri().getPath());
}
}
}
32. Java
•
- public FileStatus[] globStatus(Path pathPattern) throws IOException
- public FileStatus[] globStatus(Path pathPattern, PathFilter filter)
throws IOException
33. Java
•
[ab] {a,b}
[^ab] {a,b}
{a,b} (a b )
[a-b]
a b
{a,b} (a b ) a b
[^a-b]
{a,b} a b
¥c c c
34. Java
•
public interface PathFilter {
boolean accept(Path path);
}
35. Java
•
public class RegexExcludePathFilter implements PathFilter {
private final String regex;
public RegexExcludePathFilter(String regex) {
this.regex = regex;
}
@Override
public boolean accept(Path path) {
return !path.toString().matches(regex);
}
}
fs.globStatus(new Path("/2007/*/*"), new RegexExcludePathFilter("^.*/2007/12/31$"));
36. Java
•
- public boolean delete(Path f, boolean recursive)
throws IOException;
37. •
HDFS DistributedFileSystem NameNode
FSDataInputStream
DateNode1 DateNode2 DateNode3 DateNode4
block1 block3 block1 block2
block4 block4 block2 block3
38. •
open(new Path(“/aaa.txt”))
HDFS DistributedFileSystem NameNode
FSDataInputStream
DateNode1 DateNode2 DateNode3 DateNode4
block1 block3 block1 block2
block4 block4 block2 block3
39. •
“/aaa.txt”
open(new Path(“/aaa.txt”)) ( )
HDFS DistributedFileSystem NameNode
FSDataInputStream
DateNode1 DateNode2 DateNode3 DateNode4
block1 block3 block1 block2
block4 block4 block2 block3
40. •
“/aaa.txt”
open(new Path(“/aaa.txt”)) ( )
HDFS DistributedFileSystem NameNode
aaa.txt : block1, block2. block3, block4
block1 : DataNode1, DataNode3
block2 : DataNode3, DataNode4
FSDataInputStream block3 : DataNode2, DataNode3
block4 : DataNode1, DataNode2
DateNode1 DateNode2 DateNode3 DateNode4
block1 block3 block1 block2
block4 block4 block2 block3
41. •
“/aaa.txt”
open(new Path(“/aaa.txt”)) ( )
HDFS DistributedFileSystem NameNode
block1 : DataNode1, DataNode3
block2 : DataNode3, DataNode4 aaa.txt : block1, block2. block3, block4
block1 : DataNode1, DataNode3
block2 : DataNode3, DataNode4
FSDataInputStream block3 : DataNode2, DataNode3
block4 : DataNode1, DataNode2
DateNode1 DateNode2 DateNode3 DateNode4
block1 block3 block1 block2
block4 block4 block2 block3
42. •
“/aaa.txt”
open(new Path(“/aaa.txt”)) ( )
HDFS DistributedFileSystem NameNode
block1 : DataNode1, DataNode3
read()
block2 : DataNode3, DataNode4 aaa.txt : block1, block2. block3, block4
block1 : DataNode1, DataNode3
block2 : DataNode3, DataNode4
FSDataInputStream block3 : DataNode2, DataNode3
block4 : DataNode1, DataNode2
DateNode1 DateNode2 DateNode3 DateNode4
block1 block3 block1 block2
block4 block4 block2 block3
43. •
“/aaa.txt”
open(new Path(“/aaa.txt”)) ( )
HDFS DistributedFileSystem NameNode
block1 : DataNode1, DataNode3
read()
block2 : DataNode3, DataNode4 aaa.txt : block1, block2. block3, block4
block1 : DataNode1, DataNode3
block2 : DataNode3, DataNode4
FSDataInputStream block3 : DataNode2, DataNode3
block4 : DataNode1, DataNode2
DateNode1 DateNode2 DateNode3 DateNode4
block1 block3 block1 block2
block4 block4 block2 block3
44. •
“/aaa.txt”
open(new Path(“/aaa.txt”)) ( )
HDFS DistributedFileSystem NameNode
block1 : DataNode1, DataNode3
read()
block2 : DataNode3, DataNode4 aaa.txt : block1, block2. block3, block4
block1 : DataNode1, DataNode3
block2 : DataNode3, DataNode4
FSDataInputStream block3 : DataNode2, DataNode3
block4 : DataNode1, DataNode2
DateNode1 DateNode2 DateNode3 DateNode4
block1 block3 block1 block2
block4 block4 block2 block3
45. •
“/aaa.txt”
open(new Path(“/aaa.txt”)) ( )
HDFS DistributedFileSystem NameNode
block1 : DataNode1, DataNode3
read()
block2 : DataNode3, DataNode4 aaa.txt : block1, block2. block3, block4
block1 : DataNode1, DataNode3
block2 : DataNode3, DataNode4
FSDataInputStream block3 : DataNode2, DataNode3
block4 : DataNode1, DataNode2
DateNode1 DateNode2 DateNode3 DateNode4
block1 block3 block1 block2
block4 block4 block2 block3
46. •
“/aaa.txt”
open(new Path(“/aaa.txt”)) ( )
HDFS DistributedFileSystem NameNode
block1 : DataNode1, DataNode3
read()
block2 : DataNode3, DataNode4 aaa.txt : block1, block2. block3, block4
block1 : DataNode1, DataNode3
block2 : DataNode3, DataNode4
FSDataInputStream block3 : DataNode2, DataNode3 block3 : DataNode2, DataNode3
block4 : DataNode1, DataNode2 block4 : DataNode1, DataNode2
DateNode1 DateNode2 DateNode3 DateNode4
block1 block3 block1 block2
block4 block4 block2 block3
47. •
“/aaa.txt”
open(new Path(“/aaa.txt”)) ( )
HDFS DistributedFileSystem NameNode
block1 : DataNode1, DataNode3
read()
block2 : DataNode3, DataNode4 aaa.txt : block1, block2. block3, block4
block1 : DataNode1, DataNode3
block2 : DataNode3, DataNode4
FSDataInputStream block3 : DataNode2, DataNode3 block3 : DataNode2, DataNode3
block4 : DataNode1, DataNode2 block4 : DataNode1, DataNode2
DateNode1 DateNode2 DateNode3 DateNode4
block1 block3 block1 block2
block4 block4 block2 block3
48. •
“/aaa.txt”
open(new Path(“/aaa.txt”)) ( )
HDFS DistributedFileSystem NameNode
block1 : DataNode1, DataNode3
read()
block2 : DataNode3, DataNode4 aaa.txt : block1, block2. block3, block4
block1 : DataNode1, DataNode3
block2 : DataNode3, DataNode4
FSDataInputStream block3 : DataNode2, DataNode3 block3 : DataNode2, DataNode3
block4 : DataNode1, DataNode2 block4 : DataNode1, DataNode2
DateNode1 DateNode2 DateNode3 DateNode4
block1 block3 block1 block2
block4 block4 block2 block3
49. •
“/aaa.txt”
open(new Path(“/aaa.txt”)) ( )
HDFS DistributedFileSystem NameNode
block1 : DataNode1, DataNode3
read()
block2 : DataNode3, DataNode4 aaa.txt : block1, block2. block3, block4
close() block1 : DataNode1, DataNode3
block2 : DataNode3, DataNode4
FSDataInputStream block3 : DataNode2, DataNode3 block3 : DataNode2, DataNode3
block4 : DataNode1, DataNode2 block4 : DataNode1, DataNode2
DateNode1 DateNode2 DateNode3 DateNode4
block1 block3 block1 block2
block4 block4 block2 block3
50. •
“/aaa.txt”
open(new Path(“/aaa.txt”)) ( )
HDFS DistributedFileSystem NameNode
block1 : DataNode1, DataNode3
read()
block2 : DataNode3, DataNode4 aaa.txt : block1, block2. block3, block4
block1 : DataNode1, DataNode3
block2 : DataNode3, DataNode4
FSDataInputStream block3 : DataNode2, DataNode3
block4 : DataNode1, DataNode2
DateNode1 DateNode2 DateNode3 DateNode4
block1 block3 block1 block2
block4 block4 block2 block3
51. •
“/aaa.txt”
open(new Path(“/aaa.txt”)) ( )
HDFS DistributedFileSystem NameNode
block1 : DataNode1, DataNode3
read()
block2 : DataNode3, DataNode4 aaa.txt : block1, block2. block3, block4
block1 : DataNode1, DataNode3
block2 : DataNode3, DataNode4
FSDataInputStream block3 : DataNode2, DataNode3
block4 : DataNode1, DataNode2
DateNode1 DateNode2 DateNode3 DateNode4
block1 block3 block1 block2
block4 block4 block2 block3
52. •
“/aaa.txt”
open(new Path(“/aaa.txt”)) ( )
HDFS DistributedFileSystem NameNode
block1 : DataNode1, DataNode3
read()
block2 : DataNode3, DataNode4 aaa.txt : block1, block2. block3, block4
block1 : DataNode1, DataNode3
block2 : DataNode3, DataNode4
FSDataInputStream block3 : DataNode2, DataNode3
block4 : DataNode1, DataNode2
DateNode1 DateNode2 DateNode3 DateNode4
block1 block3 block1 block2
block4 block4 block2 block3
53. •
“/aaa.txt”
open(new Path(“/aaa.txt”)) ( )
HDFS DistributedFileSystem NameNode
block1 : DataNode1, DataNode3
read()
block2 : DataNode3, DataNode4 aaa.txt : block1, block2. block3, block4
block1 : DataNode1, DataNode3
block2 : DataNode3, DataNode4
FSDataInputStream block3 : DataNode2, DataNode3
block4 : DataNode1, DataNode2
DateNode1 DateNode2 DateNode3 DateNode4
block1 block3 block1 block2
block4 block4 block2 block3
54. •
“/aaa.txt”
open(new Path(“/aaa.txt”)) ( )
HDFS DistributedFileSystem NameNode
block1 : DataNode1, DataNode3
read()
block2 : DataNode3, DataNode4 aaa.txt : block1, block2. block3, block4
block1 : DataNode1, DataNode3
block2 : DataNode3, DataNode4
FSDataInputStream block3 : DataNode2, DataNode3
block4 : DataNode1, DataNode2
DateNode1 DateNode2 DateNode3 DateNode4
block1 block3 block1 block2
block4 block4 block2 block3
55. •
“/aaa.txt”
open(new Path(“/aaa.txt”)) ( )
HDFS DistributedFileSystem NameNode
block1 : DataNode1, DataNode3
read()
block2 : DataNode3, DataNode4 aaa.txt : block1, block2. block3, block4
block1 : DataNode1, DataNode3
block2 : DataNode3, DataNode4
FSDataInputStream block3 : DataNode2, DataNode3
block4 : DataNode1, DataNode2
DateNode1 DateNode2 DateNode3 DateNode4
block1 block3 block1 block2
block4 block4 block2 block3
56. •
“/aaa.txt”
open(new Path(“/aaa.txt”)) ( )
HDFS DistributedFileSystem NameNode
block1 : DataNode1, DataNode3
read()
block2 : DataNode3, DataNode4 aaa.txt : block1, block2. block3, block4
block1 : DataNode1, DataNode3
block2 : DataNode3, DataNode4
FSDataInputStream block3 : DataNode2, DataNode3
block4 : DataNode1, DataNode2
DateNode1 DateNode2 DateNode3 DateNode4
block1 block3 block1 block2
block4 block4 block2 block3
57. •
“/aaa.txt”
open(new Path(“/aaa.txt”)) ( )
HDFS DistributedFileSystem NameNode
block1 : DataNode1, DataNode3
read()
block2 : DataNode3, DataNode4 aaa.txt : block1, block2. block3, block4
block1 : DataNode1, DataNode3
block2 : DataNode3, DataNode4
FSDataInputStream block3 : DataNode2, DataNode3
block4 : DataNode1, DataNode2
DateNode1 DateNode2 DateNode3 DateNode4
block1 block3 block1 block2
block4 block4 block2 block3
58. •
“/aaa.txt”
open(new Path(“/aaa.txt”)) ( )
HDFS DistributedFileSystem NameNode
block1 : DataNode1, DataNode3
read()
block2 : DataNode3, DataNode4 aaa.txt : block1, block2. block3, block4
block1 : DataNode1, DataNode3
block2 : DataNode3, DataNode4
FSDataInputStream block3 : DataNode2, DataNode3
block4 : DataNode1, DataNode2
DateNode1 DateNode2 DateNode3 DateNode4
block1 block3 block1 block2
block4 block4 block2 block3
59. •
“/aaa.txt”
open(new Path(“/aaa.txt”)) ( )
HDFS DistributedFileSystem NameNode
block1 : DataNode1, DataNode3
read()
block2 : DataNode3, DataNode4 aaa.txt : block1, block2. block3, block4
block1 : DataNode1, DataNode3
block2 : DataNode3, DataNode4
FSDataInputStream block3 : DataNode2, DataNode3
block4 : DataNode1, DataNode2
DateNode1 DateNode2 DateNode3 DateNode4
block1 block3 block1 block2
block4 block4 block2 block3
61. •
-
- 9.1.1
-
(/d1/r1/n1, /d1/r1/n1) = 0
d1 d2 (/d1/r1/n1, /d1/r1/n2) = 2
(/d1/r1/n1, /d1/r2/n3) = 4
r1 r2 r3
(/d1/r1/n1, /d2/r3/n4) = 6
n1 n2 n3 n4
62. •
HDFS DistributedFileSystem NameNode
FSDataOutputStream
DateNode1 DateNode2 DateNode3
63. •
create(new Path(“/aaa.txt”))
HDFS DistributedFileSystem NameNode
FSDataOutputStream
DateNode1 DateNode2 DateNode3
64. •
create(new Path(“/aaa.txt”))
HDFS DistributedFileSystem NameNode
FSDataOutputStream
DateNode1 DateNode2 DateNode3
65. •
create(new Path(“/aaa.txt”))
HDFS DistributedFileSystem NameNode
FSDataOutputStream
DateNode1 DateNode2 DateNode3
66. •
create(new Path(“/aaa.txt”))
HDFS DistributedFileSystem NameNode
write()
FSDataOutputStream
DateNode1 DateNode2 DateNode3
67. •
create(new Path(“/aaa.txt”))
HDFS DistributedFileSystem NameNode
write()
FSDataOutputStream
ack
DateNode1 DateNode2 DateNode3
68. •
create(new Path(“/aaa.txt”))
HDFS DistributedFileSystem NameNode
write()
FSDataOutputStream
ack
DateNode1 DateNode2 DateNode3
69. •
create(new Path(“/aaa.txt”))
HDFS DistributedFileSystem NameNode
write()
FSDataOutputStream
ack
DateNode1 DateNode2 DateNode3
70. •
create(new Path(“/aaa.txt”))
HDFS DistributedFileSystem NameNode
write()
FSDataOutputStream
DataStreamer
ack
DateNode1 DateNode2 DateNode3
71. •
create(new Path(“/aaa.txt”))
HDFS DistributedFileSystem NameNode
write()
FSDataOutputStream
DataStreamer
ack
DateNode1 DateNode2 DateNode3
72. •
create(new Path(“/aaa.txt”))
HDFS DistributedFileSystem NameNode
write()
FSDataOutputStream block1 : DataNode1, DataNode2, DataNode3
DataStreamer
ack
DateNode1 DateNode2 DateNode3
73. •
create(new Path(“/aaa.txt”))
HDFS DistributedFileSystem NameNode
write()
FSDataOutputStream block1 : DataNode1, DataNode2, DataNode3
DataStreamer
ack
DateNode1 DateNode2 DateNode3
74. •
create(new Path(“/aaa.txt”))
HDFS DistributedFileSystem NameNode
write()
FSDataOutputStream block1 : DataNode1, DataNode2, DataNode3
DataStreamer
ack
DateNode1 DateNode2 DateNode3
block1
75. •
create(new Path(“/aaa.txt”))
HDFS DistributedFileSystem NameNode
write()
FSDataOutputStream block1 : DataNode1, DataNode2, DataNode3
DataStreamer
ack
DateNode1 DateNode2 DateNode3
block1 block1
76. •
create(new Path(“/aaa.txt”))
HDFS DistributedFileSystem NameNode
write()
FSDataOutputStream block1 : DataNode1, DataNode2, DataNode3
DataStreamer
ack
DateNode1 DateNode2 DateNode3
block1 block1 block1
77. •
create(new Path(“/aaa.txt”))
HDFS DistributedFileSystem NameNode
write()
FSDataOutputStream block1 : DataNode1, DataNode2, DataNode3
DataStreamer
ack
DateNode1 DateNode2 DateNode3
block1 block1 block1
78. •
create(new Path(“/aaa.txt”))
HDFS DistributedFileSystem NameNode
write()
FSDataOutputStream block1 : DataNode1, DataNode2, DataNode3
DataStreamer
ack
DateNode1 DateNode2 DateNode3
block1 block1 block1
79. •
create(new Path(“/aaa.txt”))
HDFS DistributedFileSystem NameNode
write()
FSDataOutputStream block1 : DataNode1, DataNode2, DataNode3
DataStreamer
ack
DateNode1 DateNode2 DateNode3
block1 block1 block1
80. •
create(new Path(“/aaa.txt”))
HDFS DistributedFileSystem NameNode
write()
FSDataOutputStream block1 : DataNode1, DataNode2, DataNode3
DataStreamer
ack
DateNode1 DateNode2 DateNode3
block1 block1 block1
81. •
create(new Path(“/aaa.txt”))
HDFS DistributedFileSystem NameNode
write()
FSDataOutputStream block1 : DataNode1, DataNode2, DataNode3
DataStreamer
ack
DateNode1 DateNode2 DateNode3
block1 block1 block1
82. •
create(new Path(“/aaa.txt”))
HDFS DistributedFileSystem NameNode
write()
close()
FSDataOutputStream block1 : DataNode1, DataNode2, DataNode3
DataStreamer
ack
DateNode1 DateNode2 DateNode3
block1 block1 block1
83. •
create(new Path(“/aaa.txt”))
HDFS DistributedFileSystem NameNode
write()
close()
FSDataOutputStream block1 : DataNode1, DataNode2, DataNode3
DataStreamer
ack
DateNode1 DateNode2 DateNode3
block1 block1 block1
84. •
create(new Path(“/aaa.txt”))
HDFS DistributedFileSystem NameNode
write()
FSDataOutputStream
DataStreamer
ack
DateNode1 DateNode2 DateNode3
85. •
create(new Path(“/aaa.txt”))
HDFS DistributedFileSystem NameNode
write()
FSDataOutputStream
DataStreamer
ack
DateNode1 DateNode2 DateNode3
block1
86. •
create(new Path(“/aaa.txt”))
HDFS DistributedFileSystem NameNode
write()
FSDataOutputStream
DataStreamer
ack
DateNode1 DateNode2 DateNode3
block1 block1
87. •
create(new Path(“/aaa.txt”))
HDFS DistributedFileSystem NameNode
write()
FSDataOutputStream
DataStreamer
ack
DateNode1 DateNode2 DateNode3
block1 block1
88. •
create(new Path(“/aaa.txt”))
HDFS DistributedFileSystem NameNode
write()
FSDataOutputStream
DataStreamer
ack
DateNode1 DateNode2 DateNode3
block1 block1
89. •
create(new Path(“/aaa.txt”))
HDFS DistributedFileSystem NameNode
write()
FSDataOutputStream
DataStreamer
ack
DateNode1 DateNode2 DateNode3
block2
block1 block1
90. •
create(new Path(“/aaa.txt”))
HDFS DistributedFileSystem NameNode
write()
FSDataOutputStream
DataStreamer
ack
DateNode1 DateNode2 DateNode3
block2
block1 block1
91. •
create(new Path(“/aaa.txt”))
HDFS DistributedFileSystem NameNode
write()
FSDataOutputStream
DataStreamer
ack
DateNode1 DateNode2 DateNode3
block2
block1 block1
92. •
create(new Path(“/aaa.txt”))
HDFS DistributedFileSystem NameNode
write()
FSDataOutputStream
DataStreamer
ack
DateNode1 DateNode2 DateNode3
block2
block1 block1 block2
93. •
create(new Path(“/aaa.txt”))
HDFS DistributedFileSystem NameNode
write()
FSDataOutputStream
DataStreamer
ack
DateNode1 DateNode2 DateNode3
block2
block1 block1 block2
94. •
create(new Path(“/aaa.txt”))
HDFS DistributedFileSystem NameNode
write()
FSDataOutputStream
DataStreamer
ack
DateNode1 DateNode2 DateNode3
block2
block1 block1 block2
95. •
create(new Path(“/aaa.txt”))
HDFS DistributedFileSystem NameNode
write()
FSDataOutputStream
DataStreamer
ack
DateNode1 DateNode2 DateNode3
block2
block1 block1 block2
96. •
create(new Path(“/aaa.txt”))
HDFS DistributedFileSystem NameNode
write()
FSDataOutputStream
DataStreamer
ack
DateNode1 DateNode2 DateNode3
block2
block1 block1 block2
97. •
create(new Path(“/aaa.txt”))
HDFS DistributedFileSystem NameNode
write()
FSDataOutputStream
DataStreamer
ack
DateNode1 DateNode2 DateNode3
block2
block1 block2
98. •
create(new Path(“/aaa.txt”))
HDFS DistributedFileSystem NameNode
write()
FSDataOutputStream
DataStreamer
ack
DateNode1 DateNode2 DateNode3
block2
block1 block2
99. •
create(new Path(“/aaa.txt”))
HDFS DistributedFileSystem NameNode
write()
FSDataOutputStream
DataStreamer
ack
DateNode1 DateNode2 DateNode3
block2
block1 block2 block2
100. •
- dfs.replication.min( 1)
- (dfs.replication 3)
-
101. •
1. ( )
2.
3.
4. ( )
102. •
-
fs.create(new Path("p"));
-
OutputStream out = fs.create(new Path("p"));
out.write("content".getBytes("UTF-8"));
out.flush();
103. •
- FSDataOutputStream sync()
- sync() close()
FSDataOutputStream out = fs.create(new Path("p"));
out.write("content".getBytes("UTF-8"));
out.flush();
out.sync();
104. •
-
‣ sync()
‣ sync()
‣ sync()
105. distcp
• 2 HDFS
- hadoop distcp hdfs://namenode1/foo hdfs://namenode2/bar
- hadoop distcp -overwrite hdfs://namenode1/foo hdfs://namenode2/bar/foo
- hadoop distcp -update hdfs://namenode1/foo hdfs://namenode2/bar/foo
• MapReduce
- 256MB (1GB 4 )
- map (
)
- map 1 (tasktraker) 20map
106. Hadoop
•
• HAR
• hadoop archive -archiveName files.har /my/files /my
107. Hadoop
•
-
(
)
-
- HAR MapReduce
( 7.2.1.4
CombineFileInputFormat )
108. • HDFS
-
-
-
-
• distcp
• HAR