准备:
确保hadoop2.2.0集群正常执行
1.eclipse中建立javaproject,导入hadoop2.2.0相关jar包
2.在src根文件夹下拷入log4j.properties,通过log4j查看具体日志
log4j.rootLogger=debug, stdout, R
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=%5p - %m%n
log4j.appender.R=org.apache.log4j.RollingFileAppender
log4j.appender.R.File=firestorm.log
log4j.appender.R.MaxFileSize=100KB
log4j.appender.R.MaxBackupIndex=1
log4j.appender.R.layout=org.apache.log4j.PatternLayout
log4j.appender.R.layout.ConversionPattern=%p %t %c - %m%n
log4j.logger.com.codefutures=DEBUG
3.拷入一个可运行的hadoop程序,我用的是一个HdfsDAO。能够先保证HDFS操作能运行
import java.io.IOException;
import java.net.URI;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.mapred.JobConf;
public class HdfsDAO {
private static final String HDFS = "hdfs://192.168.0.160:9000/";
public HdfsDAO(Configuration conf) {
this(HDFS, conf);
}
public HdfsDAO(String hdfs, Configuration conf) {
this.hdfsPath = hdfs;
this.conf = conf;
}
private String hdfsPath;
private Configuration conf;
public static void main(String[] args) throws IOException {
JobConf conf = config();
HdfsDAO hdfs = new HdfsDAO(conf);
// hdfs.copyFile("datafile/item.csv", "/tmp/new");
// hdfs.ls("/tmp/new");
hdfs.ls("/");
}
public static JobConf config(){
JobConf conf = new JobConf(HdfsDAO.class);
conf.setJobName("HdfsDAO");
conf.addResource("classpath:/hadoop/core-site.xml");
conf.addResource("classpath:/hadoop/hdfs-site.xml");
conf.addResource("classpath:/hadoop/mapred-site.xml");
return conf;
}
public void mkdirs(String folder) throws IOException {
Path path = new Path(folder);
FileSystem fs = FileSystem.get(URI.create(hdfsPath), conf);
if (!fs.exists(path)) {
fs.mkdirs(path);
System.out.println("Create: " + folder);
}
fs.close();
}
public void rmr(String folder) throws IOException {
Path path = new Path(folder);
FileSystem fs = FileSystem.get(URI.create(hdfsPath), conf);
fs.deleteOnExit(path);
System.out.println("Delete: " + folder);
fs.close();
}
public void ls(String folder) throws IOException {
Path path = new Path(folder);
FileSystem fs = FileSystem.get(URI.create(hdfsPath), conf);
FileStatus[] list = fs.listStatus(path);
System.out.println("ls: " + folder);
System.out.println("==========================================================");
for (FileStatus f : list) {
System.out.printf("name: %s, folder: %s, size: %d\n", f.getPath(), f.isDir(), f.getLen());
}
System.out.println("==========================================================");
fs.close();
}
public void createFile(String file, String content) throws IOException {
FileSystem fs = FileSystem.get(URI.create(hdfsPath), conf);
byte[] buff = content.getBytes();
FSDataOutputStream os = null;
try {
os = fs.create(new Path(file));
os.write(buff, 0, buff.length);
System.out.println("Create: " + file);
} finally {
if (os != null)
os.close();
}
fs.close();
}
public void copyFile(String local, String remote) throws IOException {
FileSystem fs = FileSystem.get(URI.create(hdfsPath), conf);
fs.copyFromLocalFile(new Path(local), new Path(remote));
System.out.println("copy from: " + local + " to " + remote);
fs.close();
}
public void download(String remote, String local) throws IOException {
Path path = new Path(remote);
FileSystem fs = FileSystem.get(URI.create(hdfsPath), conf);
fs.copyToLocalFile(path, new Path(local));
System.out.println("download: from" + remote + " to " + local);
fs.close();
}
public void cat(String remoteFile) throws IOException {
Path path = new Path(remoteFile);
FileSystem fs = FileSystem.get(URI.create(hdfsPath), conf);
FSDataInputStream fsdis = null;
System.out.println("cat: " + remoteFile);
try {
fsdis =fs.open(path);
IOUtils.copyBytes(fsdis, System.out, 4096, false);
} finally {
IOUtils.closeStream(fsdis);
fs.close();
}
}
public void location() throws IOException {
// String folder = hdfsPath + "create/";
// String file = "t2.txt";
// FileSystem fs = FileSystem.get(URI.create(hdfsPath), new
// Configuration());
// FileStatus f = fs.getFileStatus(new Path(folder + file));
// BlockLocation[] list = fs.getFileBlockLocations(f, 0, f.getLen());
//
// System.out.println("File Location: " + folder + file);
// for (BlockLocation bl : list) {
// String[] hosts = bl.getHosts();
// for (String host : hosts) {
// System.out.println("host:" + host);
// }
// }
// fs.close();
}
}
4.执行HdfsDAO
报错:
java.io.IOException: HADOOP_HOME or hadoop.home.dir are not set.
at org.apache.hadoop.util.Shell.checkHadoopHome(Shell.java:225)
at org.apache.hadoop.util.Shell.<clinit>(Shell.java:250)
at org.apache.hadoop.util.StringUtils.<clinit>(StringUtils.java:76)
at org.apache.hadoop.conf.Configuration.getTrimmedStrings(Configuration.java:1546)
at org.apache.hadoop.hdfs.DFSClient.<init>(DFSClient.java:519)
at org.apache.hadoop.hdfs.DFSClient.<init>(DFSClient.java:453)
at org.apache.hadoop.hdfs.DistributedFileSystem.initialize(DistributedFileSystem.java:136)
at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:2433)
at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:88)
at org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:2467)
at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:2449)
at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:367)
at HdfsDAO.copyFile(HdfsDAO.java:94)
at HdfsDAO.main(HdfsDAO.java:34)
ERROR - Failed to locate the winutils binary in the hadoop binary path
java.io.IOException: Could not locate executable null\bin\winutils.exe in the Hadoop binaries.
at org.apache.hadoop.util.Shell.getQualifiedBinPath(Shell.java:278)
at org.apache.hadoop.util.Shell.getWinUtilsPath(Shell.java:300)
at org.apache.hadoop.util.Shell.<clinit>(Shell.java:293)
at org.apache.hadoop.util.StringUtils.<clinit>(StringUtils.java:76)
at org.apache.hadoop.conf.Configuration.getTrimmedStrings(Configuration.java:1546)
at org.apache.hadoop.hdfs.DFSClient.<init>(DFSClient.java:519)
at org.apache.hadoop.hdfs.DFSClient.<init>(DFSClient.java:453)
at org.apache.hadoop.hdfs.DistributedFileSystem.initialize(DistributedFileSystem.java:136)
at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:2433)
at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:88)
at org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:2467)
at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:2449)
at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:367)
at HdfsDAO.copyFile(HdfsDAO.java:94)
at HdfsDAO.main(HdfsDAO.java:34)
解决:
首先。在win7中环境变量设置HADOOP_HOME,指向win7中的hadoop2.2.0根文件夹。
将其复制到 $HADOOP_HOME/bin 下。
5.又一次执行,报错
Exception in thread "main" java.net.ConnectException: Call From WIN-CMM62V9I3VG/192.168.0.40 to singlehadoop:9000 failed on connection exception: java.net.ConnectException: Connection refused: no further information; For more details see: http://wiki.apache.org/hadoop/ConnectionRefused
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(Unknown Source)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(Unknown Source)
at java.lang.reflect.Constructor.newInstance(Unknown Source)
at org.apache.hadoop.net.NetUtils.wrapWithMessage(NetUtils.java:783)
at org.apache.hadoop.net.NetUtils.wrapException(NetUtils.java:730)
at org.apache.hadoop.ipc.Client.call(Client.java:1351)
at org.apache.hadoop.ipc.Client.call(Client.java:1300)
at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:206)
at com.sun.proxy.$Proxy9.getListing(Unknown Source)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(Unknown Source)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(Unknown Source)
at java.lang.reflect.Method.invoke(Unknown Source)
at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:186)
at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:102)
at com.sun.proxy.$Proxy9.getListing(Unknown Source)
at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB.getListing(ClientNamenodeProtocolTranslatorPB.java:482)
at org.apache.hadoop.hdfs.DFSClient.listPaths(DFSClient.java:1660)
at org.apache.hadoop.hdfs.DFSClient.listPaths(DFSClient.java:1643)
at org.apache.hadoop.hdfs.DistributedFileSystem.listStatusInternal(DistributedFileSystem.java:640)
at org.apache.hadoop.hdfs.DistributedFileSystem.access$600(DistributedFileSystem.java:92)
at org.apache.hadoop.hdfs.DistributedFileSystem$14.doCall(DistributedFileSystem.java:702)
at org.apache.hadoop.hdfs.DistributedFileSystem$14.doCall(DistributedFileSystem.java:698)
at org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81)
at org.apache.hadoop.hdfs.DistributedFileSystem.listStatus(DistributedFileSystem.java:698)
at HdfsDAO.ls(HdfsDAO.java:69)
at HdfsDAO.main(HdfsDAO.java:36)
Caused by: java.net.ConnectException: Connection refused: no further information
at sun.nio.ch.SocketChannelImpl.checkConnect(Native Method)
at sun.nio.ch.SocketChannelImpl.finishConnect(Unknown Source)
at org.apache.hadoop.net.SocketIOWithTimeout.connect(SocketIOWithTimeout.java:206)
at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:529)
at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:493)
at org.apache.hadoop.ipc.Client$Connection.setupConnection(Client.java:547)
at org.apache.hadoop.ipc.Client$Connection.setupIOstreams(Client.java:642)
at org.apache.hadoop.ipc.Client$Connection.access$2600(Client.java:314)
at org.apache.hadoop.ipc.Client.getConnection(Client.java:1399)
at org.apache.hadoop.ipc.Client.call(Client.java:1318)
... 21 more
DEBUG - Stopping client
解决:
发现core-site.xml中
<property>
<name>fs.default.name</name>
<value>hdfs://singlehadoop:8020</value>
</property>
port是8020,不是9000,所以改动程序中下面语句中的port为8020
private static final String HDFS = "hdfs://192.168.0.160:8020/";
6.又一次启动,顺利运行
DEBUG - field org.apache.hadoop.metrics2.lib.MutableRate org.apache.hadoop.security.UserGroupInformation$UgiMetrics.loginSuccess with annotation @org.apache.hadoop.metrics2.annotation.Metric(valueName=Time, about=, value=[Rate of successful kerberos logins and latency (milliseconds)], always=false, type=DEFAULT, sampleName=Ops)
DEBUG - field org.apache.hadoop.metrics2.lib.MutableRate org.apache.hadoop.security.UserGroupInformation$UgiMetrics.loginFailure with annotation @org.apache.hadoop.metrics2.annotation.Metric(valueName=Time, about=, value=[Rate of failed kerberos logins and latency (milliseconds)], always=false, type=DEFAULT, sampleName=Ops)
DEBUG - UgiMetrics, User and group related metrics
DEBUG - Kerberos krb5 configuration not found, setting default realm to empty
DEBUG - Creating new Groups object
DEBUG - Trying to load the custom-built native-hadoop library...
DEBUG - Failed to load native-hadoop with error: java.lang.UnsatisfiedLinkError: no hadoop in java.library.path
DEBUG - java.library.path=D:\Program Files\Java\jre7\bin;C:\Windows\Sun\Java\bin;C:\Windows\system32;C:\Windows;C:\Program Files (x86)\NVIDIA Corporation\PhysX\Common;C:\Program Files (x86)\Intel\iCLS Client\;C:\Program Files\Intel\iCLS Client\;C:\Windows\system32;C:\Windows;C:\Windows\System32\Wbem;C:\Windows\System32\WindowsPowerShell\v1.0\;C:\Program Files\Intel\Intel(R) Management Engine Components\DAL;C:\Program Files\Intel\Intel(R) Management Engine Components\IPT;C:\Program Files (x86)\Intel\Intel(R) Management Engine Components\DAL;C:\Program Files (x86)\Intel\Intel(R) Management Engine Components\IPT;C:\Program Files (x86)\Intel\OpenCL SDK\3.0\bin\x86;C:\Program Files (x86)\Intel\OpenCL SDK\3.0\bin\x64;D:\Program Files\Java\jdk1.7.0_40\bin;D:\Program Files\Java\jdk1.7.0_40\jre\bin;D:\Program Files\TortoiseSVN\bin;D:\Program Files (x86)\ant\bin;D:\Program Files\maven3\bin;.
WARN - Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
DEBUG - Falling back to shell based
DEBUG - Group mapping impl=org.apache.hadoop.security.ShellBasedUnixGroupsMapping
DEBUG - Group mapping impl=org.apache.hadoop.security.JniBasedUnixGroupsMappingWithFallback; cacheTimeout=300000
DEBUG - hadoop login
DEBUG - hadoop login commit
DEBUG - using local user:NTUserPrincipal: Administrator
DEBUG - UGI loginUser:Administrator (auth:SIMPLE)
DEBUG - dfs.client.use.legacy.blockreader.local = false
DEBUG - dfs.client.read.shortcircuit = false
DEBUG - dfs.client.domain.socket.data.traffic = false
DEBUG - dfs.domain.socket.path =
DEBUG - StartupProgress, NameNode startup progress
DEBUG - multipleLinearRandomRetry = null
DEBUG - rpcKind=RPC_PROTOCOL_BUFFER, rpcRequestWrapperClass=class org.apache.hadoop.ipc.ProtobufRpcEngine$RpcRequestWrapper, rpcInvoker=org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker@1afde4a3
DEBUG - Both short-circuit local reads and UNIX domain socket are disabled.
DEBUG - The ping interval is 60000 ms.
DEBUG - Connecting to /192.168.0.160:8020
DEBUG - IPC Client (60133785) connection to /192.168.0.160:8020 from Administrator: starting, having connections 1
DEBUG - IPC Client (60133785) connection to /192.168.0.160:8020 from Administrator sending #0
DEBUG - IPC Client (60133785) connection to /192.168.0.160:8020 from Administrator got value #0
DEBUG - Call: getListing took 136ms
ls: /
==========================================================
name: hdfs://192.168.0.160:8020/data, folder: true, size: 0
name: hdfs://192.168.0.160:8020/fulong, folder: true, size: 0
name: hdfs://192.168.0.160:8020/test, folder: true, size: 0
name: hdfs://192.168.0.160:8020/tmp, folder: true, size: 0
name: hdfs://192.168.0.160:8020/user, folder: true, size: 0
name: hdfs://192.168.0.160:8020/workspace, folder: true, size: 0
==========================================================
DEBUG - Stopping client
DEBUG - IPC Client (60133785) connection to /192.168.0.160:8020 from Administrator: closed
DEBUG - IPC Client (60133785) connection to /192.168.0.160:8020 from Administrator: stopped, remaining connections 0
以下尝试跑一个hadoop2.2.0自带的wordcount程序
1.首先还是将wordcount的源代码拷入project的src
执行报错:
ERROR - PriviledgedActionException as:Administrator (auth:SIMPLE) cause:java.io.IOException: Cannot initialize Cluster. Please check your configuration for mapreduce.framework.name and the correspond server addresses.
Exception in thread "main" java.io.IOException: Cannot initialize Cluster. Please check your configuration for mapreduce.framework.name and the correspond server addresses.
at org.apache.hadoop.mapreduce.Cluster.initialize(Cluster.java:120)
at org.apache.hadoop.mapreduce.Cluster.<init>(Cluster.java:82)
at org.apache.hadoop.mapreduce.Cluster.<init>(Cluster.java:75)
at org.apache.hadoop.mapreduce.Job$9.run(Job.java:1238)
at org.apache.hadoop.mapreduce.Job$9.run(Job.java:1234)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Unknown Source)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1491)
at org.apache.hadoop.mapreduce.Job.connect(Job.java:1233)
at org.apache.hadoop.mapreduce.Job.submit(Job.java:1262)
at org.apache.hadoop.mapreduce.Job.waitForCompletion(Job.java:1286)
at org.apache.hadoop.examples.WordCount.main(WordCount.java:84)
DEBUG - Stopping client
解决:
拷入两个jar包
hadoop-mapreduce-client-common-2.2.0.jar
hadoop-mapreduce-client-jobclient-2.2.0.jar
2.再次执行,以上错误解决,出现新的报错
Exception in thread "main" DEBUG - Stopping client
DEBUG - IPC Client (152472387) connection to singlehadoop/192.168.0.160:8020 from Administrator: closed
DEBUG - IPC Client (152472387) connection to singlehadoop/192.168.0.160:8020 from Administrator: stopped, remaining connections 0
java.lang.NoClassDefFoundError: org/apache/hadoop/yarn/util/Apps
at java.lang.ClassLoader.defineClass1(Native Method)
at java.lang.ClassLoader.defineClass(Unknown Source)
at java.security.SecureClassLoader.defineClass(Unknown Source)
at java.net.URLClassLoader.defineClass(Unknown Source)
at java.net.URLClassLoader.access$100(Unknown Source)
at java.net.URLClassLoader$1.run(Unknown Source)
at java.net.URLClassLoader$1.run(Unknown Source)
at java.security.AccessController.doPrivileged(Native Method)
at java.net.URLClassLoader.findClass(Unknown Source)
at java.lang.ClassLoader.loadClass(Unknown Source)
at sun.misc.Launcher$AppClassLoader.loadClass(Unknown Source)
at java.lang.ClassLoader.loadClass(Unknown Source)
at org.apache.hadoop.mapred.LocalDistributedCacheManager.setup(LocalDistributedCacheManager.java:93)
at org.apache.hadoop.mapred.LocalJobRunner$Job.<init>(LocalJobRunner.java:157)
at org.apache.hadoop.mapred.LocalJobRunner.submitJob(LocalJobRunner.java:636)
at org.apache.hadoop.mapreduce.JobSubmitter.submitJobInternal(JobSubmitter.java:430)
at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1268)
at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1265)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Unknown Source)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1491)
at org.apache.hadoop.mapreduce.Job.submit(Job.java:1265)
at org.apache.hadoop.mapreduce.Job.waitForCompletion(Job.java:1286)
at org.apache.hadoop.examples.WordCount.main(WordCount.java:84)
Caused by: java.lang.ClassNotFoundException: org.apache.hadoop.yarn.util.Apps
at java.net.URLClassLoader$1.run(Unknown Source)
at java.net.URLClassLoader$1.run(Unknown Source)
at java.security.AccessController.doPrivileged(Native Method)
at java.net.URLClassLoader.findClass(Unknown Source)
at java.lang.ClassLoader.loadClass(Unknown Source)
at sun.misc.Launcher$AppClassLoader.loadClass(Unknown Source)
at java.lang.ClassLoader.loadClass(Unknown Source)
... 24 more
解决:
把share\hadoop\yarn下的jar包全引入project
3.再次执行。以上错误解决,出现新的报错
Exception in thread "main" java.lang.UnsatisfiedLinkError: org.apache.hadoop.io.nativeio.NativeIO$Windows.access0(Ljava/lang/String;I)Z
at org.apache.hadoop.io.nativeio.NativeIO$Windows.access0(Native Method)
at org.apache.hadoop.io.nativeio.NativeIO$Windows.access(NativeIO.java:435)
at org.apache.hadoop.fs.FileUtil.canRead(FileUtil.java:977)
at org.apache.hadoop.util.DiskChecker.checkAccessByFileMethods(DiskChecker.java:177)
at org.apache.hadoop.util.DiskChecker.checkDirAccess(DiskChecker.java:164)
at org.apache.hadoop.util.DiskChecker.checkDir(DiskChecker.java:98)
at org.apache.hadoop.fs.LocalDirAllocator$AllocatorPerContext.confChanged(LocalDirAllocator.java:285)
at org.apache.hadoop.fs.LocalDirAllocator$AllocatorPerContext.getLocalPathForWrite(LocalDirAllocator.java:344)
at org.apache.hadoop.fs.LocalDirAllocator.getLocalPathForWrite(LocalDirAllocator.java:150)
at org.apache.hadoop.fs.LocalDirAllocator.getLocalPathForWrite(LocalDirAllocator.java:131)
at org.apache.hadoop.fs.LocalDirAllocator.getLocalPathForWrite(LocalDirAllocator.java:115)
at org.apache.hadoop.mapred.LocalDistributedCacheManager.setup(LocalDistributedCacheManager.java:131)
at org.apache.hadoop.mapred.LocalJobRunner$Job.<init>(LocalJobRunner.java:157)
at org.apache.hadoop.mapred.LocalJobRunner.submitJob(LocalJobRunner.java:636)
at org.apache.hadoop.mapreduce.JobSubmitter.submitJobInternal(JobSubmitter.java:430)
at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1268)
at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1265)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Unknown Source)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1491)
at org.apache.hadoop.mapreduce.Job.submit(Job.java:1265)
at org.apache.hadoop.mapreduce.Job.waitForCompletion(Job.java:1286)
at org.apache.hadoop.examples.WordCount.main(WordCount.java:84)
解决:
这次直接来个彻底的。把整个下载的bin文件夹覆盖了$HADOOP_HOME\bin
而且在环境变量的PATH中加上%HADOOP_HOME%\bin
3.再次执行,以上错误解决,出现新的报错
Caused by: org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.security.AccessControlException): Permission denied: user=Administrator, access=WRITE, inode="/workspace/wordcount":casliyang:supergroup:drwxr-xr-x
解决:
在hdfs-site.xml中增加
<property> <name>dfs.permissions</name>
<value>false</value>
</property>
4.再次执行,程序顺利执行
总结:
1.将hadoop-2.2.0.tar.gz解压一份放到win7的程序文件夹下,注意hadoop版本号一定要和集群的版本号一致,然后拷贝集群中的下面几个配置文件覆盖到win7本地的相应文件夹:
core-site.xml
hdfs-site.xml
mapred-site.xml
yarn-site.xml
2.在eclipse中新建javaproject后,最好直接引入全部hadoop2.2.0相关的jar包,包含下面几个文件夹下的jar包:
share\hadoop\common
share\hadoop\hdfs
share\hadoop\mapreduce
share\hadoop\yarn
注:假设使用hadoop的eclipse插件,就无需该步骤,但2.2.0的插件需自行编译,编译过程參见我的还有一篇博客:
3.须要在win7中环境变量设置%HADOOP_HOME%。并把%HADOOP_HOME%\bin增加PATH环境变量中
5.注意參考hadoop集群的配置。Eclipse中的程序配置“hadoop地址:port”的代码需和hadoop集群的配置一致
<property>
<name>fs.default.name</name>
<value>hdfs://singlehadoop:8020</value>
</property>
6.在hadoop集群的hdfs-site.xml中增加例如以下属性。关闭权限校验。
<property>
<name>dfs.permissions</name>
<value>false</value>
</property>