/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hdfs; import java.io.BufferedOutputStream; import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.OutputStream; import java.net.InetAddress; import java.net.InetSocketAddress; import java.net.Socket; import java.util.AbstractMap; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.NoSuchElementException; import java.util.Random; import java.util.SortedMap; import java.util.TreeMap; import java.util.concurrent.TimeUnit; import javax.net.SocketFactory; import javax.security.auth.login.LoginException; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.ContentSummary; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.OpenFileInfo; import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.MD5MD5CRC32FileChecksum; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hdfs.DistributedFileSystem.DiskStatus; import org.apache.hadoop.hdfs.metrics.DFSClientMetrics; import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException; import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.protocol.BlockChecksumHeader; import org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol; import org.apache.hadoop.hdfs.protocol.ClientProtocol; import org.apache.hadoop.hdfs.protocol.CorruptFileBlocks; import org.apache.hadoop.hdfs.protocol.DSQuotaExceededException; import org.apache.hadoop.hdfs.protocol.DataTransferProtocol; import org.apache.hadoop.hdfs.protocol.DatanodeID; import org.apache.hadoop.hdfs.protocol.DatanodeInfo; import org.apache.hadoop.hdfs.protocol.DirectoryListing; import org.apache.hadoop.hdfs.protocol.FSConstants; import org.apache.hadoop.hdfs.protocol.HdfsFileStatus; import org.apache.hadoop.hdfs.protocol.LocatedBlock; import org.apache.hadoop.hdfs.protocol.LocatedBlockWithFileName; import org.apache.hadoop.hdfs.protocol.LocatedBlocks; import org.apache.hadoop.hdfs.protocol.LocatedBlockWithMetaInfo; import org.apache.hadoop.hdfs.protocol.LocatedBlocksWithMetaInfo; import org.apache.hadoop.hdfs.protocol.LocatedDirectoryListing; import org.apache.hadoop.hdfs.protocol.NSQuotaExceededException; import org.apache.hadoop.hdfs.protocol.ProtocolCompatible; import org.apache.hadoop.hdfs.server.common.HdfsConstants; import org.apache.hadoop.hdfs.server.common.UpgradeStatusReport; import org.apache.hadoop.hdfs.server.datanode.DataNode; import org.apache.hadoop.hdfs.server.namenode.LeaseExpiredException; import org.apache.hadoop.hdfs.server.namenode.NameNode; import org.apache.hadoop.io.DataOutputBuffer; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.MD5Hash; import org.apache.hadoop.io.retry.RetryPolicies; import org.apache.hadoop.io.retry.RetryPolicy; import org.apache.hadoop.io.retry.RetryProxy; import org.apache.hadoop.ipc.Client; import org.apache.hadoop.ipc.ProtocolProxy; import org.apache.hadoop.ipc.RPC; import org.apache.hadoop.ipc.RemoteException; import org.apache.hadoop.net.DNSToSwitchMapping; import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.net.ScriptBasedMapping; import org.apache.hadoop.security.AccessControlException; import org.apache.hadoop.security.UnixUserGroupInformation; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.Daemon; import org.apache.hadoop.util.Progressable; import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.util.StringUtils; /******************************************************** * DFSClient can connect to a Hadoop Filesystem and * perform basic file tasks. It uses the ClientProtocol * to communicate with a NameNode daemon, and connects * directly to DataNodes to read/write block data. * * Hadoop DFS users should obtain an instance of * DistributedFileSystem, which uses DFSClient to handle * filesystem tasks. * ********************************************************/ public class DFSClient implements FSConstants, java.io.Closeable { public static final Log LOG = LogFactory.getLog(DFSClient.class); public static final int MAX_BLOCK_ACQUIRE_FAILURES = 3; static final int TCP_WINDOW_SIZE = 128 * 1024; // 128 KB static final long NUM_BYTES_CHECK_READ_SPEED = 128 * 1024; static byte[] emptyByteArray = new byte[0]; public ClientProtocol namenode; private ClientProtocol rpcNamenode; // Namenode proxy that supports method-based compatibility public ProtocolProxy<ClientProtocol> namenodeProtocolProxy = null; public Object namenodeProxySyncObj = new Object(); final UnixUserGroupInformation ugi; volatile boolean clientRunning = true; static Random r = new Random(); final String clientName; final LeaseChecker leasechecker; Configuration conf; long defaultBlockSize; private short defaultReplication; SocketFactory socketFactory; int socketTimeout; int socketReadExtentionTimeout; int datanodeWriteTimeout; int datanodeWriteExtentionTimeout; int timeoutValue; // read timeout for the socket final int writePacketSize; final long minReadSpeedBps; final FileSystem.Statistics stats; int maxBlockAcquireFailures; final int hdfsTimeout; // timeout value for a DFS operation. // The amount of time to wait before aborting a close file. private final long closeFileTimeout; private long namenodeVersion = ClientProtocol.versionID; DFSClientMetrics metrics = new DFSClientMetrics(); protected Integer dataTransferVersion = -1; boolean shortCircuitLocalReads = false; final InetAddress localHost; private InetSocketAddress nameNodeAddr; private DatanodeInfo pseuDatanodeInfoForLocalhost; private String localhostNetworkLocation = null; DNSToSwitchMapping dnsToSwitchMapp