# Licensed to the Apache Software Foundation (ASF) under one# or more contributor license agreements. See the NOTICE file# distributed with this work for additional information# regarding copyright ownership. The ASF licenses this file# to you under the Apache License, Version 2.0 (the# "License"); you may not use this file except in compliance# with the License. You may obtain a copy of the License at## http://www.apache.org/licenses/LICENSE-2.0## Unless required by applicable law or agreed to in writing,# software distributed under the License is distributed on an# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY# KIND, either express or implied. See the License for the# specific language governing permissions and limitations# under the License.# flake8: noqa"""PyArrow is the python implementation of Apache Arrow.Apache Arrow is a cross-language development platform for in-memory data.It specifies a standardized language-independent columnar memory format forflat and hierarchical data, organized for efficient analytic operations onmodern hardware. It also provides computational libraries and zero-copystreaming messaging and interprocess communication.For more information see the official page at https://arrow.apache.org"""importimportlibas_importlibimportosas_osimportplatformas_platformimportsysas_systry:from._generated_versionimportversionas__version__exceptImportError:# Package is not installed, parse git tag at runtimetry:importsetuptools_scm# Code duplicated from setup.py to avoid a dependency on each otherdefparse_git(root,**kwargs):""" Parse function for setuptools_scm that ignores tags for non-C++ subprojects, e.g. apache-arrow-js-XXX tags. """fromsetuptools_scm.gitimportparsekwargs['describe_command']= \"git describe --dirty --tags --long --match 'apache-arrow-[0-9]*.*'"returnparse(root,**kwargs)__version__=setuptools_scm.get_version('../',parse=parse_git)exceptImportError:__version__=Nonefrompyarrow.libimport(BuildInfo,CppBuildInfo,RuntimeInfo,set_timezone_db_path,MonthDayNano,VersionInfo,build_info,cpp_build_info,cpp_version,cpp_version_info,runtime_info,cpu_count,set_cpu_count,enable_signal_handlers,io_thread_count,set_io_thread_count)defshow_versions():""" Print various version information, to help with error reporting. """defprint_entry(label,value):print(f"{label: <26}:{value: <8}")print("pyarrow version info\n--------------------")print_entry("Package kind",build_info.cpp_build_info.package_kindiflen(build_info.cpp_build_info.package_kind)>0else"not indicated")print_entry("Arrow C++ library version",build_info.cpp_build_info.version)print_entry("Arrow C++ compiler",(f"{build_info.cpp_build_info.compiler_id} "f"{build_info.cpp_build_info.compiler_version}"))print_entry("Arrow C++ compiler flags",build_info.cpp_build_info.compiler_flags)print_entry("Arrow C++ git revision",build_info.cpp_build_info.git_id)print_entry("Arrow C++ git description",build_info.cpp_build_info.git_description)print_entry("Arrow C++ build type",build_info.cpp_build_info.build_type)print_entry("PyArrow build type",build_info.build_type)def_module_is_available(module):try:_importlib.import_module(f'pyarrow.{module}')exceptImportError:returnFalseelse:returnTruedef_filesystem_is_available(fs):try:importpyarrow.fsexceptImportError:returnFalsetry:getattr(pyarrow.fs,fs)except(ImportError,AttributeError):returnFalseelse:returnTruedefshow_info():""" Print detailed version and platform information, for error reporting """show_versions()defprint_entry(label,value):print(f"{label: <20}:{value: <8}")print("\nPlatform:")print_entry("OS / Arch",f"{_platform.system()}{_platform.machine()}")print_entry("SIMD Level",runtime_info().simd_level)print_entry("Detected SIMD Level",runtime_info().detected_simd_level)pool=default_memory_pool()print("\nMemory:")print_entry("Default backend",pool.backend_name)print_entry("Bytes allocated",f"{pool.bytes_allocated()} bytes")print_entry("Max memory",f"{pool.max_memory()} bytes")print_entry("Supported Backends",', '.join(supported_memory_backends()))print("\nOptional modules:")modules=["csv","cuda","dataset","feather","flight","fs","gandiva","json","orc","parquet"]formoduleinmodules:status="Enabled"if_module_is_available(module)else"-"print(f"{module: <20}:{status: <8}")print("\nFilesystems:")filesystems=["AzureFileSystem","GcsFileSystem","HadoopFileSystem","S3FileSystem"]forfsinfilesystems:status="Enabled"if_filesystem_is_available(fs)else"-"print(f"{fs: <20}:{status: <8}")print("\nCompression Codecs:")codecs=["brotli","bz2","gzip","lz4_frame","lz4","snappy","zstd"]forcodecincodecs:status="Enabled"ifCodec.is_available(codec)else"-"print(f"{codec: <20}:{status: <8}")frompyarrow.libimport(null,bool_,int8,int16,int32,int64,uint8,uint16,uint32,uint64,time32,time64,timestamp,date32,date64,duration,month_day_nano_interval,float16,float32,float64,binary,string,utf8,binary_view,string_view,large_binary,large_string,large_utf8,decimal32,decimal64,decimal128,decimal256,list_,large_list,list_view,large_list_view,map_,struct,union,sparse_union,dense_union,dictionary,run_end_encoded,bool8,fixed_shape_tensor,json_,opaque,uuid,field,type_for_alias,DataType,DictionaryType,StructType,ListType,LargeListType,FixedSizeListType,ListViewType,LargeListViewType,MapType,UnionType,SparseUnionType,DenseUnionType,TimestampType,Time32Type,Time64Type,DurationType,FixedSizeBinaryType,Decimal32Type,Decimal64Type,Decimal128Type,Decimal256Type,BaseExtensionType,ExtensionType,RunEndEncodedType,Bool8Type,FixedShapeTensorType,JsonType,OpaqueType,UuidType,UnknownExtensionType,register_extension_type,unregister_extension_type,DictionaryMemo,KeyValueMetadata,Field,Schema,schema,unify_schemas,Array,Tensor,array,chunked_array,record_batch,nulls,repeat,SparseCOOTensor,SparseCSRMatrix,SparseCSCMatrix,SparseCSFTensor,infer_type,from_numpy_dtype,arange,NullArray,NumericArray,IntegerArray,FloatingPointArray,BooleanArray,Int8Array,UInt8Array,Int16Array,UInt16Array,Int32Array,UInt32Array,Int64Array,UInt64Array,HalfFloatArray,FloatArray,DoubleArray,ListArray,LargeListArray,FixedSizeListArray,ListViewArray,LargeListViewArray,MapArray,UnionArray,BinaryArray,StringArray,LargeBinaryArray,LargeStringArray,BinaryViewArray,StringViewArray,FixedSizeBinaryArray,DictionaryArray,Date32Array,Date64Array,TimestampArray,Time32Array,Time64Array,DurationArray,MonthDayNanoIntervalArray,Decimal32Array,Decimal64Array,Decimal128Array,Decimal256Array,StructArray,ExtensionArray,RunEndEncodedArray,Bool8Array,FixedShapeTensorArray,JsonArray,OpaqueArray,UuidArray,scalar,NA,_NULLasNULL,Scalar,NullScalar,BooleanScalar,Int8Scalar,Int16Scalar,Int32Scalar,Int64Scalar,UInt8Scalar,UInt16Scalar,UInt32Scalar,UInt64Scalar,HalfFloatScalar,FloatScalar,DoubleScalar,Decimal32Scalar,Decimal64Scalar,Decimal128Scalar,Decimal256Scalar,ListScalar,LargeListScalar,FixedSizeListScalar,ListViewScalar,LargeListViewScalar,Date32Scalar,Date64Scalar,Time32Scalar,Time64Scalar,TimestampScalar,DurationScalar,MonthDayNanoIntervalScalar,BinaryScalar,LargeBinaryScalar,BinaryViewScalar,StringScalar,LargeStringScalar,StringViewScalar,FixedSizeBinaryScalar,DictionaryScalar,MapScalar,StructScalar,UnionScalar,RunEndEncodedScalar,Bool8Scalar,ExtensionScalar,FixedShapeTensorScalar,JsonScalar,OpaqueScalar,UuidScalar)# Buffers, allocationfrompyarrow.libimport(DeviceAllocationType,Device,MemoryManager,default_cpu_memory_manager)frompyarrow.libimport(Buffer,ResizableBuffer,foreign_buffer,py_buffer,Codec,compress,decompress,allocate_buffer)frompyarrow.libimport(MemoryPool,LoggingMemoryPool,ProxyMemoryPool,total_allocated_bytes,set_memory_pool,default_memory_pool,system_memory_pool,jemalloc_memory_pool,mimalloc_memory_pool,logging_memory_pool,proxy_memory_pool,log_memory_allocations,jemalloc_set_decay_ms,supported_memory_backends)# I/Ofrompyarrow.libimport(NativeFile,PythonFile,BufferedInputStream,BufferedOutputStream,CacheOptions,CompressedInputStream,CompressedOutputStream,TransformInputStream,transcoding_input_stream,FixedSizeBufferWriter,BufferReader,BufferOutputStream,OSFile,MemoryMappedFile,memory_map,create_memory_map,MockOutputStream,input_stream,output_stream,have_libhdfs)frompyarrow.libimport(ChunkedArray,RecordBatch,Table,table,concat_arrays,concat_tables,TableGroupBy,RecordBatchReader,concat_batches)# Exceptionsfrompyarrow.libimport(ArrowCancelled,ArrowCapacityError,ArrowException,ArrowKeyError,ArrowIndexError,ArrowInvalid,ArrowIOError,ArrowMemoryError,ArrowNotImplementedError,ArrowTypeError,ArrowSerializationError)frompyarrow.ipcimportserialize_pandas,deserialize_pandasimportpyarrow.ipcasipcimportpyarrow.typesastypes# ----------------------------------------------------------------------# Deprecationsfrompyarrow.utilimport_deprecate_api,_deprecate_class# TODO: Deprecate these somehow in the pyarrow namespacefrompyarrow.ipcimport(Message,MessageReader,MetadataVersion,RecordBatchFileReader,RecordBatchFileWriter,RecordBatchStreamReader,RecordBatchStreamWriter)# ----------------------------------------------------------------------# Returning absolute path to the pyarrow include directory (if bundled, e.g. in# wheels)[docs]defget_include():""" Return absolute path to directory containing Arrow C++ include headers. Similar to numpy.get_include """return_os.path.join(_os.path.dirname(__file__),'include') def_get_pkg_config_executable():return_os.environ.get('PKG_CONFIG','pkg-config')def_has_pkg_config(pkgname):importsubprocesstry:returnsubprocess.call([_get_pkg_config_executable(),'--exists',pkgname])==0exceptFileNotFoundError:returnFalsedef_read_pkg_config_variable(pkgname,cli_args):importsubprocesscmd=[_get_pkg_config_executable(),pkgname]+cli_argsproc=subprocess.Popen(cmd,stdout=subprocess.PIPE,stderr=subprocess.PIPE)out,err=proc.communicate()ifproc.returncode!=0:raiseRuntimeError("pkg-config failed: "+err.decode('utf8'))returnout.rstrip().decode('utf8')[docs]defget_libraries():""" Return list of library names to include in the `libraries` argument for C or Cython extensions using pyarrow """return['arrow_python','arrow'] defcreate_library_symlinks():""" With Linux and macOS wheels, the bundled shared libraries have an embedded ABI version like libarrow.so.17 or libarrow.17.dylib and so linking to them with -larrow won't work unless we create symlinks at locations like site-packages/pyarrow/libarrow.so. This unfortunate workaround addresses prior problems we had with shipping two copies of the shared libraries to permit third party projects like turbodbc to build their C++ extensions against the pyarrow wheels. This function must only be invoked once and only when the shared libraries are bundled with the Python package, which should only apply to wheel-based installs. It requires write access to the site-packages/pyarrow directory and so depending on your system may need to be run with root. """importglobif_sys.platform=='win32':returnpackage_cwd=_os.path.dirname(__file__)if_sys.platform=='linux':bundled_libs=glob.glob(_os.path.join(package_cwd,'*.so.*'))defget_symlink_path(hard_path):returnhard_path.rsplit('.',1)[0]else:bundled_libs=glob.glob(_os.path.join(package_cwd,'*.*.dylib'))defget_symlink_path(hard_path):return'.'.join((hard_path.rsplit('.',2)[0],'dylib'))forlib_hard_pathinbundled_libs:symlink_path=get_symlink_path(lib_hard_path)if_os.path.exists(symlink_path):continuetry:_os.symlink(lib_hard_path,symlink_path)exceptPermissionError:print("Tried creating symlink{}. If you need to link to ""bundled shared libraries, run ""pyarrow.create_library_symlinks() as root")[docs]defget_library_dirs():""" Return lists of directories likely to contain Arrow C++ libraries for linking C or Cython extensions using pyarrow """package_cwd=_os.path.dirname(__file__)library_dirs=[package_cwd]defappend_library_dir(library_dir):iflibrary_dirnotinlibrary_dirs:library_dirs.append(library_dir)# Search library paths via pkg-config. This is necessary if the user# installed libarrow and the other shared libraries manually and they# are not shipped inside the pyarrow package (see also ARROW-2976).pkg_config_executable=_os.environ.get('PKG_CONFIG')or'pkg-config'forpkgnamein["arrow","arrow_python"]:if_has_pkg_config(pkgname):library_dir=_read_pkg_config_variable(pkgname,["--libs-only-L"])# pkg-config output could be empty if Arrow is installed# as a system package.iflibrary_dir:ifnotlibrary_dir.startswith("-L"):raiseValueError("pkg-config --libs-only-L returned unexpected "f"value{library_dir!r}")append_library_dir(library_dir[2:])if_sys.platform=='win32':# TODO(wesm): Is this necessary, or does setuptools within a conda# installation add Library\lib to the linker path for MSVC?python_base_install=_os.path.dirname(_sys.executable)library_dir=_os.path.join(python_base_install,'Library','lib')if_os.path.exists(_os.path.join(library_dir,'arrow.lib')):append_library_dir(library_dir)# GH-45530: Add pyarrow.libs dir containing delvewheel-mangled# msvcp140.dllpyarrow_libs_dir=_os.path.abspath(_os.path.join(_os.path.dirname(__file__),_os.pardir,"pyarrow.libs"))if_os.path.exists(pyarrow_libs_dir):append_library_dir(pyarrow_libs_dir)# ARROW-4074: Allow for ARROW_HOME to be set to some other directoryif_os.environ.get('ARROW_HOME'):append_library_dir(_os.path.join(_os.environ['ARROW_HOME'],'lib'))else:# Python wheels bundle the Arrow libraries in the pyarrow directory.append_library_dir(_os.path.dirname(_os.path.abspath(__file__)))returnlibrary_dirs