C Data Interface#

Arrow supports exchanging data without copying or serialization within the same processthrough theThe Arrow C data interface, even between different language runtimes.

Java to Python#

SeeIntegrating PyArrow with Java to implement Java toPython communication using the C Data Interface.

Java to C++#

SeeBuilding Arrow C++ to build the Arrow C++ libraries:

$gitclonehttps://github.com/apache/arrow.git$cdarrow/cpp$mkdirbuild# from inside the `cpp` subdirectory$cdbuild$cmake..--presetninja-debug-minimal$cmake--build.$treedebug/debug/├──libarrow.800.0.0.dylib├──libarrow.800.dylib->libarrow.800.0.0.dylib└──libarrow.dylib->libarrow.800.dylib

Share an Int64 array from C++ to Java#

C++ Side

Implement a function in CDataCppBridge.h that exports an array via the C Data Interface:

#include<iostream>#include<arrow/api.h>#include<arrow/c/bridge.h>voidFillInt64Array(constuintptr_tc_schema_ptr,constuintptr_tc_array_ptr){arrow::Int64Builderbuilder;builder.Append(1);builder.Append(2);builder.Append(3);builder.AppendNull();builder.Append(5);builder.Append(6);builder.Append(7);builder.Append(8);builder.Append(9);builder.Append(10);std::shared_ptr<arrow::Array>array=*builder.Finish();structArrowSchema*c_schema=reinterpret_cast<structArrowSchema*>(c_schema_ptr);autoc_schema_status=arrow::ExportType(*array->type(),c_schema);if(!c_schema_status.ok())c_schema_status.Abort();structArrowArray*c_array=reinterpret_cast<structArrowArray*>(c_array_ptr);autoc_array_status=arrow::ExportArray(*array,c_array);if(!c_array_status.ok())c_array_status.Abort();}

Java Side

For this example, we will useJavaCPP to call our C++ function from Java,without writing JNI bindings ourselves.

<?xml version="1.0" encoding="UTF-8"?><projectxmlns="http://maven.apache.org/POM/4.0.0"xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"><modelVersion>4.0.0</modelVersion><groupId>org.example</groupId><artifactId>java-cdata-example</artifactId><version>1.0-SNAPSHOT</version><properties><maven.compiler.source>8</maven.compiler.source><maven.compiler.target>8</maven.compiler.target><arrow.version>9.0.0</arrow.version></properties><dependencies><dependency><groupId>org.bytedeco</groupId><artifactId>javacpp</artifactId><version>1.5.7</version></dependency><dependency><groupId>org.apache.arrow</groupId><artifactId>arrow-c-data</artifactId><version>${arrow.version}</version></dependency><dependency><groupId>org.apache.arrow</groupId><artifactId>arrow-vector</artifactId><version>${arrow.version}</version></dependency><dependency><groupId>org.apache.arrow</groupId><artifactId>arrow-memory-core</artifactId><version>${arrow.version}</version></dependency><dependency><groupId>org.apache.arrow</groupId><artifactId>arrow-memory-netty</artifactId><version>${arrow.version}</version></dependency><dependency><groupId>org.apache.arrow</groupId><artifactId>arrow-format</artifactId><version>${arrow.version}</version></dependency></dependencies></project>
importorg.bytedeco.javacpp.annotation.Platform;importorg.bytedeco.javacpp.annotation.Properties;importorg.bytedeco.javacpp.tools.InfoMap;importorg.bytedeco.javacpp.tools.InfoMapper;@Properties(target="CDataJavaToCppExample",value=@Platform(include={"CDataCppBridge.h"},compiler={"cpp17"},linkpath={"/arrow/cpp/build/debug/"},link={"arrow"}))publicclassCDataJavaConfigimplementsInfoMapper{@Overridepublicvoidmap(InfoMapinfoMap){}}
# Compile our Java code$javac-cpjavacpp-1.5.7.jarCDataJavaConfig.java# Generate CDataInterfaceLibrary$java-jarjavacpp-1.5.7.jarCDataJavaConfig.java# Generate libjniCDataInterfaceLibrary.dylib$java-jarjavacpp-1.5.7.jarCDataJavaToCppExample.java# Validate libjniCDataInterfaceLibrary.dylib created$otool-Lmacosx-x86_64/libjniCDataJavaToCppExample.dylibmacosx-x86_64/libjniCDataJavaToCppExample.dylib:libjniCDataJavaToCppExample.dylib(compatibilityversion0.0.0,currentversion0.0.0)@rpath/libarrow.800.dylib(compatibilityversion800.0.0,currentversion800.0.0)/usr/lib/libc++.1.dylib(compatibilityversion1.0.0,currentversion1200.3.0)/usr/lib/libSystem.B.dylib(compatibilityversion1.0.0,currentversion1311.0.0)

Java Test

Let’s create a Java class to test our bridge:

importorg.apache.arrow.c.ArrowArray;importorg.apache.arrow.c.ArrowSchema;importorg.apache.arrow.c.Data;importorg.apache.arrow.memory.BufferAllocator;importorg.apache.arrow.memory.RootAllocator;importorg.apache.arrow.vector.BigIntVector;publicclassTestCDataInterface{publicstaticvoidmain(String[]args){try(BufferAllocatorallocator=newRootAllocator();ArrowSchemaarrowSchema=ArrowSchema.allocateNew(allocator);ArrowArrayarrowArray=ArrowArray.allocateNew(allocator)){CDataJavaToCppExample.FillInt64Array(arrowSchema.memoryAddress(),arrowArray.memoryAddress());try(BigIntVectorbigIntVector=(BigIntVector)Data.importVector(allocator,arrowArray,arrowSchema,null)){System.out.println("C++-allocated array: "+bigIntVector);}}}}
C++-allocatedarray:[1,2,3,null,5,6,7,8,9,10]

Share an Int32 array from Java to C++#

Java Side

For this example, we will build a JAR with all dependencies bundled.

<?xml version="1.0" encoding="UTF-8"?><projectxmlns="http://maven.apache.org/POM/4.0.0"xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"><modelVersion>4.0.0</modelVersion><groupId>org.example</groupId><artifactId>cpptojava</artifactId><version>1.0-SNAPSHOT</version><properties><maven.compiler.source>8</maven.compiler.source><maven.compiler.target>8</maven.compiler.target><arrow.version>9.0.0</arrow.version></properties><dependencies><dependency><groupId>org.apache.arrow</groupId><artifactId>arrow-c-data</artifactId><version>${arrow.version}</version></dependency><dependency><groupId>org.apache.arrow</groupId><artifactId>arrow-memory-netty</artifactId><version>${arrow.version}</version></dependency></dependencies><build><plugins><plugin><groupId>org.apache.maven.plugins</groupId><artifactId>maven-assembly-plugin</artifactId><executions><execution><phase>package</phase><goals><goal>single</goal></goals><configuration><descriptorRefs><descriptorRef>jar-with-dependencies</descriptorRef></descriptorRefs></configuration></execution></executions></plugin></plugins></build></project>
importorg.apache.arrow.c.ArrowArray;importorg.apache.arrow.c.ArrowSchema;importorg.apache.arrow.c.Data;importorg.apache.arrow.memory.BufferAllocator;importorg.apache.arrow.memory.RootAllocator;importorg.apache.arrow.vector.FieldVector;importorg.apache.arrow.vector.IntVector;importorg.apache.arrow.vector.VectorSchemaRoot;importjava.util.Arrays;publicclassToBeCalledByCpp{finalstaticBufferAllocatorallocator=newRootAllocator();/**     * Create a {@link FieldVector} and export it via the C Data Interface     * @param schemaAddress Schema memory address to wrap     * @param arrayAddress Array memory address to wrap     */publicstaticvoidfillVector(longschemaAddress,longarrayAddress){try(ArrowArrayarrow_array=ArrowArray.wrap(arrayAddress);ArrowSchemaarrow_schema=ArrowSchema.wrap(schemaAddress)){Data.exportVector(allocator,populateFieldVectorToExport(),null,arrow_array,arrow_schema);}}/**     * Create a {@link VectorSchemaRoot} and export it via the C Data Interface     * @param schemaAddress Schema memory address to wrap     * @param arrayAddress Array memory address to wrap     */publicstaticvoidfillVectorSchemaRoot(longschemaAddress,longarrayAddress){try(ArrowArrayarrow_array=ArrowArray.wrap(arrayAddress);ArrowSchemaarrow_schema=ArrowSchema.wrap(schemaAddress)){Data.exportVectorSchemaRoot(allocator,populateVectorSchemaRootToExport(),null,arrow_array,arrow_schema);}}privatestaticFieldVectorpopulateFieldVectorToExport(){IntVectorintVector=newIntVector("int-to-export",allocator);intVector.allocateNew(3);intVector.setSafe(0,1);intVector.setSafe(1,2);intVector.setSafe(2,3);intVector.setValueCount(3);System.out.println("[Java] FieldVector: \n"+intVector);returnintVector;}privatestaticVectorSchemaRootpopulateVectorSchemaRootToExport(){IntVectorintVector=newIntVector("age-to-export",allocator);intVector.setSafe(0,10);intVector.setSafe(1,20);intVector.setSafe(2,30);VectorSchemaRootroot=newVectorSchemaRoot(Arrays.asList(intVector));root.setRowCount(3);System.out.println("[Java] VectorSchemaRoot: \n"+root.contentToTSVString());returnroot;}}

Build the JAR and copy it to the C++ project.

$mvncleaninstall$cptarget/cpptojava-1.0-SNAPSHOT-jar-with-dependencies.jar<c++projectpath>/cpptojava.jar

C++ Side

This application uses JNI to call Java code, but transfers data (zero-copy) via the C Data Interface instead.

#include<iostream>#include<jni.h>#include<arrow/api.h>#include<arrow/c/bridge.h>JNIEnv*CreateVM(JavaVM**jvm){JNIEnv*env;JavaVMInitArgsvm_args;JavaVMOptionoptions[2];options[0].optionString="-Djava.class.path=cpptojava.jar";options[1].optionString="-DXcheck:jni:pedantic";vm_args.version=JNI_VERSION_10;vm_args.nOptions=2;vm_args.options=options;intstatus=JNI_CreateJavaVM(jvm,(void**)&env,&vm_args);if(status<0){std::cerr<<"\n<<<<< Unable to Launch JVM >>>>>\n"<<std::endl;returnnullptr;}returnenv;}intmain(){JNIEnv*env;JavaVM*jvm;env=CreateVM(&jvm);if(env==nullptr)returnEXIT_FAILURE;jclassjavaClassToBeCalledByCpp=env->FindClass("ToBeCalledByCpp");if(javaClassToBeCalledByCpp!=nullptr){jmethodIDfillVector=env->GetStaticMethodID(javaClassToBeCalledByCpp,"fillVector","(JJ)V");if(fillVector!=nullptr){structArrowSchemaarrowSchema;structArrowArrayarrowArray;std::cout<<"\n<<<<< C++ to Java for Arrays >>>>>\n"<<std::endl;env->CallStaticVoidMethod(javaClassToBeCalledByCpp,fillVector,static_cast<jlong>(reinterpret_cast<uintptr_t>(&arrowSchema)),static_cast<jlong>(reinterpret_cast<uintptr_t>(&arrowArray)));autoresultImportArray=arrow::ImportArray(&arrowArray,&arrowSchema);std::shared_ptr<arrow::Array>array=resultImportArray.ValueOrDie();std::cout<<"[C++] Array: "<<array->ToString()<<std::endl;}else{std::cerr<<"Could not find fillVector method\n"<<std::endl;returnEXIT_FAILURE;}jmethodIDfillVectorSchemaRoot=env->GetStaticMethodID(javaClassToBeCalledByCpp,"fillVectorSchemaRoot","(JJ)V");if(fillVectorSchemaRoot!=nullptr){structArrowSchemaarrowSchema;structArrowArrayarrowArray;std::cout<<"\n<<<<< C++ to Java for RecordBatch >>>>>\n"<<std::endl;env->CallStaticVoidMethod(javaClassToBeCalledByCpp,fillVectorSchemaRoot,static_cast<jlong>(reinterpret_cast<uintptr_t>(&arrowSchema)),static_cast<jlong>(reinterpret_cast<uintptr_t>(&arrowArray)));autoresultImportVectorSchemaRoot=arrow::ImportRecordBatch(&arrowArray,&arrowSchema);std::shared_ptr<arrow::RecordBatch>recordBatch=resultImportVectorSchemaRoot.ValueOrDie();std::cout<<"[C++] RecordBatch: "<<recordBatch->ToString()<<std::endl;}else{std::cerr<<"Could not find fillVectorSchemaRoot method\n"<<std::endl;returnEXIT_FAILURE;}}else{std::cout<<"Could not find ToBeCalledByCpp class\n"<<std::endl;returnEXIT_FAILURE;}jvm->DestroyJavaVM();returnEXIT_SUCCESS;}

CMakeLists.txt definition file:

cmake_minimum_required(VERSION3.19)project(cdatacpptojava)find_package(JNIREQUIRED)find_package(ArrowREQUIRED)message(STATUS"Arrow version: ${ARROW_VERSION}")include_directories(${JNI_INCLUDE_DIRS})set(CMAKE_CXX_STANDARD17)add_executable(${PROJECT_NAME}main.cpp)target_link_libraries(cdatacpptojavaPRIVATEArrow::arrow_shared)target_link_libraries(cdatacpptojavaPRIVATE${JNI_LIBRARIES})

Result

<<<<< C++ to Java for Arrays >>>>>[Java] FieldVector:[1, 2, 3][C++] Array: [  1,  2,  3]<<<<< C++ to Java for RecordBatch >>>>>[Java] VectorSchemaRoot:age-to-export102030[C++] RecordBatch: age-to-export:   [  10,  20,  30]