Load a Parquet to replace a table

Load a Parquet file from Cloud Storage, replacing a table.

Explore further

For detailed documentation that includes this code sample, see the following:

Loading Parquet data from Cloud Storage

Code sample

Go

Before trying this sample, follow theGo setup instructions in theBigQuery quickstart using client libraries. For more information, see theBigQueryGo API reference documentation.

To authenticate to BigQuery, set up Application Default Credentials. For more information, seeSet up authentication for client libraries.

import("context""fmt""cloud.google.com/go/bigquery")// importParquetTruncate demonstrates loading Apache Parquet data from Cloud Storage into a table// and overwriting/truncating existing data in the table.funcimportParquetTruncate(projectID,datasetID,tableIDstring)error{// projectID := "my-project-id"// datasetID := "mydataset"// tableID := "mytable"ctx:=context.Background()client,err:=bigquery.NewClient(ctx,projectID)iferr!=nil{returnfmt.Errorf("bigquery.NewClient: %w",err)}deferclient.Close()gcsRef:=bigquery.NewGCSReference("gs://cloud-samples-data/bigquery/us-states/us-states.parquet")gcsRef.SourceFormat=bigquery.ParquetgcsRef.AutoDetect=trueloader:=client.Dataset(datasetID).Table(tableID).LoaderFrom(gcsRef)loader.WriteDisposition=bigquery.WriteTruncatejob,err:=loader.Run(ctx)iferr!=nil{returnerr}status,err:=job.Wait(ctx)iferr!=nil{returnerr}ifstatus.Err()!=nil{returnfmt.Errorf("job completed with error: %w",status.Err())}returnnil}

Java

Before trying this sample, follow theJava setup instructions in theBigQuery quickstart using client libraries. For more information, see theBigQueryJava API reference documentation.

To authenticate to BigQuery, set up Application Default Credentials. For more information, seeSet up authentication for client libraries.

importcom.google.cloud.bigquery.BigQuery;importcom.google.cloud.bigquery.BigQueryException;importcom.google.cloud.bigquery.BigQueryOptions;importcom.google.cloud.bigquery.FormatOptions;importcom.google.cloud.bigquery.Job;importcom.google.cloud.bigquery.JobInfo;importcom.google.cloud.bigquery.JobInfo.WriteDisposition;importcom.google.cloud.bigquery.LoadJobConfiguration;importcom.google.cloud.bigquery.TableId;importjava.math.BigInteger;publicclassLoadParquetReplaceTable{publicstaticvoidmain(String[]args){// TODO(developer): Replace these variables before running the sample.StringdatasetName="MY_DATASET_NAME";StringsourceUri="gs://cloud-samples-data/bigquery/us-states/us-states.parquet";StringtableName="us_states";loadParquetReplaceTable(datasetName,tableName,sourceUri);}publicstaticvoidloadParquetReplaceTable(StringdatasetName,StringtableName,StringsourceUri){try{// Initialize client that will be used to send requests. This client only needs to be created// once, and can be reused for multiple requests.BigQuerybigquery=BigQueryOptions.getDefaultInstance().getService();// Imports a GCS file into a table and overwrites table data if table already exists.// This sample loads CSV file at:// https://storage.googleapis.com/cloud-samples-data/bigquery/us-states/us-states.csvTableIdtableId=TableId.of(datasetName,tableName);// For more information on LoadJobConfiguration see:// https://googleapis.dev/java/google-cloud-clients/latest/com/google/cloud/bigquery/LoadJobConfiguration.Builder.htmlLoadJobConfigurationconfiguration=LoadJobConfiguration.builder(tableId,sourceUri).setFormatOptions(FormatOptions.parquet())// Set the write disposition to overwrite existing table data..setWriteDisposition(WriteDisposition.WRITE_TRUNCATE).build();// For more information on Job see:// https://googleapis.dev/java/google-cloud-clients/latest/index.html?com/google/cloud/bigquery/package-summary.html// Load the tableJobjob=bigquery.create(JobInfo.of(configuration));// Load data from a GCS parquet file into the table// Blocks until this load table job completes its execution, either failing or succeeding.JobcompletedJob=job.waitFor();if(completedJob==null){System.out.println("Job not executed since it no longer exists.");return;}elseif(completedJob.getStatus().getError()!=null){System.out.println("BigQuery was unable to load into the table due to an error: \n"+job.getStatus().getError());return;}// Check number of rows loaded into the tableBigIntegernumRows=bigquery.getTable(tableId).getNumRows();System.out.printf("Loaded %d rows. \n",numRows);System.out.println("GCS parquet overwrote existing table successfully.");}catch(BigQueryException|InterruptedExceptione){System.out.println("Table extraction job was interrupted. \n"+e.toString());}}}

Node.js

Before trying this sample, follow theNode.js setup instructions in theBigQuery quickstart using client libraries. For more information, see theBigQueryNode.js API reference documentation.

To authenticate to BigQuery, set up Application Default Credentials. For more information, seeSet up authentication for client libraries.

// Import the Google Cloud client librariesconst{BigQuery}=require('@google-cloud/bigquery');const{Storage}=require('@google-cloud/storage');// Instantiate clientsconstbigquery=newBigQuery();conststorage=newStorage();/** * This sample loads the CSV file at * https://storage.googleapis.com/cloud-samples-data/bigquery/us-states/us-states.csv * * TODO(developer): Replace the following lines with the path to your file. */constbucketName='cloud-samples-data';constfilename='bigquery/us-states/us-states.parquet';asyncfunctionloadParquetFromGCSTruncate(){/**   * Imports a GCS file into a table and overwrites   * table data if table already exists.   *//**   * TODO(developer): Uncomment the following lines before running the sample.   */// const datasetId = "my_dataset";// const tableId = "my_table";// Configure the load job. For full list of options, see:// https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoadconstmetadata={sourceFormat:'PARQUET',// Set the write disposition to overwrite existing table data.writeDisposition:'WRITE_TRUNCATE',};// Load data from a Google Cloud Storage file into the tableconst[job]=awaitbigquery.dataset(datasetId).table(tableId).load(storage.bucket(bucketName).file(filename),metadata);// load() waits for the job to finishconsole.log(`Job${job.id} completed.`);console.log(`Write disposition used:${job.configuration.load.writeDisposition}.`,);}

PHP

Before trying this sample, follow thePHP setup instructions in theBigQuery quickstart using client libraries. For more information, see theBigQueryPHP API reference documentation.

To authenticate to BigQuery, set up Application Default Credentials. For more information, seeSet up authentication for client libraries.

use Google\Cloud\BigQuery\BigQueryClient;/** * Import data from storage parquet with write truncate option. * * @param string $projectId The project Id of your Google Cloud Project. * @param string $datasetId The BigQuery dataset ID. * @param string $tableId The BigQuery table ID. */function import_from_storage_parquet_truncate(    string $projectId,    string $datasetId,    string $tableId): void {    // instantiate the bigquery table service    $bigQuery = new BigQueryClient([      'projectId' => $projectId,    ]);    $table = $bigQuery->dataset($datasetId)->table($tableId);    // create the import job    $gcsUri = 'gs://cloud-samples-data/bigquery/us-states/us-states.parquet';    $loadConfig = $table->loadFromStorage($gcsUri)->sourceFormat('PARQUET')->writeDisposition('WRITE_TRUNCATE');    $job = $table->runJob($loadConfig);    // check if the job is complete    $job->reload();    if (!$job->isComplete()) {        throw new \Exception('Job has not yet completed', 500);    }    // check if the job has errors    if (isset($job->info()['status']['errorResult'])) {        $error = $job->info()['status']['errorResult']['message'];        printf('Error running job: %s' . PHP_EOL, $error);    } else {        print('Data imported successfully' . PHP_EOL);    }}

Python

Before trying this sample, follow thePython setup instructions in theBigQuery quickstart using client libraries. For more information, see theBigQueryPython API reference documentation.

To authenticate to BigQuery, set up Application Default Credentials. For more information, seeSet up authentication for client libraries.

importiofromgoogle.cloudimportbigquery# Construct a BigQuery client object.client=bigquery.Client()# TODO(developer): Set table_id to the ID of the table to create.# table_id = "your-project.your_dataset.your_table_namejob_config=bigquery.LoadJobConfig(schema=[bigquery.SchemaField("name","STRING"),bigquery.SchemaField("post_abbr","STRING"),],)body=io.BytesIO(b"Washington,WA")client.load_table_from_file(body,table_id,job_config=job_config).result()previous_rows=client.get_table(table_id).num_rowsassertprevious_rows >0job_config=bigquery.LoadJobConfig(write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE,source_format=bigquery.SourceFormat.PARQUET,)uri="gs://cloud-samples-data/bigquery/us-states/us-states.parquet"load_job=client.load_table_from_uri(uri,table_id,job_config=job_config)# Make an API request.load_job.result()# Waits for the job to complete.destination_table=client.get_table(table_id)print("Loaded{} rows.".format(destination_table.num_rows))

What's next

To search and filter code samples for other Google Cloud products, see theGoogle Cloud sample browser.

Except as otherwise noted, the content of this page is licensed under theCreative Commons Attribution 4.0 License, and code samples are licensed under theApache 2.0 License. For details, see theGoogle Developers Site Policies. Java is a registered trademark of Oracle and/or its affiliates.

Movatterモバイル変換

Load a Parquet to replace a table Stay organized with collections Save and categorize content based on your preferences.

Explore further

Code sample

Go

Java

Node.js

PHP

Python

What's next

Load a Parquet to replace a table