Add a column using a load job

Add a new column to a BigQuery table while appending rows using a load job.

Explore further

For detailed documentation that includes this code sample, see the following:

Modifying table schemas

Code sample

Go

Before trying this sample, follow theGo setup instructions in theBigQuery quickstart using client libraries. For more information, see theBigQueryGo API reference documentation.

To authenticate to BigQuery, set up Application Default Credentials. For more information, seeSet up authentication for client libraries.

import("context""fmt""os""cloud.google.com/go/bigquery")// createTableAndWidenLoad demonstrates augmenting a table's schema to add a new column via a load job.funccreateTableAndWidenLoad(projectID,datasetID,tableID,filenamestring)error{// projectID := "my-project-id"// datasetID := "mydataset"// tableID := "mytable"ctx:=context.Background()client,err:=bigquery.NewClient(ctx,projectID)iferr!=nil{returnfmt.Errorf("bigquery.NewClient: %w",err)}deferclient.Close()sampleSchema:=bigquery.Schema{{Name:"full_name",Type:bigquery.StringFieldType},}meta:=&bigquery.TableMetadata{Schema:sampleSchema,}tableRef:=client.Dataset(datasetID).Table(tableID)iferr:=tableRef.Create(ctx,meta);err!=nil{returnerr}// Now, import data from a local file, but specify field additions are allowed.// Because the data has a second column (age), the schema is amended as part of// the load.f,err:=os.Open(filename)iferr!=nil{returnerr}source:=bigquery.NewReaderSource(f)source.AutoDetect=true// Allow BigQuery to determine schema.source.SkipLeadingRows=1// CSV has a single header line.loader:=client.Dataset(datasetID).Table(tableID).LoaderFrom(source)loader.SchemaUpdateOptions=[]string{"ALLOW_FIELD_ADDITION"}job,err:=loader.Run(ctx)iferr!=nil{returnerr}status,err:=job.Wait(ctx)iferr!=nil{returnerr}iferr:=status.Err();err!=nil{returnerr}returnnil}

Java

Before trying this sample, follow theJava setup instructions in theBigQuery quickstart using client libraries. For more information, see theBigQueryJava API reference documentation.

To authenticate to BigQuery, set up Application Default Credentials. For more information, seeSet up authentication for client libraries.

importcom.google.cloud.bigquery.BigQuery;importcom.google.cloud.bigquery.BigQueryException;importcom.google.cloud.bigquery.BigQueryOptions;importcom.google.cloud.bigquery.Field;importcom.google.cloud.bigquery.FormatOptions;importcom.google.cloud.bigquery.Job;importcom.google.cloud.bigquery.JobId;importcom.google.cloud.bigquery.JobInfo;importcom.google.cloud.bigquery.JobInfo.SchemaUpdateOption;importcom.google.cloud.bigquery.JobInfo.WriteDisposition;importcom.google.cloud.bigquery.LegacySQLTypeName;importcom.google.cloud.bigquery.LoadJobConfiguration;importcom.google.cloud.bigquery.Schema;importcom.google.cloud.bigquery.TableId;importcom.google.common.collect.ImmutableList;importjava.util.ArrayList;importjava.util.List;importjava.util.UUID;// Sample to append column in existing table.publicclassAddColumnLoadAppend{publicstaticvoidmain(String[]args){// TODO(developer): Replace these variables before running the sample.StringdatasetName="MY_DATASET_NAME";StringtableName="MY_TABLE_NAME";StringsourceUri="/path/to/file.csv";// Add a new column to a BigQuery table while appending rows via a load job.// 'REQUIRED' fields cannot  be added to an existing schema, so the additional column must be// 'NULLABLE'.Schemaschema=Schema.of(Field.newBuilder("name",LegacySQLTypeName.STRING).setMode(Field.Mode.REQUIRED).build());List<Field>fields=schema.getFields();// Adding below additional column during the load jobFieldnewField=Field.newBuilder("post_abbr",LegacySQLTypeName.STRING).setMode(Field.Mode.NULLABLE).build();List<Field>newFields=newArrayList<>(fields);newFields.add(newField);SchemanewSchema=Schema.of(newFields);addColumnLoadAppend(datasetName,tableName,sourceUri,newSchema);}publicstaticvoidaddColumnLoadAppend(StringdatasetName,StringtableName,StringsourceUri,SchemanewSchema){try{// Initialize client that will be used to send requests. This client only needs to be created// once, and can be reused for multiple requests.BigQuerybigquery=BigQueryOptions.getDefaultInstance().getService();TableIdtableId=TableId.of(datasetName,tableName);LoadJobConfigurationloadJobConfig=LoadJobConfiguration.builder(tableId,sourceUri).setFormatOptions(FormatOptions.csv()).setWriteDisposition(WriteDisposition.WRITE_APPEND).setSchema(newSchema).setSchemaUpdateOptions(ImmutableList.of(SchemaUpdateOption.ALLOW_FIELD_ADDITION)).build();// Create a job ID so that we can safely retry.JobIdjobId=JobId.of(UUID.randomUUID().toString());JobloadJob=bigquery.create(JobInfo.newBuilder(loadJobConfig).setJobId(jobId).build());// Load data from a GCS parquet file into the table// Blocks until this load table job completes its execution, either failing or succeeding.Jobjob=loadJob.waitFor();// Check for errorsif(job.isDone() &&job.getStatus().getError()==null){System.out.println("Column successfully added during load append job");}else{System.out.println("BigQuery was unable to load into the table due to an error:"+job.getStatus().getError());}}catch(BigQueryException|InterruptedExceptione){System.out.println("Column not added during load append \n"+e.toString());}}}

Node.js

Before trying this sample, follow theNode.js setup instructions in theBigQuery quickstart using client libraries. For more information, see theBigQueryNode.js API reference documentation.

To authenticate to BigQuery, set up Application Default Credentials. For more information, seeSet up authentication for client libraries.

// Import the Google Cloud client librariesconst{BigQuery}=require('@google-cloud/bigquery');// Instantiate clientconstbigquery=newBigQuery();asyncfunctionaddColumnLoadAppend(){// Adds a new column to a BigQuery table while appending rows via a load job./**   * TODO(developer): Uncomment the following lines before running the sample.   */// const fileName = '/path/to/file.csv';// const datasetId = 'my_dataset';// const tableId = 'my_table';// In this example, the existing table contains only the 'Name', 'Age',// & 'Weight' columns. 'REQUIRED' fields cannot  be added to an existing// schema, so the additional column must be 'NULLABLE'.constschema='Name:STRING, Age:INTEGER, Weight:FLOAT, IsMagic:BOOLEAN';// Retrieve destination table referenceconst[table]=awaitbigquery.dataset(datasetId).table(tableId).get();constdestinationTableRef=table.metadata.tableReference;// Set load job optionsconstoptions={schema:schema,schemaUpdateOptions:['ALLOW_FIELD_ADDITION'],writeDisposition:'WRITE_APPEND',destinationTable:destinationTableRef,};// Load data from a local file into the tableconst[job]=awaitbigquery.dataset(datasetId).table(tableId).load(fileName,options);console.log(`Job${job.id} completed.`);console.log('New Schema:');console.log(job.configuration.load.schema.fields);// Check the job's status for errorsconsterrors=job.status.errors;if(errors &&errors.length >0){throwerrors;}}

PHP

Before trying this sample, follow thePHP setup instructions in theBigQuery quickstart using client libraries. For more information, see theBigQueryPHP API reference documentation.

To authenticate to BigQuery, set up Application Default Credentials. For more information, seeSet up authentication for client libraries.

use Google\Cloud\BigQuery\BigQueryClient;/** * Append a column using a load job. * * @param string $projectId The project Id of your Google Cloud Project. * @param string $datasetId The BigQuery dataset ID. * @param string $tableId The BigQuery table ID. */function add_column_load_append(    string $projectId,    string $datasetId,    string $tableId): void {    $bigQuery = new BigQueryClient([      'projectId' => $projectId,    ]);    $dataset = $bigQuery->dataset($datasetId);    $table = $dataset->table($tableId);    // In this example, the existing table contains only the 'Name' and 'Title'.    // A new column 'Description' gets added after load job.    $schema = [      'fields' => [        ['name' => 'name', 'type' => 'string', 'mode' => 'nullable'],        ['name' => 'title', 'type' => 'string', 'mode' => 'nullable'],        ['name' => 'description', 'type' => 'string', 'mode' => 'nullable']        ]      ];    $source = __DIR__ . '/../test/data/test_data_extra_column.csv';    // Set job configs    $loadConfig = $table->load(fopen($source, 'r'));    $loadConfig->destinationTable($table);    $loadConfig->schema($schema);    $loadConfig->schemaUpdateOptions(['ALLOW_FIELD_ADDITION']);    $loadConfig->sourceFormat('CSV');    $loadConfig->writeDisposition('WRITE_APPEND');    // Run the job with load config    $job = $bigQuery->runJob($loadConfig);    // Print all the columns    $columns = $table->info()['schema']['fields'];    printf('The columns in the table are ');    foreach ($columns as $column) {        printf('%s ', $column['name']);    }}

Python

Before trying this sample, follow thePython setup instructions in theBigQuery quickstart using client libraries. For more information, see theBigQueryPython API reference documentation.

To authenticate to BigQuery, set up Application Default Credentials. For more information, seeSet up authentication for client libraries.

# from google.cloud import bigquery# client = bigquery.Client()# project = client.project# dataset_ref = bigquery.DatasetReference(project, 'my_dataset')# filepath = 'path/to/your_file.csv'# Retrieves the destination table and checks the length of the schematable_id="my_table"table_ref=dataset_ref.table(table_id)table=client.get_table(table_ref)print("Table{} contains{} columns.".format(table_id,len(table.schema)))# Configures the load job to append the data to the destination table,# allowing field additionjob_config=bigquery.LoadJobConfig()job_config.write_disposition=bigquery.WriteDisposition.WRITE_APPENDjob_config.schema_update_options=[bigquery.SchemaUpdateOption.ALLOW_FIELD_ADDITION]# In this example, the existing table contains only the 'full_name' column.# 'REQUIRED' fields cannot be added to an existing schema, so the# additional column must be 'NULLABLE'.job_config.schema=[bigquery.SchemaField("full_name","STRING",mode="REQUIRED"),bigquery.SchemaField("age","INTEGER",mode="NULLABLE"),]job_config.source_format=bigquery.SourceFormat.CSVjob_config.skip_leading_rows=1withopen(filepath,"rb")assource_file:job=client.load_table_from_file(source_file,table_ref,location="US",# Must match the destination dataset location.job_config=job_config,)# API requestjob.result()# Waits for table load to complete.print("Loaded{} rows into{}:{}.".format(job.output_rows,dataset_id,table_ref.table_id))# Checks the updated length of the schematable=client.get_table(table)print("Table{} now contains{} columns.".format(table_id,len(table.schema)))

What's next

To search and filter code samples for other Google Cloud products, see theGoogle Cloud sample browser.

Except as otherwise noted, the content of this page is licensed under theCreative Commons Attribution 4.0 License, and code samples are licensed under theApache 2.0 License. For details, see theGoogle Developers Site Policies. Java is a registered trademark of Oracle and/or its affiliates.

Movatterモバイル変換

Add a column using a load job Stay organized with collections Save and categorize content based on your preferences.

Explore further

Code sample

Go

Java

Node.js

PHP

Python

What's next

Add a column using a load job