Bounding box detection

Experimental

This feature is subject to the "Pre-GA Offerings Terms" in the General Service Terms section of theService Specific Terms. Pre-GA features are available "as is" and might have limited support. For more information, see thelaunch stage descriptions.

In this experimental launch, we are providing developers with a powerful toolfor object detection and localization within images and video. By accuratelyidentifying and delineating objects with bounding boxes, developers can unlock awide range of applications and enhance the intelligence of their projects.

Key Benefits:

  • Simple: Integrate object detection capabilities into your applicationswith ease, regardless of your computer vision expertise.
  • Customizable: Produce bounding boxes based on custom instructions (e.g. "Iwant to see bounding boxes of all the green objects in this image"), withouthaving to train a custom model.

Technical Details:

  • Input: Your prompt and associated images or video frames.
  • Output: Bounding boxes in the[y_min, x_min, y_max, x_max] format. Thetop left corner is the origin. Thex andy axis go horizontally andvertically, respectively. Coordinate values are normalized to 0-1000 for everyimage.
  • Visualization: AI Studio users will see bounding boxes plotted within theUI. Vertex AI users should visualize their bounding boxes through customvisualization code.

Python

Install

pip install --upgrade google-genai

To learn more, see the SDK reference documentation.

Set environment variables to use the Gen AI SDK with Vertex AI:

# Replace the `GOOGLE_CLOUD_PROJECT` and `GOOGLE_CLOUD_LOCATION` values# with appropriate values for your project.exportGOOGLE_CLOUD_PROJECT=GOOGLE_CLOUD_PROJECTexportGOOGLE_CLOUD_LOCATION=globalexportGOOGLE_GENAI_USE_VERTEXAI=True

importrequestsfromgoogleimportgenaifromgoogle.genai.typesimport(GenerateContentConfig,HarmBlockThreshold,HarmCategory,HttpOptions,Part,SafetySetting,)fromPILimportImage,ImageColor,ImageDrawfrompydanticimportBaseModel# Helper class to represent a bounding boxclassBoundingBox(BaseModel):"""    Represents a bounding box with its 2D coordinates and associated label.    Attributes:        box_2d (list[int]): A list of integers representing the 2D coordinates of the bounding box,                            typically in the format [y_min, x_min, y_max, x_max].        label (str): A string representing the label or class associated with the object within the bounding box.    """box_2d:list[int]label:str# Helper function to plot bounding boxes on an imagedefplot_bounding_boxes(image_uri:str,bounding_boxes:list[BoundingBox])->None:"""    Plots bounding boxes on an image with labels, using PIL and normalized coordinates.    Args:        image_uri: The URI of the image file.        bounding_boxes: A list of BoundingBox objects. Each box's coordinates are in                        normalized [y_min, x_min, y_max, x_max] format.    """withImage.open(requests.get(image_uri,stream=True,timeout=10).raw)asim:width,height=im.sizedraw=ImageDraw.Draw(im)colors=list(ImageColor.colormap.keys())fori,bboxinenumerate(bounding_boxes):# Scale normalized coordinates to image dimensionsabs_y_min=int(bbox.box_2d[0]/1000*height)abs_x_min=int(bbox.box_2d[1]/1000*width)abs_y_max=int(bbox.box_2d[2]/1000*height)abs_x_max=int(bbox.box_2d[3]/1000*width)color=colors[i%len(colors)]# Draw the rectangle using the correct (x, y) pairsdraw.rectangle(((abs_x_min,abs_y_min),(abs_x_max,abs_y_max)),outline=color,width=4,)ifbbox.label:# Position the text at the top-left corner of the boxdraw.text((abs_x_min+8,abs_y_min+6),bbox.label,fill=color)im.show()client=genai.Client(http_options=HttpOptions(api_version="v1"))config=GenerateContentConfig(system_instruction="""    Return bounding boxes as an array with labels.    Never return masks. Limit to 25 objects.    If an object is present multiple times, give each object a unique label    according to its distinct characteristics (colors, size, position, etc..).    """,temperature=0.5,safety_settings=[SafetySetting(category=HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,threshold=HarmBlockThreshold.BLOCK_ONLY_HIGH,),],response_mime_type="application/json",response_schema=list[BoundingBox],)image_uri="https://storage.googleapis.com/generativeai-downloads/images/socks.jpg"response=client.models.generate_content(model="gemini-2.5-flash",contents=[Part.from_uri(file_uri=image_uri,mime_type="image/jpeg",),"Output the positions of the socks with a face. Label according to position in the image.",],config=config,)print(response.text)plot_bounding_boxes(image_uri,response.parsed)# Example response:# [#     {"box_2d": [6, 246, 386, 526], "label": "top-left light blue sock with cat face"},#     {"box_2d": [234, 649, 650, 863], "label": "top-right light blue sock with cat face"},# ]

Go

Learn how to install or update theGo.

To learn more, see the SDK reference documentation.

Set environment variables to use the Gen AI SDK with Vertex AI:

# Replace the `GOOGLE_CLOUD_PROJECT` and `GOOGLE_CLOUD_LOCATION` values# with appropriate values for your project.exportGOOGLE_CLOUD_PROJECT=GOOGLE_CLOUD_PROJECTexportGOOGLE_CLOUD_LOCATION=globalexportGOOGLE_GENAI_USE_VERTEXAI=True

import("context""encoding/json""fmt""image""image/color""image/draw""image/jpeg""io""net/http""google.golang.org/genai")//BoundingBoxrepresentsaboundingboxwithcoordinatesandlabel.typeBoundingBoxstruct{Box2D[]int`json:"box_2d"`Labelstring`json:"label"`}//plotBoundingBoxesdownloadstheimageandoverlaysboundingboxes.funcplotBoundingBoxes(imageURIstring,boundingBoxes[]BoundingBox)error{resp,err:=http.Get(imageURI)iferr!=nil{returnfmt.Errorf("failed to download image: %w",err)}deferresp.Body.Close()img,err:=jpeg.Decode(resp.Body)iferr!=nil{returnfmt.Errorf("failed to decode image: %w",err)}bounds:=img.Bounds()rgba:=image.NewRGBA(bounds)draw.Draw(rgba,bounds,img,bounds.Min,draw.Src)//Simpleredcolorforboundingboxesred:=color.RGBA{255,0,0,255}for_,bbox:=rangeboundingBoxes{//scalenormalizedcoordinates[01000]toabsolutepixelsyMin:=bbox.Box2D[0]*bounds.Dy()/1000xMin:=bbox.Box2D[1]*bounds.Dx()/1000yMax:=bbox.Box2D[2]*bounds.Dy()/1000xMax:=bbox.Box2D[3]*bounds.Dx()/1000//drawrectangleborderforx:=xMin;x <=xMax;x++{rgba.Set(x,yMin,red)rgba.Set(x,yMax,red)}fory:=yMin;y <=yMax;y++{rgba.Set(xMin,y,red)rgba.Set(xMax,y,red)}}returnnil}funcgenerateBoundingBoxesWithText(wio.Writer)error{ctx:=context.Background()client,err:=genai.NewClient(ctx, &genai.ClientConfig{HTTPOptions:genai.HTTPOptions{APIVersion:"v1"},})iferr!=nil{returnfmt.Errorf("failed to create genai client: %w",err)}imageURI:="https://storage.googleapis.com/generativeai-downloads/images/socks.jpg"//Schemadefinitionfor[]BoundingBoxschema:= &genai.Schema{Type:genai.TypeArray,Items: &genai.Schema{Type:genai.TypeObject,Properties:map[string]*genai.Schema{"box_2d":{Type:genai.TypeArray,Items: &genai.Schema{Type:genai.TypeInteger},},"label":{Type:genai.TypeString},},Required:[]string{"box_2d","label"},},}config:= &genai.GenerateContentConfig{SystemInstruction: &genai.Content{Parts:[]*genai.Part{{Text: "Return bounding boxes as an array with labels. Never return masks. Limit to 25 objects.",}},},Temperature:float32Ptr(0.5),ResponseMIMEType:"application/json",ResponseSchema:schema,SafetySettings:[]*genai.SafetySetting{{Category:genai.HarmCategoryDangerousContent,Threshold:genai.HarmBlockThresholdBlockOnlyHigh,},},}contents:=[]*genai.Content{{Role:genai.RoleUser,Parts:[]*genai.Part{{FileData: &genai.FileData{FileURI:imageURI,MIMEType:"image/jpeg",},},{Text:"Output the positions of the socks with a face. Label according to position in the image."},},},}resp,err:=client.Models.GenerateContent(ctx,"gemini-2.5-flash",contents,config)iferr!=nil{returnfmt.Errorf("failed to generate content: %w",err)}fmt.Fprintln(w,resp.Text())//Parseinto[]BoundingBoxvarboxes[]BoundingBoxiferr:=json.Unmarshal([]byte(resp.Text()), &boxes);err!=nil{returnfmt.Errorf("failed to parse bounding boxes: %w",err)}//Exampleresponse://Box:(962,113)-(2158,1631)Label:topleftsockwithface//Box:(2656,721)-(3953,2976)Label:toprightsockwithface//...returnplotBoundingBoxes(imageURI,boxes)}funcfloat32Ptr(vfloat32)*float32{return &v}

Node.js

Install

npm install @google/genai

To learn more, see the SDK reference documentation.

Set environment variables to use the Gen AI SDK with Vertex AI:

# Replace the `GOOGLE_CLOUD_PROJECT` and `GOOGLE_CLOUD_LOCATION` values# with appropriate values for your project.exportGOOGLE_CLOUD_PROJECT=GOOGLE_CLOUD_PROJECTexportGOOGLE_CLOUD_LOCATION=globalexportGOOGLE_GENAI_USE_VERTEXAI=True

const{GoogleGenAI}=require('@google/genai');const{createCanvas,loadImage}=require('canvas');constfetch=require('node-fetch');constfs=require('fs');constGOOGLE_CLOUD_PROJECT=process.env.GOOGLE_CLOUD_PROJECT;constGOOGLE_CLOUD_LOCATION=process.env.GOOGLE_CLOUD_LOCATION||'global';asyncfunctionfetchImageAsBase64(uri){constresponse=awaitfetch(uri);constbuffer=awaitresponse.buffer();returnbuffer.toString('base64');}asyncfunctionplotBoundingBoxes(imageUri,boundingBoxes){console.log('Creating bounding boxes');constimage=awaitloadImage(imageUri);constcanvas=createCanvas(image.width,image.height);constctx=canvas.getContext('2d');ctx.drawImage(image,0,0);constcolors=['red','blue','green','orange'];boundingBoxes.forEach((bbox,i)=>{const[yMin,xMin,yMax,xMax]=bbox.box_2d;constabsYMin=Math.floor((yMin/1000)*image.height);constabsXMin=Math.floor((xMin/1000)*image.width);constabsYMax=Math.floor((yMax/1000)*image.height);constabsXMax=Math.floor((xMax/1000)*image.width);ctx.strokeStyle=colors[i%colors.length];ctx.lineWidth=4;ctx.strokeRect(absXMin,absYMin,absXMax-absXMin,absYMax-absYMin);ctx.fillStyle=colors[i%colors.length];ctx.font='20px Arial';ctx.fillText(bbox.label,absXMin+8,absYMin+20);});fs.writeFileSync('output.png',canvas.toBuffer('image/png'));console.log('Saved output to file: output.png');}asyncfunctioncreateBoundingBox(projectId=GOOGLE_CLOUD_PROJECT,location=GOOGLE_CLOUD_LOCATION){constclient=newGoogleGenAI({vertexai:true,project:projectId,location:location,});constsystemInstruction=`Returnboundingboxesasanarraywithlabels.Neverreturnmasks.Limitto25objects.Ifanobjectispresentmultipletimes,giveeachobjectauniquelabelaccordingtoitsdistinctcharacteristics(colors,size,position,etc).`;constsafetySettings=[{category:'HARM_CATEGORY_DANGEROUS_CONTENT',threshold:'BLOCK_ONLY_HIGH',},];constimageUri='https://storage.googleapis.com/generativeai-downloads/images/socks.jpg';constbase64Image=awaitfetchImageAsBase64(imageUri);constboundingBoxSchema={type:'ARRAY',description:'List of bounding boxes for detected objects',items:{type:'OBJECT',title:'BoundingBox',description:'Represents a bounding box with coordinates and label',properties:{box_2d:{type:'ARRAY',description:'Bounding box coordinates in format [y_min, x_min, y_max, x_max]',items:{type:'INTEGER',format:'int32',},minItems:4,maxItems:4,},label:{type:'STRING',description:'Label describing the object within the bounding box',},},required:['box_2d','label'],},};constresponse=awaitclient.models.generateContent({model:'gemini-2.5-flash',contents:[{role:'user',parts:[{text:'Output the positions of the socks with a face. Label according to position in the image',},{inlineData:{data:base64Image,mimeType:'image/jpeg',},},],},],config:{systemInstruction:systemInstruction,safetySettings:safetySettings,responseMimeType:'application/json',temperature:0.5,responseSchema:boundingBoxSchema,},});constcandidate=response.candidates[0].content.parts[0].text;constboundingBoxes=JSON.parse(candidate);console.log('Bounding boxes:',boundingBoxes);awaitplotBoundingBoxes(imageUri,boundingBoxes);returnboundingBoxes;}

Java

Learn how to install or update theJava.

To learn more, see the SDK reference documentation.

Set environment variables to use the Gen AI SDK with Vertex AI:

# Replace the `GOOGLE_CLOUD_PROJECT` and `GOOGLE_CLOUD_LOCATION` values# with appropriate values for your project.exportGOOGLE_CLOUD_PROJECT=GOOGLE_CLOUD_PROJECTexportGOOGLE_CLOUD_LOCATION=globalexportGOOGLE_GENAI_USE_VERTEXAI=True

importstaticcom.google.genai.types.Type.Known.ARRAY;importstaticcom.google.genai.types.Type.Known.INTEGER;importstaticcom.google.genai.types.Type.Known.OBJECT;importstaticcom.google.genai.types.Type.Known.STRING;importcom.google.genai.Client;importcom.google.genai.types.Content;importcom.google.genai.types.GenerateContentConfig;importcom.google.genai.types.GenerateContentResponse;importcom.google.genai.types.HarmBlockThreshold;importcom.google.genai.types.HarmCategory;importcom.google.genai.types.HttpOptions;importcom.google.genai.types.Part;importcom.google.genai.types.SafetySetting;importcom.google.genai.types.Schema;importcom.google.gson.Gson;importcom.google.gson.reflect.TypeToken;importjava.awt.BasicStroke;importjava.awt.Color;importjava.awt.Font;importjava.awt.Graphics2D;importjava.awt.image.BufferedImage;importjava.io.File;importjava.io.IOException;importjava.io.InputStream;importjava.lang.reflect.Type;importjava.net.URL;importjava.util.Arrays;importjava.util.List;importjava.util.Map;importjavax.imageio.ImageIO;publicclassBoundingBoxWithTxtImg{publicstaticclassBoundingBox{List<Integer>box2d;Stringlabel;publicList<Integer>getBox2d(){returnbox2d;}publicStringgetLabel(){returnlabel;}}//Plotboundingboxesonanimageandsaveittoafile.publicstaticvoidplotBoundingBoxes(StringimageUrl,List<BoundingBox>boundingBoxes)throwsIOException{URLurl=newURL(imageUrl);BufferedImageimage=ImageIO.read(url);intwidth=image.getWidth();intheight=image.getHeight();Graphics2Dgraphics2D=image.createGraphics();graphics2D.setStroke(newBasicStroke(4));graphics2D.setFont(newFont("Arial",Font.PLAIN,18));//Definealistofcolorstocyclethrough.List<Color>colors=Arrays.asList(Color.RED,Color.GREEN,Color.BLUE,Color.YELLOW,Color.CYAN,Color.MAGENTA,Color.ORANGE);for(inti=0;i <boundingBoxes.size();i++){BoundingBoxboundingBox=boundingBoxes.get((i));List<Integer>box2d=boundingBox.getBox2d();//Scalenormalizedcoordinates(0-1000)toimagedimensions.inttopY=(int)(box2d.get(0)/1000.0*height);intleftX=(int)(box2d.get(1)/1000.0*width);intbottomY=(int)(box2d.get(2)/1000.0*height);intrightX=(int)(box2d.get(3)/1000.0*width);Colorcolor=colors.get(i%colors.size());graphics2D.setColor(color);//Drawtherectangle.graphics2D.drawRect(leftX,topY,rightX-leftX,bottomY-topY);//Drawthelabeltext.if(boundingBox.getLabel()!=null &&!boundingBox.getLabel().isEmpty()){graphics2D.drawString(boundingBox.getLabel(),leftX+8,topY+20);}}graphics2D.dispose();//Writetheimagetoafile.StringoutputFilePath="resources/output/bounding-boxes-socks.jpg";ImageIO.write(image,"jpg",newFile(outputFilePath));System.out.println("Successfully saved image to: "+outputFilePath);}publicstaticvoidmain(String[]args)throwsIOException{//TODO(developer):Replacethesevariablesbeforerunningthesample.Stringmodel="gemini-2.5-flash";generateContent(model);}//ShowshowtosendamultimodalprompttothemodelandgetastructuredJSONresponse//containingboundingboxdata,andthenusesthatdatatodrawtheboxesontheoriginal//image,savingittoanewfile.publicstaticStringgenerateContent(StringmodelId)throwsIOException{//ClientInitialization.Oncecreated,itcanbereusedformultiplerequests.try(Clientclient=Client.builder().location("global").httpOptions(HttpOptions.builder().apiVersion("v1").build()).vertexAI(true).build()){StringsystemInstruction="Return bounding boxes as an array with labels.\n"+" Never return masks. Limit to 25 objects.\n"+" If an object is present multiple times, give each object a unique label\n"+" according to its distinct characteristics (colors, size, position, etc..).";//Definetheresponseschema.SchemaresponseSchema=Schema.builder().type(ARRAY).items(Schema.builder().type(OBJECT).properties(Map.of("box2d",Schema.builder().type(ARRAY).items(Schema.builder().type(INTEGER).build()).build(),"label",Schema.builder().type(STRING).build())).required("box2d","label").build()).build();//DefinetheGenerateContentConfigandsettheresponseschema.GenerateContentConfigcontentConfig=GenerateContentConfig.builder().systemInstruction(Content.fromParts(Part.fromText(systemInstruction))).temperature(0.5F).safetySettings(SafetySetting.builder().category(HarmCategory.Known.HARM_CATEGORY_DANGEROUS_CONTENT).threshold(HarmBlockThreshold.Known.BLOCK_ONLY_HIGH).build()).responseMimeType("application/json").responseSchema(responseSchema).build();StringimageUri="https://storage.googleapis.com/generativeai-downloads/images/socks.jpg";URLurl=newURL(imageUri);try(InputStreaminputStream=url.openStream()){byte[]imageBytes=inputStream.readAllBytes();Stringprompt="Output the positions of the socks with a face."+" Label according to position in the image";GenerateContentResponseresponse=client.models.generateContent(modelId,Content.fromParts(Part.fromBytes(imageBytes,"image/jpeg"),Part.fromText(prompt)),contentConfig);System.out.println(response.text());//Exampleresponse://[//{"box2d":[24,24,521,526],"label":"top left light blue cat face sock"},//{"box2d":[238,627,649,863],"label":"top right light blue cat face sock"}//]//UseGsontoparsetheJSONstringintoalistofBoundingBoxobjects.Gsongson=newGson();TypeboundingBoxListType=newTypeToken<List<BoundingBox>>(){}.getType();List<BoundingBox>boundingBoxes=gson.fromJson(response.text(),boundingBoxListType);//Plottheboundingboxesontheimage.if(boundingBoxes!=null){plotBoundingBoxes(imageUri,boundingBoxes);}returnresponse.text();}}}}

Except as otherwise noted, the content of this page is licensed under theCreative Commons Attribution 4.0 License, and code samples are licensed under theApache 2.0 License. For details, see theGoogle Developers Site Policies. Java is a registered trademark of Oracle and/or its affiliates.

Last updated 2026-02-19 UTC.