webcoderz/pydantic-aiPublic

forked frompydantic/pydantic-ai

NotificationsYou must be signed in to change notification settings
Fork0
Star0

Commit925a377

leandrodamascena

and

Kludex

authored

feat(bedrock): add VideoUrl input for BedrockConverseModel (pydantic#1435)

Co-authored-by: Marcelo Trylesinski <marcelotryle@gmail.com>

1 parent2f8538d commit925a377Copy full SHA for 925a377

File tree

16 files changed

+903

-25

lines changed

docs
- input.md
pydantic_ai_slim
- pydantic_ai
  - __init__.py
  - messages.py
  - models
- pyproject.toml
tests
- assets
  - small_video.mp4
- conftest.py
- models
  - cassettes/test_bedrock
    - test_video_as_binary_content_input.yaml
    - test_video_url_input.yaml
  - test_bedrock.py
  - test_mistral.py
  - test_model_test.py
- test_messages.py

16 files changed

+903

-25

lines changed

`‎docs/input.md`

Lines changed: 9 additions & 2 deletions

Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`		`-#Image, Audio & Document Input`
	`1`	`+#Image, Audio, Video & Document Input`
`2`	`2`
`3`		`-Some LLMs are now capable of understandingbothaudio, image and document content.`
	`3`	`+Some LLMs are now capable of understanding audio, video, image and document content.`
`4`	`4`
`5`	`5`	`##Image Input`
`6`	`6`
`@@ -52,6 +52,13 @@ print(result.data)`
`52`	`52`
`53`	`53`	You can provide audio input using either[`AudioUrl`][pydantic_ai.AudioUrl] or[`BinaryContent`][pydantic_ai.BinaryContent]. The process is analogous to the examples above.
`54`	`54`
	`55`	`+##Video Input`
	`56`	`+`
	`57`	`+!!! info`
	`58`	`+ Some models do not support video input. Please check the model's documentation to confirm whether it supports audio input.`
	`59`	`+`
	`60`	+You can provide video input using either[`VideoUrl`][pydantic_ai.VideoUrl] or[`BinaryContent`][pydantic_ai.BinaryContent]. The process is analogous to the examples above.
	`61`	`+`
`55`	`62`	`##Document Input`
`56`	`63`
`57`	`64`	`!!! info`

`‎pydantic_ai_slim/pydantic_ai/init.py`

Lines changed: 2 additions & 1 deletion

Original file line number	Diff line number	Diff line change
`@@ -10,7 +10,7 @@`
`10`	`10`	`UsageLimitExceeded,`
`11`	`11`	`UserError,`
`12`	`12`	`)`
`13`		`-from .messagesimportAudioUrl,BinaryContent,DocumentUrl,ImageUrl`
	`13`	`+from .messagesimportAudioUrl,BinaryContent,DocumentUrl,ImageUrl,VideoUrl`
`14`	`14`	`from .toolsimportRunContext,Tool`
`15`	`15`
`16`	`16`	`__all__= (`
`@@ -33,6 +33,7 @@`
`33`	`33`	`# messages`
`34`	`34`	`'ImageUrl',`
`35`	`35`	`'AudioUrl',`
	`36`	`+'VideoUrl',`
`36`	`37`	`'DocumentUrl',`
`37`	`38`	`'BinaryContent',`
`38`	`39`	`# tools`

`‎pydantic_ai_slim/pydantic_ai/messages.py`

Lines changed: 98 additions & 18 deletions

Original file line number	Diff line number	Diff line change
`@@ -15,6 +15,34 @@`
`15`	`15`	`from ._utilsimportgenerate_tool_call_idas_generate_tool_call_id,now_utcas_now_utc`
`16`	`16`	`from .exceptionsimportUnexpectedModelBehavior`
`17`	`17`
	`18`	`+AudioMediaType:TypeAlias=Literal['audio/wav','audio/mpeg']`
	`19`	`+ImageMediaType:TypeAlias=Literal['image/jpeg','image/png','image/gif','image/webp']`
	`20`	`+DocumentMediaType:TypeAlias=Literal[`
	`21`	`+'application/pdf',`
	`22`	`+'text/plain',`
	`23`	`+'text/csv',`
	`24`	`+'application/vnd.openxmlformats-officedocument.wordprocessingml.document',`
	`25`	`+'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',`
	`26`	`+'text/html',`
	`27`	`+'text/markdown',`
	`28`	`+'application/vnd.ms-excel',`
	`29`	`+]`
	`30`	`+VideoMediaType:TypeAlias=Literal[`
	`31`	`+'video/x-matroska',`
	`32`	`+'video/quicktime',`
	`33`	`+'video/mp4',`
	`34`	`+'video/webm',`
	`35`	`+'video/x-flv',`
	`36`	`+'video/mpeg',`
	`37`	`+'video/x-ms-wmv',`
	`38`	`+'video/3gpp',`
	`39`	`+]`
	`40`	`+`
	`41`	`+AudioFormat:TypeAlias=Literal['wav','mp3']`
	`42`	`+ImageFormat:TypeAlias=Literal['jpeg','png','gif','webp']`
	`43`	`+DocumentFormat:TypeAlias=Literal['csv','doc','docx','html','md','pdf','txt','xls','xlsx']`
	`44`	`+VideoFormat:TypeAlias=Literal['mkv','mov','mp4','webm','flv','mpeg','mpg','wmv','three_gp']`
	`45`	`+`
`18`	`46`
`19`	`47`	`@dataclass`
`20`	`48`	`classSystemPromptPart:`
`@@ -42,6 +70,47 @@ def otel_event(self) -> Event:`
`42`	`70`	`returnEvent('gen_ai.system.message',body={'content':self.content,'role':'system'})`
`43`	`71`
`44`	`72`
	`73`	`+@dataclass`
	`74`	`+classVideoUrl:`
	`75`	`+"""A URL to an video."""`
	`76`	`+`
	`77`	`+url:str`
	`78`	`+"""The URL of the video."""`
	`79`	`+`
	`80`	`+kind:Literal['video-url']='video-url'`
	`81`	`+"""Type identifier, this is available on all parts as a discriminator."""`
	`82`	`+`
	`83`	`+@property`
	`84`	`+defmedia_type(self)->VideoMediaType:# pragma: no cover`
	`85`	`+"""Return the media type of the video, based on the url."""`
	`86`	`+ifself.url.endswith('.mkv'):`
	`87`	`+return'video/x-matroska'`
	`88`	`+elifself.url.endswith('.mov'):`
	`89`	`+return'video/quicktime'`
	`90`	`+elifself.url.endswith('.mp4'):`
	`91`	`+return'video/mp4'`
	`92`	`+elifself.url.endswith('.webm'):`
	`93`	`+return'video/webm'`
	`94`	`+elifself.url.endswith('.flv'):`
	`95`	`+return'video/x-flv'`
	`96`	`+elifself.url.endswith(('.mpeg','.mpg')):`
	`97`	`+return'video/mpeg'`
	`98`	`+elifself.url.endswith('.wmv'):`
	`99`	`+return'video/x-ms-wmv'`
	`100`	`+elifself.url.endswith('.three_gp'):`
	`101`	`+return'video/3gpp'`
	`102`	`+else:`
	`103`	`+raiseValueError(f'Unknown video file extension:{self.url}')`
	`104`	`+`
	`105`	`+@property`
	`106`	`+defformat(self)->VideoFormat:`
	`107`	`+"""The file format of the video.`
	`108`	`+`
	`109`	`+ The choice of supported formats were based on the Bedrock Converse API. Other APIs don't require to use a format.`
	`110`	`+ """`
	`111`	`+return_video_format(self.media_type)`
	`112`	`+`
	`113`	`+`
`45`	`114`	`@dataclass`
`46`	`115`	`classAudioUrl:`
`47`	`116`	`"""A URL to an audio file."""`
`@@ -123,23 +192,6 @@ def format(self) -> DocumentFormat:`
`123`	`192`	`return_document_format(self.media_type)`
`124`	`193`
`125`	`194`
`126`		`-AudioMediaType:TypeAlias=Literal['audio/wav','audio/mpeg']`
`127`		`-ImageMediaType:TypeAlias=Literal['image/jpeg','image/png','image/gif','image/webp']`
`128`		`-DocumentMediaType:TypeAlias=Literal[`
`129`		`-'application/pdf',`
`130`		`-'text/plain',`
`131`		`-'text/csv',`
`132`		`-'application/vnd.openxmlformats-officedocument.wordprocessingml.document',`
`133`		`-'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',`
`134`		`-'text/html',`
`135`		`-'text/markdown',`
`136`		`-'application/vnd.ms-excel',`
`137`		`-]`
`138`		`-AudioFormat:TypeAlias=Literal['wav','mp3']`
`139`		`-ImageFormat:TypeAlias=Literal['jpeg','png','gif','webp']`
`140`		`-DocumentFormat:TypeAlias=Literal['csv','doc','docx','html','md','pdf','txt','xls','xlsx']`
`141`		`-`
`142`		`-`
`143`	`195`	`@dataclass`
`144`	`196`	`classBinaryContent:`
`145`	`197`	`"""Binary content, e.g. an audio or image file."""`
`@@ -163,6 +215,11 @@ def is_image(self) -> bool:`
`163`	`215`	"""Return `True` if the media type is an image type."""
`164`	`216`	`returnself.media_type.startswith('image/')`
`165`	`217`
	`218`	`+@property`
	`219`	`+defis_video(self)->bool:`
	`220`	+"""Return `True` if the media type is a video type."""
	`221`	`+returnself.media_type.startswith('video/')`
	`222`	`+`
`166`	`223`	`@property`
`167`	`224`	`defis_document(self)->bool:`
`168`	`225`	"""Return `True` if the media type is a document type."""
`@@ -189,10 +246,12 @@ def format(self) -> str:`
`189`	`246`	`return_image_format(self.media_type)`
`190`	`247`	`elifself.is_document:`
`191`	`248`	`return_document_format(self.media_type)`
	`249`	`+elifself.is_video:`
	`250`	`+return_video_format(self.media_type)`
`192`	`251`	`raiseValueError(f'Unknown media type:{self.media_type}')`
`193`	`252`
`194`	`253`
`195`		`-UserContent:TypeAlias='str \| ImageUrl \| AudioUrl \| DocumentUrl \| BinaryContent'`
	`254`	`+UserContent:TypeAlias='str \| ImageUrl \| AudioUrl \| DocumentUrl \|VideoUrl \|BinaryContent'`
`196`	`255`
`197`	`256`
`198`	`257`	`def_document_format(media_type:str)->DocumentFormat:`
`@@ -229,6 +288,27 @@ def _image_format(media_type: str) -> ImageFormat:`
`229`	`288`	`raiseValueError(f'Unknown image media type:{media_type}')`
`230`	`289`
`231`	`290`
	`291`	`+def_video_format(media_type:str)->VideoFormat:`
	`292`	`+ifmedia_type=='video/x-matroska':`
	`293`	`+return'mkv'`
	`294`	`+elifmedia_type=='video/quicktime':`
	`295`	`+return'mov'`
	`296`	`+elifmedia_type=='video/mp4':`
	`297`	`+return'mp4'`
	`298`	`+elifmedia_type=='video/webm':`
	`299`	`+return'webm'`
	`300`	`+elifmedia_type=='video/x-flv':`
	`301`	`+return'flv'`
	`302`	`+elifmedia_type=='video/mpeg':`
	`303`	`+return'mpeg'`
	`304`	`+elifmedia_type=='video/x-ms-wmv':`
	`305`	`+return'wmv'`
	`306`	`+elifmedia_type=='video/3gpp':`
	`307`	`+return'three_gp'`
	`308`	`+else:# pragma: no cover`
	`309`	`+raiseValueError(f'Unknown video media type:{media_type}')`
	`310`	`+`
	`311`	`+`
`232`	`312`	`@dataclass`
`233`	`313`	`classUserPromptPart:`
`234`	`314`	`"""A user prompt, generally written by the end user.`

`‎pydantic_ai_slim/pydantic_ai/models/bedrock.py`

Lines changed: 15 additions & 1 deletion

Original file line number	Diff line number	Diff line change
`@@ -29,6 +29,7 @@`
`29`	`29`	`ToolCallPart,`
`30`	`30`	`ToolReturnPart,`
`31`	`31`	`UserPromptPart,`
	`32`	`+VideoUrl,`
`32`	`33`	`)`
`33`	`34`	`frompydantic_ai.modelsimportModel,ModelRequestParameters,StreamedResponse,cached_async_http_client`
`34`	`35`	`frompydantic_ai.providersimportProvider,infer_provider`
`@@ -52,6 +53,7 @@`
`52`	`53`	`SystemContentBlockTypeDef,`
`53`	`54`	`ToolChoiceTypeDef,`
`54`	`55`	`ToolTypeDef,`
	`56`	`+VideoBlockTypeDef,`
`55`	`57`	`)`
`56`	`58`
`57`	`59`
`@@ -381,21 +383,33 @@ async def _map_user_prompt(part: UserPromptPart) -> list[MessageUnionTypeDef]:`
`381`	`383`	`elifitem.is_image:`
`382`	`384`	`assertformatin ('jpeg','png','gif','webp')`
`383`	`385`	`content.append({'image': {'format':format,'source': {'bytes':item.data}}})`
	`386`	`+elifitem.is_video:`
	`387`	`+assertformatin ('mkv','mov','mp4','webm','flv','mpeg','mpg','wmv','three_gp')`
	`388`	`+content.append({'video': {'format':format,'source': {'bytes':item.data}}})`
`384`	`389`	`else:`
`385`	`390`	`raiseNotImplementedError('Binary content is not supported yet.')`
`386`		`-elifisinstance(item, (ImageUrl,DocumentUrl)):`
	`391`	`+elifisinstance(item, (ImageUrl,DocumentUrl,VideoUrl)):`
`387`	`392`	`response=awaitcached_async_http_client().get(item.url)`
`388`	`393`	`response.raise_for_status()`
`389`	`394`	`ifitem.kind=='image-url':`
`390`	`395`	`format=item.media_type.split('/')[1]`
`391`	`396`	`assertformatin ('jpeg','png','gif','webp'),f'Unsupported image format:{format}'`
`392`	`397`	`image:ImageBlockTypeDef= {'format':format,'source': {'bytes':response.content}}`
`393`	`398`	`content.append({'image':image})`
	`399`	`+`
`394`	`400`	`elifitem.kind=='document-url':`
`395`	`401`	`document_count+=1`
`396`	`402`	`name=f'Document{document_count}'`
`397`	`403`	`data=response.content`
`398`	`404`	`content.append({'document': {'name':name,'format':item.format,'source': {'bytes':data}}})`
	`405`	`+`
	`406`	`+elifitem.kind=='video-url':`
	`407`	`+format=item.media_type.split('/')[1]`
	`408`	`+assertformatin ('mkv','mov','mp4','webm','flv','mpeg','mpg','wmv','three_gp'), (`
	`409`	`+f'Unsupported video format:{format}'`
	`410`	`+ )`
	`411`	`+video:VideoBlockTypeDef= {'format':format,'source': {'bytes':response.content}}`
	`412`	`+content.append({'video':video})`
`399`	`413`	`elifisinstance(item,AudioUrl):# pragma: no cover`
`400`	`414`	`raiseNotImplementedError('Audio is not supported yet.')`
`401`	`415`	`else:`

`‎pydantic_ai_slim/pydantic_ai/models/gemini.py`

Lines changed: 3 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -34,6 +34,7 @@`
`34`	`34`	`ToolCallPart,`
`35`	`35`	`ToolReturnPart,`
`36`	`36`	`UserPromptPart,`
	`37`	`+VideoUrl,`
`37`	`38`	`)`
`38`	`39`	`from ..settingsimportModelSettings`
`39`	`40`	`from ..toolsimportToolDefinition`
`@@ -335,6 +336,8 @@ async def _map_user_prompt(part: UserPromptPart) -> list[_GeminiPartUnion]:`
`335`	`336`	`inline_data={'data':base64.b64encode(response.content).decode('utf-8'),'mime_type':mime_type}`
`336`	`337`	`)`
`337`	`338`	`content.append(inline_data)`
	`339`	`+elifisinstance(item,VideoUrl):# pragma: no cover`
	`340`	`+raiseNotImplementedError('VideoUrl is not supported for Gemini.')`
`338`	`341`	`else:`
`339`	`342`	`assert_never(item)`
`340`	`343`	`returncontent`

`‎pydantic_ai_slim/pydantic_ai/models/mistral.py`

Lines changed: 3 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -29,6 +29,7 @@`
`29`	`29`	`ToolCallPart,`
`30`	`30`	`ToolReturnPart,`
`31`	`31`	`UserPromptPart,`
	`32`	`+VideoUrl,`
`32`	`33`	`)`
`33`	`34`	`from ..providersimportProvider,infer_provider`
`34`	`35`	`from ..resultimportUsage`
`@@ -503,6 +504,8 @@ def _map_user_prompt(part: UserPromptPart) -> MistralUserMessage:`
`503`	`504`	`raiseRuntimeError('Only image binary content is supported for Mistral.')`
`504`	`505`	`elifisinstance(item,DocumentUrl):`
`505`	`506`	`raiseRuntimeError('DocumentUrl is not supported in Mistral.')`
	`507`	`+elifisinstance(item,VideoUrl):`
	`508`	`+raiseRuntimeError('VideoUrl is not supported in Mistral.')`
`506`	`509`	`else:# pragma: no cover`
`507`	`510`	`raiseRuntimeError(f'Unsupported content type:{type(item)}')`
`508`	`511`	`returnMistralUserMessage(content=content)`

`‎pydantic_ai_slim/pydantic_ai/models/openai.py`

Lines changed: 5 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -30,6 +30,7 @@`
`30`	`30`	`ToolCallPart,`
`31`	`31`	`ToolReturnPart,`
`32`	`32`	`UserPromptPart,`
	`33`	`+VideoUrl,`
`33`	`34`	`)`
`34`	`35`	`from ..settingsimportModelSettings`
`35`	`36`	`from ..toolsimportToolDefinition`
`@@ -448,6 +449,8 @@ async def _map_user_prompt(part: UserPromptPart) -> chat.ChatCompletionUserMessa`
`448`	`449`	`# file_data = f'data:{media_type};base64,{base64_encoded}'`
`449`	`450`	`# file = File(file={'file_data': file_data, 'file_name': item.url, 'file_id': item.url}, type='file')`
`450`	`451`	`# content.append(file)`
	`452`	`+elifisinstance(item,VideoUrl):# pragma: no cover`
	`453`	`+raiseNotImplementedError('VideoUrl is not supported for OpenAI')`
`451`	`454`	`else:`
`452`	`455`	`assert_never(item)`
`453`	`456`	`returnchat.ChatCompletionUserMessageParam(role='user',content=content)`
`@@ -765,6 +768,8 @@ async def _map_user_prompt(part: UserPromptPart) -> responses.EasyInputMessagePa`
`765`	`768`	`filename=f'filename.{item.format}',`
`766`	`769`	`)`
`767`	`770`	`)`
	`771`	`+elifisinstance(item,VideoUrl):# pragma: no cover`
	`772`	`+raiseNotImplementedError('VideoUrl is not supported for OpenAI.')`
`768`	`773`	`else:`
`769`	`774`	`assert_never(item)`
`770`	`775`	`returnresponses.EasyInputMessageParam(role='user',content=content)`

`‎pydantic_ai_slim/pyproject.toml`

Lines changed: 1 addition & 1 deletion

Original file line number	Diff line number	Diff line change
`@@ -62,7 +62,7 @@ vertexai = ["google-auth>=2.36.0", "requests>=2.32.3"]`
`62`	`62`	`anthropic = ["anthropic>=0.49.0"]`
`63`	`63`	`groq = ["groq>=0.15.0"]`
`64`	`64`	`mistral = ["mistralai>=1.2.5"]`
`65`		`-bedrock = ["boto3>=1.34.116"]`
	`65`	`+bedrock = ["boto3>=1.35.74"]`
`66`	`66`	`# Tools`
`67`	`67`	`duckduckgo = ["duckduckgo-search>=7.0.0"]`
`68`	`68`	`tavily = ["tavily-python>=0.5.0"]`

`‎tests/assets/small_video.mp4`

612 KB

Binary file not shown.

`‎tests/conftest.py`

Lines changed: 6 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -232,6 +232,12 @@ def image_content(assets_path: Path) -> BinaryContent:`
`232`	`232`	`returnBinaryContent(data=image_bytes,media_type='image/png')`
`233`	`233`
`234`	`234`
	`235`	`+@pytest.fixture(scope='session')`
	`236`	`+defvideo_content(assets_path:Path)->BinaryContent:`
	`237`	`+video_bytes=assets_path.joinpath('small_video.mp4').read_bytes()`
	`238`	`+returnBinaryContent(data=video_bytes,media_type='video/mp4')`
	`239`	`+`
	`240`	`+`
`235`	`241`	`@pytest.fixture(scope='session')`
`236`	`242`	`defdocument_content(assets_path:Path)->BinaryContent:`
`237`	`243`	`pdf_bytes=assets_path.joinpath('dummy.pdf').read_bytes()`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit925a377

File tree

16 files changed

16 files changed

`‎docs/input.md`

`‎pydantic_ai_slim/pydantic_ai/init.py`

`‎pydantic_ai_slim/pydantic_ai/messages.py`

`‎pydantic_ai_slim/pydantic_ai/models/bedrock.py`

`‎pydantic_ai_slim/pydantic_ai/models/gemini.py`

`‎pydantic_ai_slim/pydantic_ai/models/mistral.py`

`‎pydantic_ai_slim/pydantic_ai/models/openai.py`

`‎pydantic_ai_slim/pyproject.toml`

`‎tests/assets/small_video.mp4`

`‎tests/conftest.py`

0 commit comments