Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit925a377

Browse files
feat(bedrock): add VideoUrl input for BedrockConverseModel (pydantic#1435)
Co-authored-by: Marcelo Trylesinski <marcelotryle@gmail.com>
1 parent2f8538d commit925a377

File tree

16 files changed

+903
-25
lines changed

16 files changed

+903
-25
lines changed

‎docs/input.md

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
#Image, Audio & Document Input
1+
#Image, Audio, Video & Document Input
22

3-
Some LLMs are now capable of understandingbothaudio, image and document content.
3+
Some LLMs are now capable of understanding audio, video, image and document content.
44

55
##Image Input
66

@@ -52,6 +52,13 @@ print(result.data)
5252

5353
You can provide audio input using either[`AudioUrl`][pydantic_ai.AudioUrl] or[`BinaryContent`][pydantic_ai.BinaryContent]. The process is analogous to the examples above.
5454

55+
##Video Input
56+
57+
!!! info
58+
Some models do not support video input. Please check the model's documentation to confirm whether it supports audio input.
59+
60+
You can provide video input using either[`VideoUrl`][pydantic_ai.VideoUrl] or[`BinaryContent`][pydantic_ai.BinaryContent]. The process is analogous to the examples above.
61+
5562
##Document Input
5663

5764
!!! info

‎pydantic_ai_slim/pydantic_ai/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
UsageLimitExceeded,
1111
UserError,
1212
)
13-
from .messagesimportAudioUrl,BinaryContent,DocumentUrl,ImageUrl
13+
from .messagesimportAudioUrl,BinaryContent,DocumentUrl,ImageUrl,VideoUrl
1414
from .toolsimportRunContext,Tool
1515

1616
__all__= (
@@ -33,6 +33,7 @@
3333
# messages
3434
'ImageUrl',
3535
'AudioUrl',
36+
'VideoUrl',
3637
'DocumentUrl',
3738
'BinaryContent',
3839
# tools

‎pydantic_ai_slim/pydantic_ai/messages.py

Lines changed: 98 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,34 @@
1515
from ._utilsimportgenerate_tool_call_idas_generate_tool_call_id,now_utcas_now_utc
1616
from .exceptionsimportUnexpectedModelBehavior
1717

18+
AudioMediaType:TypeAlias=Literal['audio/wav','audio/mpeg']
19+
ImageMediaType:TypeAlias=Literal['image/jpeg','image/png','image/gif','image/webp']
20+
DocumentMediaType:TypeAlias=Literal[
21+
'application/pdf',
22+
'text/plain',
23+
'text/csv',
24+
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
25+
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
26+
'text/html',
27+
'text/markdown',
28+
'application/vnd.ms-excel',
29+
]
30+
VideoMediaType:TypeAlias=Literal[
31+
'video/x-matroska',
32+
'video/quicktime',
33+
'video/mp4',
34+
'video/webm',
35+
'video/x-flv',
36+
'video/mpeg',
37+
'video/x-ms-wmv',
38+
'video/3gpp',
39+
]
40+
41+
AudioFormat:TypeAlias=Literal['wav','mp3']
42+
ImageFormat:TypeAlias=Literal['jpeg','png','gif','webp']
43+
DocumentFormat:TypeAlias=Literal['csv','doc','docx','html','md','pdf','txt','xls','xlsx']
44+
VideoFormat:TypeAlias=Literal['mkv','mov','mp4','webm','flv','mpeg','mpg','wmv','three_gp']
45+
1846

1947
@dataclass
2048
classSystemPromptPart:
@@ -42,6 +70,47 @@ def otel_event(self) -> Event:
4270
returnEvent('gen_ai.system.message',body={'content':self.content,'role':'system'})
4371

4472

73+
@dataclass
74+
classVideoUrl:
75+
"""A URL to an video."""
76+
77+
url:str
78+
"""The URL of the video."""
79+
80+
kind:Literal['video-url']='video-url'
81+
"""Type identifier, this is available on all parts as a discriminator."""
82+
83+
@property
84+
defmedia_type(self)->VideoMediaType:# pragma: no cover
85+
"""Return the media type of the video, based on the url."""
86+
ifself.url.endswith('.mkv'):
87+
return'video/x-matroska'
88+
elifself.url.endswith('.mov'):
89+
return'video/quicktime'
90+
elifself.url.endswith('.mp4'):
91+
return'video/mp4'
92+
elifself.url.endswith('.webm'):
93+
return'video/webm'
94+
elifself.url.endswith('.flv'):
95+
return'video/x-flv'
96+
elifself.url.endswith(('.mpeg','.mpg')):
97+
return'video/mpeg'
98+
elifself.url.endswith('.wmv'):
99+
return'video/x-ms-wmv'
100+
elifself.url.endswith('.three_gp'):
101+
return'video/3gpp'
102+
else:
103+
raiseValueError(f'Unknown video file extension:{self.url}')
104+
105+
@property
106+
defformat(self)->VideoFormat:
107+
"""The file format of the video.
108+
109+
The choice of supported formats were based on the Bedrock Converse API. Other APIs don't require to use a format.
110+
"""
111+
return_video_format(self.media_type)
112+
113+
45114
@dataclass
46115
classAudioUrl:
47116
"""A URL to an audio file."""
@@ -123,23 +192,6 @@ def format(self) -> DocumentFormat:
123192
return_document_format(self.media_type)
124193

125194

126-
AudioMediaType:TypeAlias=Literal['audio/wav','audio/mpeg']
127-
ImageMediaType:TypeAlias=Literal['image/jpeg','image/png','image/gif','image/webp']
128-
DocumentMediaType:TypeAlias=Literal[
129-
'application/pdf',
130-
'text/plain',
131-
'text/csv',
132-
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
133-
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
134-
'text/html',
135-
'text/markdown',
136-
'application/vnd.ms-excel',
137-
]
138-
AudioFormat:TypeAlias=Literal['wav','mp3']
139-
ImageFormat:TypeAlias=Literal['jpeg','png','gif','webp']
140-
DocumentFormat:TypeAlias=Literal['csv','doc','docx','html','md','pdf','txt','xls','xlsx']
141-
142-
143195
@dataclass
144196
classBinaryContent:
145197
"""Binary content, e.g. an audio or image file."""
@@ -163,6 +215,11 @@ def is_image(self) -> bool:
163215
"""Return `True` if the media type is an image type."""
164216
returnself.media_type.startswith('image/')
165217

218+
@property
219+
defis_video(self)->bool:
220+
"""Return `True` if the media type is a video type."""
221+
returnself.media_type.startswith('video/')
222+
166223
@property
167224
defis_document(self)->bool:
168225
"""Return `True` if the media type is a document type."""
@@ -189,10 +246,12 @@ def format(self) -> str:
189246
return_image_format(self.media_type)
190247
elifself.is_document:
191248
return_document_format(self.media_type)
249+
elifself.is_video:
250+
return_video_format(self.media_type)
192251
raiseValueError(f'Unknown media type:{self.media_type}')
193252

194253

195-
UserContent:TypeAlias='str | ImageUrl | AudioUrl | DocumentUrl | BinaryContent'
254+
UserContent:TypeAlias='str | ImageUrl | AudioUrl | DocumentUrl |VideoUrl |BinaryContent'
196255

197256

198257
def_document_format(media_type:str)->DocumentFormat:
@@ -229,6 +288,27 @@ def _image_format(media_type: str) -> ImageFormat:
229288
raiseValueError(f'Unknown image media type:{media_type}')
230289

231290

291+
def_video_format(media_type:str)->VideoFormat:
292+
ifmedia_type=='video/x-matroska':
293+
return'mkv'
294+
elifmedia_type=='video/quicktime':
295+
return'mov'
296+
elifmedia_type=='video/mp4':
297+
return'mp4'
298+
elifmedia_type=='video/webm':
299+
return'webm'
300+
elifmedia_type=='video/x-flv':
301+
return'flv'
302+
elifmedia_type=='video/mpeg':
303+
return'mpeg'
304+
elifmedia_type=='video/x-ms-wmv':
305+
return'wmv'
306+
elifmedia_type=='video/3gpp':
307+
return'three_gp'
308+
else:# pragma: no cover
309+
raiseValueError(f'Unknown video media type:{media_type}')
310+
311+
232312
@dataclass
233313
classUserPromptPart:
234314
"""A user prompt, generally written by the end user.

‎pydantic_ai_slim/pydantic_ai/models/bedrock.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
ToolCallPart,
3030
ToolReturnPart,
3131
UserPromptPart,
32+
VideoUrl,
3233
)
3334
frompydantic_ai.modelsimportModel,ModelRequestParameters,StreamedResponse,cached_async_http_client
3435
frompydantic_ai.providersimportProvider,infer_provider
@@ -52,6 +53,7 @@
5253
SystemContentBlockTypeDef,
5354
ToolChoiceTypeDef,
5455
ToolTypeDef,
56+
VideoBlockTypeDef,
5557
)
5658

5759

@@ -381,21 +383,33 @@ async def _map_user_prompt(part: UserPromptPart) -> list[MessageUnionTypeDef]:
381383
elifitem.is_image:
382384
assertformatin ('jpeg','png','gif','webp')
383385
content.append({'image': {'format':format,'source': {'bytes':item.data}}})
386+
elifitem.is_video:
387+
assertformatin ('mkv','mov','mp4','webm','flv','mpeg','mpg','wmv','three_gp')
388+
content.append({'video': {'format':format,'source': {'bytes':item.data}}})
384389
else:
385390
raiseNotImplementedError('Binary content is not supported yet.')
386-
elifisinstance(item, (ImageUrl,DocumentUrl)):
391+
elifisinstance(item, (ImageUrl,DocumentUrl,VideoUrl)):
387392
response=awaitcached_async_http_client().get(item.url)
388393
response.raise_for_status()
389394
ifitem.kind=='image-url':
390395
format=item.media_type.split('/')[1]
391396
assertformatin ('jpeg','png','gif','webp'),f'Unsupported image format:{format}'
392397
image:ImageBlockTypeDef= {'format':format,'source': {'bytes':response.content}}
393398
content.append({'image':image})
399+
394400
elifitem.kind=='document-url':
395401
document_count+=1
396402
name=f'Document{document_count}'
397403
data=response.content
398404
content.append({'document': {'name':name,'format':item.format,'source': {'bytes':data}}})
405+
406+
elifitem.kind=='video-url':
407+
format=item.media_type.split('/')[1]
408+
assertformatin ('mkv','mov','mp4','webm','flv','mpeg','mpg','wmv','three_gp'), (
409+
f'Unsupported video format:{format}'
410+
)
411+
video:VideoBlockTypeDef= {'format':format,'source': {'bytes':response.content}}
412+
content.append({'video':video})
399413
elifisinstance(item,AudioUrl):# pragma: no cover
400414
raiseNotImplementedError('Audio is not supported yet.')
401415
else:

‎pydantic_ai_slim/pydantic_ai/models/gemini.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
ToolCallPart,
3535
ToolReturnPart,
3636
UserPromptPart,
37+
VideoUrl,
3738
)
3839
from ..settingsimportModelSettings
3940
from ..toolsimportToolDefinition
@@ -335,6 +336,8 @@ async def _map_user_prompt(part: UserPromptPart) -> list[_GeminiPartUnion]:
335336
inline_data={'data':base64.b64encode(response.content).decode('utf-8'),'mime_type':mime_type}
336337
)
337338
content.append(inline_data)
339+
elifisinstance(item,VideoUrl):# pragma: no cover
340+
raiseNotImplementedError('VideoUrl is not supported for Gemini.')
338341
else:
339342
assert_never(item)
340343
returncontent

‎pydantic_ai_slim/pydantic_ai/models/mistral.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
ToolCallPart,
3030
ToolReturnPart,
3131
UserPromptPart,
32+
VideoUrl,
3233
)
3334
from ..providersimportProvider,infer_provider
3435
from ..resultimportUsage
@@ -503,6 +504,8 @@ def _map_user_prompt(part: UserPromptPart) -> MistralUserMessage:
503504
raiseRuntimeError('Only image binary content is supported for Mistral.')
504505
elifisinstance(item,DocumentUrl):
505506
raiseRuntimeError('DocumentUrl is not supported in Mistral.')
507+
elifisinstance(item,VideoUrl):
508+
raiseRuntimeError('VideoUrl is not supported in Mistral.')
506509
else:# pragma: no cover
507510
raiseRuntimeError(f'Unsupported content type:{type(item)}')
508511
returnMistralUserMessage(content=content)

‎pydantic_ai_slim/pydantic_ai/models/openai.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
ToolCallPart,
3131
ToolReturnPart,
3232
UserPromptPart,
33+
VideoUrl,
3334
)
3435
from ..settingsimportModelSettings
3536
from ..toolsimportToolDefinition
@@ -448,6 +449,8 @@ async def _map_user_prompt(part: UserPromptPart) -> chat.ChatCompletionUserMessa
448449
# file_data = f'data:{media_type};base64,{base64_encoded}'
449450
# file = File(file={'file_data': file_data, 'file_name': item.url, 'file_id': item.url}, type='file')
450451
# content.append(file)
452+
elifisinstance(item,VideoUrl):# pragma: no cover
453+
raiseNotImplementedError('VideoUrl is not supported for OpenAI')
451454
else:
452455
assert_never(item)
453456
returnchat.ChatCompletionUserMessageParam(role='user',content=content)
@@ -765,6 +768,8 @@ async def _map_user_prompt(part: UserPromptPart) -> responses.EasyInputMessagePa
765768
filename=f'filename.{item.format}',
766769
)
767770
)
771+
elifisinstance(item,VideoUrl):# pragma: no cover
772+
raiseNotImplementedError('VideoUrl is not supported for OpenAI.')
768773
else:
769774
assert_never(item)
770775
returnresponses.EasyInputMessageParam(role='user',content=content)

‎pydantic_ai_slim/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ vertexai = ["google-auth>=2.36.0", "requests>=2.32.3"]
6262
anthropic = ["anthropic>=0.49.0"]
6363
groq = ["groq>=0.15.0"]
6464
mistral = ["mistralai>=1.2.5"]
65-
bedrock = ["boto3>=1.34.116"]
65+
bedrock = ["boto3>=1.35.74"]
6666
# Tools
6767
duckduckgo = ["duckduckgo-search>=7.0.0"]
6868
tavily = ["tavily-python>=0.5.0"]

‎tests/assets/small_video.mp4

612 KB
Binary file not shown.

‎tests/conftest.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,12 @@ def image_content(assets_path: Path) -> BinaryContent:
232232
returnBinaryContent(data=image_bytes,media_type='image/png')
233233

234234

235+
@pytest.fixture(scope='session')
236+
defvideo_content(assets_path:Path)->BinaryContent:
237+
video_bytes=assets_path.joinpath('small_video.mp4').read_bytes()
238+
returnBinaryContent(data=video_bytes,media_type='video/mp4')
239+
240+
235241
@pytest.fixture(scope='session')
236242
defdocument_content(assets_path:Path)->BinaryContent:
237243
pdf_bytes=assets_path.joinpath('dummy.pdf').read_bytes()

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp