Uh oh!
There was an error while loading.Please reload this page.
- Notifications
You must be signed in to change notification settings - Fork366
add a runtime type checker for metadata objects#3400
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to ourterms of service andprivacy statement. We’ll occasionally send you account related emails.
Already on GitHub?Sign in to your account
base:main
Are you sure you want to change the base?
Uh oh!
There was an error while loading.Please reload this page.
Changes fromall commits
b2f4ff07adff5935c720307e231585b48df4fe9ae4212515332cd30921d61886125c1bcf0615b7fce136b467747943e148d1be08cea3ed12a098cc2fc06ab41d4bd72d061fe1bbd8ba711f74991892df1eda19ecbb7e84e4cc0385be71a87971945b30d48a8a483c73c7096b19eb287bFile filter
Filter by extension
Conversations
Uh oh!
There was an error while loading.Please reload this page.
Jump to
Uh oh!
There was an error while loading.Please reload this page.
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,3 @@ | ||
| Add a runtime type checker for ``JSON`` types, and a variety of typeddict classes necessary for | ||
| modelling Zarr metadata documents. This increases the type-safety of our internal metadata routines, | ||
| and provides Zarr users with types they can use to model Zarr metadata. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,14 +1,16 @@ | ||
| from __future__ import annotations | ||
| from abc import abstractmethod | ||
| from typing import TYPE_CHECKING, Generic, TypeVar | ||
| from zarr.abc.metadata import Metadata | ||
| from zarr.core.buffer import Buffer, NDBuffer | ||
| from zarr.core.common import ( # noqa: F401 CodecJSON re-exported for backwards compatibility | ||
| CodecJSON, | ||
| CodecJSON_V2, | ||
| CodecJSON_V3, | ||
| concurrent_map, | ||
| ) | ||
| from zarr.core.config import config | ||
| if TYPE_CHECKING: | ||
| @@ -37,27 +39,6 @@ | ||
| CodecInput = TypeVar("CodecInput", bound=NDBuffer | Buffer) | ||
| CodecOutput = TypeVar("CodecOutput", bound=NDBuffer | Buffer) | ||
Comment on lines -43 to -44 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others.Learn more. Probably should reexport these types here for backwards compatibility. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others.Learn more. done in30d48a8 | ||
| class BaseCodec(Metadata, Generic[CodecInput, CodecOutput]): | ||
| """Generic base class for codecs. | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -53,6 +53,8 @@ | ||
| ZARR_JSON, | ||
| ZARRAY_JSON, | ||
| ZATTRS_JSON, | ||
| ArrayMetadataJSON_V2, | ||
| ArrayMetadataJSON_V3, | ||
| DimensionNames, | ||
| MemoryOrder, | ||
| ShapeLike, | ||
| @@ -103,11 +105,8 @@ | ||
| ) | ||
| from zarr.core.metadata import ( | ||
| ArrayMetadata, | ||
| ArrayV2Metadata, | ||
| ArrayV3Metadata, | ||
| T_ArrayMetadata, | ||
| ) | ||
| from zarr.core.metadata.v2 import ( | ||
| @@ -116,11 +115,12 @@ | ||
| parse_compressor, | ||
| parse_filters, | ||
| ) | ||
| from zarr.core.sync import sync | ||
| from zarr.core.type_check import check_type | ||
| from zarr.errors import ( | ||
| ArrayNotFoundError, | ||
| MetadataValidationError, | ||
| NodeTypeValidationError, | ||
| ZarrDeprecationWarning, | ||
| ZarrUserWarning, | ||
| ) | ||
| @@ -175,25 +175,32 @@ class DefaultFillValue: | ||
| DEFAULT_FILL_VALUE = DefaultFillValue() | ||
| @overload | ||
| def parse_array_metadata(data: ArrayV2Metadata | ArrayMetadataJSON_V2) -> ArrayV2Metadata: ... | ||
| @overload | ||
| def parse_array_metadata(data: ArrayV3Metadata | ArrayMetadataJSON_V3) -> ArrayV3Metadata: ... | ||
| def parse_array_metadata( | ||
| data: ArrayV2Metadata | ArrayMetadataJSON_V2 | ArrayV3Metadata | ArrayMetadataJSON_V3, | ||
| ) -> ArrayV2Metadata | ArrayV3Metadata: | ||
| """ | ||
| If the input is a dict representation of a Zarr metadata document, instantiate the right metadata | ||
| class from that dict. If the input is a metadata object, return it. | ||
| """ | ||
| if isinstance(data, ArrayMetadata): | ||
| return data | ||
| else: | ||
| zarr_format = data["zarr_format"] | ||
| if zarr_format == 3: | ||
| return ArrayV3Metadata.from_dict(data) # type: ignore[arg-type] | ||
| elif zarr_format == 2: | ||
| return ArrayV2Metadata.from_dict(data) # type: ignore[arg-type] | ||
| else: | ||
| raise ValueError(f"Invalid zarr_format: {zarr_format}. Expected 2 or 3") | ||
| def create_codec_pipeline(metadata: ArrayMetadata, *, store: Store | None = None) -> CodecPipeline: | ||
| @@ -213,9 +220,27 @@ def create_codec_pipeline(metadata: ArrayMetadata, *, store: Store | None = None | ||
| raise TypeError # pragma: no cover | ||
| @overload | ||
| async def get_array_metadata( | ||
| store_path: StorePath, zarr_format: Literal[3] | ||
| ) -> ArrayMetadataJSON_V3: ... | ||
| @overload | ||
| async def get_array_metadata( | ||
| store_path: StorePath, zarr_format: Literal[2] | ||
| ) -> ArrayMetadataJSON_V2: ... | ||
| @overload | ||
| async def get_array_metadata( | ||
| store_path: StorePath, zarr_format: None | ||
| ) -> ArrayMetadataJSON_V3 | ArrayMetadataJSON_V2: ... | ||
| async def get_array_metadata( | ||
| store_path: StorePath, zarr_format: ZarrFormat | None = 3 | ||
| ) ->ArrayMetadataJSON_V3 | ArrayMetadataJSON_V2: | ||
| if zarr_format == 2: | ||
| zarray_bytes, zattrs_bytes = await gather( | ||
| (store_path / ZARRAY_JSON).get(prototype=cpu_buffer_prototype), | ||
| @@ -260,19 +285,25 @@ async def get_array_metadata( | ||
| msg = f"Invalid value for 'zarr_format'. Expected 2, 3, or None. Got '{zarr_format}'." # type: ignore[unreachable] | ||
| raise MetadataValidationError(msg) | ||
| metadata_dict:ArrayMetadataJSON_V2 | ArrayMetadataJSON_V3 | ||
| if zarr_format == 2: | ||
| # V2 arrays are comprised of a .zarray and .zattrs objects | ||
| assert zarray_bytes is not None | ||
| metadata_dict = json.loads(zarray_bytes.to_bytes()) | ||
| zattrs_dict = json.loads(zattrs_bytes.to_bytes()) if zattrs_bytes is not None else {} | ||
| metadata_dict["attributes"] = zattrs_dict | ||
| tycheck = check_type(metadata_dict, ArrayMetadataJSON_V2) | ||
| if not tycheck.success: | ||
| msg = "The .zarray object at {store_path} is not a valid Zarr array metadata object. " | ||
| raise NodeTypeValidationError("zarray", "Zarr array metadata object", metadata_dict) | ||
| else: | ||
| # V3 arrays are comprised of a zarr.json object | ||
| assert zarr_json_bytes is not None | ||
| metadata_dict = json.loads(zarr_json_bytes.to_bytes()) | ||
| tycheck = check_type(metadata_dict, ArrayMetadataJSON_V3) | ||
| if not tycheck.success: | ||
| msg = "The zarr.json object at {store_path} is not a valid Zarr array metadata object. " | ||
| raise NodeTypeValidationError("zarr.json", "Zarr array metadata object", metadata_dict) | ||
| return metadata_dict | ||
| @@ -311,22 +342,22 @@ class AsyncArray(Generic[T_ArrayMetadata]): | ||
| @overload | ||
| def __init__( | ||
| self: AsyncArray[ArrayV2Metadata], | ||
| metadata: ArrayV2Metadata |ArrayMetadataJSON_V2, | ||
| store_path: StorePath, | ||
| config: ArrayConfigLike | None = None, | ||
| ) -> None: ... | ||
| @overload | ||
| def __init__( | ||
| self: AsyncArray[ArrayV3Metadata], | ||
| metadata: ArrayV3Metadata |ArrayMetadataJSON_V3, | ||
| store_path: StorePath, | ||
| config: ArrayConfigLike | None = None, | ||
| ) -> None: ... | ||
| def __init__( | ||
| self, | ||
| metadata: ArrayMetadata |ArrayMetadataJSON_V2 | ArrayMetadataJSON_V3, | ||
| store_path: StorePath, | ||
| config: ArrayConfigLike | None = None, | ||
| ) -> None: | ||
| @@ -945,7 +976,7 @@ def from_dict( | ||
| ValueError | ||
| If the dictionary data is invalid or incompatible with either Zarr format 2 or 3 array creation. | ||
| """ | ||
| metadata = parse_array_metadata(data) # type: ignore[call-overload] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others.Learn more. Is the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others.Learn more. we need the overload because the | ||
| return cls(metadata=metadata, store_path=store_path) | ||
| @classmethod | ||
| @@ -978,9 +1009,7 @@ async def open( | ||
| """ | ||
| store_path = await make_store_path(store) | ||
| metadata_dict = await get_array_metadata(store_path, zarr_format=zarr_format) | ||
| return cls(store_path=store_path, metadata=metadata_dict) | ||
| @property | ||
| def store(self) -> Store: | ||
Uh oh!
There was an error while loading.Please reload this page.
Uh oh!
There was an error while loading.Please reload this page.