1
+ # Some code are adapted from
2
+ # https://github.com/huggingface/transformers/blob/master/src/transformers/file_utils.py
3
+
4
+ from typing import Any ,BinaryIO ,Dict ,List ,Optional ,Tuple ,Union
5
+ import sys
6
+ import os
7
+ import logging
8
+ import importlib .util
9
+ from types import ModuleType
10
+
11
+ logger = logging .getLogger (__name__ )# pylint: disable=invalid-name
12
+
13
+ # The package importlib_metadata is in a different place, depending on the python version.
14
+ if sys .version_info < (3 ,8 ):
15
+ import importlib_metadata
16
+ else :
17
+ import importlib .metadata as importlib_metadata
18
+
19
+ ###########################################
20
+ ############ Layout Model Deps ############
21
+ ###########################################
22
+
23
+ _torch_available = importlib .util .find_spec ("torch" )is not None
24
+ try :
25
+ _torch_version = importlib_metadata .version ("torch" )
26
+ logger .debug (f"PyTorch version{ _torch_version } available." )
27
+ except importlib_metadata .PackageNotFoundError :
28
+ _torch_available = False
29
+
30
+ _detectron2_available = importlib .util .find_spec ("detectron2" )is not None
31
+ try :
32
+ _detectron2_version = importlib_metadata .version ("detectron2" )
33
+ logger .debug (f"Detectron2 version{ _detectron2_version } available" )
34
+ except importlib_metadata .PackageNotFoundError :
35
+ _detectron2_available = False
36
+
37
+ _paddle_available = importlib .util .find_spec ("paddle" )is not None
38
+ try :
39
+ # The name of the paddlepaddle library:
40
+ # Install name: pip install paddlepaddle
41
+ # Import name: import paddle
42
+ _paddle_version = importlib_metadata .version ("paddlepaddle" )
43
+ logger .debug (f"Paddle version{ _paddle_version } available." )
44
+ except importlib_metadata .PackageNotFoundError :
45
+ _paddle_available = False
46
+
47
+ ###########################################
48
+ ############## OCR Tool Deps ##############
49
+ ###########################################
50
+
51
+ _pytesseract_available = importlib .util .find_spec ("pytesseract" )is not None
52
+ try :
53
+ _pytesseract_version = importlib_metadata .version ("pytesseract" )
54
+ logger .debug (f"Pytesseract version{ _pytesseract_version } available." )
55
+ except importlib_metadata .PackageNotFoundError :
56
+ _pytesseract_available = False
57
+
58
+ _gcv_available = importlib .util .find_spec ("google.cloud.vision" )is not None
59
+ try :
60
+ _gcv_version = importlib_metadata .version (
61
+ "google-cloud-vision"
62
+ )# This is slightly different
63
+ logger .debug (f"Google Cloud Vision Utils version{ _gcv_version } available." )
64
+ except importlib_metadata .PackageNotFoundError :
65
+ _gcv_available = False
66
+
67
+
68
+ def is_torch_available ():
69
+ return _torch_available
70
+
71
+
72
+ def is_torch_cuda_available ():
73
+ if is_torch_available ():
74
+ import torch
75
+
76
+ return torch .cuda .is_available ()
77
+ else :
78
+ return False
79
+
80
+
81
+ def is_paddle_available ():
82
+ return _paddle_available
83
+
84
+
85
+ def is_detectron2_available ():
86
+ return _detectron2_available
87
+
88
+
89
+ def is_pytesseract_available ():
90
+ return _pytesseract_available
91
+
92
+
93
+ def is_gcv_available ():
94
+ return _gcv_available
95
+
96
+
97
+ PYTORCH_IMPORT_ERROR = """
98
+ {0} requires the PyTorch library but it was not found in your environment. Checkout the instructions on the
99
+ installation page: https://pytorch.org/get-started/locally/ and follow the ones that match your environment.
100
+ """
101
+
102
+ DETECTRON2_IMPORT_ERROR = """
103
+ {0} requires the detectron2 library but it was not found in your environment. Checkout the instructions on the
104
+ installation page: https://github.com/facebookresearch/detectron2/blob/master/INSTALL.md and follow the ones
105
+ that match your environment. Typically the following would work for MacOS or Linux CPU machines:
106
+ pip install 'git+https://github.com/facebookresearch/detectron2.git@v0.4#egg=detectron2'
107
+ """
108
+
109
+ PADDLE_IMPORT_ERROR = """
110
+ {0} requires the PaddlePaddle library but it was not found in your environment. Checkout the instructions on the
111
+ installation page: https://github.com/PaddlePaddle/Paddle and follow the ones that match your environment.
112
+ """
113
+
114
+ PYTESSERACT_IMPORT_ERROR = """
115
+ {0} requires the PyTesseract library but it was not found in your environment. You can install it with pip:
116
+ `pip install pytesseract`
117
+ """
118
+
119
+ GCV_IMPORT_ERROR = """
120
+ {0} requires the Google Cloud Vision Python utils but it was not found in your environment. You can install it with pip:
121
+ `pip install google-cloud-vision==1`
122
+ """
123
+
124
+ BACKENDS_MAPPING = dict (
125
+ [
126
+ ("torch" , (is_torch_available ,PYTORCH_IMPORT_ERROR )),
127
+ ("detectron2" , (is_detectron2_available ,DETECTRON2_IMPORT_ERROR )),
128
+ ("paddle" , (is_paddle_available ,PADDLE_IMPORT_ERROR )),
129
+ ("pytesseract" , (is_pytesseract_available ,PYTESSERACT_IMPORT_ERROR )),
130
+ ("google-cloud-vision" , (is_gcv_available ,GCV_IMPORT_ERROR )),
131
+ ]
132
+ )
133
+
134
+
135
+ def requires_backends (obj ,backends ):
136
+ if not isinstance (backends , (list ,tuple )):
137
+ backends = [backends ]
138
+
139
+ name = obj .__name__ if hasattr (obj ,"__name__" )else obj .__class__ .__name__
140
+ if not all (BACKENDS_MAPPING [backend ][0 ]()for backend in backends ):
141
+ raise ImportError (
142
+ "" .join ([BACKENDS_MAPPING [backend ][1 ].format (name )for backend in backends ])
143
+ )
144
+
145
+
146
+ class _LazyModule (ModuleType ):
147
+ """
148
+ Module class that surfaces all objects but only performs associated imports when the objects are requested.
149
+ """
150
+
151
+ # Adapted from HuggingFace
152
+ # https://github.com/huggingface/transformers/blob/c37573806ab3526dd805c49cbe2489ad4d68a9d7/src/transformers/file_utils.py#L1990
153
+
154
+ def __init__ (
155
+ self ,name ,module_file ,import_structure ,module_spec = None ,extra_objects = None
156
+ ):
157
+ super ().__init__ (name )
158
+ self ._modules = set (import_structure .keys ())
159
+ self ._class_to_module = {}
160
+ for key ,values in import_structure .items ():
161
+ for value in values :
162
+ self ._class_to_module [value ]= key
163
+ # Needed for autocompletion in an IDE
164
+ self .__all__ = list (import_structure .keys ())+ sum (
165
+ import_structure .values (), []
166
+ )
167
+ self .__file__ = module_file
168
+ self .__spec__ = module_spec
169
+ self .__path__ = [os .path .dirname (module_file )]
170
+ self ._objects = {}if extra_objects is None else extra_objects
171
+ self ._name = name
172
+ self ._import_structure = import_structure
173
+
174
+ # Following [PEP 366](https://www.python.org/dev/peps/pep-0366/)
175
+ # The __package__ variable should be set
176
+ # https://docs.python.org/3/reference/import.html#__package__
177
+ self .__package__ = self .__name__
178
+
179
+ # Needed for autocompletion in an IDE
180
+ def __dir__ (self ):
181
+ return super ().__dir__ ()+ self .__all__
182
+
183
+ def __getattr__ (self ,name :str )-> Any :
184
+ if name in self ._objects :
185
+ return self ._objects [name ]
186
+ if name in self ._modules :
187
+ value = self ._get_module (name )
188
+ elif name in self ._class_to_module .keys ():
189
+ module = self ._get_module (self ._class_to_module [name ])
190
+ value = getattr (module ,name )
191
+ else :
192
+ raise AttributeError (f"module{ self .__name__ } has no attribute{ name } " )
193
+
194
+ setattr (self ,name ,value )
195
+ return value
196
+
197
+ def _get_module (self ,module_name :str ):
198
+ return importlib .import_module ("." + module_name ,self .__name__ )
199
+
200
+ def __reduce__ (self ):
201
+ return (self .__class__ , (self ._name ,self .__file__ ,self ._import_structure ))