2222from app .translator .core .custom_types .tokens import OperatorType
2323from app .translator .core .custom_types .values import ValueType
2424from app .translator .core .models .query_tokens .field_value import FieldValue
25+ from app .translator .core .models .query_tokens .function_value import FunctionValue
2526from app .translator .core .models .query_tokens .identifier import Identifier
27+ from app .translator .core .models .query_tokens .keyword import Keyword
2628from app .translator .core .tokenizer import QueryTokenizer
2729from app .translator .platforms .base .sql .str_value_manager import sql_str_value_manager
2830from app .translator .tools .utils import get_match_group
@@ -49,6 +51,7 @@ class SqlTokenizer(QueryTokenizer):
4951 )
5052_value_pattern = rf"{ num_value_pattern } |{ bool_value_pattern } |{ single_quotes_value_pattern } "
5153multi_value_pattern = rf"""\((?P<{ ValueType .multi_value } >\d+(?:,\s*\d+)*|'(?:[:a-zA-Z\*0-9=+%#\-\/\\,_".$&^@!\(\)\{{\}}\s]|'')*'(?:,\s*'(?:[:a-zA-Z\*0-9=+%#\-\/\\,_".$&^@!\(\)\{{\}}\s]|'')*')*)\)""" # noqa: E501
54+ re_field_value_pattern = rf"""regexp_like\({ field_pattern } ,\s*'(?P<{ ValueType .regex_value } >(?:[:a-zA-Z\*\?0-9=+%#№;\-_,"\.$&^@!\{{\}}\[\]\s?<>|]|\\\'|\\)+)'\)""" # noqa: E501
5255
5356wildcard_symbol = "%"
5457
@@ -77,6 +80,22 @@ def create_field_value(field_name: str, operator: Identifier, value: Union[str,
7780field_name = field_name .strip ('"' )
7881return FieldValue (source_name = field_name ,operator = operator ,value = value )
7982
83+ def _search_re_field_value (self ,query :str )-> Optional [tuple [FieldValue ,str ]]:
84+ if match := re .match (self .re_field_value_pattern ,query ,re .IGNORECASE ):
85+ group_dict = match .groupdict ()
86+ field_name = group_dict ["field_name" ]
87+ value = self .str_value_manager .from_re_str_to_container (group_dict [ValueType .regex_value ])
88+ operator = Identifier (token_type = OperatorType .REGEX )
89+ return self .create_field_value (field_name ,operator ,value ),query [match .end () :]
90+
8091def tokenize (self ,query :str )-> list :
8192query = re .sub (r"\s*ESCAPE\s*'.'" ,"" ,query )# remove `ESCAPE 'escape_char'` in LIKE expr
8293return super ().tokenize (query )
94+
95+ def _get_next_token (
96+ self ,query :str
97+ )-> tuple [Union [FieldValue ,FunctionValue ,Keyword ,Identifier ,list [Union [FieldValue ,Identifier ]]],str ]:
98+ query = query .strip ("\n " ).strip (" " ).strip ("\n " )
99+ if search_result := self ._search_re_field_value (query ):
100+ return search_result
101+ return super ()._get_next_token (query )