Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitcf9753e

Browse files
authored
Merge pull request#36 from UncoderIO/escaping_logic
Added escaping logic support
2 parentsc0a4420 +5b57082 commitcf9753e

File tree

27 files changed

+338
-161
lines changed

27 files changed

+338
-161
lines changed
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
fromapp.translator.tools.custom_enumimportCustomEnum
2+
3+
4+
classValueType(CustomEnum):
5+
value="value"
6+
number_value="num_value"
7+
double_quotes_value="d_q_value"
8+
single_quotes_value="s_q_value"
9+
back_quotes_value="b_q_value"
10+
no_quotes_value="no_q_value"
11+
bool_value="bool_value"
12+
regular_expression_value="re_value"
13+
greater_than_or_equal="gte_value"
14+
less_than_or_equal="lte_value"
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
importre
2+
fromabcimportABC
3+
fromtypingimportUnion
4+
5+
fromapp.translator.core.custom_types.valuesimportValueType
6+
fromapp.translator.core.models.escape_detailsimportEscapeDetails
7+
8+
9+
classEscapeManager(ABC):
10+
escape_map:dict[str,EscapeDetails]= {}
11+
12+
defescape(self,value:Union[str,int],value_type:str=ValueType.value)->Union[str,int]:
13+
ifisinstance(value,int):
14+
returnvalue
15+
ifescape_details:=self.escape_map.get(value_type):
16+
symbols_pattern=re.compile(escape_details.pattern)
17+
value=symbols_pattern.sub(escape_details.escape_symbols,value)
18+
returnvalue
19+
20+
defremove_escape(self,value:Union[str,int])->Union[str,int]:
21+
ifisinstance(value,int):
22+
returnvalue
23+
value=value.encode().decode("unicode_escape")
24+
returnvalue
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
fromdataclassesimportdataclass
2+
3+
4+
@dataclass
5+
classEscapeDetails:
6+
pattern:str=None
7+
escape_symbols:str="\\\\\g<1>"

‎translator/app/translator/core/render.py‎

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121
fromtypingimportUnion,List,Dict
2222

2323
fromapp.translator.constimportDEFAULT_VALUE_TYPE
24+
fromapp.translator.core.custom_types.valuesimportValueType
25+
fromapp.translator.core.escape_managerimportEscapeManager
2426
fromapp.translator.core.exceptions.coreimportNotImplementedException,StrictPlatformException
2527
fromapp.translator.core.exceptions.parserimportUnsupportedOperatorException
2628
fromapp.translator.core.functionsimportPlatformFunctions
@@ -34,6 +36,7 @@
3436

3537
classBaseQueryFieldValue(ABC):
3638
details:PlatformDetails=None
39+
escape_manager:EscapeManager=None
3740

3841
def__init__(self,or_token):
3942
self.field_value= {
@@ -84,6 +87,10 @@ def regex_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
8487
defkeywords(self,field:str,value:DEFAULT_VALUE_TYPE)->str:
8588
raiseNotImplementedException
8689

90+
defapply_value(self,value:Union[str,int],value_type:str=ValueType.value)->Union[str,int]:
91+
updated_value=self.escape_manager.escape(value,value_type)
92+
returnupdated_value
93+
8794
defapply_field_value(self,field,operator,value):
8895
ifmodifier_function:=self.field_value.get(operator.token_type):
8996
returnmodifier_function(field,value)

‎translator/app/translator/core/tokenizer.py‎

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
importre
2121
fromtypingimportTuple,Union,List,Any,Optional,Type
2222

23+
fromapp.translator.core.custom_types.valuesimportValueType
24+
fromapp.translator.core.escape_managerimportEscapeManager
2325
fromapp.translator.core.exceptions.parserimport (
2426
UnsupportedOperatorException,
2527
TokenizerGeneralException,
@@ -60,6 +62,7 @@ class QueryTokenizer(BaseTokenizer):
6062

6163
multi_value_delimiter=","
6264
wildcard_symbol=None
65+
escape_manager:EscapeManager=None
6366

6467
def__init_subclass__(cls,**kwargs):
6568
cls._validate_re_patterns()
@@ -100,7 +103,7 @@ def search_operator(self, query, field_name) -> str:
100103
returnoperator
101104

102105
defget_operator_and_value(self,match:re.Match,operator:str=OperatorType.EQ)->Tuple[str,Any]:
103-
returnoperator,get_match_group(match,group_name='value')
106+
returnoperator,get_match_group(match,group_name=ValueType.value)
104107

105108
@staticmethod
106109
defclean_multi_value(value:Union[int,str])->Union[int,str]:

‎translator/app/translator/platforms/athena/tokenizer.py‎

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
importre
2020
fromtypingimportTuple,Any
2121

22+
fromapp.translator.core.custom_types.valuesimportValueType
2223
fromapp.translator.core.models.identifierimportIdentifier
2324
fromapp.translator.core.tokenizerimportQueryTokenizer
2425
fromapp.translator.core.custom_types.tokensimportOperatorType
@@ -41,11 +42,11 @@ class AthenaTokenizer(QueryTokenizer):
4142
}
4243

4344
field_pattern=r'(?P<field_name>"[a-zA-Z\._\-\s]+"|[a-zA-Z\._\-]+)'
44-
num_value_pattern=r"(?P<num_value>\d+(?:\.\d+)*)\s*"
45-
bool_value_pattern=r"(?P<bool_value>true|false)\s*"
46-
single_quotes_value_pattern=r"""'(?P<s_q_value>(?:[:a-zA-Z\*0-9=+%#\-\/\\,_".$&^@!\(\)\{\}\s]|'')*)'"""
45+
num_value_pattern=fr"(?P<{ValueType.number_value}>\d+(?:\.\d+)*)\s*"
46+
bool_value_pattern=fr"(?P<{ValueType.bool_value}>true|false)\s*"
47+
single_quotes_value_pattern=fr"""'(?P<{ValueType.single_quotes_value}>(?:[:a-zA-Z\*0-9=+%#\-\/\\,_".$&^@!\(\)\{{\}}\s]|'')*)'"""
4748
_value_pattern=fr"{num_value_pattern}|{bool_value_pattern}|{single_quotes_value_pattern}"
48-
multi_value_pattern=r"""\((?P<value>\d+(?:,\s*\d+)*|'(?:[:a-zA-Z\*0-9=+%#\-\/\\,_".$&^@!\(\)\{\}\s]|'')*'(?:,\s*'(?:[:a-zA-Z\*0-9=+%#\-\/\\,_".$&^@!\(\)\{\}\s]|'')*')*)\)"""
49+
multi_value_pattern=fr"""\((?P<{ValueType.value}>\d+(?:,\s*\d+)*|'(?:[:a-zA-Z\*0-9=+%#\-\/\\,_".$&^@!\(\)\{{\}}\s]|'')*'(?:,\s*'(?:[:a-zA-Z\*0-9=+%#\-\/\\,_".$&^@!\(\)\{{\}}\s]|'')*')*)\)"""
4950

5051
wildcard_symbol="%"
5152

@@ -54,13 +55,13 @@ def should_process_value_wildcard_symbols(operator: str) -> bool:
5455
returnoperator.lower()in ("like",)
5556

5657
defget_operator_and_value(self,match:re.Match,operator:str=OperatorType.EQ)->Tuple[str,Any]:
57-
if (num_value:=get_match_group(match,group_name='num_value'))isnotNone:
58+
if (num_value:=get_match_group(match,group_name=ValueType.number_value))isnotNone:
5859
returnoperator,num_value
5960

60-
elif (bool_value:=get_match_group(match,group_name='bool_value'))isnotNone:
61+
elif (bool_value:=get_match_group(match,group_name=ValueType.bool_value))isnotNone:
6162
returnoperator,bool_value
6263

63-
elif (s_q_value:=get_match_group(match,group_name='s_q_value'))isnotNone:
64+
elif (s_q_value:=get_match_group(match,group_name=ValueType.single_quotes_value))isnotNone:
6465
returnoperator,s_q_value
6566

6667
returnsuper().get_operator_and_value(match,operator)
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
fromapp.translator.core.custom_types.valuesimportValueType
2+
fromapp.translator.core.escape_managerimportEscapeManager
3+
fromapp.translator.core.models.escape_detailsimportEscapeDetails
4+
5+
6+
classLuceneEscapeManager(EscapeManager):
7+
escape_map= {
8+
ValueType.value:EscapeDetails(pattern=r'([_!@#$%^&*=+()\[\]{}|;:\'",.<>?/`~\-\s\\])',escape_symbols=r"\\\1")
9+
}
10+
11+
12+
lucene_escape_manager=LuceneEscapeManager()

‎translator/app/translator/platforms/base/lucene/renders/lucene.py‎

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -21,16 +21,15 @@
2121
fromapp.translator.constimportDEFAULT_VALUE_TYPE
2222
fromapp.translator.core.renderimportBaseQueryRender
2323
fromapp.translator.core.renderimportBaseQueryFieldValue
24+
fromapp.translator.platforms.base.lucene.escape_managerimportlucene_escape_manager
2425

2526

2627
classLuceneFieldValue(BaseQueryFieldValue):
27-
28-
defapply_value(self,value:Union[str,int]):
29-
returnvalue
28+
escape_manager=lucene_escape_manager
3029

3130
defequal_modifier(self,field:str,value:DEFAULT_VALUE_TYPE)->str:
3231
ifisinstance(value,list):
33-
values=self.or_token.join(self.apply_value(f'{v}')forvinvalue)
32+
values=self.or_token.join(f'{self.apply_value(v)}'forvinvalue)
3433
returnf"{field}:({values})"
3534
returnf'{field}:{self.apply_value(value)}'
3635

@@ -48,29 +47,29 @@ def greater_or_equal_modifier(self, field: str, value: Union[int, str]) -> str:
4847

4948
defnot_equal_modifier(self,field:str,value:DEFAULT_VALUE_TYPE)->str:
5049
ifisinstance(value,list):
51-
values=self.or_token.join(self.apply_value(f'{v}')forvinvalue)
50+
values=self.or_token.join(f'{self.apply_value(v)}'forvinvalue)
5251
returnf"NOT ({field} = ({values})"
5352
returnf'NOT ({field} ={self.apply_value(value)})'
5453

5554
defcontains_modifier(self,field:str,value:DEFAULT_VALUE_TYPE)->str:
5655
ifisinstance(value,list):
57-
values=self.or_token.join(self.apply_value(f'*{v}*')forvinvalue)
56+
values=self.or_token.join(f'*{self.apply_value(v)}*'forvinvalue)
5857
returnf"{field}:({values})"
59-
prepared_value=self.apply_value(f"*{value}*")
58+
prepared_value=f"*{self.apply_value(value)}*"
6059
returnf'{field}:{prepared_value}'
6160

6261
defendswith_modifier(self,field:str,value:DEFAULT_VALUE_TYPE)->str:
6362
ifisinstance(value,list):
64-
values=self.or_token.join(self.apply_value(f'*{v}')forvinvalue)
63+
values=self.or_token.join(f'*{self.apply_value(v)}'forvinvalue)
6564
returnf"{field}:({values})"
66-
prepared_value=self.apply_value(f"*{value}")
65+
prepared_value=f"*{self.apply_value(value)}"
6766
returnf'{field}:{prepared_value}'
6867

6968
defstartswith_modifier(self,field:str,value:DEFAULT_VALUE_TYPE)->str:
7069
ifisinstance(value,list):
71-
values=self.or_token.join(self.apply_value(f'{v}*')forvinvalue)
70+
values=self.or_token.join(f'{self.apply_value(v)}*'forvinvalue)
7271
returnf"{field}:({values})"
73-
prepared_value=self.apply_value(f"{value}*")
72+
prepared_value=f"{self.apply_value(value)}*"
7473
returnf'{field}:{prepared_value}'
7574

7675
defregex_modifier(self,field:str,value:DEFAULT_VALUE_TYPE)->str:
@@ -81,7 +80,7 @@ def regex_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
8180
defkeywords(self,field:str,value:DEFAULT_VALUE_TYPE)->str:
8281
ifisinstance(value,list):
8382
returnf"({self.or_token.join(self.keywords(field=field,value=v)forvinvalue)})"
84-
returnself.apply_value(f"*{value}*")
83+
returnf"*{self.apply_value(value)}*"
8584

8685

8786
classLuceneQueryRender(BaseQueryRender):

‎translator/app/translator/platforms/base/lucene/tokenizer.py‎

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,14 @@
1919

2020
fromtypingimportTuple,Union,List,Any
2121

22+
fromapp.translator.core.custom_types.valuesimportValueType
2223
fromapp.translator.core.exceptions.parserimportTokenizerGeneralException
2324
fromapp.translator.core.mixins.logicimportANDLogicOperatorMixin
2425
fromapp.translator.core.models.fieldimportKeyword,Field
2526
fromapp.translator.core.models.identifierimportIdentifier
2627
fromapp.translator.core.tokenizerimportQueryTokenizer
2728
fromapp.translator.core.custom_types.tokensimportOperatorType
29+
fromapp.translator.platforms.base.lucene.escape_managerimportlucene_escape_manager
2830
fromapp.translator.tools.utilsimportget_match_group
2931

3032

@@ -41,19 +43,21 @@ class LuceneTokenizer(QueryTokenizer, ANDLogicOperatorMixin):
4143
field_pattern=r"(?P<field_name>[a-zA-Z\.\-_]+)"
4244
match_operator_pattern=r"(?:___field___\s*(?P<match_operator>:\[\*\sTO|:\[|:<|:>|:))\s*"
4345
_num_value_pattern=r"\d+(?:\.\d+)*"
44-
num_value_pattern=fr"(?P<num_value>{_num_value_pattern})\s*"
45-
double_quotes_value_pattern=r'"(?P<d_q_value>(?:[:a-zA-Z\*0-9=+%#\-_/,\'\.$&^@!\(\)\{\}\s]|\\\"|\\)*)"\s*'
46-
no_quotes_value_pattern=r"(?P<n_q_value>(?:[a-zA-Z\*0-9=%#_/,\'\.$@]|\\\"|\\\\)+)\s*"
47-
re_value_pattern=r"/(?P<re_value>[:a-zA-Z\*0-9=+%#\\\-_\,\"\'\.$&^@!\(\)\{\}\[\]\s?]+)/\s*"
48-
gte_value_pattern=fr"\[\s*(?P<gte_value>{_num_value_pattern})\s+TO\s+\*\s*\]"
49-
lte_value_pattern=fr"\[\s*\*\s+TO\s+(?P<lte_value>{_num_value_pattern})\s*\]"
46+
num_value_pattern=fr"(?P<{ValueType.number_value}>{_num_value_pattern})\s*"
47+
double_quotes_value_pattern=fr'"(?P<{ValueType.double_quotes_value}>(?:[:a-zA-Z\*0-9=+%#\-_/,\'\.$&^@!\(\)\{{\}}\s]|\\\"|\\)*)"\s*'
48+
no_quotes_value_pattern=fr"(?P<{ValueType.no_quotes_value}>(?:[a-zA-Z\*0-9=%#_/,\'\.$@]|\\\"|\\\\)+)\s*"
49+
re_value_pattern=fr"/(?P<{ValueType.regular_expression_value}>[:a-zA-Z\*0-9=+%#\\\-_\,\"\'\.$&^@!\(\)\{{\}}\[\]\s?]+)/\s*"
50+
gte_value_pattern=fr"\[\s*(?P<{ValueType.greater_than_or_equal}>{_num_value_pattern})\s+TO\s+\*\s*\]"
51+
lte_value_pattern=fr"\[\s*\*\s+TO\s+(?P<{ValueType.less_than_or_equal}>{_num_value_pattern})\s*\]"
5052
range_value_pattern=fr"{gte_value_pattern}|{lte_value_pattern}"
5153
_value_pattern=fr"{num_value_pattern}|{re_value_pattern}|{no_quotes_value_pattern}|{double_quotes_value_pattern}|{range_value_pattern}"
52-
keyword_pattern=r"(?P<n_q_value>(?:[a-zA-Z\*0-9=%#_/,\'\.$@]|\\\"|\\\(|\\\)|\\\[|\\\]|\\\{|\\\}|\\\:|\\)+)(?:\s+|\)|$)"
54+
keyword_pattern=fr"(?P<{ValueType.no_quotes_value}>(?:[a-zA-Z\*0-9=%#_/,\'\.$@]|\\\"|\\\(|\\\)|\\\[|\\\]|\\\{{|\\\}}|\\\:|\\)+)(?:\s+|\)|$)"
5355

54-
multi_value_pattern=r"""\((?P<value>[:a-zA-Z\"\*0-9=+%#\-_\/\\'\,.&^@!\(\[\]\s]+)\)"""
56+
multi_value_pattern=fr"""\((?P<{ValueType.value}>[:a-zA-Z\"\*0-9=+%#\-_\/\\'\,.&^@!\(\[\]\s]+)\)"""
5557
multi_value_check_pattern=r"___field___\s*___operator___\s*\("
5658

59+
escape_manager=lucene_escape_manager
60+
5761
wildcard_symbol="*"
5862

5963
@staticmethod
@@ -69,22 +73,22 @@ def clean_quotes(value: Union[str, int]):
6973
returnvalue
7074

7175
defget_operator_and_value(self,match:re.Match,operator:str=OperatorType.EQ)->Tuple[str,Any]:
72-
if (num_value:=get_match_group(match,group_name='num_value'))isnotNone:
76+
if (num_value:=get_match_group(match,group_name=ValueType.number_value))isnotNone:
7377
returnoperator,num_value
7478

75-
elif (re_value:=get_match_group(match,group_name='re_value'))isnotNone:
79+
elif (re_value:=get_match_group(match,group_name=ValueType.regular_expression_value))isnotNone:
7680
returnOperatorType.REGEX,re_value
7781

78-
elif (n_q_value:=get_match_group(match,group_name='n_q_value'))isnotNone:
82+
elif (n_q_value:=get_match_group(match,group_name=ValueType.no_quotes_value))isnotNone:
7983
returnoperator,n_q_value
8084

81-
elif (d_q_value:=get_match_group(match,group_name='d_q_value'))isnotNone:
85+
elif (d_q_value:=get_match_group(match,group_name=ValueType.double_quotes_value))isnotNone:
8286
returnoperator,d_q_value
8387

84-
elif (gte_value:=get_match_group(match,group_name='gte_value'))isnotNone:
88+
elif (gte_value:=get_match_group(match,group_name=ValueType.greater_than_or_equal))isnotNone:
8589
returnOperatorType.GTE,gte_value
8690

87-
elif (lte_value:=get_match_group(match,group_name='lte_value'))isnotNone:
91+
elif (lte_value:=get_match_group(match,group_name=ValueType.less_than_or_equal))isnotNone:
8892
returnOperatorType.LTE,lte_value
8993

9094
returnsuper().get_operator_and_value(match,operator)
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
fromapp.translator.core.custom_types.valuesimportValueType
2+
fromapp.translator.core.escape_managerimportEscapeManager
3+
fromapp.translator.core.models.escape_detailsimportEscapeDetails
4+
5+
6+
classSplEscapeManager(EscapeManager):
7+
escape_map= {
8+
ValueType.value:EscapeDetails(pattern='("|(?<!\\\\)\\\\(?![*?\\\\]))')
9+
}
10+
11+
12+
spl_escape_manager=SplEscapeManager()

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp