Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Added escaping logic support#36

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to ourterms of service andprivacy statement. We’ll occasionally send you account related emails.

Already on GitHub?Sign in to your account

Merged
nazargesyk merged 3 commits intomainfromescaping_logic
Jan 5, 2024
Merged
Show file tree
Hide file tree
Changes fromall commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletionstranslator/app/translator/core/custom_types/values.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
from app.translator.tools.custom_enum import CustomEnum


class ValueType(CustomEnum):
value = "value"
number_value = "num_value"
double_quotes_value = "d_q_value"
single_quotes_value = "s_q_value"
back_quotes_value = "b_q_value"
no_quotes_value = "no_q_value"
bool_value = "bool_value"
regular_expression_value = "re_value"
greater_than_or_equal = "gte_value"
less_than_or_equal = "lte_value"
24 changes: 24 additions & 0 deletionstranslator/app/translator/core/escape_manager.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
import re
from abc import ABC
from typing import Union

from app.translator.core.custom_types.values import ValueType
from app.translator.core.models.escape_details import EscapeDetails


class EscapeManager(ABC):
escape_map: dict[str, EscapeDetails] = {}

def escape(self, value: Union[str, int], value_type: str = ValueType.value) -> Union[str, int]:
if isinstance(value, int):
return value
if escape_details := self.escape_map.get(value_type):
symbols_pattern = re.compile(escape_details.pattern)
value = symbols_pattern.sub(escape_details.escape_symbols, value)
return value

def remove_escape(self, value: Union[str, int]) -> Union[str, int]:
if isinstance(value, int):
return value
value = value.encode().decode("unicode_escape")
return value
7 changes: 7 additions & 0 deletionstranslator/app/translator/core/models/escape_details.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
from dataclasses import dataclass


@dataclass
class EscapeDetails:
pattern: str = None
escape_symbols: str = "\\\\\g<1>"
7 changes: 7 additions & 0 deletionstranslator/app/translator/core/render.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -21,6 +21,8 @@
from typing import Union, List, Dict

from app.translator.const import DEFAULT_VALUE_TYPE
from app.translator.core.custom_types.values import ValueType
from app.translator.core.escape_manager import EscapeManager
from app.translator.core.exceptions.core import NotImplementedException, StrictPlatformException
from app.translator.core.exceptions.parser import UnsupportedOperatorException
from app.translator.core.functions import PlatformFunctions
Expand All@@ -34,6 +36,7 @@

class BaseQueryFieldValue(ABC):
details: PlatformDetails = None
escape_manager: EscapeManager = None

def __init__(self, or_token):
self.field_value = {
Expand DownExpand Up@@ -84,6 +87,10 @@ def regex_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
def keywords(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
raise NotImplementedException

def apply_value(self, value: Union[str, int], value_type: str = ValueType.value) -> Union[str, int]:
updated_value = self.escape_manager.escape(value, value_type)
return updated_value

def apply_field_value(self, field, operator, value):
if modifier_function := self.field_value.get(operator.token_type):
return modifier_function(field, value)
Expand Down
5 changes: 4 additions & 1 deletiontranslator/app/translator/core/tokenizer.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -20,6 +20,8 @@
import re
from typing import Tuple, Union, List, Any, Optional, Type

from app.translator.core.custom_types.values import ValueType
from app.translator.core.escape_manager import EscapeManager
from app.translator.core.exceptions.parser import (
UnsupportedOperatorException,
TokenizerGeneralException,
Expand DownExpand Up@@ -60,6 +62,7 @@ class QueryTokenizer(BaseTokenizer):

multi_value_delimiter = ","
wildcard_symbol = None
escape_manager: EscapeManager = None

def __init_subclass__(cls, **kwargs):
cls._validate_re_patterns()
Expand DownExpand Up@@ -100,7 +103,7 @@ def search_operator(self, query, field_name) -> str:
return operator

def get_operator_and_value(self, match: re.Match, operator: str = OperatorType.EQ) -> Tuple[str, Any]:
return operator, get_match_group(match, group_name='value')
return operator, get_match_group(match, group_name=ValueType.value)

@staticmethod
def clean_multi_value(value: Union[int, str]) -> Union[int, str]:
Expand Down
15 changes: 8 additions & 7 deletionstranslator/app/translator/platforms/athena/tokenizer.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -19,6 +19,7 @@
import re
from typing import Tuple, Any

from app.translator.core.custom_types.values import ValueType
from app.translator.core.models.identifier import Identifier
from app.translator.core.tokenizer import QueryTokenizer
from app.translator.core.custom_types.tokens import OperatorType
Expand All@@ -41,11 +42,11 @@ class AthenaTokenizer(QueryTokenizer):
}

field_pattern = r'(?P<field_name>"[a-zA-Z\._\-\s]+"|[a-zA-Z\._\-]+)'
num_value_pattern =r"(?P<num_value>\d+(?:\.\d+)*)\s*"
bool_value_pattern =r"(?P<bool_value>true|false)\s*"
single_quotes_value_pattern =r"""'(?P<s_q_value>(?:[:a-zA-Z\*0-9=+%#\-\/\\,_".$&^@!\(\)\{\}\s]|'')*)'"""
num_value_pattern =fr"(?P<{ValueType.number_value}>\d+(?:\.\d+)*)\s*"
bool_value_pattern =fr"(?P<{ValueType.bool_value}>true|false)\s*"
single_quotes_value_pattern =fr"""'(?P<{ValueType.single_quotes_value}>(?:[:a-zA-Z\*0-9=+%#\-\/\\,_".$&^@!\(\)\{{\}}\s]|'')*)'"""
_value_pattern = fr"{num_value_pattern}|{bool_value_pattern}|{single_quotes_value_pattern}"
multi_value_pattern =r"""\((?P<value>\d+(?:,\s*\d+)*|'(?:[:a-zA-Z\*0-9=+%#\-\/\\,_".$&^@!\(\)\{\}\s]|'')*'(?:,\s*'(?:[:a-zA-Z\*0-9=+%#\-\/\\,_".$&^@!\(\)\{\}\s]|'')*')*)\)"""
multi_value_pattern =fr"""\((?P<{ValueType.value}>\d+(?:,\s*\d+)*|'(?:[:a-zA-Z\*0-9=+%#\-\/\\,_".$&^@!\(\)\{{\}}\s]|'')*'(?:,\s*'(?:[:a-zA-Z\*0-9=+%#\-\/\\,_".$&^@!\(\)\{{\}}\s]|'')*')*)\)"""

wildcard_symbol = "%"

Expand All@@ -54,13 +55,13 @@ def should_process_value_wildcard_symbols(operator: str) -> bool:
return operator.lower() in ("like",)

def get_operator_and_value(self, match: re.Match, operator: str = OperatorType.EQ) -> Tuple[str, Any]:
if (num_value := get_match_group(match, group_name='num_value')) is not None:
if (num_value := get_match_group(match, group_name=ValueType.number_value)) is not None:
return operator, num_value

elif (bool_value := get_match_group(match, group_name='bool_value')) is not None:
elif (bool_value := get_match_group(match, group_name=ValueType.bool_value)) is not None:
return operator, bool_value

elif (s_q_value := get_match_group(match, group_name='s_q_value')) is not None:
elif (s_q_value := get_match_group(match, group_name=ValueType.single_quotes_value)) is not None:
return operator, s_q_value

return super().get_operator_and_value(match, operator)
Expand Down
View file
Open in desktop
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
from app.translator.core.custom_types.values import ValueType
from app.translator.core.escape_manager import EscapeManager
from app.translator.core.models.escape_details import EscapeDetails


class LuceneEscapeManager(EscapeManager):
escape_map = {
ValueType.value: EscapeDetails(pattern=r'([_!@#$%^&*=+()\[\]{}|;:\'",.<>?/`~\-\s\\])', escape_symbols=r"\\\1")
}


lucene_escape_manager = LuceneEscapeManager()
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -21,16 +21,15 @@
from app.translator.const import DEFAULT_VALUE_TYPE
from app.translator.core.render import BaseQueryRender
from app.translator.core.render import BaseQueryFieldValue
from app.translator.platforms.base.lucene.escape_manager import lucene_escape_manager


class LuceneFieldValue(BaseQueryFieldValue):

def apply_value(self, value: Union[str, int]):
return value
escape_manager = lucene_escape_manager

def equal_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
if isinstance(value, list):
values = self.or_token.join(self.apply_value(f'{v}') for v in value)
values = self.or_token.join(f'{self.apply_value(v)}' for v in value)
return f"{field}:({values})"
return f'{field}:{self.apply_value(value)}'

Expand All@@ -48,29 +47,29 @@ def greater_or_equal_modifier(self, field: str, value: Union[int, str]) -> str:

def not_equal_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
if isinstance(value, list):
values = self.or_token.join(self.apply_value(f'{v}') for v in value)
values = self.or_token.join(f'{self.apply_value(v)}' for v in value)
return f"NOT ({field} = ({values})"
return f'NOT ({field} = {self.apply_value(value)})'

def contains_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
if isinstance(value, list):
values = self.or_token.join(self.apply_value(f'*{v}*') for v in value)
values = self.or_token.join(f'*{self.apply_value(v)}*' for v in value)
return f"{field}:({values})"
prepared_value =self.apply_value(f"*{value}*")
prepared_value = f"*{self.apply_value(value)}*"
return f'{field}:{prepared_value}'

def endswith_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
if isinstance(value, list):
values = self.or_token.join(self.apply_value(f'*{v}') for v in value)
values = self.or_token.join(f'*{self.apply_value(v)}' for v in value)
return f"{field}:({values})"
prepared_value =self.apply_value(f"*{value}")
prepared_value = f"*{self.apply_value(value)}"
return f'{field}:{prepared_value}'

def startswith_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
if isinstance(value, list):
values = self.or_token.join(self.apply_value(f'{v}*') for v in value)
values = self.or_token.join(f'{self.apply_value(v)}*' for v in value)
return f"{field}:({values})"
prepared_value = self.apply_value(f"{value}*")
prepared_value =f"{self.apply_value(value)}*"
return f'{field}:{prepared_value}'

def regex_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
Expand All@@ -81,7 +80,7 @@ def regex_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
def keywords(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
if isinstance(value, list):
return f"({self.or_token.join(self.keywords(field=field, value=v) for v in value)})"
returnself.apply_value(f"*{value}*")
return f"*{self.apply_value(value)}*"


class LuceneQueryRender(BaseQueryRender):
Expand Down
32 changes: 18 additions & 14 deletionstranslator/app/translator/platforms/base/lucene/tokenizer.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -19,12 +19,14 @@

from typing import Tuple, Union, List, Any

from app.translator.core.custom_types.values import ValueType
from app.translator.core.exceptions.parser import TokenizerGeneralException
from app.translator.core.mixins.logic import ANDLogicOperatorMixin
from app.translator.core.models.field import Keyword, Field
from app.translator.core.models.identifier import Identifier
from app.translator.core.tokenizer import QueryTokenizer
from app.translator.core.custom_types.tokens import OperatorType
from app.translator.platforms.base.lucene.escape_manager import lucene_escape_manager
from app.translator.tools.utils import get_match_group


Expand All@@ -41,19 +43,21 @@ class LuceneTokenizer(QueryTokenizer, ANDLogicOperatorMixin):
field_pattern = r"(?P<field_name>[a-zA-Z\.\-_]+)"
match_operator_pattern = r"(?:___field___\s*(?P<match_operator>:\[\*\sTO|:\[|:<|:>|:))\s*"
_num_value_pattern = r"\d+(?:\.\d+)*"
num_value_pattern = fr"(?P<num_value>{_num_value_pattern})\s*"
double_quotes_value_pattern =r'"(?P<d_q_value>(?:[:a-zA-Z\*0-9=+%#\-_/,\'\.$&^@!\(\)\{\}\s]|\\\"|\\)*)"\s*'
no_quotes_value_pattern =r"(?P<n_q_value>(?:[a-zA-Z\*0-9=%#_/,\'\.$@]|\\\"|\\\\)+)\s*"
re_value_pattern =r"/(?P<re_value>[:a-zA-Z\*0-9=+%#\\\-_\,\"\'\.$&^@!\(\)\{\}\[\]\s?]+)/\s*"
gte_value_pattern = fr"\[\s*(?P<gte_value>{_num_value_pattern})\s+TO\s+\*\s*\]"
lte_value_pattern = fr"\[\s*\*\s+TO\s+(?P<lte_value>{_num_value_pattern})\s*\]"
num_value_pattern = fr"(?P<{ValueType.number_value}>{_num_value_pattern})\s*"
double_quotes_value_pattern =fr'"(?P<{ValueType.double_quotes_value}>(?:[:a-zA-Z\*0-9=+%#\-_/,\'\.$&^@!\(\)\{{\}}\s]|\\\"|\\)*)"\s*'
no_quotes_value_pattern =fr"(?P<{ValueType.no_quotes_value}>(?:[a-zA-Z\*0-9=%#_/,\'\.$@]|\\\"|\\\\)+)\s*"
re_value_pattern =fr"/(?P<{ValueType.regular_expression_value}>[:a-zA-Z\*0-9=+%#\\\-_\,\"\'\.$&^@!\(\)\{{\}}\[\]\s?]+)/\s*"
gte_value_pattern = fr"\[\s*(?P<{ValueType.greater_than_or_equal}>{_num_value_pattern})\s+TO\s+\*\s*\]"
lte_value_pattern = fr"\[\s*\*\s+TO\s+(?P<{ValueType.less_than_or_equal}>{_num_value_pattern})\s*\]"
range_value_pattern = fr"{gte_value_pattern}|{lte_value_pattern}"
_value_pattern = fr"{num_value_pattern}|{re_value_pattern}|{no_quotes_value_pattern}|{double_quotes_value_pattern}|{range_value_pattern}"
keyword_pattern =r"(?P<n_q_value>(?:[a-zA-Z\*0-9=%#_/,\'\.$@]|\\\"|\\\(|\\\)|\\\[|\\\]|\\\{|\\\}|\\\:|\\)+)(?:\s+|\)|$)"
keyword_pattern =fr"(?P<{ValueType.no_quotes_value}>(?:[a-zA-Z\*0-9=%#_/,\'\.$@]|\\\"|\\\(|\\\)|\\\[|\\\]|\\\{{|\\\}}|\\\:|\\)+)(?:\s+|\)|$)"

multi_value_pattern =r"""\((?P<value>[:a-zA-Z\"\*0-9=+%#\-_\/\\'\,.&^@!\(\[\]\s]+)\)"""
multi_value_pattern =fr"""\((?P<{ValueType.value}>[:a-zA-Z\"\*0-9=+%#\-_\/\\'\,.&^@!\(\[\]\s]+)\)"""
multi_value_check_pattern = r"___field___\s*___operator___\s*\("

escape_manager = lucene_escape_manager

wildcard_symbol = "*"

@staticmethod
Expand All@@ -69,22 +73,22 @@ def clean_quotes(value: Union[str, int]):
return value

def get_operator_and_value(self, match: re.Match, operator: str = OperatorType.EQ) -> Tuple[str, Any]:
if (num_value := get_match_group(match, group_name='num_value')) is not None:
if (num_value := get_match_group(match, group_name=ValueType.number_value)) is not None:
return operator, num_value

elif (re_value := get_match_group(match, group_name='re_value')) is not None:
elif (re_value := get_match_group(match, group_name=ValueType.regular_expression_value)) is not None:
return OperatorType.REGEX, re_value

elif (n_q_value := get_match_group(match, group_name='n_q_value')) is not None:
elif (n_q_value := get_match_group(match, group_name=ValueType.no_quotes_value)) is not None:
return operator, n_q_value

elif (d_q_value := get_match_group(match, group_name='d_q_value')) is not None:
elif (d_q_value := get_match_group(match, group_name=ValueType.double_quotes_value)) is not None:
return operator, d_q_value

elif (gte_value := get_match_group(match, group_name='gte_value')) is not None:
elif (gte_value := get_match_group(match, group_name=ValueType.greater_than_or_equal)) is not None:
return OperatorType.GTE, gte_value

elif (lte_value := get_match_group(match, group_name='lte_value')) is not None:
elif (lte_value := get_match_group(match, group_name=ValueType.less_than_or_equal)) is not None:
return OperatorType.LTE, lte_value

return super().get_operator_and_value(match, operator)
Expand Down
View file
Open in desktop
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
from app.translator.core.custom_types.values import ValueType
from app.translator.core.escape_manager import EscapeManager
from app.translator.core.models.escape_details import EscapeDetails


class SplEscapeManager(EscapeManager):
escape_map = {
ValueType.value: EscapeDetails(pattern='("|(?<!\\\\)\\\\(?![*?\\\\]))')
}


spl_escape_manager = SplEscapeManager()
22 changes: 12 additions & 10 deletionstranslator/app/translator/platforms/base/spl/renders/spl.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -21,51 +21,53 @@
from app.translator.const import DEFAULT_VALUE_TYPE
from app.translator.core.exceptions.render import UnsupportedRenderMethod
from app.translator.core.render import BaseQueryRender, BaseQueryFieldValue
from app.translator.platforms.base.spl.escape_manager import spl_escape_manager


class SplFieldValue(BaseQueryFieldValue):
escape_manager = spl_escape_manager

def equal_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
if isinstance(value, list):
return f"({self.or_token.join([self.equal_modifier(field=field, value=v) for v in value])})"
return f'{field}="{value}"'
return f'{field}="{self.apply_value(value)}"'

def less_modifier(self, field: str, value: Union[int, str]) -> str:
return f'{field}<"{value}"'
return f'{field}<"{self.apply_value(value)}"'

def less_or_equal_modifier(self, field: str, value: Union[int, str]) -> str:
return f'{field}<="{value}"'
return f'{field}<="{self.apply_value(value)}"'

def greater_modifier(self, field: str, value: Union[int, str]) -> str:
return f'{field}>"{value}"'
return f'{field}>"{self.apply_value(value)}"'

def greater_or_equal_modifier(self, field: str, value: Union[int, str]) -> str:
return f'{field}>="{value}"'
return f'{field}>="{self.apply_value(value)}"'

def not_equal_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
if isinstance(value, list):
return f"({self.or_token.join([self.not_equal_modifier(field=field, value=v) for v in value])})"
return f'{field}!="{value}"'
return f'{field}!="{self.apply_value(value)}"'

def contains_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
if isinstance(value, list):
return f"({self.or_token.join([self.contains_modifier(field=field, value=v) for v in value])})"
return f'{field}="*{value}*"'
return f'{field}="*{self.apply_value(value)}*"'

def endswith_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
if isinstance(value, list):
return f"({self.or_token.join([self.endswith_modifier(field=field, value=v) for v in value])})"
return f'{field}="*{value}"'
return f'{field}="*{self.apply_value(value)}"'

def startswith_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
if isinstance(value, list):
return f"({self.or_token.join([self.startswith_modifier(field=field, value=v) for v in value])})"
return f'{field}="{value}*"'
return f'{field}="{self.apply_value(value)}*"'

def keywords(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
if isinstance(value, list):
return f"({self.or_token.join(self.keywords(field=field, value=v) for v in value)})"
return f'"{value}"'
return f'"{self.apply_value(value)}"'

def regex_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
raise UnsupportedRenderMethod(platform_name=self.details.name, method="Regex Expression")
Expand Down
Loading

[8]ページ先頭

©2009-2025 Movatter.jp