1
+ #!/usr/bin/env python3
2
+ """
3
+ Create a focused terminology dictionary for the most important Python terms.
4
+
5
+ This script extracts the most critical Python terminology for translation consistency.
6
+ """
7
+
8
+ import csv
9
+ from collections import defaultdict ,Counter
10
+
11
+
12
+ def create_focused_dictionary ():
13
+ """Create a focused dictionary with the most important terms."""
14
+
15
+ # Read the full terminology dictionary
16
+ important_terms = []
17
+
18
+ with open ("terminology_dictionary.csv" ,'r' ,encoding = 'utf-8' )as csvfile :
19
+ reader = csv .DictReader (csvfile )
20
+
21
+ for row in reader :
22
+ source_term = row ['source_term' ].strip ()
23
+ frequency = int (row ['frequency' ])
24
+ files_count = int (row ['files_count' ])
25
+
26
+ # Focus on high-priority terms
27
+ is_important = False
28
+
29
+ # High priority: Python built-in types and keywords
30
+ if source_term .lower ()in {
31
+ 'class' ,'function' ,'method' ,'module' ,'package' ,'object' ,'type' ,
32
+ 'int' ,'str' ,'list' ,'dict' ,'tuple' ,'set' ,'float' ,'bool' ,'complex' ,
33
+ 'none' ,'true' ,'false' ,'return' ,'import' ,'def' ,'async' ,'await' ,
34
+ 'lambda' ,'yield' ,'raise' ,'try' ,'except' ,'finally' ,'with' ,'as'
35
+ }:
36
+ is_important = True
37
+
38
+ # High priority: Common Python concepts
39
+ elif any (concept in source_term .lower ()for concept in [
40
+ 'exception' ,'error' ,'iterator' ,'generator' ,'decorator' ,'property' ,
41
+ 'classmethod' ,'staticmethod' ,'metaclass' ,'inheritance' ,'polymorphism'
42
+ ]):
43
+ is_important = True
44
+
45
+ # High priority: Terms that appear in many files (widespread usage)
46
+ elif files_count >= 20 and frequency >= 10 :
47
+ is_important = True
48
+
49
+ # Medium priority: Code elements in backticks
50
+ elif '`' in source_term or source_term .startswith ('__' )and source_term .endswith ('__' ):
51
+ is_important = True
52
+
53
+ # Medium priority: Terms with technical patterns
54
+ elif any (pattern in source_term for pattern in ['()' ,'Error' ,'Exception' ,'Class' ]):
55
+ is_important = True
56
+
57
+ if is_important :
58
+ important_terms .append (row )
59
+
60
+ # Sort by frequency (most common first)
61
+ important_terms .sort (key = lambda x :int (x ['frequency' ]),reverse = True )
62
+
63
+ # Write focused dictionary
64
+ with open ("focused_terminology_dictionary.csv" ,'w' ,newline = '' ,encoding = 'utf-8' )as csvfile :
65
+ fieldnames = ['source_term' ,'translated_term' ,'frequency' ,'files_count' ,
66
+ 'priority' ,'category' ,'example_files' ]
67
+ writer = csv .DictWriter (csvfile ,fieldnames = fieldnames )
68
+
69
+ writer .writeheader ()
70
+
71
+ for term_data in important_terms :
72
+ source_term = term_data ['source_term' ].strip ()
73
+
74
+ # Categorize the term
75
+ category = 'Other'
76
+ priority = 'Medium'
77
+
78
+ if source_term .lower ()in {
79
+ 'class' ,'function' ,'method' ,'module' ,'package' ,'object' ,'type'
80
+ }:
81
+ category = 'Core Concepts'
82
+ priority = 'High'
83
+ elif source_term .lower ()in {
84
+ 'int' ,'str' ,'list' ,'dict' ,'tuple' ,'set' ,'float' ,'bool' ,'complex'
85
+ }:
86
+ category = 'Built-in Types'
87
+ priority = 'High'
88
+ elif source_term .lower ()in {
89
+ 'none' ,'true' ,'false' ,'return' ,'import' ,'def' ,'async' ,'await'
90
+ }:
91
+ category = 'Keywords/Constants'
92
+ priority = 'High'
93
+ elif 'error' in source_term .lower ()or 'exception' in source_term .lower ():
94
+ category = 'Exceptions'
95
+ priority = 'High'
96
+ elif '`' in source_term :
97
+ category = 'Code Elements'
98
+ priority = 'Medium'
99
+ elif int (term_data ['files_count' ])>= 50 :
100
+ category = 'Common Terms'
101
+ priority = 'High'
102
+
103
+ writer .writerow ({
104
+ 'source_term' :source_term ,
105
+ 'translated_term' :term_data ['translated_term' ],
106
+ 'frequency' :term_data ['frequency' ],
107
+ 'files_count' :term_data ['files_count' ],
108
+ 'priority' :priority ,
109
+ 'category' :category ,
110
+ 'example_files' :term_data ['example_files' ]
111
+ })
112
+
113
+ print (f"Created focused terminology dictionary with{ len (important_terms )} important terms" )
114
+
115
+ # Print category statistics
116
+ categories = defaultdict (int )
117
+ priorities = defaultdict (int )
118
+
119
+ for term in important_terms :
120
+ source_term = term ['source_term' ].strip ()
121
+ if source_term .lower ()in {'class' ,'function' ,'method' ,'module' ,'package' ,'object' ,'type' }:
122
+ categories ['Core Concepts' ]+= 1
123
+ elif source_term .lower ()in {'int' ,'str' ,'list' ,'dict' ,'tuple' ,'set' ,'float' ,'bool' ,'complex' }:
124
+ categories ['Built-in Types' ]+= 1
125
+ elif source_term .lower ()in {'none' ,'true' ,'false' ,'return' ,'import' ,'def' ,'async' ,'await' }:
126
+ categories ['Keywords/Constants' ]+= 1
127
+ elif 'error' in source_term .lower ()or 'exception' in source_term .lower ():
128
+ categories ['Exceptions' ]+= 1
129
+ elif '`' in source_term :
130
+ categories ['Code Elements' ]+= 1
131
+ else :
132
+ categories ['Common Terms' ]+= 1
133
+
134
+ print ("\n Category breakdown:" )
135
+ for category ,count in categories .items ():
136
+ print (f"{ category } :{ count } terms" )
137
+
138
+
139
+ if __name__ == "__main__" :
140
+ create_focused_dictionary ()