1+ #!/usr/bin/env python3
2+ """
3+ Create a focused terminology dictionary for the most important Python terms.
4+
5+ This script extracts the most critical Python terminology for translation consistency.
6+ """
7+
8+ import csv
9+ from collections import defaultdict ,Counter
10+
11+
12+ def create_focused_dictionary ():
13+ """Create a focused dictionary with the most important terms."""
14+
15+ # Read the full terminology dictionary
16+ important_terms = []
17+
18+ with open ("terminology_dictionary.csv" ,'r' ,encoding = 'utf-8' )as csvfile :
19+ reader = csv .DictReader (csvfile )
20+
21+ for row in reader :
22+ source_term = row ['source_term' ].strip ()
23+ frequency = int (row ['frequency' ])
24+ files_count = int (row ['files_count' ])
25+
26+ # Focus on high-priority terms
27+ is_important = False
28+
29+ # High priority: Python built-in types and keywords
30+ if source_term .lower ()in {
31+ 'class' ,'function' ,'method' ,'module' ,'package' ,'object' ,'type' ,
32+ 'int' ,'str' ,'list' ,'dict' ,'tuple' ,'set' ,'float' ,'bool' ,'complex' ,
33+ 'none' ,'true' ,'false' ,'return' ,'import' ,'def' ,'async' ,'await' ,
34+ 'lambda' ,'yield' ,'raise' ,'try' ,'except' ,'finally' ,'with' ,'as'
35+ }:
36+ is_important = True
37+
38+ # High priority: Common Python concepts
39+ elif any (concept in source_term .lower ()for concept in [
40+ 'exception' ,'error' ,'iterator' ,'generator' ,'decorator' ,'property' ,
41+ 'classmethod' ,'staticmethod' ,'metaclass' ,'inheritance' ,'polymorphism'
42+ ]):
43+ is_important = True
44+
45+ # High priority: Terms that appear in many files (widespread usage)
46+ elif files_count >= 20 and frequency >= 10 :
47+ is_important = True
48+
49+ # Medium priority: Code elements in backticks
50+ elif '`' in source_term or source_term .startswith ('__' )and source_term .endswith ('__' ):
51+ is_important = True
52+
53+ # Medium priority: Terms with technical patterns
54+ elif any (pattern in source_term for pattern in ['()' ,'Error' ,'Exception' ,'Class' ]):
55+ is_important = True
56+
57+ if is_important :
58+ important_terms .append (row )
59+
60+ # Sort by frequency (most common first)
61+ important_terms .sort (key = lambda x :int (x ['frequency' ]),reverse = True )
62+
63+ # Write focused dictionary
64+ with open ("focused_terminology_dictionary.csv" ,'w' ,newline = '' ,encoding = 'utf-8' )as csvfile :
65+ fieldnames = ['source_term' ,'translated_term' ,'frequency' ,'files_count' ,
66+ 'priority' ,'category' ,'example_files' ]
67+ writer = csv .DictWriter (csvfile ,fieldnames = fieldnames )
68+
69+ writer .writeheader ()
70+
71+ for term_data in important_terms :
72+ source_term = term_data ['source_term' ].strip ()
73+
74+ # Categorize the term
75+ category = 'Other'
76+ priority = 'Medium'
77+
78+ if source_term .lower ()in {
79+ 'class' ,'function' ,'method' ,'module' ,'package' ,'object' ,'type'
80+ }:
81+ category = 'Core Concepts'
82+ priority = 'High'
83+ elif source_term .lower ()in {
84+ 'int' ,'str' ,'list' ,'dict' ,'tuple' ,'set' ,'float' ,'bool' ,'complex'
85+ }:
86+ category = 'Built-in Types'
87+ priority = 'High'
88+ elif source_term .lower ()in {
89+ 'none' ,'true' ,'false' ,'return' ,'import' ,'def' ,'async' ,'await'
90+ }:
91+ category = 'Keywords/Constants'
92+ priority = 'High'
93+ elif 'error' in source_term .lower ()or 'exception' in source_term .lower ():
94+ category = 'Exceptions'
95+ priority = 'High'
96+ elif '`' in source_term :
97+ category = 'Code Elements'
98+ priority = 'Medium'
99+ elif int (term_data ['files_count' ])>= 50 :
100+ category = 'Common Terms'
101+ priority = 'High'
102+
103+ writer .writerow ({
104+ 'source_term' :source_term ,
105+ 'translated_term' :term_data ['translated_term' ],
106+ 'frequency' :term_data ['frequency' ],
107+ 'files_count' :term_data ['files_count' ],
108+ 'priority' :priority ,
109+ 'category' :category ,
110+ 'example_files' :term_data ['example_files' ]
111+ })
112+
113+ print (f"Created focused terminology dictionary with{ len (important_terms )} important terms" )
114+
115+ # Print category statistics
116+ categories = defaultdict (int )
117+ priorities = defaultdict (int )
118+
119+ for term in important_terms :
120+ source_term = term ['source_term' ].strip ()
121+ if source_term .lower ()in {'class' ,'function' ,'method' ,'module' ,'package' ,'object' ,'type' }:
122+ categories ['Core Concepts' ]+= 1
123+ elif source_term .lower ()in {'int' ,'str' ,'list' ,'dict' ,'tuple' ,'set' ,'float' ,'bool' ,'complex' }:
124+ categories ['Built-in Types' ]+= 1
125+ elif source_term .lower ()in {'none' ,'true' ,'false' ,'return' ,'import' ,'def' ,'async' ,'await' }:
126+ categories ['Keywords/Constants' ]+= 1
127+ elif 'error' in source_term .lower ()or 'exception' in source_term .lower ():
128+ categories ['Exceptions' ]+= 1
129+ elif '`' in source_term :
130+ categories ['Code Elements' ]+= 1
131+ else :
132+ categories ['Common Terms' ]+= 1
133+
134+ print ("\n Category breakdown:" )
135+ for category ,count in categories .items ():
136+ print (f"{ category } :{ count } terms" )
137+
138+
139+ if __name__ == "__main__" :
140+ create_focused_dictionary ()