|
| 1 | +#%% |
| 2 | +''' |
| 3 | +pip install googletrans==3.1.0a0 |
| 4 | +
|
| 5 | +from googletrans import Translator |
| 6 | +from google.colab import files |
| 7 | +
|
| 8 | +uploaded = files.upload() |
| 9 | +''' |
| 10 | +importpandasaspd |
| 11 | +df=pd.read_csv("C:/Users/GAURAV/Downloads/data.csv") |
| 12 | +df=df[['first_party','property_description','second_party']] |
| 13 | +print(df.head(10)) |
| 14 | + |
| 15 | +#pip install indic-nlp-library |
| 16 | + |
| 17 | +#pip install xlrd==1.2.0 |
| 18 | + |
| 19 | +''' |
| 20 | +from indicnlp.tokenize import indic_tokenize |
| 21 | +def tokenization(indic_string): |
| 22 | + tokens = [] |
| 23 | + for t in indic_tokenize.trivial_tokenize(indic_string): |
| 24 | + tokens.append(t) |
| 25 | + return tokens |
| 26 | +prop=['first_party','property_description','second_party'] |
| 27 | +for i in prop: |
| 28 | + try: |
| 29 | + df[i] = df[i].apply(lambda x: tokenization(x)) |
| 30 | + except: |
| 31 | + df[i] = df[i].astype(str) |
| 32 | + df[i] = df[i].apply(lambda x: tokenization(x)) |
| 33 | +
|
| 34 | +df['first_party'] = df['first_party'].apply(lambda x: tokenization(x)) |
| 35 | +df["second_party"] = df["second_party"].astype(str) |
| 36 | +df['second_party'] = df['second_party'].apply(lambda x: tokenization(x)) |
| 37 | +''' |
| 38 | +#%% |
| 39 | +fromindicnlp.tokenizeimportindic_tokenize |
| 40 | +deftokenization(indic_string): |
| 41 | +tokens= [] |
| 42 | +fortinindic_tokenize.trivial_tokenize(indic_string): |
| 43 | +tokens.append(t) |
| 44 | +returntokens |
| 45 | +prop=['first_party','property_description','second_party'] |
| 46 | +foriinprop: |
| 47 | +try: |
| 48 | +df[i]=df[i].apply(lambdax:tokenization(x)) |
| 49 | +stopwords_hi= ['तुम','मेरी','मुझे','क्योंकि','हम','प्रति','अबकी','आगे','माननीय','शहर','बताएं','कौनसी','क्लिक','किसकी','बड़े','मैं','and','रही','आज','लें','आपके','मिलकर','सब','मेरे','जी','श्री','वैसा','आपका','अंदर','अत','अपना','अपनी','अपने','अभी','आदि','आप','इत्यादि','इन','इनका','इन्हीं','इन्हें','इन्हों','इस','इसका','इसकी','इसके','इसमें','इसी','इसे','उन','उनका','उनकी','उनके','उनको','उन्हीं','उन्हें','उन्हों','उस','उसके','उसी','उसे','एक','एवं','एस','ऐसे','और','कई','कर','करता','करते','करना','करने','करें','कहते','कहा','का','काफ़ी','कि','कितना','किन्हें','किन्हों','किया','किर','किस','किसी','किसे','की','कुछ','कुल','के','को','कोई','कौन','कौनसा','गया','घर','जब','जहाँ','जा','जितना','जिन','जिन्हें','जिन्हों','जिस','जिसे','जीधर','जैसा','जैसे','जो','तक','तब','तरह','तिन','तिन्हें','तिन्हों','तिस','तिसे','तो','था','थी','थे','दबारा','दिया','दुसरा','दूसरे','दो','द्वारा','न','नहीं','ना','निहायत','नीचे','ने','पर','पर','पहले','पूरा','पे','फिर','बनी','बही','बहुत','बाद','बाला','बिलकुल','भी','भीतर','मगर','मानो','मे','में','यदि','यह','यहाँ','यही','या','यिह','ये','रखें','रहा','रहे','ऱ्वासा','लिए','लिये','लेकिन','व','वर्ग','वह','वह','वहाँ','वहीं','वाले','वुह','वे','वग़ैरह','संग','सकता','सकते','सबसे','सभी','साथ','साबुत','साभ','सारा','से','सो','ही','हुआ','हुई','हुए','है','हैं','हो','होता','होती','होते','होना','होने','अपनि','जेसे','होति','सभि','तिंहों','इंहों','दवारा','इसि','किंहें','थि','उंहों','ओर','जिंहें','वहिं','अभि','बनि','हि','उंहिं','उंहें','हें','वगेरह','एसे','रवासा','कोन','निचे','काफि','उसि','पुरा','भितर','हे','बहि','वहां','कोइ','यहां','जिंहों','तिंहें','किसि','कइ','यहि','इंहिं','जिधर','इंहें','अदि','इतयादि','हुइ','कोनसा','इसकि','दुसरे','जहां','अप','किंहों','उनकि','भि','वरग','हुअ','जेसा','नहिं',"-मे "," मे ","मे.","-मे,"," मे,","-मै "," मै ","मै.","मै,","-मेस "," मेस ","-मेस."," मेस.","मेसर्स","एम/एस"] |
| 50 | +stopwords_en= ['i','me','my','myself','we','our','ours','ourselves','you',"you're","you've","you'll","you'd",'your','yours','yourself','yourselves','he','him','his','himself','she',"she's",'her','hers','herself','it',"it's",'its','itself','they','them','their','theirs','themselves','what','which','who','whom','this','that',"that'll",'these','those','am','is','are','was','were','be','been','being','have','has','had','having','do','does','did','doing','a','an','the','and','but','if','or','because','as','until','while','of','at','by','for','with','about','against','between','into','through','during','before','after','above','below','to','from','up','down','in','out','on','off','over','under','again','further','then','once','here','there','when','where','why','how','all','any','both','each','few','more','most','other','some','such','no','nor','not','only','own','same','so','than','too','very','s','t','can','will','just','don',"don't",'should',"should've",'now','d','ll','m','o','re','ve','y','ain','aren',"aren't",'couldn',"couldn't",'didn',"didn't",'doesn',"doesn't",'hadn',"hadn't",'hasn',"hasn't",'haven',"haven't",'isn',"isn't",'ma','mightn',"mightn't",'mustn',"mustn't",'needn',"needn't",'shan',"shan't",'shouldn',"shouldn't",'wasn',"wasn't",'weren',"weren't",'won',"won't",'wouldn',"wouldn't","-ms "," ms ","m/s ","m/s."] |
| 51 | +punctuations= ['nn','n','।','/','`','+','\', ','?','▁(','$','@','[','_',"'",'!',',',':','^','|',']','=','%','&','.',')','(','*','',';','-','{','}','|','"'] |
| 52 | +trial=['एकूण',"क्षेत्रफि","क्षेत्रफळ","क्षेत्र","क्षेञ"] |
| 53 | +to_be_removed=stopwords_hi+punctuations+stopwords_en+trial |
| 54 | + |
| 55 | +forjinrange(len(df)): |
| 56 | +df[i][j]=[eleforeleindf[i][j]ifelenotin (to_be_removed)] |
| 57 | + |
| 58 | +except: |
| 59 | +df[i]=df[i].astype(str) |
| 60 | +df[i]=df[i].apply(lambdax:tokenization(x)) |
| 61 | +stopwords_hi= ['तुम','मेरी','मुझे','क्योंकि','हम','प्रति','अबकी','आगे','माननीय','शहर','बताएं','कौनसी','क्लिक','किसकी','बड़े','मैं','and','रही','आज','लें','आपके','मिलकर','सब','मेरे','जी','श्री','वैसा','आपका','अंदर','अत','अपना','अपनी','अपने','अभी','आदि','आप','इत्यादि','इन','इनका','इन्हीं','इन्हें','इन्हों','इस','इसका','इसकी','इसके','इसमें','इसी','इसे','उन','उनका','उनकी','उनके','उनको','उन्हीं','उन्हें','उन्हों','उस','उसके','उसी','उसे','एक','एवं','एस','ऐसे','और','कई','कर','करता','करते','करना','करने','करें','कहते','कहा','का','काफ़ी','कि','कितना','किन्हें','किन्हों','किया','किर','किस','किसी','किसे','की','कुछ','कुल','के','को','कोई','कौन','कौनसा','गया','घर','जब','जहाँ','जा','जितना','जिन','जिन्हें','जिन्हों','जिस','जिसे','जीधर','जैसा','जैसे','जो','तक','तब','तरह','तिन','तिन्हें','तिन्हों','तिस','तिसे','तो','था','थी','थे','दबारा','दिया','दुसरा','दूसरे','दो','द्वारा','न','नहीं','ना','निहायत','नीचे','ने','पर','पर','पहले','पूरा','पे','फिर','बनी','बही','बहुत','बाद','बाला','बिलकुल','भी','भीतर','मगर','मानो','मे','में','यदि','यह','यहाँ','यही','या','यिह','ये','रखें','रहा','रहे','ऱ्वासा','लिए','लिये','लेकिन','व','वर्ग','वह','वह','वहाँ','वहीं','वाले','वुह','वे','वग़ैरह','संग','सकता','सकते','सबसे','सभी','साथ','साबुत','साभ','सारा','से','सो','ही','हुआ','हुई','हुए','है','हैं','हो','होता','होती','होते','होना','होने','अपनि','जेसे','होति','सभि','तिंहों','इंहों','दवारा','इसि','किंहें','थि','उंहों','ओर','जिंहें','वहिं','अभि','बनि','हि','उंहिं','उंहें','हें','वगेरह','एसे','रवासा','कोन','निचे','काफि','उसि','पुरा','भितर','हे','बहि','वहां','कोइ','यहां','जिंहों','तिंहें','किसि','कइ','यहि','इंहिं','जिधर','इंहें','अदि','इतयादि','हुइ','कोनसा','इसकि','दुसरे','जहां','अप','किंहों','उनकि','भि','वरग','हुअ','जेसा','नहिं',"-मे "," मे ","मे.","-मे,"," मे,","-मै "," मै ","मै.","मै,","-मेस "," मेस ","-मेस."," मेस.","मेसर्स","एम/एस"] |
| 62 | +stopwords_en= ['i','me','my','myself','we','our','ours','ourselves','you',"you're","you've","you'll","you'd",'your','yours','yourself','yourselves','he','him','his','himself','she',"she's",'her','hers','herself','it',"it's",'its','itself','they','them','their','theirs','themselves','what','which','who','whom','this','that',"that'll",'these','those','am','is','are','was','were','be','been','being','have','has','had','having','do','does','did','doing','a','an','the','and','but','if','or','because','as','until','while','of','at','by','for','with','about','against','between','into','through','during','before','after','above','below','to','from','up','down','in','out','on','off','over','under','again','further','then','once','here','there','when','where','why','how','all','any','both','each','few','more','most','other','some','such','no','nor','not','only','own','same','so','than','too','very','s','t','can','will','just','don',"don't",'should',"should've",'now','d','ll','m','o','re','ve','y','ain','aren',"aren't",'couldn',"couldn't",'didn',"didn't",'doesn',"doesn't",'hadn',"hadn't",'hasn',"hasn't",'haven',"haven't",'isn',"isn't",'ma','mightn',"mightn't",'mustn',"mustn't",'needn',"needn't",'shan',"shan't",'shouldn',"shouldn't",'wasn',"wasn't",'weren',"weren't",'won',"won't",'wouldn',"wouldn't","-ms "," ms ","m/s ","m/s."] |
| 63 | +punctuations= ['nn','n','।','/','`','+','\', ','?','▁(','$','@','[','_',"'",'!',',',':','^','|',']','=','%','&','.',')','(','*','',';','-','{','}','|','"'] |
| 64 | +trial=['एकूण',"क्षेत्रफि","क्षेत्रफळ","क्षेत्र","क्षेञ"] |
| 65 | +to_be_removed=stopwords_hi+punctuations+stopwords_en+trial |
| 66 | + |
| 67 | +forjinrange(len(df)): |
| 68 | +df[i][j]=[eleforeleindf[i][j]ifelenotin (to_be_removed)] |
| 69 | +df.head(20) |
| 70 | + |
| 71 | +#pip install English-to-Hindi |
| 72 | +#%% |
| 73 | +fromgoogletransimportTranslator |
| 74 | +str="सलील जनार्दन बोरवंडकर" |
| 75 | +translator=Translator() |
| 76 | +translated=translator.translate(str,src='hi',dest='en') |
| 77 | +print(translated.text) |
| 78 | +# %% |