uemit/pythoncode-tutorialsPublic

forked fromx4nth055/pythoncode-tutorials

NotificationsYou must be signed in to change notification settings
Fork0
Star0

Commit96fd0fb

committed

update spam classifier tutorial to tensorflow

1 parent972b3dc commit96fd0fbCopy full SHA for 96fd0fb

File tree

11 files changed

+356

-54

lines changed

machine-learning/nlp/spam-classifier
- logs/spam_classifier_1566057536.51311
  - events.out.tfevents.1566057541.DESKTOP-JCAH48A
- requirements.txt
- results
- spam_classifier.py
- test.py
- tutorial.ipynb
- utils.py

11 files changed

+356

-54

lines changed

`‎machine-learning/nlp/spam-classifier/logs/spam_classifier_1566057536.51311/events.out.tfevents.1566057541.DESKTOP-JCAH48A`

-1.31 MB

Binary file not shown.

`‎machine-learning/nlp/spam-classifier/requirements.txt`

Lines changed: 1 addition & 3 deletions

Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,4 @@`
`1`	`1`	`sklearn`
`2`		`-keras`
`3`	`2`	`tqdm`
`4`	`3`	`numpy`
`5`		`-keras_metrics`
`6`		`-tensorflow==1.15.4`
	`4`	`+tensorflow`

`‎machine-learning/nlp/spam-classifier/results/spam_classifier_0.05`

-4.36 MB

Binary file not shown.

`‎machine-learning/nlp/spam-classifier/results/spam_classifier_0.06`

-4.36 MB

Binary file not shown.

`‎machine-learning/nlp/spam-classifier/results/spam_classifier_0.08`

-4.36 MB

Binary file not shown.

`‎machine-learning/nlp/spam-classifier/results/spam_classifier_0.10`

-4.36 MB

Binary file not shown.

`‎machine-learning/nlp/spam-classifier/results/tokenizer.pickle`

-404 KB

Binary file not shown.

`‎machine-learning/nlp/spam-classifier/spam_classifier.py`

Lines changed: 19 additions & 29 deletions

Original file line number	Diff line number	Diff line change
`@@ -1,29 +1,20 @@`
`1`		`-# to use CPU uncomment below code`
`2`		`-# import os`
`3`		`-# os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # see issue #152`
`4`		`-# os.environ["CUDA_VISIBLE_DEVICES"] = "-1"`
`5`		`-`
`6`		`-# import tensorflow as tf`
`7`		`-`
`8`		`-# config = tf.ConfigProto(intra_op_parallelism_threads=5,`
`9`		`-# inter_op_parallelism_threads=5,`
`10`		`-# allow_soft_placement=True,`
`11`		`-# device_count = {'CPU' : 1,`
`12`		`-# 'GPU' : 0}`
`13`		`-# )`
`14`		`-`
`15`		`-`
`16`		`-fromkeras.preprocessing.textimportTokenizer`
`17`		`-fromkeras.preprocessing.sequenceimportpad_sequences`
`18`		`-fromkeras.utilsimportto_categorical`
`19`		`-fromkeras.callbacksimportModelCheckpoint,TensorBoard`
	`1`	`+importtensorflowastf`
	`2`	`+gpus=tf.config.experimental.list_physical_devices('GPU')`
	`3`	`+ifgpus:`
	`4`	`+# only use GPU memory that we need, not allocate all the GPU memory`
	`5`	`+tf.config.experimental.set_memory_growth(gpus[0],enable=True)`
	`6`	`+`
	`7`	`+fromtensorflow.keras.preprocessing.textimportTokenizer`
	`8`	`+fromtensorflow.keras.preprocessing.sequenceimportpad_sequences`
	`9`	`+fromtensorflow.keras.utilsimportto_categorical`
	`10`	`+fromtensorflow.keras.callbacksimportModelCheckpoint,TensorBoard`
`20`	`11`	`fromsklearn.model_selectionimporttrain_test_split`
`21`	`12`	`importtime`
`22`	`13`	`importnumpyasnp`
`23`	`14`	`importpickle`
`24`	`15`
`25`		`-fromutilsimportget_embedding_vectors,get_model,SEQUENCE_LENGTH,EMBEDDING_SIZE,TEST_SIZE`
`26`		`-fromutilsimportBATCH_SIZE,EPOCHS,int2label,label2int`
	`16`	`+fromutilsimportget_model,SEQUENCE_LENGTH,TEST_SIZE`
	`17`	`+fromutilsimportBATCH_SIZE,EPOCHS,label2int`
`27`	`18`
`28`	`19`
`29`	`20`	`defload_data():`
`@@ -69,26 +60,25 @@ def load_data():`
`69`	`60`
`70`	`61`	`y= [label2int[label]forlabeliny ]`
`71`	`62`	`y=to_categorical(y)`
`72`		`-`
`73`	`63`	`print(y[0])`
`74`	`64`
`75`	`65`	`# split and shuffle`
`76`	`66`	`X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=TEST_SIZE,random_state=7)`
`77`		`-`
	`67`	`+# print our data shapes`
	`68`	`+print("X_train.shape:",X_train.shape)`
	`69`	`+print("X_test.shape:",X_test.shape)`
	`70`	`+print("y_train.shape:",y_train.shape)`
	`71`	`+print("y_test.shape:",y_test.shape)`
`78`	`72`	`# constructs the model with 128 LSTM units`
`79`	`73`	`model=get_model(tokenizer=tokenizer,lstm_units=128)`
`80`	`74`
`81`	`75`	`# initialize our ModelCheckpoint and TensorBoard callbacks`
`82`	`76`	`# model checkpoint for saving best weights`
`83`		`-model_checkpoint=ModelCheckpoint("results/spam_classifier_{val_loss:.2f}",save_best_only=True,`
	`77`	`+model_checkpoint=ModelCheckpoint("results/spam_classifier_{val_loss:.2f}.h5",save_best_only=True,`
`84`	`78`	`verbose=1)`
`85`	`79`	`# for better visualization`
`86`	`80`	`tensorboard=TensorBoard(f"logs/spam_classifier_{time.time()}")`
`87`		`-# print our data shapes`
`88`		`-print("X_train.shape:",X_train.shape)`
`89`		`-print("X_test.shape:",X_test.shape)`
`90`		`-print("y_train.shape:",y_train.shape)`
`91`		`-print("y_test.shape:",y_test.shape)`
	`81`	`+`
`92`	`82`	`# train the model`
`93`	`83`	`model.fit(X_train,y_train,validation_data=(X_test,y_test),`
`94`	`84`	`batch_size=BATCH_SIZE,epochs=EPOCHS,`

`‎machine-learning/nlp/spam-classifier/test.py`

Lines changed: 9 additions & 15 deletions

Original file line number	Diff line number	Diff line change
`@@ -1,17 +1,10 @@`
`1`		`-importos`
`2`		`-# os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # see issue #152`
`3`		`-# os.environ["CUDA_VISIBLE_DEVICES"] = "-1"`
`4`		`-`
`5`		`-# import tensorflow as tf`
`6`		`-`
`7`		`-# config = tf.ConfigProto(intra_op_parallelism_threads=5,`
`8`		`-# inter_op_parallelism_threads=5,`
`9`		`-# allow_soft_placement=True,`
`10`		`-# device_count = {'CPU' : 1,`
`11`		`-# 'GPU' : 0}`
`12`		`-# )`
`13`		`-fromutilsimportget_model,int2label,label2int`
`14`		`-fromkeras.preprocessing.sequenceimportpad_sequences`
	`1`	`+importtensorflowastf`
	`2`	`+gpus=tf.config.experimental.list_physical_devices('GPU')`
	`3`	`+ifgpus:`
	`4`	`+# only use GPU memory that we need, not allocate all the GPU memory`
	`5`	`+tf.config.experimental.set_memory_growth(gpus[0],enable=True)`
	`6`	`+fromutilsimportget_model,int2label`
	`7`	`+fromtensorflow.keras.preprocessing.sequenceimportpad_sequences`
`15`	`8`
`16`	`9`	`importpickle`
`17`	`10`	`importnumpyasnp`
`@@ -22,7 +15,8 @@`
`22`	`15`	`tokenizer=pickle.load(open("results/tokenizer.pickle","rb"))`
`23`	`16`
`24`	`17`	`model=get_model(tokenizer,128)`
`25`		`-model.load_weights("results/spam_classifier_0.05")`
	`18`	`+# change to the model name in results folder`
	`19`	`+model.load_weights("results/spam_classifier_0.06.h5")`
`26`	`20`
`27`	`21`	`defget_predictions(text):`
`28`	`22`	`sequence=tokenizer.texts_to_sequences([text])`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit96fd0fb

File tree

11 files changed

11 files changed

`‎machine-learning/nlp/spam-classifier/logs/spam_classifier_1566057536.51311/events.out.tfevents.1566057541.DESKTOP-JCAH48A`

`‎machine-learning/nlp/spam-classifier/requirements.txt`

`‎machine-learning/nlp/spam-classifier/results/spam_classifier_0.05`

`‎machine-learning/nlp/spam-classifier/results/spam_classifier_0.06`

`‎machine-learning/nlp/spam-classifier/results/spam_classifier_0.08`

`‎machine-learning/nlp/spam-classifier/results/spam_classifier_0.10`

`‎machine-learning/nlp/spam-classifier/results/tokenizer.pickle`

`‎machine-learning/nlp/spam-classifier/spam_classifier.py`

`‎machine-learning/nlp/spam-classifier/test.py`

0 commit comments