Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitc50100f

Browse files
committed
add dimensionality reduction feature extraction tutorial
1 parenta29ed38 commitc50100f

File tree

5 files changed

+455
-0
lines changed

5 files changed

+455
-0
lines changed

‎README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ This is a repository of all the tutorials of [The Python Code](https://www.thepy
9191
-[Logistic Regression using PyTorch in Python](https://www.thepythoncode.com/article/logistic-regression-using-pytorch). ([code](machine-learning/logistic-regression-in-pytorch))
9292
-[Dropout Regularization using PyTorch in Python](https://www.thepythoncode.com/article/dropout-regularization-in-pytorch). ([code](machine-learning/dropout-in-pytorch))
9393
-[K-Fold Cross Validation using Scikit-Learn in Python](https://www.thepythoncode.com/article/kfold-cross-validation-using-sklearn-in-python). ([code](machine-learning/k-fold-cross-validation-sklearn))
94+
-[Dimensionality Reduction: Feature Extraction using Scikit-learn in Python](https://www.thepythoncode.com/article/dimensionality-reduction-using-feature-extraction-sklearn). ([code](machine-learning/dimensionality-reduction-feature-extraction))
9495

9596
-###[General Python Topics](https://www.thepythoncode.com/topic/general-python-topics)
9697
-[How to Make Facebook Messenger bot in Python](https://www.thepythoncode.com/article/make-bot-fbchat-python). ([code](general/messenger-bot))
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,341 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type":"code",
5+
"execution_count":null,
6+
"metadata": {
7+
"id":"NNamP65y8eGf"
8+
},
9+
"outputs": [],
10+
"source": [
11+
"from sklearn import datasets\n",
12+
"from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n",
13+
"from sklearn.decomposition import PCA, KernelPCA\n",
14+
"from sklearn.datasets import make_circles\n",
15+
"from sklearn.preprocessing import StandardScaler\n",
16+
"from sklearn.decomposition import NMF\n",
17+
"from sklearn.decomposition import TruncatedSVD\n",
18+
"from scipy.sparse import csr_matrix"
19+
]
20+
},
21+
{
22+
"cell_type":"code",
23+
"execution_count":2,
24+
"metadata": {
25+
"colab": {
26+
"base_uri":"https://localhost:8080/"
27+
},
28+
"id":"fvJfKhFq8hQc",
29+
"outputId":"acbc4c59-acbd-4ff4-bacb-e54b55e0312f"
30+
},
31+
"outputs": [
32+
{
33+
"name":"stdout",
34+
"output_type":"stream",
35+
"text": [
36+
"Original number of features: 64\n",
37+
"Reduced number of features: 40\n"
38+
]
39+
}
40+
],
41+
"source": [
42+
"# Load the data\n",
43+
"digits = datasets.load_digits()\n",
44+
"# Feature matrix standardization\n",
45+
"features = StandardScaler().fit_transform(digits.data)\n",
46+
"# Perform PCA While retaining 80% of variance\n",
47+
"pca = PCA(n_components=0.95, whiten=True)\n",
48+
"# perform PCA\n",
49+
"pcafeatures = pca.fit_transform(features)\n",
50+
"# Display results\n",
51+
"print(\"Original number of features:\", features.shape[1])\n",
52+
"print(\"Reduced number of features:\", pcafeatures.shape[1])"
53+
]
54+
},
55+
{
56+
"cell_type":"code",
57+
"execution_count":3,
58+
"metadata": {
59+
"colab": {
60+
"base_uri":"https://localhost:8080/"
61+
},
62+
"id":"jyU800Lf8it4",
63+
"outputId":"0d4c73bf-7d08-48e6-a44f-a5647a2e0c11"
64+
},
65+
"outputs": [
66+
{
67+
"name":"stdout",
68+
"output_type":"stream",
69+
"text": [
70+
"Original number of features: 2\n",
71+
"Reduced number of features: 1\n"
72+
]
73+
}
74+
],
75+
"source": [
76+
"# Creation of the linearly inseparable data\n",
77+
"features, _ = make_circles(n_samples=2000, random_state=1, noise=0.1, factor=0.1)\n",
78+
"# kernal PCA with radius basis function (RBF) kernel application\n",
79+
"k_pca = KernelPCA(kernel=\"rbf\", gamma=16, n_components=1)\n",
80+
"k_pcaf = k_pca.fit_transform(features)\n",
81+
"print(\"Original number of features:\", features.shape[1])\n",
82+
"print(\"Reduced number of features:\", k_pcaf.shape[1])"
83+
]
84+
},
85+
{
86+
"cell_type":"code",
87+
"execution_count":4,
88+
"metadata": {
89+
"colab": {
90+
"base_uri":"https://localhost:8080/"
91+
},
92+
"id":"IfCo5TA28kn6",
93+
"outputId":"312956a9-9fb5-4296-d766-a3e642649da1"
94+
},
95+
"outputs": [
96+
{
97+
"name":"stdout",
98+
"output_type":"stream",
99+
"text": [
100+
"number of features(original): 4\n",
101+
"number of features that was reduced: 1\n"
102+
]
103+
}
104+
],
105+
"source": [
106+
"#flower dataset loading:\n",
107+
"iris = datasets.load_iris()\n",
108+
"features = iris.data\n",
109+
"target = iris.target\n",
110+
"# Creation of LDA. Use of LDA for features transformation\n",
111+
"lda = LinearDiscriminantAnalysis(n_components=1)\n",
112+
"features_lda = lda.fit(features, target).transform(features)\n",
113+
"# Print the number of features\n",
114+
"print(\"number of features(original):\", features.shape[1])\n",
115+
"print(\"number of features that was reduced:\", features_lda.shape[1])"
116+
]
117+
},
118+
{
119+
"cell_type":"code",
120+
"execution_count":5,
121+
"metadata": {
122+
"colab": {
123+
"base_uri":"https://localhost:8080/"
124+
},
125+
"id":"yjQBlMtM8mQu",
126+
"outputId":"800279fb-f44b-43e8-9210-a35b8e190fc7"
127+
},
128+
"outputs": [
129+
{
130+
"data": {
131+
"text/plain": [
132+
"array([0.9912126])"
133+
]
134+
},
135+
"execution_count":5,
136+
"metadata": {},
137+
"output_type":"execute_result"
138+
}
139+
],
140+
"source": [
141+
"lda.explained_variance_ratio_"
142+
]
143+
},
144+
{
145+
"cell_type":"code",
146+
"execution_count":10,
147+
"metadata": {
148+
"colab": {
149+
"base_uri":"https://localhost:8080/"
150+
},
151+
"id":"tHOWTxn18nf7",
152+
"outputId":"ae3c857a-0ca8-4508-affc-b5ea4dff6788"
153+
},
154+
"outputs": [
155+
{
156+
"data": {
157+
"text/plain": [
158+
"1"
159+
]
160+
},
161+
"execution_count":10,
162+
"metadata": {},
163+
"output_type":"execute_result"
164+
}
165+
],
166+
"source": [
167+
"# Load Iris flower dataset:\n",
168+
"iris123 = datasets.load_iris()\n",
169+
"features = iris123.data\n",
170+
"target = iris123.target\n",
171+
"# Create and run LDA\n",
172+
"lda_r = LinearDiscriminantAnalysis(n_components=None)\n",
173+
"features_lda = lda_r.fit(features, target)\n",
174+
"# array of explained variance ratios\n",
175+
"lda_var_r = lda_r.explained_variance_ratio_\n",
176+
"# function ceration\n",
177+
"def select_n_c(v_ratio, g_var: float) -> int:\n",
178+
" # initial variance explained setting\n",
179+
" total_v = 0.0\n",
180+
" # number of features initialisation\n",
181+
" n_components = 0\n",
182+
" # If we consider explained variance of each feature:\n",
183+
" for explained_v in v_ratio:\n",
184+
" # explained variance addition to the total\n",
185+
" total_v += explained_v\n",
186+
" # add one to number of components\n",
187+
" n_components += 1\n",
188+
" # we attain our goal level of explained variance\n",
189+
" if total_v >= g_var:\n",
190+
" # end the loop\n",
191+
" break\n",
192+
" # return the number of components\n",
193+
" return n_components\n",
194+
"\n",
195+
"# run the function\n",
196+
"select_n_c(lda_var_r, 0.95)"
197+
]
198+
},
199+
{
200+
"cell_type":"code",
201+
"execution_count":7,
202+
"metadata": {
203+
"colab": {
204+
"base_uri":"https://localhost:8080/"
205+
},
206+
"id":"12zwY1Du8o6i",
207+
"outputId":"e9178fdf-2195-41cc-f4c3-a1e52c030df5"
208+
},
209+
"outputs": [
210+
{
211+
"name":"stderr",
212+
"output_type":"stream",
213+
"text": [
214+
"/usr/local/lib/python3.7/dist-packages/sklearn/decomposition/_nmf.py:294: FutureWarning: The 'init' value, when 'init=None' and n_components is less than n_samples and n_features, will be changed from 'nndsvd' to 'nndsvda' in 1.1 (renaming of 0.26).\n",
215+
" FutureWarning,\n"
216+
]
217+
},
218+
{
219+
"name":"stdout",
220+
"output_type":"stream",
221+
"text": [
222+
"Original number of features: 64\n",
223+
"Reduced number of features: 12\n"
224+
]
225+
},
226+
{
227+
"name":"stderr",
228+
"output_type":"stream",
229+
"text": [
230+
"/usr/local/lib/python3.7/dist-packages/sklearn/decomposition/_nmf.py:1641: ConvergenceWarning: Maximum number of iterations 200 reached. Increase it to improve convergence.\n",
231+
" ConvergenceWarning,\n"
232+
]
233+
}
234+
],
235+
"source": [
236+
"# data loading\n",
237+
"digit = datasets.load_digits()\n",
238+
"# feature matrix loading\n",
239+
"feature_m = digit.data\n",
240+
"# Creation, fit and application of NMF\n",
241+
"n_mf = NMF(n_components=12, random_state=1)\n",
242+
"features_nmf = n_mf.fit_transform(feature_m)\n",
243+
"# Show results\n",
244+
"print(\"Original number of features:\", feature_m.shape[1])\n",
245+
"print(\"Reduced number of features:\", features_nmf.shape[1])"
246+
]
247+
},
248+
{
249+
"cell_type":"code",
250+
"execution_count":8,
251+
"metadata": {
252+
"colab": {
253+
"base_uri":"https://localhost:8080/"
254+
},
255+
"id":"wrEYF9Ql8qtU",
256+
"outputId":"c28d28be-4f0b-4bd7-bb56-fde6ead38a45"
257+
},
258+
"outputs": [
259+
{
260+
"name":"stdout",
261+
"output_type":"stream",
262+
"text": [
263+
"Original number of features: 64\n",
264+
"Reduced number of features: 12\n"
265+
]
266+
}
267+
],
268+
"source": [
269+
"# data loading\n",
270+
"digit123 = datasets.load_digits()\n",
271+
"# feature matrix Standardization\n",
272+
"features_m = StandardScaler().fit_transform(digit123.data)\n",
273+
"# sparse matrix creation\n",
274+
"f_sparse = csr_matrix(features_m)\n",
275+
"# TSVD creation\n",
276+
"tsvd = TruncatedSVD(n_components=12)\n",
277+
"# sparse matrix TSVD\n",
278+
"features_sp_tsvd = tsvd.fit(f_sparse).transform(f_sparse)\n",
279+
"# results\n",
280+
"print(\"Original number of features:\", f_sparse.shape[1])\n",
281+
"print(\"Reduced number of features:\", features_sp_tsvd.shape[1])"
282+
]
283+
},
284+
{
285+
"cell_type":"code",
286+
"execution_count":9,
287+
"metadata": {
288+
"colab": {
289+
"base_uri":"https://localhost:8080/"
290+
},
291+
"id":"xRQ_nUf_8sZA",
292+
"outputId":"19b8d99c-b330-406d-e728-407c18d82f20"
293+
},
294+
"outputs": [
295+
{
296+
"data": {
297+
"text/plain": [
298+
"0.3003938539283667"
299+
]
300+
},
301+
"execution_count":9,
302+
"metadata": {},
303+
"output_type":"execute_result"
304+
}
305+
],
306+
"source": [
307+
"# Sum of first three components' explained variance ratios\n",
308+
"tsvd.explained_variance_ratio_[0:3].sum()"
309+
]
310+
},
311+
{
312+
"cell_type":"code",
313+
"execution_count":null,
314+
"metadata": {
315+
"id":"zbExVkXp8vpi"
316+
},
317+
"outputs": [],
318+
"source": []
319+
}
320+
],
321+
"metadata": {
322+
"colab": {
323+
"name":"DimentionalityReductionUsingFeatureExtraction_PythonCodeTutorial.ipynb",
324+
"provenance": []
325+
},
326+
"interpreter": {
327+
"hash":"f89a88aed07bbcd763ac68893150ace71e487877d8c6527a76855322f20001c6"
328+
},
329+
"kernelspec": {
330+
"display_name":"Python 3.9.12 64-bit",
331+
"language":"python",
332+
"name":"python3"
333+
},
334+
"language_info": {
335+
"name":"python",
336+
"version":"3.9.12"
337+
}
338+
},
339+
"nbformat":4,
340+
"nbformat_minor":0
341+
}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
#[Dimensionality Reduction: Feature Extraction using Scikit-learn in Python](https://www.thepythoncode.com/article/dimensionality-reduction-using-feature-extraction-sklearn)

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp