Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit89997e7

Browse files
committed
Pushing the docs to dev/ for branch: master, commit 228109cd5c12c0c3374e37f13b48d7382d15a5a7
1 parent8258e8a commit89997e7

File tree

1,170 files changed

+3647
-3647
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,170 files changed

+3647
-3647
lines changed

‎dev/_downloads/1abc4484d4183963e2039c8c679497eb/plot_sgd_comparison.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"# Author: Rob Zinkov <rob at zinkov dot com>\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn import datasets\n\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.linear_model import SGDClassifier, Perceptron\nfrom sklearn.linear_model import PassiveAggressiveClassifier\nfrom sklearn.linear_model import LogisticRegression\n\nheldout = [0.95, 0.90, 0.75, 0.50, 0.01]\nrounds = 20\nX, y = datasets.load_digits(return_X_y=True)\n\nclassifiers = [\n (\"SGD\", SGDClassifier(max_iter=100)),\n (\"ASGD\", SGDClassifier(average=True, max_iter=1000)),\n (\"Perceptron\", Perceptron(tol=1e-3)),\n (\"Passive-Aggressive I\", PassiveAggressiveClassifier(loss='hinge',\n C=1.0, tol=1e-4)),\n (\"Passive-Aggressive II\", PassiveAggressiveClassifier(loss='squared_hinge',\n C=1.0, tol=1e-4)),\n (\"SAG\", LogisticRegression(solver='sag', tol=1e-1, C=1.e4 / X.shape[0]))\n]\n\nxx = 1. - np.array(heldout)\n\nfor name, clf in classifiers:\n print(\"training %s\" % name)\n rng = np.random.RandomState(42)\n yy = []\n for i in heldout:\n yy_ = []\n for r in range(rounds):\n X_train, X_test, y_train, y_test = \\\n train_test_split(X, y, test_size=i, random_state=rng)\n clf.fit(X_train, y_train)\n y_pred = clf.predict(X_test)\n yy_.append(1 - np.mean(y_pred == y_test))\n yy.append(np.mean(yy_))\n plt.plot(xx, yy, label=name)\n\nplt.legend(loc=\"upper right\")\nplt.xlabel(\"Proportion train\")\nplt.ylabel(\"Test Error Rate\")\nplt.show()"
29+
"# Author: Rob Zinkov <rob at zinkov dot com>\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn import datasets\n\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.linear_model import SGDClassifier, Perceptron\nfrom sklearn.linear_model import PassiveAggressiveClassifier\nfrom sklearn.linear_model import LogisticRegression\n\nheldout = [0.95, 0.90, 0.75, 0.50, 0.01]\nrounds = 20\nX, y = datasets.load_digits(return_X_y=True)\n\nclassifiers = [\n (\"SGD\", SGDClassifier(max_iter=100)),\n (\"ASGD\", SGDClassifier(average=True)),\n (\"Perceptron\", Perceptron()),\n (\"Passive-Aggressive I\", PassiveAggressiveClassifier(loss='hinge',\n C=1.0, tol=1e-4)),\n (\"Passive-Aggressive II\", PassiveAggressiveClassifier(loss='squared_hinge',\n C=1.0, tol=1e-4)),\n (\"SAG\", LogisticRegression(solver='sag', tol=1e-1, C=1.e4 / X.shape[0]))\n]\n\nxx = 1. - np.array(heldout)\n\nfor name, clf in classifiers:\n print(\"training %s\" % name)\n rng = np.random.RandomState(42)\n yy = []\n for i in heldout:\n yy_ = []\n for r in range(rounds):\n X_train, X_test, y_train, y_test = \\\n train_test_split(X, y, test_size=i, random_state=rng)\n clf.fit(X_train, y_train)\n y_pred = clf.predict(X_test)\n yy_.append(1 - np.mean(y_pred == y_test))\n yy.append(np.mean(yy_))\n plt.plot(xx, yy, label=name)\n\nplt.legend(loc=\"upper right\")\nplt.xlabel(\"Proportion train\")\nplt.ylabel(\"Test Error Rate\")\nplt.show()"
3030
]
3131
}
3232
],
Binary file not shown.

‎dev/_downloads/3650884f0a646ba96d2e47df0a6fb935/plot_sgd_comparison.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@
2525

2626
classifiers= [
2727
("SGD",SGDClassifier(max_iter=100)),
28-
("ASGD",SGDClassifier(average=True,max_iter=1000)),
29-
("Perceptron",Perceptron(tol=1e-3)),
28+
("ASGD",SGDClassifier(average=True)),
29+
("Perceptron",Perceptron()),
3030
("Passive-Aggressive I",PassiveAggressiveClassifier(loss='hinge',
3131
C=1.0,tol=1e-4)),
3232
("Passive-Aggressive II",PassiveAggressiveClassifier(loss='squared_hinge',

‎dev/_downloads/388641d133587cc11aa26f2dbef4b950/plot_document_classification_20newsgroups.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,7 @@ def benchmark(clf):
247247
results= []
248248
forclf,namein (
249249
(RidgeClassifier(tol=1e-2,solver="sag"),"Ridge Classifier"),
250-
(Perceptron(max_iter=50,tol=1e-3),"Perceptron"),
250+
(Perceptron(max_iter=50),"Perceptron"),
251251
(PassiveAggressiveClassifier(max_iter=50,tol=1e-3),
252252
"Passive-Aggressive"),
253253
(KNeighborsClassifier(n_neighbors=10),"kNN"),

‎dev/_downloads/3b31bf37034a6ece04667cd422e5ff79/plot_document_classification_20newsgroups.ipynb

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

‎dev/_downloads/80692cf167e9ea27b27e5bd144159c82/plot_out_of_core_classification.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,7 @@ def progress(blocknum, bs, size):
208208
# Here are some classifiers that support the `partial_fit` method
209209
partial_fit_classifiers= {
210210
'SGD':SGDClassifier(max_iter=5),
211-
'Perceptron':Perceptron(tol=1e-3),
211+
'Perceptron':Perceptron(),
212212
'NB Multinomial':MultinomialNB(alpha=0.01),
213213
'Passive-Aggressive':PassiveAggressiveClassifier(tol=1e-3),
214214
}

‎dev/_downloads/b86db3a111b621a7beeaa9d099608e5b/plot_out_of_core_classification.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@
6262
},
6363
"outputs": [],
6464
"source": [
65-
"vectorizer = HashingVectorizer(decode_error='ignore', n_features=2 ** 18,\n alternate_sign=False)\n\n\n# Iterator over parsed Reuters SGML files.\ndata_stream = stream_reuters_documents()\n\n# We learn a binary classification between the \"acq\" class and all the others.\n# \"acq\" was chosen as it is more or less evenly distributed in the Reuters\n# files. For other datasets, one should take care of creating a test set with\n# a realistic portion of positive instances.\nall_classes = np.array([0, 1])\npositive_class = 'acq'\n\n# Here are some classifiers that support the `partial_fit` method\npartial_fit_classifiers = {\n 'SGD': SGDClassifier(max_iter=5),\n 'Perceptron': Perceptron(tol=1e-3),\n 'NB Multinomial': MultinomialNB(alpha=0.01),\n 'Passive-Aggressive': PassiveAggressiveClassifier(tol=1e-3),\n}\n\n\ndef get_minibatch(doc_iter, size, pos_class=positive_class):\n \"\"\"Extract a minibatch of examples, return a tuple X_text, y.\n\n Note: size is before excluding invalid docs with no topics assigned.\n\n \"\"\"\n data = [('{title}\\n\\n{body}'.format(**doc), pos_class in doc['topics'])\n for doc in itertools.islice(doc_iter, size)\n if doc['topics']]\n if not len(data):\n return np.asarray([], dtype=int), np.asarray([], dtype=int)\n X_text, y = zip(*data)\n return X_text, np.asarray(y, dtype=int)\n\n\ndef iter_minibatches(doc_iter, minibatch_size):\n \"\"\"Generator of minibatches.\"\"\"\n X_text, y = get_minibatch(doc_iter, minibatch_size)\n while len(X_text):\n yield X_text, y\n X_text, y = get_minibatch(doc_iter, minibatch_size)\n\n\n# test data statistics\ntest_stats = {'n_test': 0, 'n_test_pos': 0}\n\n# First we hold out a number of examples to estimate accuracy\nn_test_documents = 1000\ntick = time.time()\nX_test_text, y_test = get_minibatch(data_stream, 1000)\nparsing_time = time.time() - tick\ntick = time.time()\nX_test = vectorizer.transform(X_test_text)\nvectorizing_time = time.time() - tick\ntest_stats['n_test'] += len(y_test)\ntest_stats['n_test_pos'] += sum(y_test)\nprint(\"Test set is %d documents (%d positive)\" % (len(y_test), sum(y_test)))\n\n\ndef progress(cls_name, stats):\n \"\"\"Report progress information, return a string.\"\"\"\n duration = time.time() - stats['t0']\n s = \"%20s classifier : \\t\" % cls_name\n s += \"%(n_train)6d train docs (%(n_train_pos)6d positive) \" % stats\n s += \"%(n_test)6d test docs (%(n_test_pos)6d positive) \" % test_stats\n s += \"accuracy: %(accuracy).3f \" % stats\n s += \"in %.2fs (%5d docs/s)\" % (duration, stats['n_train'] / duration)\n return s\n\n\ncls_stats = {}\n\nfor cls_name in partial_fit_classifiers:\n stats = {'n_train': 0, 'n_train_pos': 0,\n 'accuracy': 0.0, 'accuracy_history': [(0, 0)], 't0': time.time(),\n 'runtime_history': [(0, 0)], 'total_fit_time': 0.0}\n cls_stats[cls_name] = stats\n\nget_minibatch(data_stream, n_test_documents)\n# Discard test set\n\n# We will feed the classifier with mini-batches of 1000 documents; this means\n# we have at most 1000 docs in memory at any time. The smaller the document\n# batch, the bigger the relative overhead of the partial fit methods.\nminibatch_size = 1000\n\n# Create the data_stream that parses Reuters SGML files and iterates on\n# documents as a stream.\nminibatch_iterators = iter_minibatches(data_stream, minibatch_size)\ntotal_vect_time = 0.0\n\n# Main loop : iterate on mini-batches of examples\nfor i, (X_train_text, y_train) in enumerate(minibatch_iterators):\n\n tick = time.time()\n X_train = vectorizer.transform(X_train_text)\n total_vect_time += time.time() - tick\n\n for cls_name, cls in partial_fit_classifiers.items():\n tick = time.time()\n # update estimator with examples in the current mini-batch\n cls.partial_fit(X_train, y_train, classes=all_classes)\n\n # accumulate test accuracy stats\n cls_stats[cls_name]['total_fit_time'] += time.time() - tick\n cls_stats[cls_name]['n_train'] += X_train.shape[0]\n cls_stats[cls_name]['n_train_pos'] += sum(y_train)\n tick = time.time()\n cls_stats[cls_name]['accuracy'] = cls.score(X_test, y_test)\n cls_stats[cls_name]['prediction_time'] = time.time() - tick\n acc_history = (cls_stats[cls_name]['accuracy'],\n cls_stats[cls_name]['n_train'])\n cls_stats[cls_name]['accuracy_history'].append(acc_history)\n run_history = (cls_stats[cls_name]['accuracy'],\n total_vect_time + cls_stats[cls_name]['total_fit_time'])\n cls_stats[cls_name]['runtime_history'].append(run_history)\n\n if i % 3 == 0:\n print(progress(cls_name, cls_stats[cls_name]))\n if i % 3 == 0:\n print('\\n')"
65+
"vectorizer = HashingVectorizer(decode_error='ignore', n_features=2 ** 18,\n alternate_sign=False)\n\n\n# Iterator over parsed Reuters SGML files.\ndata_stream = stream_reuters_documents()\n\n# We learn a binary classification between the \"acq\" class and all the others.\n# \"acq\" was chosen as it is more or less evenly distributed in the Reuters\n# files. For other datasets, one should take care of creating a test set with\n# a realistic portion of positive instances.\nall_classes = np.array([0, 1])\npositive_class = 'acq'\n\n# Here are some classifiers that support the `partial_fit` method\npartial_fit_classifiers = {\n 'SGD': SGDClassifier(max_iter=5),\n 'Perceptron': Perceptron(),\n 'NB Multinomial': MultinomialNB(alpha=0.01),\n 'Passive-Aggressive': PassiveAggressiveClassifier(tol=1e-3),\n}\n\n\ndef get_minibatch(doc_iter, size, pos_class=positive_class):\n \"\"\"Extract a minibatch of examples, return a tuple X_text, y.\n\n Note: size is before excluding invalid docs with no topics assigned.\n\n \"\"\"\n data = [('{title}\\n\\n{body}'.format(**doc), pos_class in doc['topics'])\n for doc in itertools.islice(doc_iter, size)\n if doc['topics']]\n if not len(data):\n return np.asarray([], dtype=int), np.asarray([], dtype=int)\n X_text, y = zip(*data)\n return X_text, np.asarray(y, dtype=int)\n\n\ndef iter_minibatches(doc_iter, minibatch_size):\n \"\"\"Generator of minibatches.\"\"\"\n X_text, y = get_minibatch(doc_iter, minibatch_size)\n while len(X_text):\n yield X_text, y\n X_text, y = get_minibatch(doc_iter, minibatch_size)\n\n\n# test data statistics\ntest_stats = {'n_test': 0, 'n_test_pos': 0}\n\n# First we hold out a number of examples to estimate accuracy\nn_test_documents = 1000\ntick = time.time()\nX_test_text, y_test = get_minibatch(data_stream, 1000)\nparsing_time = time.time() - tick\ntick = time.time()\nX_test = vectorizer.transform(X_test_text)\nvectorizing_time = time.time() - tick\ntest_stats['n_test'] += len(y_test)\ntest_stats['n_test_pos'] += sum(y_test)\nprint(\"Test set is %d documents (%d positive)\" % (len(y_test), sum(y_test)))\n\n\ndef progress(cls_name, stats):\n \"\"\"Report progress information, return a string.\"\"\"\n duration = time.time() - stats['t0']\n s = \"%20s classifier : \\t\" % cls_name\n s += \"%(n_train)6d train docs (%(n_train_pos)6d positive) \" % stats\n s += \"%(n_test)6d test docs (%(n_test_pos)6d positive) \" % test_stats\n s += \"accuracy: %(accuracy).3f \" % stats\n s += \"in %.2fs (%5d docs/s)\" % (duration, stats['n_train'] / duration)\n return s\n\n\ncls_stats = {}\n\nfor cls_name in partial_fit_classifiers:\n stats = {'n_train': 0, 'n_train_pos': 0,\n 'accuracy': 0.0, 'accuracy_history': [(0, 0)], 't0': time.time(),\n 'runtime_history': [(0, 0)], 'total_fit_time': 0.0}\n cls_stats[cls_name] = stats\n\nget_minibatch(data_stream, n_test_documents)\n# Discard test set\n\n# We will feed the classifier with mini-batches of 1000 documents; this means\n# we have at most 1000 docs in memory at any time. The smaller the document\n# batch, the bigger the relative overhead of the partial fit methods.\nminibatch_size = 1000\n\n# Create the data_stream that parses Reuters SGML files and iterates on\n# documents as a stream.\nminibatch_iterators = iter_minibatches(data_stream, minibatch_size)\ntotal_vect_time = 0.0\n\n# Main loop : iterate on mini-batches of examples\nfor i, (X_train_text, y_train) in enumerate(minibatch_iterators):\n\n tick = time.time()\n X_train = vectorizer.transform(X_train_text)\n total_vect_time += time.time() - tick\n\n for cls_name, cls in partial_fit_classifiers.items():\n tick = time.time()\n # update estimator with examples in the current mini-batch\n cls.partial_fit(X_train, y_train, classes=all_classes)\n\n # accumulate test accuracy stats\n cls_stats[cls_name]['total_fit_time'] += time.time() - tick\n cls_stats[cls_name]['n_train'] += X_train.shape[0]\n cls_stats[cls_name]['n_train_pos'] += sum(y_train)\n tick = time.time()\n cls_stats[cls_name]['accuracy'] = cls.score(X_test, y_test)\n cls_stats[cls_name]['prediction_time'] = time.time() - tick\n acc_history = (cls_stats[cls_name]['accuracy'],\n cls_stats[cls_name]['n_train'])\n cls_stats[cls_name]['accuracy_history'].append(acc_history)\n run_history = (cls_stats[cls_name]['accuracy'],\n total_vect_time + cls_stats[cls_name]['total_fit_time'])\n cls_stats[cls_name]['runtime_history'].append(run_history)\n\n if i % 3 == 0:\n print(progress(cls_name, cls_stats[cls_name]))\n if i % 3 == 0:\n print('\\n')"
6666
]
6767
},
6868
{
Binary file not shown.

‎dev/_downloads/scikit-learn-docs.pdf

3.11 KB
Binary file not shown.

‎dev/_images/iris.png

0 Bytes
157 Bytes
92 Bytes
-14 Bytes
150 Bytes
47 Bytes
47 Bytes
23 Bytes
374 Bytes
374 Bytes
-490 Bytes
-85 Bytes
-345 Bytes
328 Bytes
349 Bytes
5.21 KB
581 Bytes
238 Bytes
238 Bytes
-67 Bytes
-67 Bytes
-211 Bytes
-211 Bytes
128 Bytes
128 Bytes
-43 Bytes
-43 Bytes
82 Bytes
82 Bytes
-144 Bytes
57 Bytes
57 Bytes
-112 Bytes
777 Bytes
-45 Bytes
9 Bytes

‎dev/_sources/auto_examples/applications/plot_face_recognition.rst.txt

Lines changed: 17 additions & 17 deletions

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp