In [1]:
import numpy as np
import matplotlib.pyplot as plt
In [2]:
from sklearn import datasets
iris = datasets.load_iris()
In [3]:
iris.keys()
Out[3]:
dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename', 'data_module'])
In [21]:
X = iris.data[:,0].reshape(-1,1)
In [22]:
y = (iris.target == 0).astype(np.int8)
In [23]:
y
Out[23]:
array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int8)
In [24]:
from sklearn.linear_model import LogisticRegression
log_reg = LogisticRegression(solver='lbfgs', C=1, random_state=1)
log_reg.fit(X, y)
Out[24]:
LogisticRegression(C=1, random_state=1)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
LogisticRegression(C=1, random_state=1)
In [25]:
log_reg.intercept_, log_reg.coef_
Out[25]:
(array([18.5455613]), array([[-3.45772499]]))
In [26]:
log_reg.score(X, y)
Out[26]:
0.8933333333333333
In [28]:
x_new = [[5],[6.5]]
log_reg.predict(x_new)
Out[28]:
array([1, 0], dtype=int8)
In [29]:
log_reg.predict_proba(x_new)
Out[29]:
array([[0.22150173, 0.77849827], [0.98072817, 0.01927183]])
In [34]:
from sklearn.linear_model import SGDClassifier
sgd_clf = SGDClassifier(loss='log_loss', penalty='l2', alpha=0.0001, random_state=1)
sgd_clf.fit(X, y)
sgd_clf.intercept_, sgd_clf.coef_
Out[34]:
(array([321.68580859]), array([[-61.90239437]]))
In [36]:
sgd_clf.score(X, y)
Out[36]:
0.8733333333333333
In [39]:
X_new = [[5], [6.5]]
sgd_clf.predict(X_new)
Out[39]:
array([1, 0], dtype=int8)
In [40]:
sgd_clf.predict_proba(X_new)
Out[40]:
array([[5.16377895e-06, 9.99994836e-01], [1.00000000e+00, 9.14592585e-36]])
In [42]:
# h=0, i.e. 0=b+w1x1 => x1=b/w1
decision_boundary = - log_reg.intercept_ / log_reg.coef_
decision_boundary
Out[42]:
array([[5.36351542]])
In [43]:
import matplotlib.pyplot as plt
In [44]:
XS = np.linspace(3, 9, 100).reshape(-1,1)
y_proba = log_reg.predict_proba(XS)
In [62]:
plt.figure(figsize=(8,3))
plt.plot(X[y==1], y[y==1], 'b.', label='Setosa')
plt.plot(X[y==0], y[y==0], 'rx', label='Not Setosa')
plt.plot(XS, y_proba[:,0], 'r:')
plt.plot(XS, y_proba[:,1], 'b:')
plt.plot([decision_boundary[0],decision_boundary[0]], [0,1], 'k--', label='Boundary')
plt.xlabel('$x_1$', fontsize=14)
plt.ylabel('$\hat p$', fontsize=14)
plt.legend(loc='center right', fontsize=14)
plt.show()