Code snippets for page Node ListΒΆ

Download node_list.py. Browse the code snippet index.

# -*- coding: utf-8 -*-
# Generated by codesnippet sphinx extension on 2016-03-08

import mdp
import numpy as np
np.random.seed(0)
import numpy as np
from sklearn import linear_model
n_samples, n_features = 10, 5
np.random.seed(0)
y = np.random.randn(n_samples)
X = np.random.randn(n_samples, n_features)
clf = linear_model.SGDRegressor()
clf.fit(X, y)

# Expected:
## SGDRegressor(alpha=0.0001, average=False, epsilon=0.1, eta0=0.01,
##              fit_intercept=True, l1_ratio=0.15, learning_rate='invscaling',
##              loss='squared_loss', n_iter=5, penalty='l2', power_t=0.25,
##              random_state=None, shuffle=True, verbose=0, warm_start=False)

from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
import numpy as np
X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
y = np.array([1, 1, 1, 2, 2, 2])
clf = QuadraticDiscriminantAnalysis()
clf.fit(X, y)

# Expected:
## QuadraticDiscriminantAnalysis(priors=None, reg_param=0.0,
##                               store_covariances=False, tol=0.0001)
print(clf.predict([[-0.8, -1]]))
# Expected:
## [1]

X = [[0], [1], [2], [3]]
y = [0, 0, 1, 1]
from sklearn.neighbors import KNeighborsClassifier
neigh = KNeighborsClassifier(n_neighbors=3)
neigh.fit(X, y)
# Expected:
## KNeighborsClassifier(...)
print(neigh.predict([[1.1]]))
# Expected:
## [0]
print(neigh.predict_proba([[0.9]]))
# Expected:
## [[ 0.66666667  0.33333333]]

from sklearn.neighbors.nearest_centroid import NearestCentroid
import numpy as np
X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
y = np.array([1, 1, 1, 2, 2, 2])
clf = NearestCentroid()
clf.fit(X, y)
# Expected:
## NearestCentroid(metric='euclidean', shrink_threshold=None)
print(clf.predict([[-0.8, -1]]))
# Expected:
## [1]

from sklearn import svm, grid_search, datasets
iris = datasets.load_iris()
parameters = {'kernel':('linear', 'rbf'), 'C':[1, 10]}
svr = svm.SVC()
clf = grid_search.GridSearchCV(svr, parameters)
clf.fit(iris.data, iris.target)

# Expected:
## GridSearchCV(cv=None, error_score=...,
##        estimator=SVC(C=1.0, cache_size=..., class_weight=..., coef0=...,
##                      decision_function_shape=None, degree=..., gamma=...,
##                      kernel='rbf', max_iter=-1, probability=False,
##                      random_state=None, shrinking=True, tol=...,
##                      verbose=False),
##        fit_params={}, iid=..., n_jobs=1,
##        param_grid=..., pre_dispatch=..., refit=...,
##        scoring=..., verbose=...)

import numpy as np
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
y = np.array([1, 1, 1, 2, 2, 2])
clf = LinearDiscriminantAnalysis()
clf.fit(X, y)
# Expected:
## LinearDiscriminantAnalysis(n_components=None, priors=None, shrinkage=None,
##               solver='svd', store_covariance=False, tol=0.0001)
print(clf.predict([[-0.8, -1]]))
# Expected:
## [1]

from sklearn import linear_model
clf = linear_model.ARDRegression()
clf.fit([[0,0], [1, 1], [2, 2]], [0, 1, 2])

# Expected:
## ARDRegression(alpha_1=1e-06, alpha_2=1e-06, compute_score=False,
##         copy_X=True, fit_intercept=True, lambda_1=1e-06, lambda_2=1e-06,
##         n_iter=300, normalize=False, threshold_lambda=10000.0, tol=0.001,
##         verbose=False)
clf.predict([[1, 1]])
# Expected:
## array([ 1.])

from sklearn.cross_decomposition import PLSCanonical
X = [[0., 0., 1.], [1.,0.,0.], [2.,2.,2.], [2.,5.,4.]]
Y = [[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]]
plsca = PLSCanonical(n_components=2)
plsca.fit(X, Y)

# Expected:
## PLSCanonical(algorithm='nipals', copy=True, max_iter=500, n_components=2,
##              scale=True, tol=1e-06)
X_c, Y_c = plsca.transform(X, Y)

import numpy as np
X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
Y = np.array([1, 1, 1, 2, 2, 2])
from sklearn.naive_bayes import GaussianNB
clf = GaussianNB()
clf.fit(X, Y)
# Expected:
## GaussianNB()
print(clf.predict([[-0.8, -1]]))
# Expected:
## [1]
clf_pf = GaussianNB()
clf_pf.partial_fit(X, Y, np.unique(Y))
# Expected:
## GaussianNB()
print(clf_pf.predict([[-0.8, -1]]))
# Expected:
## [1]

from sklearn import datasets
from sklearn.semi_supervised import LabelSpreading
label_prop_model = LabelSpreading()
iris = datasets.load_iris()
random_unlabeled_points = np.where(np.random.random_integers(0, 1,
   size=len(iris.target)))
labels = np.copy(iris.target)
labels[random_unlabeled_points] = -1
label_prop_model.fit(iris.data, labels)

# Expected:
## LabelSpreading(...)

import numpy as np
X = np.array([[1,1], [2, 1], [3, 1.2], [4, 1], [5, 0.8], [6, 1]])
from sklearn.decomposition import NMF
model = NMF(n_components=2, init='random', random_state=0)
model.fit(X)
# Expected:
## NMF(alpha=0.0, beta=1, eta=0.1, init='random', l1_ratio=0.0, max_iter=200,
##   n_components=2, nls_max_iter=2000, random_state=0, shuffle=False,
##   solver='cd', sparseness=None, tol=0.0001, verbose=0)

model.components_
# Expected:
## array([[ 2.09783018,  0.30560234],
##        [ 2.13443044,  2.13171694]])
model.reconstruction_err_
# Expected:
## 0.00115993...

import numpy as np
X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])
y = np.array([1, 1, 2, 2])
from sklearn.svm import SVC
clf = SVC()
clf.fit(X, y)
# Expected:
## SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
##     decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',
##     max_iter=-1, probability=False, random_state=None, shrinking=True,
##     tol=0.001, verbose=False)
print(clf.predict([[-0.8, -1]]))
# Expected:
## [1]

from sklearn.feature_extraction import DictVectorizer
v = DictVectorizer(sparse=False)
D = [{'foo': 1, 'bar': 2}, {'foo': 3, 'baz': 1}]
X = v.fit_transform(D)
X
# Expected:
## array([[ 2.,  0.,  1.],
##        [ 0.,  1.,  3.]])
v.inverse_transform(X) ==         [{'bar': 2.0, 'foo': 1.0}, {'baz': 1.0, 'foo': 3.0}]
# Expected:
## True
v.transform({'foo': 4, 'unseen_feature': 3})
# Expected:
## array([[ 0.,  0.,  4.]])

from sklearn.linear_model import RandomizedLasso
randomized_lasso = RandomizedLasso()

from sklearn.kernel_ridge import KernelRidge
import numpy as np
n_samples, n_features = 10, 5
rng = np.random.RandomState(0)
y = rng.randn(n_samples)
X = rng.randn(n_samples, n_features)
clf = KernelRidge(alpha=1.0)
clf.fit(X, y)
# Expected:
## KernelRidge(alpha=1.0, coef0=1, degree=3, gamma=None, kernel='linear',
##             kernel_params=None)

import numpy as np
X = np.random.randint(5, size=(6, 100))
y = np.array([1, 2, 3, 4, 5, 6])
from sklearn.naive_bayes import MultinomialNB
clf = MultinomialNB()
clf.fit(X, y)
# Expected:
## MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)
print(clf.predict(X[2:3]))
# Expected:
## [3]

X = np.arange(6).reshape(3, 2)
X
# Expected:
## array([[0, 1],
##        [2, 3],
##        [4, 5]])
poly = PolynomialFeatures(2)
poly.fit_transform(X)
# Expected:
## array([[  1.,   0.,   1.,   0.,   0.,   1.],
##        [  1.,   2.,   3.,   4.,   6.,   9.],
##        [  1.,   4.,   5.,  16.,  20.,  25.]])
poly = PolynomialFeatures(interaction_only=True)
poly.fit_transform(X)
# Expected:
## array([[  1.,   0.,   1.,   0.],
##        [  1.,   2.,   3.,   6.],
##        [  1.,   4.,   5.,  20.]])

from sklearn.cluster import Birch
X = [[0, 1], [0.3, 1], [-0.3, 1], [0, -1], [0.3, -1], [-0.3, -1]]
brc = Birch(branching_factor=50, n_clusters=None, threshold=0.5,
compute_labels=True)
brc.fit(X)
# Expected:
## Birch(branching_factor=50, compute_labels=True, copy=True, n_clusters=None,
##    threshold=0.5)
brc.predict(X)
# Expected:
## array([0, 0, 0, 1, 1, 1])

from sklearn import datasets
from sklearn.semi_supervised import LabelPropagation
label_prop_model = LabelPropagation()
iris = datasets.load_iris()
random_unlabeled_points = np.where(np.random.random_integers(0, 1,
   size=len(iris.target)))
labels = np.copy(iris.target)
labels[random_unlabeled_points] = -1
label_prop_model.fit(iris.data, labels)

# Expected:
## LabelPropagation(...)

import numpy as np
from sklearn.gaussian_process import GaussianProcess
X = np.array([[1., 3., 5., 6., 7., 8.]]).T
y = (X * np.sin(X)).ravel()
gp = GaussianProcess(theta0=0.1, thetaL=.001, thetaU=1.)
gp.fit(X, y)
# Expected:
## GaussianProcess(beta0=None...
##         ...

X = [[0], [1], [2], [3]]
y = [0, 0, 1, 1]
from sklearn.neighbors import RadiusNeighborsRegressor
neigh = RadiusNeighborsRegressor(radius=1.0)
neigh.fit(X, y)
# Expected:
## RadiusNeighborsRegressor(...)
print(neigh.predict([[1.5]]))
# Expected:
## [ 0.5]

from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder()
enc.fit([[0, 0, 3], [1, 1, 0], [0, 2, 1], [1, 0, 2]])
# Expected:
## OneHotEncoder(categorical_features='all', dtype=<... 'float'>,
##        handle_unknown='error', n_values='auto', sparse=True)
enc.n_values_
# Expected:
## array([2, 3, 4])
enc.feature_indices_
# Expected:
## array([0, 2, 5, 9])
enc.transform([[0, 1, 1]]).toarray()
# Expected:
## array([[ 1.,  0.,  0.,  1.,  0.,  0.,  1.,  0.,  0.]])

X = [[0], [1], [2], [3]]
y = [0, 0, 1, 1]
from sklearn.neighbors import KNeighborsRegressor
neigh = KNeighborsRegressor(n_neighbors=2)
neigh.fit(X, y)
# Expected:
## KNeighborsRegressor(...)
print(neigh.predict([[1.5]]))
# Expected:
## [ 0.5]

from sklearn import preprocessing
le = preprocessing.LabelEncoder()
le.fit([1, 2, 2, 6])
# Expected:
## LabelEncoder()
le.classes_
# Expected:
## array([1, 2, 6])
le.transform([1, 1, 2, 6])
# Expected:
## array([0, 0, 1, 2]...)
le.inverse_transform([0, 0, 1, 2])
# Expected:
## array([1, 1, 2, 6])

le = preprocessing.LabelEncoder()
le.fit(["paris", "paris", "tokyo", "amsterdam"])
# Expected:
## LabelEncoder()
list(le.classes_)
# Expected:
## ['amsterdam', 'paris', 'tokyo']
le.transform(["tokyo", "tokyo", "paris"])
# Expected:
## array([2, 2, 1]...)
list(le.inverse_transform([2, 2, 1]))
# Expected:
## ['tokyo', 'tokyo', 'paris']

from sklearn.linear_model import Ridge
import numpy as np
n_samples, n_features = 10, 5
np.random.seed(0)
y = np.random.randn(n_samples)
X = np.random.randn(n_samples, n_features)
clf = Ridge(alpha=1.0)
clf.fit(X, y)
# Expected:
## Ridge(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=None,
##       normalize=False, random_state=None, solver='auto', tol=0.001)

import numpy as np
from sklearn.decomposition import RandomizedPCA
X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
pca = RandomizedPCA(n_components=2)
pca.fit(X)
# Expected:
## RandomizedPCA(copy=True, iterated_power=3, n_components=2,
##        random_state=None, whiten=False)
print(pca.explained_variance_ratio_)
# Expected:
## [ 0.99244...  0.00755...]

from sklearn import linear_model
clf = linear_model.LassoLarsIC(criterion='bic')
clf.fit([[-1, 1], [0, 0], [1, 1]], [-1.1111, 0, -1.1111])

# Expected:
## LassoLarsIC(copy_X=True, criterion='bic', eps=..., fit_intercept=True,
##       max_iter=500, normalize=True, positive=False, precompute='auto',
##       verbose=False)
print(clf.coef_)
# Expected:
## [ 0.  -1.11...]

from sklearn.datasets import make_friedman1
from sklearn.feature_selection import RFE
from sklearn.svm import SVR
X, y = make_friedman1(n_samples=50, n_features=10, random_state=0)
estimator = SVR(kernel="linear")
selector = RFE(estimator, 5, step=1)
selector = selector.fit(X, y)
selector.support_
# Expected:
## array([ True,  True,  True,  True,  True,
##         False, False, False, False, False], dtype=bool)
selector.ranking_
# Expected:
## array([1, 1, 1, 1, 1, 6, 4, 3, 2, 5])

import numpy as np
from sklearn.decomposition import PCA
X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
pca = PCA(n_components=2)
pca.fit(X)
# Expected:
## PCA(copy=True, n_components=2, whiten=False)
print(pca.explained_variance_ratio_)
# Expected:
## [ 0.99244...  0.00755...]

from sklearn import linear_model
clf = linear_model.MultiTaskLasso(alpha=0.1)
clf.fit([[0,0], [1, 1], [2, 2]], [[0, 0], [1, 1], [2, 2]])
# Expected:
## MultiTaskLasso(alpha=0.1, copy_X=True, fit_intercept=True, max_iter=1000,
##         normalize=False, random_state=None, selection='cyclic', tol=0.0001,
##         warm_start=False)
print(clf.coef_)
# Expected:
## [[ 0.89393398  0.        ]
##  [ 0.89393398  0.        ]]
print(clf.intercept_)
# Expected:
## [ 0.10606602  0.10606602]

from sklearn.linear_model import RandomizedLogisticRegression
randomized_logistic = RandomizedLogisticRegression()

from sklearn import linear_model
clf = linear_model.MultiTaskElasticNet(alpha=0.1)
clf.fit([[0,0], [1, 1], [2, 2]], [[0, 0], [1, 1], [2, 2]])

# Expected:
## MultiTaskElasticNet(alpha=0.1, copy_X=True, fit_intercept=True,
##         l1_ratio=0.5, max_iter=1000, normalize=False, random_state=None,
##         selection='cyclic', tol=0.0001, warm_start=False)
print(clf.coef_)
# Expected:
## [[ 0.45663524  0.45612256]
##  [ 0.45663524  0.45612256]]
print(clf.intercept_)
# Expected:
## [ 0.0872422  0.0872422]

import numpy as np
from sklearn import mixture
np.random.seed(1)
g = mixture.GMM(n_components=2)
obs = np.concatenate((np.random.randn(100, 1),
                      10 + np.random.randn(300, 1)))
g.fit(obs)
# Expected:
## GMM(covariance_type='diag', init_params='wmc', min_covar=0.001,
##         n_components=2, n_init=1, n_iter=100, params='wmc',
##         random_state=None, thresh=None, tol=0.001, verbose=0)
np.round(g.weights_, 2)
# Expected:
## array([ 0.75,  0.25])
np.round(g.means_, 2)
# Expected:
## array([[ 10.05],
##        [  0.06]])
np.round(g.covars_, 2)
# Expected:
## array([[[ 1.02]],
##        [[ 0.96]]])
g.predict([[0], [2], [9], [10]])
# Expected:
## array([1, 1, 0, 0]...)
np.round(g.score([[0], [2], [9], [10]]), 2)
# Expected:
## array([-2.19, -4.58, -1.75, -1.21])
g.fit(20 * [[0]] +  20 * [[10]])
# Expected:
## GMM(covariance_type='diag', init_params='wmc', min_covar=0.001,
##         n_components=2, n_init=1, n_iter=100, params='wmc',
##         random_state=None, thresh=None, tol=0.001, verbose=0)
np.round(g.weights_, 2)
# Expected:
## array([ 0.5,  0.5])

from sklearn.datasets import load_iris
from sklearn.cross_validation import cross_val_score
from sklearn.tree import DecisionTreeClassifier
clf = DecisionTreeClassifier(random_state=0)
iris = load_iris()
cross_val_score(clf, iris.data, iris.target, cv=10)

# Expected:
## array([ 1.     ,  0.93...,  0.86...,  0.93...,  0.93...,
##         0.93...,  0.93...,  1.     ,  0.93...,  1.      ])

import numpy as np
X = np.random.randint(2, size=(6, 100))
Y = np.array([1, 2, 3, 4, 4, 5])
from sklearn.naive_bayes import BernoulliNB
clf = BernoulliNB()
clf.fit(X, Y)
# Expected:
## BernoulliNB(alpha=1.0, binarize=0.0, class_prior=None, fit_prior=True)
print(clf.predict(X[2:3]))
# Expected:
## [3]

mlb = MultiLabelBinarizer()
mlb.fit_transform([(1, 2), (3,)])
# Expected:
## array([[1, 1, 0],
##        [0, 0, 1]])
mlb.classes_
# Expected:
## array([1, 2, 3])

mlb.fit_transform([set(['sci-fi', 'thriller']), set(['comedy'])])
# Expected:
## array([[0, 1, 1],
##        [1, 0, 0]])
list(mlb.classes_)
# Expected:
## ['comedy', 'sci-fi', 'thriller']

import numpy as np
X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])
y = np.array([1, 1, 2, 2])
from sklearn.svm import NuSVC
clf = NuSVC()
clf.fit(X, y)
# Expected:
## NuSVC(cache_size=200, class_weight=None, coef0=0.0,
##       decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',
##       max_iter=-1, nu=0.5, probability=False, random_state=None,
##       shrinking=True, tol=0.001, verbose=False)
print(clf.predict([[-0.8, -1]]))
# Expected:
## [1]

from sklearn import linear_model
clf = linear_model.LassoLars(alpha=0.01)
clf.fit([[-1, 1], [0, 0], [1, 1]], [-1, 0, -1])

# Expected:
## LassoLars(alpha=0.01, copy_X=True, eps=..., fit_intercept=True,
##      fit_path=True, max_iter=500, normalize=True, positive=False,
##      precompute='auto', verbose=False)
print(clf.coef_)
# Expected:
## [ 0.         -0.963257...]

from sklearn import linear_model
clf = linear_model.Lasso(alpha=0.1)
clf.fit([[0,0], [1, 1], [2, 2]], [0, 1, 2])
# Expected:
## Lasso(alpha=0.1, copy_X=True, fit_intercept=True, max_iter=1000,
##    normalize=False, positive=False, precompute=False, random_state=None,
##    selection='cyclic', tol=0.0001, warm_start=False)
print(clf.coef_)
# Expected:
## [ 0.85  0.  ]
print(clf.intercept_)
# Expected:
## 0.15

from sklearn.decomposition import TruncatedSVD
from sklearn.random_projection import sparse_random_matrix
X = sparse_random_matrix(100, 100, density=0.01, random_state=42)
svd = TruncatedSVD(n_components=5, random_state=42)
svd.fit(X)
# Expected:
## TruncatedSVD(algorithm='randomized', n_components=5, n_iter=5,
##         random_state=42, tol=0.0)
print(svd.explained_variance_ratio_)
# Expected:
## [ 0.0782... 0.0552... 0.0544... 0.0499... 0.0413...]
print(svd.explained_variance_ratio_.sum())
# Expected:
## 0.279...

from sklearn.svm import NuSVR
import numpy as np
n_samples, n_features = 10, 5
np.random.seed(0)
y = np.random.randn(n_samples)
X = np.random.randn(n_samples, n_features)
clf = NuSVR(C=1.0, nu=0.1)
clf.fit(X, y)
# Expected:
## NuSVR(C=1.0, cache_size=200, coef0=0.0, degree=3, gamma='auto',
##       kernel='rbf', max_iter=-1, nu=0.1, shrinking=True, tol=0.001,
##       verbose=False)

import numpy as np
X = np.array([[1,1], [2, 1], [3, 1.2], [4, 1], [5, 0.8], [6, 1]])
from sklearn.decomposition import NMF
model = NMF(n_components=2, init='random', random_state=0)
model.fit(X)
# Expected:
## NMF(alpha=0.0, beta=1, eta=0.1, init='random', l1_ratio=0.0, max_iter=200,
##   n_components=2, nls_max_iter=2000, random_state=0, shuffle=False,
##   solver='cd', sparseness=None, tol=0.0001, verbose=0)

model.components_
# Expected:
## array([[ 2.09783018,  0.30560234],
##        [ 2.13443044,  2.13171694]])
model.reconstruction_err_
# Expected:
## 0.00115993...

import numpy as np
from sklearn import linear_model
X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])
Y = np.array([1, 1, 2, 2])
clf = linear_model.SGDClassifier()
clf.fit(X, Y)

# Expected:
## SGDClassifier(alpha=0.0001, average=False, class_weight=None, epsilon=0.1,
##         eta0=0.0, fit_intercept=True, l1_ratio=0.15,
##         learning_rate='optimal', loss='hinge', n_iter=5, n_jobs=1,
##         penalty='l2', power_t=0.5, random_state=None, shuffle=True,
##         verbose=0, warm_start=False)
print(clf.predict([[-0.8, -1]]))
# Expected:
## [1]

from sklearn.cross_decomposition import CCA
X = [[0., 0., 1.], [1.,0.,0.], [2.,2.,2.], [3.,5.,4.]]
Y = [[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]]
cca = CCA(n_components=1)
cca.fit(X, Y)

# Expected:
## CCA(copy=True, max_iter=500, n_components=1, scale=True, tol=1e-06)
X_c, Y_c = cca.transform(X, Y)

import numpy as np
from sklearn.neural_network import BernoulliRBM
X = np.array([[0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 1]])
model = BernoulliRBM(n_components=2)
model.fit(X)
# Expected:
## BernoulliRBM(batch_size=10, learning_rate=0.1, n_components=2, n_iter=10,
##        random_state=None, verbose=0)

X = [[0], [1], [2], [3]]
y = [0, 0, 1, 1]
from sklearn.neighbors import RadiusNeighborsClassifier
neigh = RadiusNeighborsClassifier(radius=1.0)
neigh.fit(X, y)
# Expected:
## RadiusNeighborsClassifier(...)
print(neigh.predict([[1.5]]))
# Expected:
## [0]

import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
clf1 = LogisticRegression(random_state=1)
clf2 = RandomForestClassifier(random_state=1)
clf3 = GaussianNB()
X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
y = np.array([1, 1, 1, 2, 2, 2])
eclf1 = VotingClassifier(estimators=[
        ('lr', clf1), ('rf', clf2), ('gnb', clf3)], voting='hard')
eclf1 = eclf1.fit(X, y)
print(eclf1.predict(X))
# Expected:
## [1 1 1 2 2 2]
eclf2 = VotingClassifier(estimators=[
        ('lr', clf1), ('rf', clf2), ('gnb', clf3)],
        voting='soft')
eclf2 = eclf2.fit(X, y)
print(eclf2.predict(X))
# Expected:
## [1 1 1 2 2 2]
eclf3 = VotingClassifier(estimators=[
       ('lr', clf1), ('rf', clf2), ('gnb', clf3)],
       voting='soft', weights=[2,1,1])
eclf3 = eclf3.fit(X, y)
print(eclf3.predict(X))
# Expected:
## [1 1 1 2 2 2]

from sklearn.datasets import load_boston
from sklearn.cross_validation import cross_val_score
from sklearn.tree import DecisionTreeRegressor
boston = load_boston()
regressor = DecisionTreeRegressor(random_state=0)
cross_val_score(regressor, boston.data, boston.target, cv=10)

# Expected:
## array([ 0.61..., 0.57..., -0.34..., 0.41..., 0.75...,
##         0.07..., 0.29..., 0.33..., -1.42..., -1.77...])

from sklearn import linear_model
clf = linear_model.Lars(n_nonzero_coefs=1)
clf.fit([[-1, 1], [0, 0], [1, 1]], [-1.1111, 0, -1.1111])

# Expected:
## Lars(copy_X=True, eps=..., fit_intercept=True, fit_path=True,
##    n_nonzero_coefs=1, normalize=True, positive=False, precompute='auto',
##    verbose=False)
print(clf.coef_)
# Expected:
## [ 0. -1.11...]

from sklearn.svm import SVR
import numpy as np
n_samples, n_features = 10, 5
np.random.seed(0)
y = np.random.randn(n_samples)
X = np.random.randn(n_samples, n_features)
clf = SVR(C=1.0, epsilon=0.2)
clf.fit(X, y)
# Expected:
## SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.2, gamma='auto',
##     kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

from sklearn.datasets import make_friedman1
from sklearn.feature_selection import RFECV
from sklearn.svm import SVR
X, y = make_friedman1(n_samples=50, n_features=10, random_state=0)
estimator = SVR(kernel="linear")
selector = RFECV(estimator, step=1, cv=5)
selector = selector.fit(X, y)
selector.support_
# Expected:
## array([ True,  True,  True,  True,  True,
##         False, False, False, False, False], dtype=bool)
selector.ranking_
# Expected:
## array([1, 1, 1, 1, 1, 6, 4, 3, 2, 5])

from sklearn import linear_model
clf = linear_model.BayesianRidge()
clf.fit([[0,0], [1, 1], [2, 2]], [0, 1, 2])

# Expected:
## BayesianRidge(alpha_1=1e-06, alpha_2=1e-06, compute_score=False,
##         copy_X=True, fit_intercept=True, lambda_1=1e-06, lambda_2=1e-06,
##         n_iter=300, normalize=False, tol=0.001, verbose=False)
clf.predict([[1, 1]])
# Expected:
## array([ 1.])

from sklearn import linear_model
clf = linear_model.MultiTaskElasticNetCV()
clf.fit([[0,0], [1, 1], [2, 2]],
        [[0, 0], [1, 1], [2, 2]])

# Expected:
## MultiTaskElasticNetCV(alphas=None, copy_X=True, cv=None, eps=0.001,
##        fit_intercept=True, l1_ratio=0.5, max_iter=1000, n_alphas=100,
##        n_jobs=1, normalize=False, random_state=None, selection='cyclic',
##        tol=0.0001, verbose=0)
print(clf.coef_)
# Expected:
## [[ 0.52875032  0.46958558]
##  [ 0.52875032  0.46958558]]
print(clf.intercept_)
# Expected:
## [ 0.00166409  0.00166409]

from sklearn.cross_decomposition import PLSRegression
X = [[0., 0., 1.], [1.,0.,0.], [2.,2.,2.], [2.,5.,4.]]
Y = [[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]]
pls2 = PLSRegression(n_components=2)
pls2.fit(X, Y)

# Expected:
## PLSRegression(copy=True, max_iter=500, n_components=2, scale=True,
##         tol=1e-06)
Y_pred = pls2.predict(X)

from sklearn.feature_extraction import FeatureHasher
h = FeatureHasher(n_features=10)
D = [{'dog': 1, 'cat':2, 'elephant':4},{'dog': 2, 'run': 5}]
f = h.transform(D)
f.toarray()
# Expected:
## array([[ 0.,  0., -4., -1.,  0.,  0.,  0.,  0.,  0.,  2.],
##        [ 0.,  0.,  0., -2., -5.,  0.,  0.,  0.,  0.,  0.]])

from sklearn import preprocessing
lb = preprocessing.LabelBinarizer()
lb.fit([1, 2, 6, 4, 2])
# Expected:
## LabelBinarizer(neg_label=0, pos_label=1, sparse_output=False)
lb.classes_
# Expected:
## array([1, 2, 4, 6])
lb.transform([1, 6])
# Expected:
## array([[1, 0, 0, 0],
##        [0, 0, 0, 1]])

lb = preprocessing.LabelBinarizer()
lb.fit_transform(['yes', 'no', 'no', 'yes'])
# Expected:
## array([[1],
##        [0],
##        [0],
##        [1]])

import numpy as np
lb.fit(np.array([[0, 1, 1], [1, 0, 0]]))
# Expected:
## LabelBinarizer(neg_label=0, pos_label=1, sparse_output=False)
lb.classes_
# Expected:
## array([0, 1, 2])
lb.transform([0, 1, 2, 1])
# Expected:
## array([[1, 0, 0],
##        [0, 1, 0],
##        [0, 0, 1],
##        [0, 1, 0]])