# 隨機try
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import randint

param_distribs = {
        'n_estimators': randint(low=1, high=200),
        'max_features': randint(low=1, high=8),
    }

forest_reg = RandomForestRegressor()
rnd_search = RandomizedSearchCV(forest_reg, param_distributions=param_distribs,
                                n_iter=10, cv=5, scoring='neg_mean_squared_error')
rnd_search.fit(housing_prepared, housing_labels)

RandomizedSearchCV(cv=5, error_score='raise',
          estimator=RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
           max_features='auto', max_leaf_nodes=None,
           min_impurity_split=1e-07, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           n_estimators=10, n_jobs=1, oob_score=False, random_state=None,
           verbose=0, warm_start=False),
          fit_params={}, iid=True, n_iter=10, n_jobs=1,
          param_distributions={'max_features': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7ff15d8c6d68>, 'n_estimators': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7ff15d8ca0b8>},
          pre_dispatch='2*n_jobs', random_state=None, refit=True,
          return_train_score=True, scoring='neg_mean_squared_error',
          verbose=0)

cvres = rnd_search.cv_results_
for mean_score, params in zip(cvres["mean_test_score"], cvres["params"]):
    print(np.sqrt(-mean_score), params)

50239.6442738 {'max_features': 3, 'n_estimators': 121}
50307.8432326 {'max_features': 3, 'n_estimators': 187}
49185.0150532 {'max_features': 6, 'n_estimators': 88}
49133.3305418 {'max_features': 5, 'n_estimators': 137}
49021.6318804 {'max_features': 7, 'n_estimators': 197}
49636.8878839 {'max_features': 6, 'n_estimators': 39}
52273.457854 {'max_features': 2, 'n_estimators': 50}
54413.8506712 {'max_features': 1, 'n_estimators': 184}
51953.3364641 {'max_features': 2, 'n_estimators': 71}
49174.1414792 {'max_features': 6, 'n_estimators': 140}

Analy the Best Modes and Their Errors

feature_importances = grid_search.best_estimator_.feature_importances_
feature_importances

array([  7.38167445e-02,   6.93425001e-02,   4.41741354e-02,
         1.80040251e-02,   1.65486595e-02,   1.80013616e-02,
         1.59794977e-02,   3.32716759e-01,   5.57319056e-02,
         1.05464076e-01,   8.70481930e-02,   9.90812199e-03,
         1.43083072e-01,   1.03976446e-04,   3.87833961e-03,
         6.19863203e-03])

extra_attribs = ["rooms_per_household", "population_per_household", "bedrooms_per_room"]
cat_one_hot_attribs = list(encoder.classes_)
attributes = num_attribs + extra_attribs + cat_one_hot_attribs
sorted(zip(feature_importances, attributes), reverse=True)

[(0.33271675888101543, 'median_income'),
 (0.143083072155538, 'INLAND'),
 (0.10546407641519769, 'population_per_household'),
 (0.087048193008911728, 'bedrooms_per_room'),
 (0.073816744482605889, 'longitude'),
 (0.069342500136275007, 'latitude'),
 (0.055731905592885149, 'rooms_per_household'),
 (0.0441741353556422, 'housing_median_age'),
 (0.018004025077788675, 'total_rooms'),
 (0.018001361574586823, 'population'),
 (0.016548659526332148, 'total_bedrooms'),
 (0.015979497714769569, 'households'),
 (0.0099081219902823602, '<1H OCEAN'),
 (0.0061986320346514171, 'NEAR OCEAN'),
 (0.003878339607642522, 'NEAR BAY'),
 (0.00010397644587548846, 'ISLAND')]

Evaluate Your System on the test set

final_model = grid_search.best_estimator_

X_test = strat_test_set.drop("median_house_value", axis=1)
y_test = strat_test_set["median_house_value"].copy()

X_test_transformed = full_pipeline.transform(X_test)
final_predictions = final_model.predict(X_test_transformed)

final_mse = mean_squared_error(y_test, final_predictions)
final_rmse = np.sqrt(final_mse)
final_rmse

47728.481110152476

Present Your Solution¶

Document what you have done.
Create a nice presentation.
- Make sure you highlight the big picture first.
Explain why your solution achieves the business objective.
Don’t forget to present interesting points you noticed along the way.
- Describe what worked and what did not.
- List your assumptions and your system’s limitations.
Ensure your key findings are communicated through beautiful visualizations or easy-to-remember statements (e.g., 「the median income is the number-one predictor of housing prices」).

Launch!¶

Get your solution ready for production (plug into production data inputs, write unit tests, etc.).
Write monitoring code to check your system’s live performance at regular intervals and trigger alerts when it drops.
- Beware of slow degradation too: models tend to 「rot」 as data evolves.
- Measuring performance may require a human pipeline (e.g., via a crowdsourcing service).
- Also monitor your inputs’ quality (e.g., a malfunctioning sensor sending random values, or another team’s output becoming stale). This is particularly important for online learning systems.
Retrain your models on a regular basis on fresh data (automate as much as possible).

Exercises¶

Try a Support Vector Machine regressor (sklearn.svm.SVR), with various hyperparameters such as kernel="linear" (with various values for the C hyperparameter) or kernel="rbf" (with various values for the C and gamma hyperparameters). Don't worry about what these hyperparameters mean for now. How does the best SVR predictor perform?

from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV

param_grid = [{"kernel" : ["linear"], "C" : [10., 50.]},
              {"kernel" : ['rbf'], "C" : [300., 600.], 'gamma' : [.001]}]

svr_reg = SVR()
svr_search = GridSearchCV(svr_reg, param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=4, verbose=2)
svr_search.fit(housing_prepared, housing_labels)

svres = svr_search.cv_results_
for mean_score, params in zip(svres["mean_test_score"], svres["params"]):
    print(np.sqrt(-mean_score), params)

Fitting 5 folds for each of 4 candidates, totalling 20 fits
...

[Parallel(n_jobs=4)]: Done  20 out of  20 | elapsed:  5.4min finished

84654.0893002 {'C': 10.0, 'kernel': 'linear'}
73235.0217516 {'C': 50.0, 'kernel': 'linear'}
115058.204591 {'C': 300.0, 'kernel': 'rbf', 'gamma': 0.001}
111584.382328 {'C': 600.0, 'kernel': 'rbf', 'gamma': 0.001}

Try replacing GridSearchCV with RandomizedSearchCV.

svr_reg.get_params()

{'C': 1.0,
 'cache_size': 200,
 'coef0': 0.0,
 'degree': 3,
 'epsilon': 0.1,
 'gamma': 'auto',
 'kernel': 'rbf',
 'max_iter': -1,
 'shrinking': True,
 'tol': 0.001,
 'verbose': False}

from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import expon, reciprocal

# see https://docs.scipy.org/doc/scipy-0.19.0/reference/stats.html
# for `expon()` and `reciprocal()` documentation and more probability distribution functions.

# Note: gamma is ignored when kernel is "linear"
param_distribs = {
        'kernel': ['linear', 'rbf'],
        'C': reciprocal(20, 200), #handson-ml answers 20000
        'gamma': expon(scale=1.0),
    }

svm_reg = SVR()
rnd_search = RandomizedSearchCV(svm_reg, param_distributions=param_distribs,
                                n_iter=10, cv=5, scoring='neg_mean_squared_error', verbose=2, n_jobs=4)
rnd_search.fit(housing_prepared, housing_labels)

Fitting 5 folds for each of 10 candidates, totalling 50 fits
...

[Parallel(n_jobs=4)]: Done  33 tasks      | elapsed:  9.7min

[Parallel(n_jobs=4)]: Done  50 out of  50 | elapsed: 13.4min finished

RandomizedSearchCV(cv=5, error_score='raise',
          estimator=SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='auto',
  kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False),
          fit_params={}, iid=True, n_iter=10, n_jobs=4,
          param_distributions={'C': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7ff15d8c3a90>, 'kernel': ['linear', 'rbf'], 'gamma': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7ff15d8c3eb8>},
          pre_dispatch='2*n_jobs', random_state=None, refit=True,
          return_train_score=True, scoring='neg_mean_squared_error',
          verbose=2)

negative_mse = rnd_search.best_score_
rmse = np.sqrt(-negative_mse)
rmse

71204.177308638871

rnd_search.best_params_

{'C': 141.89295169056408, 'gamma': 1.0123312540374287, 'kernel': 'linear'}

expon_distrib = expon(scale=1.)
samples = expon_distrib.rvs(10000)
plt.figure(figsize=(10, 4))
plt.subplot(121)
plt.title("Exponential distribution (scale=1.0)")
plt.hist(samples, bins=50)
plt.subplot(122)
plt.title("Log of this distribution")
plt.hist(np.log(samples), bins=50)
plt.show()

Try adding a transformer in the preparation pipeline to select only the most important attributes.

feature selector assume you has already compute the feature importances

from sklearn.base import BaseEstimator, TransformerMixin

def indices_of_top_k(arr, k):
    return np.sort(np.argpartition(np.array(arr), -k)[-k:])

class TopFeatureSelector(BaseEstimator, TransformerMixin):
    def __init__(self, feature_importances, k):
        self.feature_importances = feature_importances
        self.k = k
    def fit(self, X, y=None):
        self.feature_indices_ = indices_of_top_k(self.feature_importances, self.k)
        return self
    def transform(self, X, y=None):
        return X[:, self.feature_indices_]
    
#define k
k = 5

#look the selected feature 
top_k_feature_indices = indices_of_top_k(feature_importances, k)
print(top_k_feature_indices)
print(np.array(attributes)[top_k_feature_indices])

sorted(zip(feature_importances, attributes), reverse=True)[:k]

[ 0  7  9 10 12]
['longitude' 'median_income' 'population_per_household' 'bedrooms_per_room'
 'INLAND']

[(0.33271675888101543, 'median_income'),
 (0.143083072155538, 'INLAND'),
 (0.10546407641519769, 'population_per_household'),
 (0.087048193008911728, 'bedrooms_per_room'),
 (0.073816744482605889, 'longitude')]

#pipeline
preparation_and_feature_selection_pipeline = Pipeline([
    ('preparation', full_pipeline),
    ('feature_selection', TopFeatureSelector(feature_importances, k))
])

#fit_transform
housing_prepared_top_k_features = preparation_and_feature_selection_pipeline.fit_transform(housing)

housing_prepared_top_k_features

array([[-1.15604281, -0.61493744, -0.08649871,  0.15531753,  0.        ],
       [-1.17602483,  1.33645936, -0.03353391, -0.83628902,  0.        ],
       [ 1.18684903, -0.5320456 , -0.09240499,  0.4222004 ,  0.        ],
       ..., 
       [ 1.58648943, -0.3167053 , -0.03055414, -0.52177644,  1.        ],
       [ 0.78221312,  0.09812139,  0.06150916, -0.30340741,  0.        ],
       [-1.43579109, -0.15779865, -0.09586294,  0.10180567,  0.        ]])

Try creating a single pipeline that does the full data preparation plus the final prediction.

注意，必定要把LabelBinarizer換成Spuervision Friendly的！！！！！！

class SupervisionFriendlyLabelBinarizer(LabelBinarizer):
    def fit_transform(self, X, y=None):
        return super(SupervisionFriendlyLabelBinarizer, self).fit_transform(X)

# Replace the Labelbinarizer with a SupervisionFriendlyLabelBinarizer
cat_pipeline.steps[1] = ("label_binarizer", SupervisionFriendlyLabelBinarizer())

# Now you can create a full pipeline with a supervised predictor at the end.
fulll_pipeline = Pipeline([
        ("preparation", preparation_pipeline),
        ("linear", LinearRegression())
    ])

fulll_pipeline.fit(housing, housing_labels)
fulll_pipeline.predict(some_data)

array([ 210644.60459286,  317768.80697211,  210956.43331178,
         59218.98886849,  189747.55849879])

prepare_select_and_predict_pipeline = Pipeline([
    ('preparation', preparation_pipeline),
    ('feature_selection', TopFeatureSelector(feature_importances, k)),
    ('svr_rege', SVR(C=122659.12862707644, gamma=0.22653313890837068, kernel='rbf')),
])

prepare_select_and_predict_pipeline.fit(housing, housing_labels)

Pipeline(steps=[('preparation', FeatureUnion(n_jobs=1,
       transformer_list=[('num_pipeline', Pipeline(steps=[('selector', DataFrameSelector(attribute_names=['longitude', 'latitude', 'housing_median_age', 'total_rooms', 'total_bedrooms', 'population', 'households', 'median_income'])), ('imputer', Imputer(... gamma=0.22653313890837068, kernel='rbf', max_iter=-1, shrinking=True,
  tol=0.001, verbose=False))])

終於找到了報錯緣由:沒有轉換爲監督友好的label二值化函數！！！

Automatically explore some preparation options using GridSearchCV.

param_grid = [
        {'preparation__num_pipeline__imputer__strategy': ['mean', 'median', 'most_frequent'],
         'feature_selection__k': [3, 4, 5, 6, 7]}
]

grid_search_prep = GridSearchCV(prepare_select_and_predict_pipeline, param_grid, cv=5,
                                scoring='neg_mean_squared_error', verbose=2, n_jobs=4)
grid_search_prep.fit(housing, housing_labels)

Fitting 5 folds for each of 15 candidates, totalling 75 fits
...

[Parallel(n_jobs=4)]: Done  33 tasks      | elapsed:  9.1min

...

[Parallel(n_jobs=4)]: Done  75 out of  75 | elapsed: 18.7min finished

GridSearchCV(cv=5, error_score='raise',
       estimator=Pipeline(steps=[('preparation', FeatureUnion(n_jobs=1,
       transformer_list=[('num_pipeline', Pipeline(steps=[('selector', DataFrameSelector(attribute_names=['longitude', 'latitude', 'housing_median_age', 'total_rooms', 'total_bedrooms', 'population', 'households', 'median_income'])), ('imputer', Imputer(... gamma=0.22653313890837068, kernel='rbf', max_iter=-1, shrinking=True,
  tol=0.001, verbose=False))]),
       fit_params={}, iid=True, n_jobs=4,
       param_grid=[{'preparation__num_pipeline__imputer__strategy': ['mean', 'median', 'most_frequent'], 'feature_selection__k': [3, 4, 5, 6, 7]}],
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring='neg_mean_squared_error', verbose=2)

grid_search_prep.best_params_

{'feature_selection__k': 7,
 'preparation__num_pipeline__imputer__strategy': 'median'}

housing.shape

(16512, 9)

	longitude	latitude	housing_median_age	total_rooms	total_bedrooms	population	households	median_income	median_house_value	ocean_proximity
0	-122.23	37.88	41.0	880.0	129.0	322.0	126.0	8.3252	452600.0	NEAR BAY
1	-122.22	37.86	21.0	7099.0	1106.0	2401.0	1138.0	8.3014	358500.0	NEAR BAY
2	-122.24	37.85	52.0	1467.0	190.0	496.0	177.0	7.2574	352100.0	NEAR BAY
3	-122.25	37.85	52.0	1274.0	235.0	558.0	219.0	5.6431	341300.0	NEAR BAY
4	-122.25	37.85	52.0	1627.0	280.0	565.0	259.0	3.8462	342200.0	NEAR BAY

	longitude	latitude	housing_median_age	total_rooms	total_bedrooms	population	households	median_income	median_house_value
count	20640.000000	20640.000000	20640.000000	20640.000000	20433.000000	20640.000000	20640.000000	20640.000000	20640.000000
mean	-119.569704	35.631861	28.639486	2635.763081	537.870553	1425.476744	499.539680	3.870671	206855.816909
std	2.003532	2.135952	12.585558	2181.615252	421.385070	1132.462122	382.329753	1.899822	115395.615874
min	-124.350000	32.540000	1.000000	2.000000	1.000000	3.000000	1.000000	0.499900	14999.000000
25%	-121.800000	33.930000	18.000000	1447.750000	296.000000	787.000000	280.000000	2.563400	119600.000000
50%	-118.490000	34.260000	29.000000	2127.000000	435.000000	1166.000000	409.000000	3.534800	179700.000000
75%	-118.010000	37.710000	37.000000	3148.000000	647.000000	1725.000000	605.000000	4.743250	264725.000000
max	-114.310000	41.950000	52.000000	39320.000000	6445.000000	35682.000000	6082.000000	15.000100	500001.000000

	longitude	latitude	housing_median_age	total_rooms	total_bedrooms	population	households	median_income	ocean_proximity	rooms_per_household	population_per_household
0	-121.89	37.29	38	1568	351	710	339	2.7042	<1H OCEAN	4.62537	2.0944
1	-121.93	37.05	14	679	108	306	113	6.4214	<1H OCEAN	6.00885	2.70796
2	-117.2	32.77	31	1952	471	936	462	2.8621	NEAR OCEAN	4.22511	2.02597
3	-119.61	36.31	25	1847	371	1460	353	1.8839	INLAND	5.23229	4.13598
4	-118.59	34.23	17	6592	1525	4459	1463	3.0347	<1H OCEAN	4.50581	3.04785

	mean_fit_time	mean_score_time	mean_test_score	mean_train_score	param_bootstrap	param_max_features	param_n_estimators	params	rank_test_score	split0_test_score	...	split2_test_score	split2_train_score	split3_test_score	split3_train_score	split4_test_score	split4_train_score	std_fit_time	std_score_time	std_test_score	std_train_score
0	0.060753	0.003462	-4.213572e+09	-1.122089e+09	NaN	2	3	{'max_features': 2, 'n_estimators': 3}	18	-4.322392e+09	...	-4.091199e+09	-1.132659e+09	-4.048299e+09	-1.084169e+09	-4.278616e+09	-1.181979e+09	0.007624	0.000016	1.194097e+08	4.503304e+07
1	0.190027	0.009662	-3.084167e+09	-5.686194e+08	NaN	2	10	{'max_features': 2, 'n_estimators': 10}	11	-2.920668e+09	...	-3.189759e+09	-5.684440e+08	-2.977423e+09	-5.753131e+08	-3.140389e+09	-5.569981e+08	0.008264	0.000134	1.133146e+08	1.555889e+07
2	0.560841	0.027835	-2.802672e+09	-4.390709e+08	NaN	2	30	{'max_features': 2, 'n_estimators': 30}	9	-2.635798e+09	...	-2.899767e+09	-4.299952e+08	-2.628577e+09	-4.459977e+08	-2.910563e+09	-4.319555e+08	0.005079	0.000553	1.398004e+08	6.703308e+06
3	0.093439	0.003463	-3.646238e+09	-9.779480e+08	NaN	4	3	{'max_features': 4, 'n_estimators': 3}	16	-3.583831e+09	...	-3.950913e+09	-9.887841e+08	-3.308822e+09	-1.011182e+09	-3.662211e+09	-9.190933e+08	0.001568	0.000022	2.083609e+08	3.282495e+07
4	0.320713	0.009649	-2.778336e+09	-5.111719e+08	NaN	4	10	{'max_features': 4, 'n_estimators': 10}	8	-2.703532e+09	...	-2.884782e+09	-4.948073e+08	-2.650746e+09	-5.355259e+08	-2.882622e+09	-5.245530e+08	0.023930	0.000276	9.396457e+07	2.504651e+07
5	0.953676	0.027456	-2.550613e+09	-3.959620e+08	NaN	4	30	{'max_features': 4, 'n_estimators': 30}	3	-2.302148e+09	...	-2.682066e+09	-3.923106e+08	-2.492072e+09	-4.120956e+08	-2.653622e+09	-3.940042e+08	0.035897	0.000251	1.402365e+08	8.599915e+06
6	0.130158	0.003463	-3.487861e+09	-9.302686e+08	NaN	6	3	{'max_features': 6, 'n_estimators': 3}	13	-3.323532e+09	...	-3.477330e+09	-8.673024e+08	-3.255834e+09	-9.719544e+08	-3.719818e+09	-9.505729e+08	0.002415	0.000006	1.818530e+08	3.502555e+07
7	0.428550	0.009539	-2.721921e+09	-5.009736e+08	NaN	6	10	{'max_features': 6, 'n_estimators': 10}	5	-2.605933e+09	...	-2.871511e+09	-4.944972e+08	-2.601547e+09	-5.127494e+08	-2.799685e+09	-4.853162e+08	0.003952	0.000054	1.062510e+08	1.527767e+07
8	1.303528	0.027117	-2.495897e+09	-3.848766e+08	NaN	6	30	{'max_features': 6, 'n_estimators': 30}	1	-2.410445e+09	...	-2.600516e+09	-3.791315e+08	-2.304437e+09	-3.834466e+08	-2.627380e+09	-3.763532e+08	0.023792	0.000425	1.215337e+08	7.051109e+06
9	0.167242	0.003468	-3.495442e+09	-8.965714e+08	NaN	8	3	{'max_features': 8, 'n_estimators': 3}	14	-3.274179e+09	...	-3.517974e+09	-9.317195e+08	-3.512932e+09	-9.331547e+08	-3.562802e+09	-8.541539e+08	0.001919	0.000029	1.160130e+08	3.128209e+07
10	0.557235	0.009574	-2.750120e+09	-5.032131e+08	NaN	8	10	{'max_features': 8, 'n_estimators': 10}	6	-2.694581e+09	...	-2.883188e+09	-4.955736e+08	-2.540331e+09	-5.046915e+08	-2.845775e+09	-4.838147e+08	0.004840	0.000046	1.227074e+08	1.561460e+07
11	1.705456	0.027242	-2.504151e+09	-3.825022e+08	NaN	8	30	{'max_features': 8, 'n_estimators': 30}	2	-2.371638e+09	...	-2.565840e+09	-3.751654e+08	-2.377880e+09	-3.897076e+08	-2.653704e+09	-3.785671e+08	0.052605	0.000643	1.112988e+08	5.231629e+06
12	0.091101	0.003988	-3.890157e+09	0.000000e+00	False	2	3	{'bootstrap': False, 'max_features': 2, 'n_est...	17	-3.617603e+09	...	-4.217359e+09	-0.000000e+00	-3.780422e+09	-0.000000e+00	-3.677274e+09	-0.000000e+00	0.002504	0.000067	2.492080e+08	0.000000e+00
13	0.294909	0.011605	-2.978131e+09	0.000000e+00	False	2	10	{'bootstrap': False, 'max_features': 2, 'n_est...	10	-2.815093e+09	...	-3.044746e+09	-0.000000e+00	-2.827508e+09	-0.000000e+00	-3.097349e+09	-0.000000e+00	0.001960	0.000504	1.298188e+08	0.000000e+00
14	0.117953	0.003990	-3.556220e+09	0.000000e+00	False	3	3	{'bootstrap': False, 'max_features': 3, 'n_est...	15	-3.546021e+09	...	-3.625256e+09	-0.000000e+00	-3.465998e+09	-0.000000e+00	-3.596042e+09	-0.000000e+00	0.002875	0.000036	5.415723e+07	0.000000e+00
15	0.395329	0.011699	-2.751641e+09	0.000000e+00	False	3	10	{'bootstrap': False, 'max_features': 3, 'n_est...	7	-2.604595e+09	...	-2.789225e+09	-0.000000e+00	-2.644243e+09	-0.000000e+00	-2.895713e+09	-0.000000e+00	0.005156	0.000551	1.101169e+08	0.000000e+00
16	0.150052	0.003963	-3.460459e+09	0.000000e+00	False	4	3	{'bootstrap': False, 'max_features': 4, 'n_est...	12	-3.060089e+09	...	-3.597422e+09	-0.000000e+00	-3.416000e+09	-0.000000e+00	-3.699168e+09	-0.000000e+00	0.005011	0.000038	2.203775e+08	0.000000e+00
17	0.494180	0.011123	-2.705352e+09	0.000000e+00	False	4	10	{'bootstrap': False, 'max_features': 4, 'n_est...	4	-2.534795e+09	...	-2.748411e+09	-0.000000e+00	-2.497470e+09	-0.000000e+00	-2.897782e+09	-0.000000e+00	0.006378	0.000020	1.622491e+08	0.000000e+00

Notes ： Chapter 2

Frame the Problem and Look at the big picture¶

Get the Data¶

Explore the Data¶

Prepare the Data¶

Select and Train a Model¶

Fine-tune the System¶

Present Your Solution¶

Launch!¶

Exercises¶