【百度飛漿AI Studio】二、感性快速體驗深度學習的線性歸回預測房價

時間 2019-11-12

標籤百度飛漿AI Studio 感性快速體驗深度學習線性預測房價欄目應用數學简体版

原文原文鏈接

百度學習原文地址： https://www.paddlepaddle.org.cn/documentation/docs/zh/1.5/beginners_guide/basics/fit_a_line/README.cn.htmlhtml

import paddle
import paddle.fluid as fluid
import numpy
import math
import sys
from __future__ import print_function
BATCH_SIZE = 20

train_reader = paddle.batch(
    paddle.reader.shuffle(
        paddle.dataset.uci_housing.train(), buf_size=500),
        batch_size=BATCH_SIZE)

test_reader = paddle.batch(
    paddle.reader.shuffle(
        paddle.dataset.uci_housing.test(), buf_size=500),
        batch_size=BATCH_SIZE)
x = fluid.layers.data(name='x', shape=[13], dtype='float32') # 定義輸入的形狀和數據類型
y = fluid.layers.data(name='y', shape=[1], dtype='float32') # 定義輸出的形狀和數據類型
y_predict = fluid.layers.fc(input=x, size=1, act=None) # 鏈接輸入和輸出的全鏈接層

main_program = fluid.default_main_program() # 獲取默認/全局主函數
startup_program = fluid.default_startup_program() # 獲取默認/全局啓動程序

cost = fluid.layers.square_error_cost(input=y_predict, label=y) # 利用標籤數據和輸出的預測數據估計方差
avg_loss = fluid.layers.mean(cost) # 對方差求均值，獲得平均損失

sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001)
sgd_optimizer.minimize(avg_loss)

#克隆main_program獲得test_program
#有些operator在訓練和測試之間的操做是不一樣的，例如batch_norm，使用參數for_test來區分該程序是用來訓練仍是用來測試
#該api不會刪除任何操做符,請在backward和optimization以前使用
test_program = main_program.clone(for_test=True)

use_cuda = False
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() # 指明executor的執行場所

###executor能夠接受傳入的program，並根據feed map(輸入映射表)和fetch list(結果獲取表)向program中添加數據輸入算子和結果獲取算子。使用close()關閉該executor，調用run(...)執行program。
exe = fluid.Executor(place)


num_epochs = 100

def train_test(executor, program, reader, feeder, fetch_list):
    accumulated = 1 * [0]
    count = 0
    for data_test in reader():
        outs = executor.run(program=program,
                            feed=feeder.feed(data_test),
                            fetch_list=fetch_list)
        accumulated = [x_c[0] + x_c[1][0] for x_c in zip(accumulated, outs)] # 累加測試過程當中的損失值
        count += 1 # 累加測試集中的樣本數量
    return [x_d / count for x_d in accumulated] # 計算平均損失
%matplotlib inline
params_dirname = "fit_a_line.inference.model"
feeder = fluid.DataFeeder(place=place, feed_list=[x, y])
exe.run(startup_program)
train_prompt = "train cost"
test_prompt = "test cost"
from paddle.utils.plot import Ploter
plot_prompt = Ploter(train_prompt, test_prompt)
step = 0

exe_test = fluid.Executor(place)   


for pass_id in range(num_epochs):
    for data_train in train_reader():
        avg_loss_value, = exe.run(main_program,
                                  feed=feeder.feed(data_train),
                                  fetch_list=[avg_loss])
        if step % 10 == 0: # 每10個批次記錄並輸出一下訓練損失
            plot_prompt.append(train_prompt, step, avg_loss_value[0])
            plot_prompt.plot()
            print("%s, Step %d, Cost %f" %
                      (train_prompt, step, avg_loss_value[0]))
        if step % 100 == 0:  # 每100批次記錄並輸出一下測試損失
            test_metics = train_test(executor=exe_test,
                                     program=test_program,
                                     reader=test_reader,
                                     fetch_list=[avg_loss.name],
                                     feeder=feeder)
            plot_prompt.append(test_prompt, step, test_metics[0])
            plot_prompt.plot()
            print("%s, Step %d, Cost %f" %
                      (test_prompt, step, test_metics[0]))
            if test_metics[0] < 10.0: # 若是準確率達到要求，則中止訓練
                break

        step += 1

        if math.isnan(float(avg_loss_value[0])):
            sys.exit("got NaN loss, training failed.")

        #保存訓練參數到以前給定的路徑中
        if params_dirname is not None:
            fluid.io.save_inference_model(params_dirname, ['x'], [y_predict], exe)
infer_exe = fluid.Executor(place)
inference_scope = fluid.core.Scope()


def save_result(points1, points2):
    import matplotlib
    matplotlib.use('Agg')
    import matplotlib.pyplot as plt
    x1 = [idx for idx in range(len(points1))]
    y1 = points1
    y2 = points2
    l1 = plt.plot(x1, y1, 'r--', label='predictions')
    l2 = plt.plot(x1, y2, 'g--', label='GT')
    plt.plot(x1, y1, 'ro-', x1, y2, 'g+-')
    plt.title('predictions VS GT')
    plt.legend()
    plt.savefig('./image/prediction_gt.png')
    
with fluid.scope_guard(inference_scope):
    [inference_program, feed_target_names,
     fetch_targets] = fluid.io.load_inference_model(params_dirname, infer_exe) # 載入預訓練模型
    batch_size = 10

    infer_reader = paddle.batch(
        paddle.dataset.uci_housing.test(), batch_size=batch_size) # 準備測試集

    infer_data = next(infer_reader())
    infer_feat = numpy.array(
        [data[0] for data in infer_data]).astype("float32") # 提取測試集中的數據
    infer_label = numpy.array(
        [data[1] for data in infer_data]).astype("float32") # 提取測試集中的標籤

    assert feed_target_names[0] == 'x'
    results = infer_exe.run(inference_program,
                            feed={feed_target_names[0]: numpy.array(infer_feat)},
                            fetch_list=fetch_targets) # 進行預測
    #打印預測結果和標籤並可視化結果
    print("infer results: (House Price)")
    for idx, val in enumerate(results[0]):
        print("%d: %.2f" % (idx, val)) # 打印預測結果

    print("\nground truth:")
    for idx, val in enumerate(infer_label):
        print("%d: %.2f" % (idx, val)) # 打印標籤值

    save_result(results[0], infer_label) # 保存圖片

把代碼複製進去api