梯度降低算法,參考Edwin《最優化導論》8.2章節,算法採用go語言實現。算法
此處算法仍然存在疑惑,主要是獲取梯度降低時如何肯定步長,即便採用割線法獲取最優步長,那麼割線法的初始值又如何肯定??函數
下面程序中雖然採用了牛頓法獲取極值,但很是依賴初始取值範圍!!優化
/***************************************** * FileName : grad.go * Author : fredric * Date : 2017.09.01 * Note : 梯度算法 * History : *****************************************/ package grad import( "fmt" "math" ) //沒法採用牛頓方法求得極值,主要緣由在於沒法肯定初始值,形成導數誤差很大 func _get_argmin_newton(x1, x2, x3, grad_x1, grad_x2, grad_x3 float64) float64 { fmt.Printf("_get_argmin input value %f,%f,%f,%f,%f,%f\n", x1, x2, x3, grad_x1, grad_x2, grad_x3) //f(x - a*delta) = (x1 - a * grad_x1 - 4)^4 + (x2 - a * grad_x2 - 3)^2 + 4 * (x3 - a*grad_x3 + 5)^4 //f'(x - a*delta) = 4 * grad_x1 * (x1 - a * grad_x1 - 4)^3 // + 2 * grad_x2 * (x2 - a * grad_x2 - 3) // + 16* grad_x3 * (x3 - a*grad_x3 + 5)^3 //f''(x - a*delta)= 12 * grad_x1^2 * (x1 - a * grad_x1 - 4)^2 // + 2 * grad_x2^2 * a // + 48 * grad_x3^2 * (x3 - a*grad_x3 + 5)^2 //採用牛頓法求取f(a)的最小值 //此處的初始值仍是比較疑惑,由於初始值取不對,結果差太遠 var a0 float64 = 0.0002 var a1 float64 = 0.0005 delta := 0.0005 for math.Abs(a1 - a0) > delta { a0 = a1 //fmt.Printf("a0: %f\n" , a0) //fmt.Printf("grad_x2: %f\n" , grad_x2) //fmt.Printf("grad_x2 * a0: %f\n" , grad_x2 * a0) //fmt.Printf("grad_x2 * 0.2: %f\n" , grad_x2 * 0.2) f_1_v := 4 * grad_x1 * (x1 - a0 * grad_x1 - 4)* (x1 - a0 * grad_x1 - 4)* (x1 - a0 * grad_x1 - 4) + 2 * grad_x2 * (x2 - a0 * grad_x2 - 3) + 16* grad_x3 * (x3 - a0 * grad_x3 + 5)* (x3 - a0 * grad_x3 + 5) * (x3 - a0 * grad_x3 + 5) f_2_v := 12 * grad_x1 * grad_x1 * (x1 - a1 * grad_x1 - 4)* (x1 - a1 * grad_x1 - 4) + 2 * grad_x2* grad_x2 * a1 + 48 * grad_x3* grad_x3 * (x3 - a1 * grad_x3 + 5)* (x3 - a1 * grad_x3 + 5) a1 = a0 - f_1_v / f_2_v //fmt.Printf("----------abs = %f\n", math.Abs(a1 - a0)) fmt.Printf("step value = %f f_1_v = %f, f_2_v = %f\n", (a0 + a1)/2, f_1_v, f_2_v) } return (a0 + a1)/2 } //採用常量方式求極值 func _get_argmin_const(x1, x2, x3, grad_x1, grad_x2, grad_x3 float64) float64{ /* * 不是很搞的清楚,當採用快速降低算法時如何肯定固定步長,網上有一個說法實踐是正確的 * 即知足李普希茲條件存在L>0使得|f(x1)-f(x2)|<=L|x1-x2|,步長取1/L * 下面這個例子因爲存在x3這個高階,因此若是步長取大的話,徹底沒有辦法計算 */ return 0.0004 } func DoGradAlgorithm(){ //計算f(x1,x2,x3) = (x1 - 4)^4 + (x2 - 3)^2 + 4*(x3 + 5)^4 //所謂梯度本質上也是導數,只是針對多維度上,取了各個維度偏導數,組成向量; //最速降低法就是在每次迭代時取當前負梯度方向的能獲取的函數數最小值 //初始值x0 = [4, 2, -1] x1 := 4.0 x2 := 2.0 x3 := -1.0 //取三次迭代 for i := 0; i < 4; i++ { grad_x1 := 4 * (x1 - 4)*(x1 - 4)*(x1 - 4) grad_x2 := 2 * (x2 - 3) grad_x3 := 16 * (x3 + 5)* (x3 + 5)* (x3 + 5) a := _get_argmin_newton(x1,x2,x3, grad_x1, grad_x2, grad_x3) fmt.Printf("grad_x1 = %f, grad_x2 = %f, grad_x3 = %f\n", grad_x1, grad_x2, grad_x3) x1 = x1 - a * grad_x1 x2 = x2 - a * grad_x2 x3 = x3 - a * grad_x3 fmt.Printf("x1 = %f, x2 = %f, x3 = %f\n", x1, x2, x3) } }