ps_1_1,ps_1_2,ps_1_3,ps_1_4算法
Instruction Set | |||||||
版本 | 指令槽 | 1_1 | 1_2 | 1_3 | 1_4 | ||
ps | 版本號 | 0 | x | x | x | x | |
常數指令 | 1_1 | 1_2 | 1_3 | 1_4 | |||
def - ps | 定義常數 | 0 | x | x | x | x | |
相位指令 | 1_1 | 1_2 | 1_3 | 1_4 | |||
phase - ps | 在相位1與相位2之間轉換 | 0 | x | ||||
算法指令 | 1_1 | 1_2 | 1_3 | 1_4 | |||
add - ps | 兩個向量相加 | 1 | x | x | x | x | |
bem - ps | 使用一個假的 bump environment-map 變換 | 2 | x | ||||
cmp - ps | 以0爲比較賦值 | 1¹ | x | x | x | ||
cnd - ps | 以0.5爲比較賦值 | 1 | x | x | x | x | |
dp3 - ps | 三個份量點積 | 1 | x | x | x | x | |
dp4 - ps | 四個份量點積 | 1¹ | x | x | x | ||
lrp - ps | 線性插值 | 1 | x | x | x | x | |
mad - ps | 每一個份量乘完了加 | 1 | x | x | x | x | |
mov - ps | 賦值 | 1 | x | x | x | x | |
mul - ps | 乘法 | 1 | x | x | x | x | |
nop - ps | 無運算 | 0 | x | x | x | x | |
sub - ps | 減法 | 1 | x | x | x | x | |
圖片指令 | 1_1 | 1_2 | 1_3 | 1_4 | |||
tex - ps | 對圖片採樣 | 1 | x | x | x | ||
texbem - ps | 使用一個假的 bump environment-map 變換 | 1 | x | x | x | ||
texbeml - ps | 使用一個通過亮度校訂的假的 bump environment-map 變換 | 1+1² | x | x | x | ||
texcoord - ps | 返回圖片座標爲顏色 | 1 | x | x | x | ||
texcrd - ps | 複製圖片座標爲顏色 | 1 | x | ||||
texdepth - ps | 計算深度值 | 1 | x | ||||
texdp3 - ps | 貼圖數據與貼圖座標之間的三個份量的點積 | 1 | x | x | |||
texdp3tex - ps | 三個份量點積而且查找1D圖片 | 1 | x | x | |||
texkill - ps | 基於一個比較來取消像素的渲染 | 1 | x | x | x | x | |
texld - ps_1_4 | 對圖片採樣 | 1 | x | ||||
texm3x2depth - ps | 逐像素計算深度值用於深度測試 | 1 | x | ||||
texm3x2pad - ps | First row matrix multiply of a two-row matrix multiply | 1 | x | x | x | ||
texm3x2tex - ps | 最後一行與一個二行矩陣進行矩陣乘法 | 1 | x | x | x | ||
texm3x3 - ps | 3x3矩陣相乘 | 1 | x | x | |||
texm3x3pad - ps | 第一行或者第二行與一個三行矩陣進行矩陣乘法,須要與 texm3x3 - ps, texm3x3spec - ps, texm3x3vspec - ps, 或 texm3x3tex - ps結合使用 | 1 | x | x | x | ||
texm3x3spec - ps | 最後一行與一個三行矩陣進行矩陣乘法使用計算結果進行圖片查找,能夠用於鏡面反射與環境貼圖 | 1 | x | x | x | ||
texm3x3tex - ps | 經過3x3 矩陣乘積的結果來查找圖片 | 1 | x | x | x | ||
texm3x3vspec - ps | 用一個3x3矩陣乘法的計算結果做爲法向量,與一個很是量的視線方向向量進行圖片查找,能夠用於鏡面反射與環境貼圖 | 1 | x | x | x | ||
texreg2ar - ps | 經過r和a通道做爲uv來採樣圖片 | 1 | x | x | x | ||
texreg2gb - ps | 經過g和b通道做爲uv來採樣圖片 | 1 | x | x | x | ||
texreg2rgb - ps | 經過r、g和b通道來採樣圖片 | 1 | x | x | |||
語法app
bem dst.rg, src0, src1函數
算法測試
(Given n == dest register #)3d
dest.r = src0.r + D3DTSS_BUMPENVMAT00(stage n) * src1.rcomponent
+D3DTSS_BUMPENVMAT10(stage n) * src1.gorm
dest.g = src0.g + D3DTSS_BUMPENVMAT01(stage n) * src1.r圖片
+D3DTSS_BUMPENVMAT11(stage n) * src1.gip
語法get
cmp dst, src0, src1, src2
若是src0>=0返回src1不然src2
算法
ps_1_4
def c0, -0.6, 0.6, 0, 0.6
def c1 0,0,0,0
def c2 1,1,1,1
mov r1, c1
mov r2, c2
cmp r0, c0, r1, r2 // r0 is assigned 1,0,0,0 based on the following:
// r0.x = c2.x because c0.x < 0
// r0.y = c1.y because c0.y >= 0
// r0.z = c1.z because c0.z >= 0
// r0.w = c1.w because c0.w >= 0
語法
cmp dst, src0, src1, src2
若是src0>0.5 返回src1不然src2
算法
在1_1到1_3版本,src0必須爲r0.a(單通道)
// Version 1_1 to 1_3
if (r0.a > 0.5)
dest = src1
else
dest = src2
在1_4版本就能夠每一個通道分別比較值
for each component in src0
{
if (src0.component > 0.5)
dest.component = src1.component
else
dest.component = src2.component
}
示例
ps_1_4
def c0, -0.5, 0.5, 0, 0.6
def c1, 0,0,0,0
def c2, 1,1,1,1
cnd r1, c0, c1, c2 // r0 contains 1,1,1,0 because
// r1.x = c2.x because c0.x <= 0.5
// r1.y = c2.y because c0.y <= 0.5
// r1.z = c2.z because c0.z <= 0.5
// r1.w = c1.w because c0.w > 0.5
dp3 dst, src0, src1
計算三個份量的點積
算法
dest.x = dest.y = dest.z = dest.w =
(src0.x * src1.x) + (src0.y * src1.y) + (src0.z * src1.z);
語法
dp4 dst, src0, src1
計算四個份量的點積
算法
dest.x = dest.y = dest.z = dest.w =
(src0.x * src1.x) + (src0.y * src1.y) +
(src0.z * src1.z) + (src0.w * src1.w);
語法
lrp dst, src0, src1, src2
基於src0對src1與src2作線性插值運算
算法
dest = src0 * src1 + (1-src0) * src2
// which is the same as
dest = src2 + src0 * (src1 - src2)
語法
mad dst, src0, src1, src2
作(src0 * src1) + src2處理
算法
dest.x = src0.x * src1.x + src2.x;
dest.y = src0.y * src1.y + src2.y;
dest.z = src0.z * src1.z + src2.z;
dest.w = src0.w * src1.w + src2.w;
語法
mov dst, src
轉移值處理
語法
mul dst, src0, src1
乘法
算法
dest.x = src0.x * src1.x;
dest.y = src0.y * src1.y;
dest.z = src0.z * src1.z;
dest.w = src0.w * src1.w;
語法
nop
執行無運算
語法
sub dst, src0, src1
減法運算
算法
dest = src0 - src1
vs1
Instruction Set | |||||||
Name | Description | Instruction slots | Setup | Arithmetic | New | ||
add - vs | 兩個向量加法運算 | 1 | x | x | |||
dcl_usage input (sm1, sm2, sm3 - vs asm) | 聲明輸入向量寄存器 (see Registers - vs_1_1) | 0 | x | x | |||
def - vs | 定義常量 | 0 | x | x | |||
dp3 - vs | 三個份量的點積運算 | 1 | x | x | |||
dp4 - vs | 四個份量的點積運算 | 1 | x | x | |||
dst - vs | 計算距離向量 | 1 | x | x | |||
exp - vs | 全精度的2的x次方計算 | 10 | x | x | |||
exp - vs | 半精度的2的x次方計算 | 1 | x | x | |||
frc - vs | 小數部分 | 3 | x | x | |||
lit - vs | 局部光計算 | 1 | x | x | |||
log - vs | 全精度的 log₂(x)計算 | 10 | x | x | |||
logp - vs | 半精度的 log₂(x)計算 | 1 | x | x | |||
m3x2 - vs | 3x2 乘法 | 2 | x | x | |||
m3x3 - vs | 3x3 乘法 | 3 | x | x | |||
m3x4 - vs | 3x4 乘法 | 4 | x | x | |||
m4x3 - vs | 4x3 乘法 | 3 | x | x | |||
m4x4 - vs | 4x4 乘法 | 4 | x | x | |||
mad - vs | 每一個份量乘完了加 | 1 | x | x | |||
max - vs | 求最大值 | 1 | x | x | |||
min - vs | 求最小值 | 1 | x | x | |||
mov - vs | 賦值 | 1 | x | x | |||
mul - vs | 乘法 | 1 | x | x | |||
nop - vs | 無運算 | 1 | x | x | |||
rcp - vs | 倒數 | 1 | x | x | |||
rsq - vs | 平方根以後的倒數 | 1 | x | x | |||
sge - vs | 大於或等於比較,返回1或0 | 1 | x | x | |||
slt - vs | 小於比較,返回1或0 | 1 | x | x | |||
sub - vs | 減法 | 1 | x | x | |||
vs | 版本 | 0 | x | x | |||
dst
語法
dst dest, src0, src1
計算距離向量
src0爲(ignored, d*d, d*d,ignored)
src1爲(ignored, 1/d,ignored, 1/d)
最終獲得的結果爲(1, d, d*d, 1/d)
算法
dest.x = 1;
dest.y = src0.y * src1.y;
dest.z = src0.z;
dest.w = src1.w;
語法
exp dst, src
算法
dest.x = dest.y = dest.z = dest.w = (float)pow(2, src.replicateSwizzleComponent);
語法
frc dst, src
算法
dest.x = src.x - (float)floor(src.x);
dest.y = src.y - (float)floor(src.y);
dest.z = src.z - (float)floor(src.z);
dest.w = src.w - (float)floor(src.w);
語法
lit dst, src
src的各部分爲
src.x = N*L ; The dot product between normal and direction to light
src.y = N*H ; The dot product between normal and half vector
src.z = ignored ; This value is ignored
src.w = exponent ; The value must be between -128.0 and 128.0
算法
dest.x = 1;
dest.y = 0;
dest.z = 0;
dest.w = 1;
float power = src.w;
const float MAXPOWER = 127.9961f;
if (power < -MAXPOWER)
power = -MAXPOWER; // Fits into 8.8 fixed point format
else if (power > MAXPOWER)
power = MAXPOWER; // Fits into 8.8 fixed point format
if (src.x > 0)
{
dest.y = src.x;//diffuse
if (src.y > 0)
{
// Allowed approximation is EXP(power * LOG(src.y))
dest.z = (float)(pow(src.y, power));//specular
}
}
語法
log dst, src
算法
float v = abs(src);
if (v != 0)
{
dest.x = dest.y = dest.z = dest.w =
(float)(log(v)/log(2));
}
else
{
dest.x = dest.y = dest.z = dest.w = -FLT_MAX;
}
語法
logp dst, src
算法
float f = abs(src);
if (f != 0)
dest.x = dest.y = dest.z = dest.w = (float)(log(f)/log(2));
else
dest.x = dest.y = dest.z = dest.w = -FLT_MAX;
語法
m3x2 dst, src0, src1
算法
dest.x = (src0.x * src1.x) + (src0.x * src1.y) + (src0.x * src1.z);
dest.y = (src0.x * src2.x) + (src0.y * src2.y) + (src0.z * src2.z);
語法
m3x3 dst,src0, src1
算法
dest.x = (src0.x * src1.x) + (src0.y * src1.y) + (src0.z * src1.z);
dest.y = (src0.x * src2.x) + (src0.y * src2.y) + (src0.z * src2.z);
dest.z = (src0.x * src3.x) + (src0.y * src3.y) + (src0.z * src3.z);
語法
m3x4 dst, src0, src1
算法
dest.x = (src0.x * src1.x) + (src0.y * src1.y) + (src0.z * src1.z);
dest.y = (src0.x * src2.x) + (src0.y * src2.y) + (src0.z * src2.z);
dest.z = (src0.x * src3.x) + (src0.y * src3.y) + (src0.z * src3.z);
dest.w = (src0.x * src4.x) + (src0.y * src4.y) + (src0.z * src4.z);
語法
m4x3dst, src0, src1
算法
dest.x = (src0.x * src1.x) + (src0.y * src1.y) + (src0.z * src1.z) + (src0.w * src1.w);
dest.y = (src0.x * src2.x) + (src0.y * src2.y) + (src0.z * src2.z) + (src0.w * src2.w);
dest.z = (src0.x * src3.x) + (src0.y * src3.y) + (src0.z * src3.z) + (src0.w * src3.w);
語法
m4x4 dst, src0, src1
算法
dest.x = (src0.x * src1.x) + (src0.y * src1.y) + (src0.z * src1.z) +
(src0.w * src1.w);
dest.y = (src0.x * src2.x) + (src0.y * src2.y) + (src0.z * src2.z) +
(src0.w * src2.w);
dest.z = (src0.x * src3.x) + (src0.y * src3.y) + (src0.z * src3.z) +
(src0.w * src3.w);
dest.w = (src0.x * src4.x) + (src0.y * src4.y) + (src0.z * src4.z) +
(src0.w * src4.w);
語法
max dst, src0, src1
算法
dest.x=(src0.x >= src1.x) ? src0.x : src1.x;
dest.y=(src0.y >= src1.y) ? src0.y : src1.y;
dest.z=(src0.z >= src1.z) ? src0.z : src1.z;
dest.w=(src0.w >= src1.w) ? src0.w : src1.w;
語法
min dst, src0, src1
算法
dest.x=(src0.x < src1.x) ? src0.x : src1.x;
dest.y=(src0.y < src1.y) ? src0.y : src1.y;
dest.z=(src0.z < src1.z) ? src0.z : src1.z;
dest.w=(src0.w < src1.w) ? src0.w : src1.w;
語法
rcp dst, src
算法
float f = src0;
if(f == 0.0f)
{
f = FLT_MAX;
}
else
{
if(f != 1.0)
{
f = 1/f;
}
}
dest = f;
語法
rsq dst, src
算法
float f = abs(src0);
if (f == 0)
f = FLT_MAX
else
{
if (f != 1.0)
f = 1.0/(float)sqrt(f);
}
dest.z = dest.y = dest.z = dest.w = f;
語法
sge dst, src0, src1
src0大於等於 src1返回1不然爲0
算法
dest.x = (src0.x >= src1.x) ? 1.0f : 0.0f;
dest.y = (src0.y >= src1.y) ? 1.0f : 0.0f;
dest.z = (src0.z >= src1.z) ? 1.0f : 0.0f;
dest.w = (src0.w >= src1.w) ? 1.0f : 0.0f;
語法
slt dst, src0, src1
src0小於 src1返回1不然爲0
算法
dest.x = (src0.x < src1.x) ? 1.0f : 0.0f;
dest.y = (src0.y < src1.y) ? 1.0f : 0.0f;
dest.z = (src0.z < src1.z) ? 1.0f : 0.0f;
dest.w = (src0.w < src1.w) ? 1.0f : 0.0f;
庫: https://msdn.microsoft.com/en-us/library/bb219840(v=vs.85).aspx
--wolf96 2017/1/1