战魂小筑

讨论群:309800774 知乎关注:http://zhihu.com/people/sunicdavy 开源项目:https://github.com/davyxu

   :: 首页 :: 新随笔 :: 联系 :: 聚合  :: 管理 ::
  252 随笔 :: 0 文章 :: 506 评论 :: 0 Trackbacks

传统的蒙皮骨骼动画混合方法易于理解,但是在SM 2.0的256常量限制下,骨骼数保守计算最多50根骨头,因此对美术的工作流程以及模型渲染方法造成了很大的障碍

float4x4 matBoneArray[40]; //  这是传输的瓶颈
 
VS_OUTPUT vs_main( SkinnedVS_INPUT In )
{
 
    VS_OUTPUT Out = (VS_OUTPUT)0;
 
    float4x4 skinTransform = 0;
 
    skinTransform += matBoneArray[In.BoneIndices.x] * In.BoneWeights.x;
    skinTransform += matBoneArray[In.BoneIndices.y] * In.BoneWeights.y;
    skinTransform += matBoneArray[In.BoneIndices.z] * In.BoneWeights.z;
    skinTransform += matBoneArray[In.BoneIndices.w] * In.BoneWeights.w;
    float4 localpos = mul(In.Position, skinTransform);
    
    Out.Position = mul( localpos, matViewProj ); 
    Out.TexCoord = In.TexCoord;
  
    return Out;
}

matBoneArray这个数组是骨骼的LocalRot和LocalTranslation 通过以下函数build出来

    Matrix4& Matrix4::FromTranslationRotation( const Vector3& translation, const Quaternion& rotation )
    {
        float xx = rotation.x * rotation.x * 2.0f, yy = rotation.y * rotation.y * 2.0f, zz = rotation.z * rotation.z * 2.0f;
        float xy = rotation.x * rotation.y * 2.0f, zw = rotation.z * rotation.w * 2.0f, xz = rotation.x * rotation.z * 2.0f;
        float yw = rotation.y * rotation.w * 2.0f, yz = rotation.y * rotation.z * 2.0f, xw = rotation.x * rotation.w * 2.0f;
 
        m[0][0] = 1.0f - yy - zz; m[0][1] =        xy + zw; m[0][2] =        xz - yw; m[0][3] = 0.0f;
        m[1][0] =        xy - zw; m[1][1] = 1.0f - xx - zz; m[1][2] =        yz + xw; m[1][3] = 0.0f;
        m[2][0] =        xz + yw; m[2][1] =        yz - xw; m[2][2] = 1.0f - xx - yy; m[2][3] = 0.0f;
        m[3][0] =  translation.x; m[3][1] =  translation.y; m[3][2] =  translation.z; m[3][3] = 1.0f;
 
        return *this;
    }

从这里你可以发现, 本来每根骨头只需要2个float4 传递变换信息的,现在却需要4个float4,也就是一个矩阵来传递,矩阵中还有很多不使用的变量也被传输到GPU中,这里就是优化的点.

重新调整后的Shader代码:

float4x4 BuildFromTransRot( float4 translation, float4 rot )
{
    float4 rotation = rot;
    
    float xx = rotation.x * rotation.x * 2.0f, yy = rotation.y * rotation.y * 2.0f, zz = rotation.z * rotation.z * 2.0f;
    float xy = rotation.x * rotation.y * 2.0f, zw = rotation.z * rotation.w * 2.0f, xz = rotation.x * rotation.z * 2.0f;
    float yw = rotation.y * rotation.w * 2.0f, yz = rotation.y * rotation.z * 2.0f, xw = rotation.x * rotation.w * 2.0f;
    float4x4 m = { 
    {1.0f - yy - zz,             xy + zw,             xz - yw,         0},
    {xy - zw,              1.0f - xx - zz,             yz + xw,         0},
    {xz + yw,                      yz - xw,     1.0f - xx - yy,         0},
    {translation.x,       translation.y,       translation.z,         1}
    
    };
    
    return m;
}
 
float4x4 GetBoneElement( float index )
{
    return BuildFromTransRot( vecBoneLocalTrans[index], vecBoneLocalRot[index] );
}

VS_OUTPUT vs_main( SkinnedVS_INPUT In )
{
 
    VS_OUTPUT Out = (VS_OUTPUT)0;
 
    float4x4 skinTransform = 0;
 
    skinTransform += GetBoneElement(In.BoneIndices.x) * In.BoneWeights.x;
    skinTransform += GetBoneElement(In.BoneIndices.y) * In.BoneWeights.y;
    skinTransform += GetBoneElement(In.BoneIndices.z) * In.BoneWeights.z;
    skinTransform += GetBoneElement(In.BoneIndices.w) * In.BoneWeights.w;
    float4 localpos = mul(In.Position, skinTransform);
    
    Out.Position = mul( localpos, matViewProj ); 
    Out.TexCoord = In.TexCoord;
  
    return Out;
}

我们将骨头的local旋转及偏移传递至GPU,然后在GPU内重组,虽然对GPU性能计算有部分损耗,但是骨骼数量就能保守提高到100个.

posted on 2010-04-26 13:31 战魂小筑 阅读(3388) 评论(7)  编辑 收藏 引用 所属分类: 游戏开发技术渲染 Shader 引擎

评论

# re: [原创]提高Shader Model 2.0 蒙皮骨骼动画的骨骼限制 2010-04-26 18:17 Bill Hsu
谢谢分享,好文

可是我看到某个shader代码里有这样一句:
uniform mat4 boneMat[96];

那个最大50个骨骼的限制是怎么算出来的呢?

还有这种方法没有考虑scale,所以需要3个float4才可以。。。  回复  更多评论
  

# re: [原创]提高Shader Model 2.0 蒙皮骨骼动画的骨骼限制 2010-04-26 19:50 Davy.xu
这里是在SM2.0的256个常量限制下计算的
1个矩阵占用4个常量,那么最多可以传入256/4=64个矩阵,由于转换矩阵及其他的数据还需要占用常量,保守就是用50作为限制
可以缩放的骨骼非常少见,为了这个很小的特性而提高工作流复杂度,不值得  回复  更多评论
  

# re: [原创]提高Shader Model 2.0 蒙皮骨骼动画的骨骼限制 2010-04-27 22:08 陈昱(CY)
位移只有float3,应该能又节约一些吧?
uniform vec4 g_allBonesQuaRot[50];
uniform vec3 g_allBonesTran[50];  回复  更多评论
  

# re: [原创]提高Shader Model 2.0 蒙皮骨骼动画的骨骼限制 2010-04-28 09:03 Davy.xu
在D3D下无论如何都是float4对齐吧  回复  更多评论
  

# re: [原创]提高Shader Model 2.0 蒙皮骨骼动画的骨骼限制 2010-04-28 11:37 Bill Hsu
@陈昱(CY)
好像float3也会被转换成float4  回复  更多评论
  

# re: [原创]提高Shader Model 2.0 蒙皮骨骼动画的骨骼限制 2010-04-28 15:29 Davy.xu
GLSL里默认不是float4  回复  更多评论
  

# re: [原创]提高Shader Model 2.0 蒙皮骨骼动画的骨骼限制 2010-05-14 16:26 Bill Hsu
我试了楼主的代码,
会报错:error X4004: Program too complex..
为什们呢?


我的完整shader:

matrix WVPMatrix;
float4 quat[100];
float3 tran[100];

struct VS_INPUT
{
float3 Position : POSITION;
float3 Normal : NORMAL0;
float2 TexCoord0 : TEXCOORD0;
float4 Weights:TEXCOORD1;
float4 Indices:TEXCOORD2;
};


struct VS_OUTPUT
{
float4 Pos : POSITION;
float3 Diffuse : COLOR;
float2 Tex0 : TEXCOORD0;
};

float4x4 BuildFromTransRot( float3 translation, float4 rot )

{

float4 rotation = rot;



float xx = rotation.x * rotation.x * 2.0f, yy = rotation.y * rotation.y * 2.0f, zz = rotation.z * rotation.z * 2.0f;

float xy = rotation.x * rotation.y * 2.0f, zw = rotation.z * rotation.w * 2.0f, xz = rotation.x * rotation.z * 2.0f;

float yw = rotation.y * rotation.w * 2.0f, yz = rotation.y * rotation.z * 2.0f, xw = rotation.x * rotation.w * 2.0f;

float4x4 m = {

{1.0f - yy - zz, xy + zw, xz - yw, 0},

{xy - zw, 1.0f - xx - zz, yz + xw, 0},

{xz + yw, yz - xw, 1.0f - xx - yy, 0},

{translation.x, translation.y, translation.z, 1}



};



return m;

}



float4x4 GetBoneElement( float index )

{
return BuildFromTransRot(tran[index], quat[index] );

}


VS_OUTPUT main(VS_INPUT input)
{

VS_OUTPUT Out = (VS_OUTPUT)0;



float4x4 skinTransform = 0;



skinTransform += GetBoneElement(input.Indices.x) * input.Weights.x;

skinTransform += GetBoneElement(input.Indices.y) * input.Weights.y;

skinTransform += GetBoneElement(input.Indices.z) * input.Weights.z;

skinTransform += GetBoneElement(input.Indices.w) * input.Weights.w;

float4 localpos = mul(input.Position, skinTransform);



Out.Pos = mul( localpos, WVPMatrix );

Out.Tex0 = input.TexCoord0;



return Out;

}
  回复  更多评论
  


只有注册用户登录后才能发表评论。
【推荐】超50万行VC++源码: 大型组态工控、电力仿真CAD与GIS源码库
网站导航: 博客园   IT新闻   BlogJava   知识库   博问   管理