On one of my draw call, I got a exteme large value like follows on HD5450 with PerfStudio2.1.
VSVerticesIn:41263
VertexMemFetched: 136032768
Per Vertex about 3KB.
Will the other kind of traffic will be included in this counter?
Does the value always appear high like this, or is it just occassionally? We've found that having Aero mode enabled in Windows can cause inconsistencies in some of the counters.
Generally, this counter is based on the number of fetch requests received for data that is stored in a vertex-related format. There is a possibility that an inefficient index buffer could cause a single vertex to be fetched multiple times. I'm not entirely sure if accessing vertex attributes or performance texture fetches in the vertex shader will contribute to this value. I will look into this.
We have ongoing research to identify better counters related to index buffer quality / vertex cache reuse and this counter may be adjusted in the process.
It's draw call by draw call. For the frame I studied, that counter values look reasonable for most of draw call except this one. The Aero mode was disabled during the profiling. The Primitive number for that draw call was about 33973. So there won't be too much index buffer access. Actually, this is one frame in 3DMarkVantage GT1 Performance setting.
//
// Generated by Microsoft (R) HLSL Shader Compiler 9.19.949.2111
//
//
// Buffer Definitions:
//
// cbuffer view_changes_every_frame
// {
//
// row_major float4x4 world_to_view_clip_matrix;// Offset: 0 Size: 64
// row_major float4x4 view_to_world_matrix;// Offset: 64 Size: 64 [unused]
// float3 camera_in_world; // Offset: 128 Size: 12 [unused]
// row_major float4x4 frustum_planes; // Offset: 144 Size: 64 [unused]
// row_major float4x4 inverse_projection_matrix;// Offset: 208 Size: 64 [unused]
//
// }
//
// cbuffer xsi_uniforms
// {
//
// float wave_intensity; // Offset: 0 Size: 4
// float foam_height_effect; // Offset: 4 Size: 4
// float foam_saturate_low_threshold; // Offset: 8 Size: 4
// float foam_saturate_high_threshold;// Offset: 12 Size: 4
// float3 water_diffuse_color; // Offset: 16 Size: 12 [unused]
// float3 specular_color; // Offset: 32 Size: 12 [unused]
// float specular_exponent; // Offset: 44 Size: 4 [unused]
// float specular_intensity; // Offset: 48 Size: 4 [unused]
// float foam_tiling_factor_x; // Offset: 52 Size: 4 [unused]
// float foam_tiling_factor_y; // Offset: 56 Size: 4 [unused]
// float foam_visibility_low_threshold;// Offset: 60 Size: 4 [unused]
// float foam_visibility_high_threshold;// Offset: 64 Size: 4 [unused]
// float reflection_displacement; // Offset: 68 Size: 4 [unused]
// float foam_distort_factor; // Offset: 72 Size: 4 [unused]
// float refraction_map_intensity; // Offset: 76 Size: 4 [unused]
// float water_diffuse_scattered; // Offset: 80 Size: 4 [unused]
//
// }
//
// cbuffer changes_every_call
// {
//
// row_major float4x4 object_to_world_matrix;// Offset: 0 Size: 64
//
// }
//
//
// Resource Bindings:
//
// Name Type Format Dim Slot Elements
// ------------------------------ ---------- ------- ----------- ---- --------
// height_texture texture float4 2d 0 1
// view_changes_every_frame cbuffer NA NA 0 1
// xsi_uniforms cbuffer NA NA 1 1
// changes_every_call cbuffer NA NA 2 1
//
//
//
// Input signature:
//
// Name Index Mask Register SysValue Format Used
// -------------------- ----- ------ -------- -------- ------ ------
// SV_Position 0 xyzw 0 NONE float xyzw
// TEXCOORD 0 xyz 1 NONE float xyz
//
//
// Output signature:
//
// Name Index Mask Register SysValue Format Used
// -------------------- ----- ------ -------- -------- ------ ------
// SV_Position 0 xyzw 0 POS float xyzw
// POSITION 1 xyz 1 NONE float xyz
// TEXCOORD 0 xyzw 2 NONE float xyz
//
vs_4_0
dcl_input v0.xyzw
dcl_input v1.xyz
dcl_output_siv o0.xyzw, position
dcl_output o1.xyz
dcl_output o2.xyz
dcl_constantbuffer cb0[4], immediateIndexed
dcl_constantbuffer cb1[1], immediateIndexed
dcl_constantbuffer cb2[4], immediateIndexed
dcl_resource_texture2d (float,float,float,float) t0
dcl_temps 2
add r0.x, -cb1[0].z, cb1[0].w
div r0.x, l(1.000000, 1.000000, 1.000000, 1.000000), r0.x
resinfo r1.xyzw, l(0), t0.xyzw
mul r0.yz, r1.xxyx, v1.xxyx
ftoi r1.xy, r0.yzyy
mov r1.zw, l(0,0,0,0)
ld r1.xyzw, r1.xyzw, t0.xyzw
add r0.y, r1.y, -cb1[0].z
mul_sat r0.x, r0.x, r0.y
mad r0.y, r0.x, l(-2.000000), l(3.000000)
mul r0.x, r0.x, r0.x
mul r0.x, r0.y, r0.x
mul r0.x, r1.y, r0.x
mul r0.x, r0.x, cb1[0].y
mad r0.x, r1.x, cb1[0].x, r0.x
add r0.y, r0.x, v0.y
mov r0.xzw, v0.xxzw
dp4 r1.w, cb2[3].xyzw, r0.xyzw
dp4 r1.x, cb2[0].xyzw, r0.xyzw
dp4 r1.y, cb2[1].xyzw, r0.xyzw
dp4 r1.z, cb2[2].xyzw, r0.xyzw
dp4 o0.x, cb0[0].xyzw, r1.xyzw
dp4 o0.y, cb0[1].xyzw, r1.xyzw
dp4 o0.z, cb0[2].xyzw, r1.xyzw
dp4 o0.w, cb0[3].xyzw, r1.xyzw
mov o1.xyz, r1.xyzx
mov o2.xyz, v1.xyzx
ret
// Approximately 28 instruction slots used
API | DrawIndexed |
Draw_Call | 445 |
CBMemRead | 0 |
CBMemWritten | 3941952 |
ClippedPrims | 0 |
CulledPrims | 23527 |
DepthStencilTestBusy | 0.448353 |
GPUBusy | 99.99967 |
GPUTime | 31.43322 |
GSALUBusy | 0 |
GSALUEfficiency | 0 |
GSALUInstCount | 0 |
GSALUTexRatio | 0 |
GSExportPct | 0 |
GSPrimsIn | 0 |
GSTexBusy | 0 |
GSTexInstCount | 0 |
GSVerticesOut | 0 |
HiZReject | 20.19346 |
HiZTrivialAccept | 0 |
PAStalledOnRasterizer | 98.16735 |
PSALUBusy | 72.58314 |
PSALUEfficiency | 62.37532 |
PSALUInstCount | 406.9916 |
PSALUTexRatio | 6.154698 |
PSExportStalls | 0 |
PSPixelsIn | 374372 |
PSPixelsOut | 374372 |
PSTexBusy | 47.17455 |
PSTexInstCount | 66.12528 |
Pct128SlowTexels | 0 |
Pct64SlowTexels | 3.772305 |
PctCompressedTexels | 3.715805 |
PctDepthTexels | 31.16592 |
PctInterlacedTexels | 0 |
PctTex1D | 27.811 |
PctTex1DArray | 15.52234 |
PctTex2D | 0 |
PctTex2DArray | 56.57877 |
PctTex2DMSAA | 0 |
PctTex2DMSAAArray | 0 |
PctTex3D | 0 |
PctTexCube | 0 |
PctTexCubeArray | 0 |
PctUncompressedTexels | 37.33273 |
PctVertex128SlowTexels | 0 |
PctVertex64SlowTexels | 0 |
PctVertexTexels | 27.56327 |
PostZSamplesFailingS | 0 |
PostZSamplesFailingZ | 0 |
PostZSamplesPassing | 0 |
PreZSamplesFailingS | 0 |
PreZSamplesFailingZ | 37709 |
PreZSamplesPassing | 366785 |
PrimitiveAssemblyBusy | 99.47789 |
PrimitivesIn | 33973 |
ShaderBusy | 99.39033 |
ShaderBusyGS | 0 |
ShaderBusyPS | 99.44472 |
ShaderBusyVS | 0.555278 |
TexAveAnisotropy | 0.973451 |
TexCacheStalled | 0.457929 |
TexCostOfFiltering | 110.9878 |
TexMemBytesRead | 25280000 |
TexMissRate | 0.20409 |
TexTriFilteringPct | 2.574058 |
TexUnitBusy | 38.02385 |
TexVolFilteringPct | 0 |
TexelFetchCount | 61786368 |
VSALUBusy | 0.264587 |
VSALUEfficiency | 53.33333 |
VSALUInstCount | 21 |
VSALUTexRatio | 10.5 |
VSTexBusy | 0.001008 |
VSTexInstCount | 2 |
VSVerticesIn | 41263 |
VertexMemFetched | 1.36E+08 |
VertexMemFetchedCost | 20.78471 |
ZUnitStalled | 0 |