I have a problem with array indexing on GPU in Compute Shader and I'm stuck with it for weeks.
I'm trying to use the x value of SV_DispatchThreadID as the index of my particle array (as it is shown in some example on the web).
It is working... But threadID variable (in main function) is always returning 0,3,6,9,12,15... not 0,1,2,3,4,...
My dispatch call on CPU side is: Dispatch(64, 1, 1);
I've tried many configuration of dispatching (32,16,1), (128,1,1),... with many configuration of numtheards (1,1,1), (32,32,1), (16,16,1)... but always the same result... threadID is NEVER well ordered.
How can I get ordered index? :(... always getting index like 0,3,6,9,...
Any suggestion?
Many Thanks.
Here is my CS kernel and my C# source:
#pragma kernel CSMain
float dt;
float time;
float pi;
uint maxParticles = 1024;
float maxAge;
struct Particle
{
int index;
float3 position;
float3 velocity;
float size;
float age;
float normAge;
int type;
};
RWStructuredBuffer <Particle> particles;
[numthreads( 1, 1, 1 )]
void CSMain ( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
{
uint index = DTid.x;
if (index < maxParticles)
{
Particle p = particles[index];
p.position.y = p.index; //just check if the index is correct by giving a Y position
particles[index] = p;
}
}
C# code to create the ComputeBuffer and other stuff:
using UnityEngine;
using System.Collections;
using System.Runtime.InteropServices;
public class SimpleEmitter : MonoBehaviour
{
struct Particle
{
public int index;
public Vector3 position;
public Vector3 velocity;
public float size;
public float age;
public float normAge;
public int type;
}
public ComputeShader computeShader;
public Material material;
public int maxParticles = 1000000;
public float maxAge = 3.0f;
public float particleSize = 0.5f;
private ComputeBuffer particles;
private int particleSizeOf;
void Start ()
{
Particle p = new Particle();
particleSizeOf = Marshal.SizeOf(p);
Particle[] pInitBuffer = new Particle[maxParticles];
for (int i = 0; i < maxParticles; i++)
{
p = new Particle();
p.index = i;
p.type = 0;
p.age = 0;
p.normAge = 0.1f;
p.size = particleSize * 0.5f + Random.value * particleSize;
p.velocity = new Vector3(0, 0, 0);
pInitBuffer[i] = p;
}
particles = new ComputeBuffer(maxParticles, particleSizeOf, ComputeBufferType.Default);
particles.SetData(pInitBuffer);
computeShader.SetBuffer(0, "particles", particles);
}
void Update()
{
computeShader.SetFloat("dt", Time.deltaTime);
computeShader.SetFloat("time", Time.time);
computeShader.SetFloat("pi", Mathf.PI);
computeShader.SetInt("maxParticles", maxParticles);
computeShader.SetFloat("maxAge", maxAge);
computeShader.Dispatch(0, 64, 1, 1);
}
public void OnPostRender()
{
material.SetPass(0);
material.SetFloat("maxAge", maxAge);
material.SetBuffer("particles", particles);
Graphics.DrawProcedural(MeshTopology.Triangles, maxParticles, 0);
}
void OnDisable()
{
particles.Release();
}
}
Here the Vertex, Geom and Pixel Shader :
Shader "Custom/SimpleRS"
{
Properties
{
_ParticleTexture ("Diffuse Tex", 2D) = "white" {}
_Ramp1Texture ("G_Ramp1", 2D) = "white" {}
}
SubShader
{
Pass
{
Tags { "Queue"="Transparent" "IgnoreProjector"="True" "RenderType"="Transparent" }
Blend OneMinusDstColor One
Cull Off
Lighting Off
ZWrite Off
Fog { Color (0,0,0,0) }
CGPROGRAM
#pragma target 5.0
#pragma vertex VSMAIN
#pragma fragment PSMAIN
#pragma geometry GSMAIN
#include "UnityCG.cginc"
struct Particle
{
int index;
float3 position;
float3 velocity;
float size;
float age;
float normAge;
int type;
};
StructuredBuffer<Particle> particles;
Texture2D _ParticleTexture;
SamplerState sampler_ParticleTexture;
Texture2D _Ramp1Texture;
SamplerState sampler_Ramp1Texture;
float maxAge;
float maxRad;
struct VS_INPUT
{
uint vertexid : SV_VertexID;
};
//--------------------------------------------------------------------------------
struct GS_INPUT
{
float4 position : SV_POSITION;
float size : TEXCOORD0;
float age : TEXCOORD1;
float type : TEXCOORD2;
};
//--------------------------------------------------------------------------------
struct PS_INPUT
{
float4 position : SV_POSITION;
float2 texcoords : TEXCOORD0;
float size : TEXCOORD1;
float age : TEXCOORD2;
float type : TEXCOORD3;
};
//--------------------------------------------------------------------------------
GS_INPUT VSMAIN( in VS_INPUT input )
{
GS_INPUT output;
output.position.xyz = particles[input.vertexid].position;
output.position.w = 1.0;
output.age = particles[input.vertexid].normAge;
output.size = particles[input.vertexid].size;
output.type = particles[input.vertexid].type;
return output;
}
//--------------------------------------------------------------------------------
[maxvertexcount(4)]
void GSMAIN( point GS_INPUT p[1], inout TriangleStream<PS_INPUT> triStream )
{
float4 pos = mul(UNITY_MATRIX_MVP, p[0].position);
float halfS = p[0].size * 0.5f;
float4 offset = mul(UNITY_MATRIX_P, float4(halfS, halfS, 0, 1));
float4 v[4];
v[0] = pos + float4(offset.x, offset.y, 0, 1);
v[1] = pos + float4(offset.x, -offset.y, 0, 1);
v[2] = pos + float4(-offset.x, offset.y, 0, 1);
v[3] = pos + float4(-offset.x, -offset.y, 0, 1);
PS_INPUT pIn;
pIn.position = v[0];
pIn.texcoords = float2(1.0f, 0.0f);
pIn.size = p[0].size;
pIn.age = p[0].age;
pIn.type = p[0].type;
triStream.Append(pIn);
pIn.position = v[1];
pIn.texcoords = float2(1.0f, 1.0f);
triStream.Append(pIn);
pIn.position = v[2];
pIn.texcoords = float2(0.0f, 0.0f);
triStream.Append(pIn);
pIn.position = v[3];
pIn.texcoords = float2(0.0f, 1.0f);
triStream.Append(pIn);
}
//--------------------------------------------------------------------------------
float4 PSMAIN( in PS_INPUT input ) : COLOR
{
float4 color = _ParticleTexture.Sample( sampler_ParticleTexture, input.texcoords );
float4 tint = _Ramp1Texture.Sample(sampler_Ramp1Texture, float2(min(1.0, input.age),0));
color *= tint;
if (input.age == 0) discard;
return color;
}
//--------------------------------------------------------------------------------
ENDCG
}
}
}
You should be drawing with MeshTopology.Points not Triangles.
Since the geometry shader is passed only a point, that's your topology, the GS then handles expanding this out into triangles as per your shader code.
This explains why only every 3rd particle is showing up. They're being passed to the GS in groups of 3 as per your requested topology, you're then discarding the 2nd and 3rd input transparently and the next vertex the GS sees is Vertex 3, then 6, then 9 etc.