texture TheTexture : register(t0);
sampler TheSampler : register(s0) = sampler_state
{
	Texture = <TheTexture>;
};

texture PseudoStencilTexture : register(t1);
sampler PseudoStencilSampler : register(s1) = sampler_state
{
	Texture = <PseudoStencilTexture>;
};

// Sample from some widely different spots, to increase texture cache thrashing
#define SCALE float2(1.0 / 128, 1.0 / 72)

float4 PixelShaderFunction(in float2 texCoords : TEXCOORD0) : COLOR0
{
	float4 accum = 0;
	for (int x = -2; x < 3; x++)
	{
		for (int y = -2; y < 3; y++)
		{
		    accum += tex2Dlod(TheSampler, float4(texCoords + (float2(x, y) * SCALE), 0, 0));
		}
	}

	accum /= 25;

	accum.a = 0.5;
	return accum;
}

float4 BranchingPixelShaderFunction(in float2 texCoords : TEXCOORD0) : COLOR0
{
	float4 pseudoStencilValue = tex2D(PseudoStencilSampler, texCoords);
	[branch]
	if (pseudoStencilValue.r == 0.0)//any(pseudoStencilValue))
	{
		float4 accum = 0;
		for (int x = -2; x < 3; x++)
		{
			for (int y = -2; y < 3; y++)
			{
				accum += tex2Dlod(TheSampler, float4(texCoords + (float2(x, y) * SCALE), 0, 0));
			}
		}

		accum /= 25;

		accum.a = 0.5;
		return accum;
	}
	else
	{
		return 0;
	}
}

float2 InverseViewPort;
void VertexShaderFunction(inout float4 position : POSITION0, inout float2 texCoords : TEXCOORD0)
{
	float2 newPos = position.xy * InverseViewPort * 2;
	position.x = newPos.x - 1;
	position.y = 1 - newPos.y;
}

technique Standard
{
    pass Pass1
    {
		VertexShader = compile vs_3_0 VertexShaderFunction();
        PixelShader = compile ps_3_0 PixelShaderFunction();
    }
}

technique Branching
{
    pass Pass1
    {
		VertexShader = compile vs_3_0 VertexShaderFunction();
        PixelShader = compile ps_3_0 BranchingPixelShaderFunction();
    }
}
