//-------------------------------------------------------------------------------------------------
// Matrox10bitUnpackBaseOnly.fx
//
// Copyright (c) LWKS Software Ltd.  All Rights Reserved
//-------------------------------------------------------------------------------------------------

texture gSourceImage; // The source image

sampler SourceImageSampler = 
sampler_state
{
   Texture = <gSourceImage>;
   MipFilter = POINT;
   MinFilter = POINT;
   MagFilter = POINT; //GAUSSIANQUAD; //POINT; //LINEAR;
   AddressU  = ClampToEdge;
   AddressV  = ClampToEdge;
};

float pSourceImageTotalWidthInDWORDs;

float4 ps_main( float2 uv1 : TEXCOORD1 ) : COLOR0
{
   // The source surface must be in A8R8G8B8 format, wrapping Matrox-format data. The rendertarget is expected
   // to be in X16R16G16B16. The output data will be in 16-bit YCrCb422 format, with values normalised to the range 0.0->1.0.
   //

   // Matrox 10 bit YCrCb 4:2:2 format is packed as follows:
   //
   //               |------------------------------------------ BASE BLOCK --------------------||---------------- EXTRAS BLOCK ----------------|
   // DWORD offset  |----------- 0 ---------||---------- 1 ---------|.. |---------- 7 ---------||---------- 8 ---------||---------- 9 ---------|
   // Byte Offset   0      1     2     3     4     5     6     7     .. 28    29    30    31    32    33    34    35    36    37    38    39
   // Value         Y0     Cb0   Y1    Cr0   Y2    Cb2   Y3    Cr2   .. Y14   Cb14  Y15   Cr14  Packed high-order 2 bits of preceeding pixels, order TBC

   const int   kBlockSizeInComponentValuesI = 32;

   const int   kBaseBlockSizeInDWORDsI = kBlockSizeInComponentValuesI / 4;       // eg, 8
   const float kBaseBlockSizeInDWORDsF = (float)kBaseBlockSizeInDWORDsI;         //     8.0

   const int   kExtrasBlockSizeInDWORDsI = kBlockSizeInComponentValuesI / 16;    //     2
   const float kExtrasBlockSizeInDWORDsF = (float)kExtrasBlockSizeInDWORDsI;     //     2.0

   const int   kTotalBlockSizeInDWORDsI = kBaseBlockSizeInDWORDsI + kExtrasBlockSizeInDWORDsI;  // 10
   const float kTotalBlockSizeInDWORDsF = (float)kTotalBlockSizeInDWORDsI;       // 10

   // TEXCOORD0: y = output row 0.0->1.0 format. x = output column, 0 - ( SourceImageWidth - 1 ) format.

   float phase = fmod( uv1.x, kBaseBlockSizeInDWORDsF );
   float block = floor( uv1.x / kBaseBlockSizeInDWORDsF );

   // Determine the x-coord where the four 8-bit values will come from:
   float xDWORDPixelOffset = floor( ( block * kTotalBlockSizeInDWORDsF ) + phase );

   uv1.x = ( xDWORDPixelOffset + 0.5 ) / pSourceImageTotalWidthInDWORDs;

   // now 16 bit value scaled to 0..1 instead of 8 bit
   float4 base;
   base = tex2D( SourceImageSampler, uv1 );

   return base * 255.0f * 256.0f / 65535.0f;
   // approximately 16 instruction slots used (1 texture, 15 arithmetic)
};

technique T1 { pass P1 { PixelShader = compile PROFILE ps_main(); } }
