View Single Post
Old 20th December 2014, 09:43   #83  |  Link
madshi
Registered Developer
 
Join Date: Sep 2006
Posts: 9,140
Quote:
Originally Posted by Orf View Post
can you please share your the PS and DirectCompute versions of nnedi3?
The DirectCompute kernel is shipping with madVR. The PS version for upscaling in X direction is here:

Code:
    "sampler SourceSampler : register(s0);\n"
    "sampler WeightSampler : register(s2);\n"
    "float4 floatConsts1 : register(c0);\n"
    "#define pixSizeX (floatConsts1[0])\n"
    "#define pixSizeY (floatConsts1[1])\n"
    "static float1 SumWeights1[nns] = (float1[nns]) packedSumWeights1Array;\n"
    "static float1 SumWeights2[nns] = (float1[nns]) packedSumWeights2Array;\n"
    "static float4x4 rgbToHd = {+0.2126000000000000, +0.7152000000000000, +0.0722000000000000, 0,\n"
    "                           -0.1145721060573400, -0.3854278939426600, +0.5000000000000000, 0,\n"
    "                           +0.5000000000000000, -0.4541529083058166, -0.0458470916941834, 0, 0, 0, 0, 0};\n"
    "\n"
    "float4 main(float2 Tex : TEXCOORD0) : COLOR0\n"
    "{\n"
    "  float input[32];\n"
    "  float mstd0, mstd1, mstd2;\n"
    "  {\n"
    "    float sum = 0;\n"
    "    float sumsq = 0;\n"
    "    int index = 0;\n"
    "    float xpos = Tex.x - 1.0 * pixSizeX;\n"
    "    for (int ix = 0; ix < 4; ix++)\n"
    "    {\n"
    "      float ypos = Tex.y - 3.0 * pixSizeY;\n"
    "      for (int iy = 0; iy < 8; iy++)\n"
    "      {\n"
    "        float4 sample = tex2Dlod(SourceSampler, float4(xpos, ypos, 0, 0));\n"
    "        sample = (sample - 16.0f / 255.0f) / (219.0f / 255.0f);\n"   // d3d9Float8 16-235 -> 0-255
    "	       sample = mul(rgbToHd, sample) * 255.0;\n"
    "        ypos += pixSizeY;\n"
    "        input[index++] = sample[0];\n"
    "        sum += sample[0];\n"
    "        sumsq += sample[0] * sample[0];\n"
    "      }\n"
    "      xpos += pixSizeX;\n"
    "    }\n"
    "    mstd0 = sum / 32.0;\n"
    "    mstd1 = sumsq / 32.0 - mstd0 * mstd0;\n"
    "    mstd1 = (mstd1 <= 1.19209290e-07) ? 0.0 : sqrt(mstd1);\n"
    "    mstd2 = (mstd1 > 0) ? (1.0 / mstd1) : 0.0;\n"
    "  }\n"
    "  float vsum = 0;\n"
    "  float wsum = 0;\n"
    "  {\n"
    "    float ypos = 0.5 / nns;\n"
    "    for (int i1 = 0; i1 < nns; i1++)\n"
    "    {\n"
    "      float xpos = 0.5 / 16.0;\n"
    "      float sum1 = 0;\n"
    "      float sum2 = 0;\n"
    "      int index = 0;\n"
    "      for (int i2 = 0; i2 < 16; i2++)\n"
    "      {\n"
    "        float4 weights = tex1Dlod(WeightSampler, float4(xpos, ypos, 0, 0));\n"
    "        xpos += 1.0 / 16.0;\n"
    "        float sample = input[index++];\n"
    "        sum1 += sample * weights[0];\n"
    "        sum2 += sample * weights[1];\n"
    "        sample = input[index++];\n"
    "        sum1 += sample * weights[2];\n"
    "        sum2 += sample * weights[3];\n"
    "      }\n"
    "      ypos += 1.0 / nns;\n"
    "      float temp1 = sum1 * mstd2 + SumWeights1[i1];\n"
    "      float temp2 = sum2 * mstd2 + SumWeights2[i1];\n"
    "      temp1 = exp(clamp(temp1, -80.0, +80.0));\n"
    "      vsum += temp1 * (temp2 / (1.0 + abs(temp2)));\n"
    "      wsum += temp1;\n"
    "    }\n"
    "  }\n"
    "  float result = (mstd0 + ((wsum > 1e-10) ? (((5.0 * vsum) / wsum) * mstd1) : 0.0)) / 255.0;\n"
    "  return result * (219.0f / 255.0f) + 16.0f / 255.0f;\n"   // d3d9Float8 0-255 -> 16-235
    "}";
This kernel needs the NNEDI3 weight "database" uploaded to the "WeightSampler" texture and the "SumWeights1/2" constants in the right order, though, and I don't have that information easy to share, unfortunately.
madshi is offline   Reply With Quote