changeset 2495:d437b8e8ba62

Add xBRZ and bandlimit pixel footprint shaders ported by hunterk
author Michael Pavone <pavone@retrodev.com>
date Sun, 28 Apr 2024 23:22:37 -0700
parents b62580dc6f30
children 187bc857a76a
files shaders/bandlimit_pixel.f.glsl shaders/xBRZ.f.glsl
diffstat 2 files changed, 446 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/shaders/bandlimit_pixel.f.glsl	Sun Apr 28 23:22:37 2024 -0700
@@ -0,0 +1,149 @@
+/*
+ * Bandlimited pixel footprint shader.
+ * Author: Themaister
+ * License: MIT
+ * Adapted from: https://github.com/Themaister/Granite/blob/master/assets/shaders/inc/bandlimited_pixel_filter.h
+ * ported to blastem shader format by hunterk
+ */
+
+// sensible values between 0.0 and 5.0
+#define SMOOTHNESS 0.5
+ 
+uniform sampler2D textures[2];
+uniform highp vec2 texsize;
+
+varying highp vec2 texcoord;
+
+// The cosine filter convolved with rect has a support of 0.5 + d pixels.
+// We can sample 4x4 regions, so we can deal with 2.0 pixel range in our filter,
+// and the maximum extent value we can have is 1.5.
+const highp float maximum_support_extent = 1.5;
+
+struct BandlimitedPixelInfo
+{
+	highp vec2 uv0;
+	highp vec2 uv1;
+	highp vec2 uv2;
+	highp vec2 uv3;
+	mediump vec4 weights;
+	mediump float l;
+};
+
+// Our Taylor approximation is not exact, normalize so the peak is 1.
+const highp float taylor_pi_half = 1.00452485553;
+const highp float taylor_normalization = 1.0 / taylor_pi_half;
+const highp float PI = 3.14159265359;
+const highp float PI_half = 0.5 * PI;
+
+#define gen_taylor(T) \
+mediump T taylor_sin(mediump T p) \
+{ \
+	mediump T p2 = p * p; \
+	mediump T p3 = p * p2; \
+	mediump T p5 = p2 * p3; \
+	return clamp(taylor_normalization * (p - p3 * (1.0 / 6.0) + p5 * (1.0 / 120.0)), -1.0, 1.0); \
+}
+// No templates in GLSL. Stamp out macros.
+gen_taylor(float)
+gen_taylor(vec2)
+gen_taylor(vec3)
+gen_taylor(vec4)
+
+// Given weights, compute a bilinear filter which implements the weight.
+// All weights are known to be non-negative, and separable.
+mediump vec3 compute_uv_phase_weight(mediump vec2 weights_u, mediump vec2 weights_v)
+{
+	// The sum of a bilinear sample has combined weight of 1, we will need to adjust the resulting sample
+	// to match our actual weight sum.
+	mediump float w = dot(weights_u.xyxy, weights_v.xxyy);
+	mediump float x = weights_u.y / max(weights_u.x + weights_u.y, 0.001);
+	mediump float y = weights_v.y / max(weights_v.x + weights_v.y, 0.001);
+	return vec3(x, y, w);
+}
+
+BandlimitedPixelInfo compute_pixel_weights(vec2 uv, vec2 size, vec2 inv_size)
+{
+	// Get derivatives in texel space.
+	// Need a non-zero derivative.
+	vec2 extent = max(fwidth(uv) * size * (SMOOTHNESS + 0.5), 1.0 / 256.0);
+
+	// Get base pixel and phase, range [0, 1).
+	vec2 pixel = uv * size - 0.5;
+	vec2 base_pixel = floor(pixel);
+	vec2 phase = pixel - base_pixel;
+
+	BandlimitedPixelInfo info;
+
+	mediump vec2 inv_extent = 1.0 / extent;
+	if (any(greaterThan(extent, vec2(maximum_support_extent))))
+	{
+		// We need to just do regular minimization filtering.
+		info = BandlimitedPixelInfo(vec2(0.0), vec2(0.0), vec2(0.0), vec2(0.0),
+		                            vec4(0.0, 0.0, 0.0, 0.0), 0.0);
+	}
+	else if (all(lessThanEqual(extent, vec2(0.5))))
+	{
+		// We can resolve the filter by just sampling a single 2x2 block.
+		mediump vec2 shift = 0.5 + 0.5 * taylor_sin(PI_half * clamp(inv_extent * (phase - 0.5), -1.0, 1.0));
+		info = BandlimitedPixelInfo((base_pixel + 0.5 + shift) * inv_size, vec2(0.0), vec2(0.0), vec2(0.0),
+		                            vec4(1.0, 0.0, 0.0, 0.0), 1.0);
+	}
+	else
+	{
+		// Full 4x4 sampling.
+
+		// Fade between bandlimited and normal sampling.
+		// Fully use bandlimited filter at LOD 0, normal filtering at approx. LOD -0.5.
+		mediump float max_extent = max(extent.x, extent.y);
+		mediump float l = clamp(1.0 - (max_extent - 1.0) / (maximum_support_extent - 1.0), 0.0, 1.0);
+
+		mediump vec4 sine_phases_x = PI_half * clamp(inv_extent.x * (phase.x + vec4(1.5, 0.5, -0.5, -1.5)), -1.0, 1.0);
+		mediump vec4 sines_x = taylor_sin(sine_phases_x);
+
+		mediump vec4 sine_phases_y = PI_half * clamp(inv_extent.y * (phase.y + vec4(1.5, 0.5, -0.5, -1.5)), -1.0, 1.0);
+		mediump vec4 sines_y = taylor_sin(sine_phases_y);
+
+		mediump vec2 sine_phases_end = PI_half * clamp(inv_extent * (phase - 2.5), -1.0, 1.0);
+		mediump vec2 sines_end = taylor_sin(sine_phases_end);
+
+		mediump vec4 weights_x = 0.5 * (sines_x - vec4(sines_x.yzw, sines_end.x));
+		mediump vec4 weights_y = 0.5 * (sines_y - vec4(sines_y.yzw, sines_end.y));
+
+		mediump vec3 w0 = compute_uv_phase_weight(weights_x.xy, weights_y.xy);
+		mediump vec3 w1 = compute_uv_phase_weight(weights_x.zw, weights_y.xy);
+		mediump vec3 w2 = compute_uv_phase_weight(weights_x.xy, weights_y.zw);
+		mediump vec3 w3 = compute_uv_phase_weight(weights_x.zw, weights_y.zw);
+
+		info = BandlimitedPixelInfo((base_pixel - 0.5 + w0.xy) * inv_size,
+									(base_pixel + vec2(1.5, -0.5) + w1.xy) * inv_size,
+									(base_pixel + vec2(-0.5, 1.5) + w2.xy) * inv_size,
+									(base_pixel + 1.5 + w3.xy) * inv_size,
+									vec4(w0.z, w1.z, w2.z, w3.z), l);
+	}
+
+	return info;
+}
+
+mediump vec4 sample_bandlimited_pixel(sampler2D samp, vec2 uv, BandlimitedPixelInfo info, float lod)
+{
+	mediump vec4 color = texture2D(samp, uv);
+	if (info.l > 0.0)
+	{
+		mediump vec4 bandlimited = info.weights.x * pow(texture2D(samp, info.uv0, lod), vec4(2.2));
+		if (info.weights.x < 1.0)
+		{
+			bandlimited += info.weights.y * pow(texture2D(samp, info.uv1, lod), vec4(2.2));
+			bandlimited += info.weights.z * pow(texture2D(samp, info.uv2, lod), vec4(2.2));
+			bandlimited += info.weights.w * pow(texture2D(samp, info.uv3, lod), vec4(2.2));
+		}
+		color = mix(color, bandlimited, info.l);
+	}
+	return color;
+}
+
+void main()
+{
+	BandlimitedPixelInfo info = compute_pixel_weights(texcoord, texsize.xy, 1.0 / texsize.xy);
+	mediump vec3 result = sample_bandlimited_pixel(textures[0], texcoord, info, 0.0).rgb;
+	gl_FragColor = vec4(sqrt(clamp(result, 0.0, 1.0)), 1.0);
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/shaders/xBRZ.f.glsl	Sun Apr 28 23:22:37 2024 -0700
@@ -0,0 +1,297 @@
+// xBRZ freescale
+// based on :
+
+/*
+   Hyllian's xBR-vertex code and texel mapping
+   
+   Copyright (C) 2011/2016 Hyllian - sergiogdb@gmail.com
+
+   Permission is hereby granted, free of charge, to any person obtaining a copy
+   of this software and associated documentation files (the "Software"), to deal
+   in the Software without restriction, including without limitation the rights
+   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+   copies of the Software, and to permit persons to whom the Software is 
+   furnished to do so, subject to the following conditions:
+
+   The above copyright notice and this permission notice shall be included in
+   all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+   THE SOFTWARE.
+
+*/ 
+
+// This shader also uses code and/or concepts from xBRZ as it appears
+// in the Desmume source code. The license for which is as follows:
+
+// ****************************************************************************
+// * This file is part of the HqMAME project. It is distributed under         *
+// * GNU General Public License: http://www.gnu.org/licenses/gpl-3.0          *
+// * Copyright (C) Zenju (zenju AT gmx DOT de) - All Rights Reserved          *
+// *                                                                          *
+// * Additionally and as a special exception, the author gives permission     *
+// * to link the code of this program with the MAME library (or with modified *
+// * versions of MAME that use the same license as MAME), and distribute      *
+// * linked combinations including the two. You must obey the GNU General     *
+// * Public License in all respects for all of the code used other than MAME. *
+// * If you modify this file, you may extend this exception to your version   *
+// * of the file, but you are not obligated to do so. If you do not wish to   *
+// * do so, delete this exception statement from your version.                *
+// ****************************************************************************
+// ported to blastem shader format by hunterk
+
+uniform sampler2D textures[2];
+uniform mediump vec2 texsize;
+varying mediump vec2 texcoord;
+
+#define BLEND_NONE 0
+#define BLEND_NORMAL 1
+#define BLEND_DOMINANT 2
+#define LUMINANCE_WEIGHT 1.0
+#define EQUAL_COLOR_TOLERANCE 30.0/255.0
+#define STEEP_DIRECTION_THRESHOLD 2.2
+#define DOMINANT_DIRECTION_THRESHOLD 3.6
+
+// this is usually handled automatically but blastem doesn't expose output size?
+#define scale vec2(10.0, 10.0)
+
+mediump float DistYCbCr(vec3 pixA, vec3 pixB)
+{
+  const vec3 w = vec3(0.2627, 0.6780, 0.0593);
+  const float scaleB = 0.5 / (1.0 - w.b);
+  const float scaleR = 0.5 / (1.0 - w.r);
+  vec3 diff = pixA - pixB;
+  float Y = dot(diff.rgb, w);
+  float Cb = scaleB * (diff.b - Y);
+  float Cr = scaleR * (diff.r - Y);
+
+  return sqrt(((LUMINANCE_WEIGHT * Y) * (LUMINANCE_WEIGHT * Y)) + (Cb * Cb) + (Cr * Cr));
+}
+
+bool IsPixEqual(const vec3 pixA, const vec3 pixB)
+{
+  return (DistYCbCr(pixA, pixB) < EQUAL_COLOR_TOLERANCE);
+}
+
+mediump float get_left_ratio(vec2 center, vec2 origin, vec2 direction, vec2 scale_)
+{
+  vec2 P0 = center - origin;
+  vec2 proj = direction * (dot(P0, direction) / dot(direction, direction));
+  vec2 distv = P0 - proj;
+  vec2 orth = vec2(-direction.y, direction.x);
+  float side = sign(dot(P0, orth));
+  float v = side * length(distv * scale_);
+
+//  return step(0, v);
+  return smoothstep(-sqrt(2.0)/2.0, sqrt(2.0)/2.0, v);
+}
+
+#define eq(a,b)  (a == b)
+#define neq(a,b) (a != b)
+
+#define P(x,y) texture2D(textures[0], coord + (1.0 / texsize) * vec2(x, y)).rgb
+
+void main()
+{
+  //---------------------------------------
+  // Input Pixel Mapping:  -|x|x|x|-
+  //                       x|A|B|C|x
+  //                       x|D|E|F|x
+  //                       x|G|H|I|x
+  //                       -|x|x|x|-
+
+  vec2 pos = fract(texcoord * texsize.xy) - vec2(0.5, 0.5);
+  vec2 coord = texcoord - pos * (1.0 / texsize.xy);
+  
+  vec3 A = P(-1,-1);
+  vec3 B = P( 0,-1);
+  vec3 C = P( 1,-1);
+  vec3 D = P(-1, 0);
+  vec3 E = P( 0, 0);
+  vec3 F = P( 1, 0);
+  vec3 G = P(-1, 1);
+  vec3 H = P( 0, 1);
+  vec3 I = P( 1, 1);
+
+  // blendResult Mapping: x|y|
+  //                      w|z|
+  ivec4 blendResult = ivec4(BLEND_NONE,BLEND_NONE,BLEND_NONE,BLEND_NONE);
+
+  // Preprocess corners
+  // Pixel Tap Mapping: -|-|-|-|-
+  //                    -|-|B|C|-
+  //                    -|D|E|F|x
+  //                    -|G|H|I|x
+  //                    -|-|x|x|-
+  if (!((eq(E,F) && eq(H,I)) || (eq(E,H) && eq(F,I))))
+  {
+    float dist_H_F = DistYCbCr(G, E) + DistYCbCr(E, C) + DistYCbCr(P(0,2), I) + DistYCbCr(I, P(2,0)) + (4.0 * DistYCbCr(H, F));
+    float dist_E_I = DistYCbCr(D, H) + DistYCbCr(H, P(1,2)) + DistYCbCr(B, F) + DistYCbCr(F, P(2,1)) + (4.0 * DistYCbCr(E, I));
+    bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_H_F) < dist_E_I;
+    blendResult.z = ((dist_H_F < dist_E_I) && neq(E,F) && neq(E,H)) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE;
+  }
+
+
+  // Pixel Tap Mapping: -|-|-|-|-
+  //                    -|A|B|-|-
+  //                    x|D|E|F|-
+  //                    x|G|H|I|-
+  //                    -|x|x|-|-
+  if (!((eq(D,E) && eq(G,H)) || (eq(D,G) && eq(E,H))))
+  {
+    float dist_G_E = DistYCbCr(P(-2,1)  , D) + DistYCbCr(D, B) + DistYCbCr(P(-1,2), H) + DistYCbCr(H, F) + (4.0 * DistYCbCr(G, E));
+    float dist_D_H = DistYCbCr(P(-2,0)  , G) + DistYCbCr(G, P(0,2)) + DistYCbCr(A, E) + DistYCbCr(E, I) + (4.0 * DistYCbCr(D, H));
+    bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_D_H) < dist_G_E;
+    blendResult.w = ((dist_G_E > dist_D_H) && neq(E,D) && neq(E,H)) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE;
+  }
+
+  // Pixel Tap Mapping: -|-|x|x|-
+  //                    -|A|B|C|x
+  //                    -|D|E|F|x
+  //                    -|-|H|I|-
+  //                    -|-|-|-|-
+  if (!((eq(B,C) && eq(E,F)) || (eq(B,E) && eq(C,F))))
+  {
+    float dist_E_C = DistYCbCr(D, B) + DistYCbCr(B, P(1,-2)) + DistYCbCr(H, F) + DistYCbCr(F, P(2,-1)) + (4.0 * DistYCbCr(E, C));
+    float dist_B_F = DistYCbCr(A, E) + DistYCbCr(E, I) + DistYCbCr(P(0,-2), C) + DistYCbCr(C, P(2,0)) + (4.0 * DistYCbCr(B, F));
+    bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_B_F) < dist_E_C;
+    blendResult.y = ((dist_E_C > dist_B_F) && neq(E,B) && neq(E,F)) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE;
+  }
+
+  // Pixel Tap Mapping: -|x|x|-|-
+  //                    x|A|B|C|-
+  //                    x|D|E|F|-
+  //                    -|G|H|-|-
+  //                    -|-|-|-|-
+  if (!((eq(A,B) && eq(D,E)) || (eq(A,D) && eq(B,E))))
+  {
+    float dist_D_B = DistYCbCr(P(-2,0), A) + DistYCbCr(A, P(0,-2)) + DistYCbCr(G, E) + DistYCbCr(E, C) + (4.0 * DistYCbCr(D, B));
+    float dist_A_E = DistYCbCr(P(-2,-1), D) + DistYCbCr(D, H) + DistYCbCr(P(-1,-2), B) + DistYCbCr(B, F) + (4.0 * DistYCbCr(A, E));
+    bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_D_B) < dist_A_E;
+    blendResult.x = ((dist_D_B < dist_A_E) && neq(E,D) && neq(E,B)) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE;
+  }
+
+  vec3 res = E;
+
+  // Pixel Tap Mapping: -|-|-|-|-
+  //                    -|-|B|C|-
+  //                    -|D|E|F|x
+  //                    -|G|H|I|x
+  //                    -|-|x|x|-
+  if(blendResult.z != BLEND_NONE)
+  {
+    float dist_F_G = DistYCbCr(F, G);
+    float dist_H_C = DistYCbCr(H, C);
+    bool doLineBlend = (blendResult.z == BLEND_DOMINANT ||
+                !((blendResult.y != BLEND_NONE && !IsPixEqual(E, G)) || (blendResult.w != BLEND_NONE && !IsPixEqual(E, C)) ||
+                  (IsPixEqual(G, H) && IsPixEqual(H, I) && IsPixEqual(I, F) && IsPixEqual(F, C) && !IsPixEqual(E, I))));
+
+    vec2 origin = vec2(0.0, 1.0 / sqrt(2.0));
+    vec2 direction = vec2(1.0, -1.0);
+    if(doLineBlend)
+    {
+      bool haveShallowLine = (STEEP_DIRECTION_THRESHOLD * dist_F_G <= dist_H_C) && neq(E,G) && neq(D,G);
+      bool haveSteepLine = (STEEP_DIRECTION_THRESHOLD * dist_H_C <= dist_F_G) && neq(E,C) && neq(B,C);
+      origin = haveShallowLine? vec2(0.0, 0.25) : vec2(0.0, 0.5);
+      direction.x += haveShallowLine? 1.0: 0.0;
+      direction.y -= haveSteepLine? 1.0: 0.0;
+    }
+
+    vec3 blendPix = mix(H,F, step(DistYCbCr(E, F), DistYCbCr(E, H)));
+    res = mix(res, blendPix, get_left_ratio(pos, origin, direction, scale));
+  }
+
+  // Pixel Tap Mapping: -|-|-|-|-
+  //                    -|A|B|-|-
+  //                    x|D|E|F|-
+  //                    x|G|H|I|-
+  //                    -|x|x|-|-
+  if(blendResult.w != BLEND_NONE)
+  {
+    float dist_H_A = DistYCbCr(H, A);
+    float dist_D_I = DistYCbCr(D, I);
+    bool doLineBlend = (blendResult.w == BLEND_DOMINANT ||
+                !((blendResult.z != BLEND_NONE && !IsPixEqual(E, A)) || (blendResult.x != BLEND_NONE && !IsPixEqual(E, I)) ||
+                  (IsPixEqual(A, D) && IsPixEqual(D, G) && IsPixEqual(G, H) && IsPixEqual(H, I) && !IsPixEqual(E, G))));
+
+    vec2 origin = vec2(-1.0 / sqrt(2.0), 0.0);
+    vec2 direction = vec2(1.0, 1.0);
+    if(doLineBlend)
+    {
+      bool haveShallowLine = (STEEP_DIRECTION_THRESHOLD * dist_H_A <= dist_D_I) && neq(E,A) && neq(B,A);
+      bool haveSteepLine  = (STEEP_DIRECTION_THRESHOLD * dist_D_I <= dist_H_A) && neq(E,I) && neq(F,I);
+      origin = haveShallowLine? vec2(-0.25, 0.0) : vec2(-0.5, 0.0);
+      direction.y += haveShallowLine? 1.0: 0.0;
+      direction.x += haveSteepLine? 1.0: 0.0;
+    }
+    origin = origin;
+    direction = direction;
+
+    vec3 blendPix = mix(H,D, step(DistYCbCr(E, D), DistYCbCr(E, H)));
+    res = mix(res, blendPix, get_left_ratio(pos, origin, direction, scale));
+  }
+
+  // Pixel Tap Mapping: -|-|x|x|-
+  //                    -|A|B|C|x
+  //                    -|D|E|F|x
+  //                    -|-|H|I|-
+  //                    -|-|-|-|-
+  if(blendResult.y != BLEND_NONE)
+  {
+    float dist_B_I = DistYCbCr(B, I);
+    float dist_F_A = DistYCbCr(F, A);
+    bool doLineBlend = (blendResult.y == BLEND_DOMINANT ||
+                !((blendResult.x != BLEND_NONE && !IsPixEqual(E, I)) || (blendResult.z != BLEND_NONE && !IsPixEqual(E, A)) ||
+                  (IsPixEqual(I, F) && IsPixEqual(F, C) && IsPixEqual(C, B) && IsPixEqual(B, A) && !IsPixEqual(E, C))));
+
+    vec2 origin = vec2(1.0 / sqrt(2.0), 0.0);
+    vec2 direction = vec2(-1.0, -1.0);
+
+    if(doLineBlend)
+    {
+      bool haveShallowLine = (STEEP_DIRECTION_THRESHOLD * dist_B_I <= dist_F_A) && neq(E,I) && neq(H,I);
+      bool haveSteepLine  = (STEEP_DIRECTION_THRESHOLD * dist_F_A <= dist_B_I) && neq(E,A) && neq(D,A);
+      origin = haveShallowLine? vec2(0.25, 0.0) : vec2(0.5, 0.0);
+      direction.y -= haveShallowLine? 1.0: 0.0;
+      direction.x -= haveSteepLine? 1.0: 0.0;
+    }
+
+    vec3 blendPix = mix(F,B, step(DistYCbCr(E, B), DistYCbCr(E, F)));
+    res = mix(res, blendPix, get_left_ratio(pos, origin, direction, scale));
+  }
+
+  // Pixel Tap Mapping: -|x|x|-|-
+  //                    x|A|B|C|-
+  //                    x|D|E|F|-
+  //                    -|G|H|-|-
+  //                    -|-|-|-|-
+  if(blendResult.x != BLEND_NONE)
+  {
+    float dist_D_C = DistYCbCr(D, C);
+    float dist_B_G = DistYCbCr(B, G);
+    bool doLineBlend = (blendResult.x == BLEND_DOMINANT ||
+                !((blendResult.w != BLEND_NONE && !IsPixEqual(E, C)) || (blendResult.y != BLEND_NONE && !IsPixEqual(E, G)) ||
+                  (IsPixEqual(C, B) && IsPixEqual(B, A) && IsPixEqual(A, D) && IsPixEqual(D, G) && !IsPixEqual(E, A))));
+
+    vec2 origin = vec2(0.0, -1.0 / sqrt(2.0));
+    vec2 direction = vec2(-1.0, 1.0);
+    if(doLineBlend)
+    {
+      bool haveShallowLine = (STEEP_DIRECTION_THRESHOLD * dist_D_C <= dist_B_G) && neq(E,C) && neq(F,C);
+      bool haveSteepLine  = (STEEP_DIRECTION_THRESHOLD * dist_B_G <= dist_D_C) && neq(E,G) && neq(H,G);
+      origin = haveShallowLine? vec2(0.0, -0.25) : vec2(0.0, -0.5);
+      direction.x -= haveShallowLine? 1.0: 0.0;
+      direction.y += haveSteepLine? 1.0: 0.0;
+    }
+
+    vec3 blendPix = mix(D,B, step(DistYCbCr(E, B), DistYCbCr(E, D)));
+    res = mix(res, blendPix, get_left_ratio(pos, origin, direction, scale));
+  }
+
+ 	gl_FragColor = vec4(res, 1.0);
+}
\ No newline at end of file