# HG changeset patch # User Michael Pavone # Date 1714371757 25200 # Node ID d437b8e8ba626b00a3eb4a533728ea0b815fa4ad # Parent b62580dc6f30e025319bd29de4409570d21b6d2d Add xBRZ and bandlimit pixel footprint shaders ported by hunterk diff -r b62580dc6f30 -r d437b8e8ba62 shaders/bandlimit_pixel.f.glsl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/shaders/bandlimit_pixel.f.glsl Sun Apr 28 23:22:37 2024 -0700 @@ -0,0 +1,149 @@ +/* + * Bandlimited pixel footprint shader. + * Author: Themaister + * License: MIT + * Adapted from: https://github.com/Themaister/Granite/blob/master/assets/shaders/inc/bandlimited_pixel_filter.h + * ported to blastem shader format by hunterk + */ + +// sensible values between 0.0 and 5.0 +#define SMOOTHNESS 0.5 + +uniform sampler2D textures[2]; +uniform highp vec2 texsize; + +varying highp vec2 texcoord; + +// The cosine filter convolved with rect has a support of 0.5 + d pixels. +// We can sample 4x4 regions, so we can deal with 2.0 pixel range in our filter, +// and the maximum extent value we can have is 1.5. +const highp float maximum_support_extent = 1.5; + +struct BandlimitedPixelInfo +{ + highp vec2 uv0; + highp vec2 uv1; + highp vec2 uv2; + highp vec2 uv3; + mediump vec4 weights; + mediump float l; +}; + +// Our Taylor approximation is not exact, normalize so the peak is 1. +const highp float taylor_pi_half = 1.00452485553; +const highp float taylor_normalization = 1.0 / taylor_pi_half; +const highp float PI = 3.14159265359; +const highp float PI_half = 0.5 * PI; + +#define gen_taylor(T) \ +mediump T taylor_sin(mediump T p) \ +{ \ + mediump T p2 = p * p; \ + mediump T p3 = p * p2; \ + mediump T p5 = p2 * p3; \ + return clamp(taylor_normalization * (p - p3 * (1.0 / 6.0) + p5 * (1.0 / 120.0)), -1.0, 1.0); \ +} +// No templates in GLSL. Stamp out macros. +gen_taylor(float) +gen_taylor(vec2) +gen_taylor(vec3) +gen_taylor(vec4) + +// Given weights, compute a bilinear filter which implements the weight. +// All weights are known to be non-negative, and separable. +mediump vec3 compute_uv_phase_weight(mediump vec2 weights_u, mediump vec2 weights_v) +{ + // The sum of a bilinear sample has combined weight of 1, we will need to adjust the resulting sample + // to match our actual weight sum. + mediump float w = dot(weights_u.xyxy, weights_v.xxyy); + mediump float x = weights_u.y / max(weights_u.x + weights_u.y, 0.001); + mediump float y = weights_v.y / max(weights_v.x + weights_v.y, 0.001); + return vec3(x, y, w); +} + +BandlimitedPixelInfo compute_pixel_weights(vec2 uv, vec2 size, vec2 inv_size) +{ + // Get derivatives in texel space. + // Need a non-zero derivative. + vec2 extent = max(fwidth(uv) * size * (SMOOTHNESS + 0.5), 1.0 / 256.0); + + // Get base pixel and phase, range [0, 1). + vec2 pixel = uv * size - 0.5; + vec2 base_pixel = floor(pixel); + vec2 phase = pixel - base_pixel; + + BandlimitedPixelInfo info; + + mediump vec2 inv_extent = 1.0 / extent; + if (any(greaterThan(extent, vec2(maximum_support_extent)))) + { + // We need to just do regular minimization filtering. + info = BandlimitedPixelInfo(vec2(0.0), vec2(0.0), vec2(0.0), vec2(0.0), + vec4(0.0, 0.0, 0.0, 0.0), 0.0); + } + else if (all(lessThanEqual(extent, vec2(0.5)))) + { + // We can resolve the filter by just sampling a single 2x2 block. + mediump vec2 shift = 0.5 + 0.5 * taylor_sin(PI_half * clamp(inv_extent * (phase - 0.5), -1.0, 1.0)); + info = BandlimitedPixelInfo((base_pixel + 0.5 + shift) * inv_size, vec2(0.0), vec2(0.0), vec2(0.0), + vec4(1.0, 0.0, 0.0, 0.0), 1.0); + } + else + { + // Full 4x4 sampling. + + // Fade between bandlimited and normal sampling. + // Fully use bandlimited filter at LOD 0, normal filtering at approx. LOD -0.5. + mediump float max_extent = max(extent.x, extent.y); + mediump float l = clamp(1.0 - (max_extent - 1.0) / (maximum_support_extent - 1.0), 0.0, 1.0); + + mediump vec4 sine_phases_x = PI_half * clamp(inv_extent.x * (phase.x + vec4(1.5, 0.5, -0.5, -1.5)), -1.0, 1.0); + mediump vec4 sines_x = taylor_sin(sine_phases_x); + + mediump vec4 sine_phases_y = PI_half * clamp(inv_extent.y * (phase.y + vec4(1.5, 0.5, -0.5, -1.5)), -1.0, 1.0); + mediump vec4 sines_y = taylor_sin(sine_phases_y); + + mediump vec2 sine_phases_end = PI_half * clamp(inv_extent * (phase - 2.5), -1.0, 1.0); + mediump vec2 sines_end = taylor_sin(sine_phases_end); + + mediump vec4 weights_x = 0.5 * (sines_x - vec4(sines_x.yzw, sines_end.x)); + mediump vec4 weights_y = 0.5 * (sines_y - vec4(sines_y.yzw, sines_end.y)); + + mediump vec3 w0 = compute_uv_phase_weight(weights_x.xy, weights_y.xy); + mediump vec3 w1 = compute_uv_phase_weight(weights_x.zw, weights_y.xy); + mediump vec3 w2 = compute_uv_phase_weight(weights_x.xy, weights_y.zw); + mediump vec3 w3 = compute_uv_phase_weight(weights_x.zw, weights_y.zw); + + info = BandlimitedPixelInfo((base_pixel - 0.5 + w0.xy) * inv_size, + (base_pixel + vec2(1.5, -0.5) + w1.xy) * inv_size, + (base_pixel + vec2(-0.5, 1.5) + w2.xy) * inv_size, + (base_pixel + 1.5 + w3.xy) * inv_size, + vec4(w0.z, w1.z, w2.z, w3.z), l); + } + + return info; +} + +mediump vec4 sample_bandlimited_pixel(sampler2D samp, vec2 uv, BandlimitedPixelInfo info, float lod) +{ + mediump vec4 color = texture2D(samp, uv); + if (info.l > 0.0) + { + mediump vec4 bandlimited = info.weights.x * pow(texture2D(samp, info.uv0, lod), vec4(2.2)); + if (info.weights.x < 1.0) + { + bandlimited += info.weights.y * pow(texture2D(samp, info.uv1, lod), vec4(2.2)); + bandlimited += info.weights.z * pow(texture2D(samp, info.uv2, lod), vec4(2.2)); + bandlimited += info.weights.w * pow(texture2D(samp, info.uv3, lod), vec4(2.2)); + } + color = mix(color, bandlimited, info.l); + } + return color; +} + +void main() +{ + BandlimitedPixelInfo info = compute_pixel_weights(texcoord, texsize.xy, 1.0 / texsize.xy); + mediump vec3 result = sample_bandlimited_pixel(textures[0], texcoord, info, 0.0).rgb; + gl_FragColor = vec4(sqrt(clamp(result, 0.0, 1.0)), 1.0); +} \ No newline at end of file diff -r b62580dc6f30 -r d437b8e8ba62 shaders/xBRZ.f.glsl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/shaders/xBRZ.f.glsl Sun Apr 28 23:22:37 2024 -0700 @@ -0,0 +1,297 @@ +// xBRZ freescale +// based on : + +/* + Hyllian's xBR-vertex code and texel mapping + + Copyright (C) 2011/2016 Hyllian - sergiogdb@gmail.com + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. + +*/ + +// This shader also uses code and/or concepts from xBRZ as it appears +// in the Desmume source code. The license for which is as follows: + +// **************************************************************************** +// * This file is part of the HqMAME project. It is distributed under * +// * GNU General Public License: http://www.gnu.org/licenses/gpl-3.0 * +// * Copyright (C) Zenju (zenju AT gmx DOT de) - All Rights Reserved * +// * * +// * Additionally and as a special exception, the author gives permission * +// * to link the code of this program with the MAME library (or with modified * +// * versions of MAME that use the same license as MAME), and distribute * +// * linked combinations including the two. You must obey the GNU General * +// * Public License in all respects for all of the code used other than MAME. * +// * If you modify this file, you may extend this exception to your version * +// * of the file, but you are not obligated to do so. If you do not wish to * +// * do so, delete this exception statement from your version. * +// **************************************************************************** +// ported to blastem shader format by hunterk + +uniform sampler2D textures[2]; +uniform mediump vec2 texsize; +varying mediump vec2 texcoord; + +#define BLEND_NONE 0 +#define BLEND_NORMAL 1 +#define BLEND_DOMINANT 2 +#define LUMINANCE_WEIGHT 1.0 +#define EQUAL_COLOR_TOLERANCE 30.0/255.0 +#define STEEP_DIRECTION_THRESHOLD 2.2 +#define DOMINANT_DIRECTION_THRESHOLD 3.6 + +// this is usually handled automatically but blastem doesn't expose output size? +#define scale vec2(10.0, 10.0) + +mediump float DistYCbCr(vec3 pixA, vec3 pixB) +{ + const vec3 w = vec3(0.2627, 0.6780, 0.0593); + const float scaleB = 0.5 / (1.0 - w.b); + const float scaleR = 0.5 / (1.0 - w.r); + vec3 diff = pixA - pixB; + float Y = dot(diff.rgb, w); + float Cb = scaleB * (diff.b - Y); + float Cr = scaleR * (diff.r - Y); + + return sqrt(((LUMINANCE_WEIGHT * Y) * (LUMINANCE_WEIGHT * Y)) + (Cb * Cb) + (Cr * Cr)); +} + +bool IsPixEqual(const vec3 pixA, const vec3 pixB) +{ + return (DistYCbCr(pixA, pixB) < EQUAL_COLOR_TOLERANCE); +} + +mediump float get_left_ratio(vec2 center, vec2 origin, vec2 direction, vec2 scale_) +{ + vec2 P0 = center - origin; + vec2 proj = direction * (dot(P0, direction) / dot(direction, direction)); + vec2 distv = P0 - proj; + vec2 orth = vec2(-direction.y, direction.x); + float side = sign(dot(P0, orth)); + float v = side * length(distv * scale_); + +// return step(0, v); + return smoothstep(-sqrt(2.0)/2.0, sqrt(2.0)/2.0, v); +} + +#define eq(a,b) (a == b) +#define neq(a,b) (a != b) + +#define P(x,y) texture2D(textures[0], coord + (1.0 / texsize) * vec2(x, y)).rgb + +void main() +{ + //--------------------------------------- + // Input Pixel Mapping: -|x|x|x|- + // x|A|B|C|x + // x|D|E|F|x + // x|G|H|I|x + // -|x|x|x|- + + vec2 pos = fract(texcoord * texsize.xy) - vec2(0.5, 0.5); + vec2 coord = texcoord - pos * (1.0 / texsize.xy); + + vec3 A = P(-1,-1); + vec3 B = P( 0,-1); + vec3 C = P( 1,-1); + vec3 D = P(-1, 0); + vec3 E = P( 0, 0); + vec3 F = P( 1, 0); + vec3 G = P(-1, 1); + vec3 H = P( 0, 1); + vec3 I = P( 1, 1); + + // blendResult Mapping: x|y| + // w|z| + ivec4 blendResult = ivec4(BLEND_NONE,BLEND_NONE,BLEND_NONE,BLEND_NONE); + + // Preprocess corners + // Pixel Tap Mapping: -|-|-|-|- + // -|-|B|C|- + // -|D|E|F|x + // -|G|H|I|x + // -|-|x|x|- + if (!((eq(E,F) && eq(H,I)) || (eq(E,H) && eq(F,I)))) + { + float dist_H_F = DistYCbCr(G, E) + DistYCbCr(E, C) + DistYCbCr(P(0,2), I) + DistYCbCr(I, P(2,0)) + (4.0 * DistYCbCr(H, F)); + float dist_E_I = DistYCbCr(D, H) + DistYCbCr(H, P(1,2)) + DistYCbCr(B, F) + DistYCbCr(F, P(2,1)) + (4.0 * DistYCbCr(E, I)); + bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_H_F) < dist_E_I; + blendResult.z = ((dist_H_F < dist_E_I) && neq(E,F) && neq(E,H)) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE; + } + + + // Pixel Tap Mapping: -|-|-|-|- + // -|A|B|-|- + // x|D|E|F|- + // x|G|H|I|- + // -|x|x|-|- + if (!((eq(D,E) && eq(G,H)) || (eq(D,G) && eq(E,H)))) + { + float dist_G_E = DistYCbCr(P(-2,1) , D) + DistYCbCr(D, B) + DistYCbCr(P(-1,2), H) + DistYCbCr(H, F) + (4.0 * DistYCbCr(G, E)); + float dist_D_H = DistYCbCr(P(-2,0) , G) + DistYCbCr(G, P(0,2)) + DistYCbCr(A, E) + DistYCbCr(E, I) + (4.0 * DistYCbCr(D, H)); + bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_D_H) < dist_G_E; + blendResult.w = ((dist_G_E > dist_D_H) && neq(E,D) && neq(E,H)) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE; + } + + // Pixel Tap Mapping: -|-|x|x|- + // -|A|B|C|x + // -|D|E|F|x + // -|-|H|I|- + // -|-|-|-|- + if (!((eq(B,C) && eq(E,F)) || (eq(B,E) && eq(C,F)))) + { + float dist_E_C = DistYCbCr(D, B) + DistYCbCr(B, P(1,-2)) + DistYCbCr(H, F) + DistYCbCr(F, P(2,-1)) + (4.0 * DistYCbCr(E, C)); + float dist_B_F = DistYCbCr(A, E) + DistYCbCr(E, I) + DistYCbCr(P(0,-2), C) + DistYCbCr(C, P(2,0)) + (4.0 * DistYCbCr(B, F)); + bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_B_F) < dist_E_C; + blendResult.y = ((dist_E_C > dist_B_F) && neq(E,B) && neq(E,F)) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE; + } + + // Pixel Tap Mapping: -|x|x|-|- + // x|A|B|C|- + // x|D|E|F|- + // -|G|H|-|- + // -|-|-|-|- + if (!((eq(A,B) && eq(D,E)) || (eq(A,D) && eq(B,E)))) + { + float dist_D_B = DistYCbCr(P(-2,0), A) + DistYCbCr(A, P(0,-2)) + DistYCbCr(G, E) + DistYCbCr(E, C) + (4.0 * DistYCbCr(D, B)); + float dist_A_E = DistYCbCr(P(-2,-1), D) + DistYCbCr(D, H) + DistYCbCr(P(-1,-2), B) + DistYCbCr(B, F) + (4.0 * DistYCbCr(A, E)); + bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_D_B) < dist_A_E; + blendResult.x = ((dist_D_B < dist_A_E) && neq(E,D) && neq(E,B)) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE; + } + + vec3 res = E; + + // Pixel Tap Mapping: -|-|-|-|- + // -|-|B|C|- + // -|D|E|F|x + // -|G|H|I|x + // -|-|x|x|- + if(blendResult.z != BLEND_NONE) + { + float dist_F_G = DistYCbCr(F, G); + float dist_H_C = DistYCbCr(H, C); + bool doLineBlend = (blendResult.z == BLEND_DOMINANT || + !((blendResult.y != BLEND_NONE && !IsPixEqual(E, G)) || (blendResult.w != BLEND_NONE && !IsPixEqual(E, C)) || + (IsPixEqual(G, H) && IsPixEqual(H, I) && IsPixEqual(I, F) && IsPixEqual(F, C) && !IsPixEqual(E, I)))); + + vec2 origin = vec2(0.0, 1.0 / sqrt(2.0)); + vec2 direction = vec2(1.0, -1.0); + if(doLineBlend) + { + bool haveShallowLine = (STEEP_DIRECTION_THRESHOLD * dist_F_G <= dist_H_C) && neq(E,G) && neq(D,G); + bool haveSteepLine = (STEEP_DIRECTION_THRESHOLD * dist_H_C <= dist_F_G) && neq(E,C) && neq(B,C); + origin = haveShallowLine? vec2(0.0, 0.25) : vec2(0.0, 0.5); + direction.x += haveShallowLine? 1.0: 0.0; + direction.y -= haveSteepLine? 1.0: 0.0; + } + + vec3 blendPix = mix(H,F, step(DistYCbCr(E, F), DistYCbCr(E, H))); + res = mix(res, blendPix, get_left_ratio(pos, origin, direction, scale)); + } + + // Pixel Tap Mapping: -|-|-|-|- + // -|A|B|-|- + // x|D|E|F|- + // x|G|H|I|- + // -|x|x|-|- + if(blendResult.w != BLEND_NONE) + { + float dist_H_A = DistYCbCr(H, A); + float dist_D_I = DistYCbCr(D, I); + bool doLineBlend = (blendResult.w == BLEND_DOMINANT || + !((blendResult.z != BLEND_NONE && !IsPixEqual(E, A)) || (blendResult.x != BLEND_NONE && !IsPixEqual(E, I)) || + (IsPixEqual(A, D) && IsPixEqual(D, G) && IsPixEqual(G, H) && IsPixEqual(H, I) && !IsPixEqual(E, G)))); + + vec2 origin = vec2(-1.0 / sqrt(2.0), 0.0); + vec2 direction = vec2(1.0, 1.0); + if(doLineBlend) + { + bool haveShallowLine = (STEEP_DIRECTION_THRESHOLD * dist_H_A <= dist_D_I) && neq(E,A) && neq(B,A); + bool haveSteepLine = (STEEP_DIRECTION_THRESHOLD * dist_D_I <= dist_H_A) && neq(E,I) && neq(F,I); + origin = haveShallowLine? vec2(-0.25, 0.0) : vec2(-0.5, 0.0); + direction.y += haveShallowLine? 1.0: 0.0; + direction.x += haveSteepLine? 1.0: 0.0; + } + origin = origin; + direction = direction; + + vec3 blendPix = mix(H,D, step(DistYCbCr(E, D), DistYCbCr(E, H))); + res = mix(res, blendPix, get_left_ratio(pos, origin, direction, scale)); + } + + // Pixel Tap Mapping: -|-|x|x|- + // -|A|B|C|x + // -|D|E|F|x + // -|-|H|I|- + // -|-|-|-|- + if(blendResult.y != BLEND_NONE) + { + float dist_B_I = DistYCbCr(B, I); + float dist_F_A = DistYCbCr(F, A); + bool doLineBlend = (blendResult.y == BLEND_DOMINANT || + !((blendResult.x != BLEND_NONE && !IsPixEqual(E, I)) || (blendResult.z != BLEND_NONE && !IsPixEqual(E, A)) || + (IsPixEqual(I, F) && IsPixEqual(F, C) && IsPixEqual(C, B) && IsPixEqual(B, A) && !IsPixEqual(E, C)))); + + vec2 origin = vec2(1.0 / sqrt(2.0), 0.0); + vec2 direction = vec2(-1.0, -1.0); + + if(doLineBlend) + { + bool haveShallowLine = (STEEP_DIRECTION_THRESHOLD * dist_B_I <= dist_F_A) && neq(E,I) && neq(H,I); + bool haveSteepLine = (STEEP_DIRECTION_THRESHOLD * dist_F_A <= dist_B_I) && neq(E,A) && neq(D,A); + origin = haveShallowLine? vec2(0.25, 0.0) : vec2(0.5, 0.0); + direction.y -= haveShallowLine? 1.0: 0.0; + direction.x -= haveSteepLine? 1.0: 0.0; + } + + vec3 blendPix = mix(F,B, step(DistYCbCr(E, B), DistYCbCr(E, F))); + res = mix(res, blendPix, get_left_ratio(pos, origin, direction, scale)); + } + + // Pixel Tap Mapping: -|x|x|-|- + // x|A|B|C|- + // x|D|E|F|- + // -|G|H|-|- + // -|-|-|-|- + if(blendResult.x != BLEND_NONE) + { + float dist_D_C = DistYCbCr(D, C); + float dist_B_G = DistYCbCr(B, G); + bool doLineBlend = (blendResult.x == BLEND_DOMINANT || + !((blendResult.w != BLEND_NONE && !IsPixEqual(E, C)) || (blendResult.y != BLEND_NONE && !IsPixEqual(E, G)) || + (IsPixEqual(C, B) && IsPixEqual(B, A) && IsPixEqual(A, D) && IsPixEqual(D, G) && !IsPixEqual(E, A)))); + + vec2 origin = vec2(0.0, -1.0 / sqrt(2.0)); + vec2 direction = vec2(-1.0, 1.0); + if(doLineBlend) + { + bool haveShallowLine = (STEEP_DIRECTION_THRESHOLD * dist_D_C <= dist_B_G) && neq(E,C) && neq(F,C); + bool haveSteepLine = (STEEP_DIRECTION_THRESHOLD * dist_B_G <= dist_D_C) && neq(E,G) && neq(H,G); + origin = haveShallowLine? vec2(0.0, -0.25) : vec2(0.0, -0.5); + direction.x -= haveShallowLine? 1.0: 0.0; + direction.y += haveSteepLine? 1.0: 0.0; + } + + vec3 blendPix = mix(D,B, step(DistYCbCr(E, B), DistYCbCr(E, D))); + res = mix(res, blendPix, get_left_ratio(pos, origin, direction, scale)); + } + + gl_FragColor = vec4(res, 1.0); +} \ No newline at end of file