Skip to content

Commit f305490

Browse files
committed
branchless float saturation
1 parent 8aed39f commit f305490

File tree

1 file changed

+26
-12
lines changed

1 file changed

+26
-12
lines changed

src/external/rlsw.h

+26-12
Original file line numberDiff line numberDiff line change
@@ -444,6 +444,27 @@ static inline void sw_vec4_transform(float dst[4], const float v[4], const sw_ma
444444
}
445445
}
446446

447+
static inline float sw_saturate(float x)
448+
{
449+
// After several comparisons, this saturation method
450+
// seems to be the most optimized by GCC and Clang,
451+
// and it does not produce any conditional branching.
452+
453+
// However, it is possible that a clamp could be
454+
// more efficient on certain platforms.
455+
// Comparisons will need to be made.
456+
457+
// SEE: https://godbolt.org/z/5qYznK5zj
458+
459+
// Saturation from below: max(0, x)
460+
float y = 0.5f * (x + fabsf(x));
461+
462+
// Saturation from above: min(1, y)
463+
return y - 0.5f * ((y - 1.0f) + fabsf(y - 1.0f));
464+
465+
// return (x < 0.0f) ? 0.0f : ((x > 1.0f) ? 1.0f : x);
466+
}
467+
447468
static inline float sw_lerp(float a, float b, float t)
448469
{
449470
return a + t * (b - a);
@@ -700,8 +721,7 @@ static inline void sw_map_repeat(int* out, float in, int max)
700721

701722
static inline void sw_map_clamp_to_edge(int* out, float in, int max)
702723
{
703-
in = (in > 1.0f) ? 1.0f : ((in < 0.0f) ? 0.0f : in);
704-
*out = (int)(in * (max - 1) + 0.5f);
724+
*out = (int)(sw_saturate(in) * (max - 1) + 0.5f);
705725
}
706726

707727
static inline void sw_map_mirrored_repeat(int* out, float in, int max)
@@ -991,23 +1011,17 @@ void FUNC_NAME(const sw_texture_t* tex, const sw_vertex_t* start,
9911011
\
9921012
/* Interpolate the color and modulate by the texture color */ \
9931013
for (int i = 0; i < 4; i++) { \
994-
float lerp = start->color[i] + t * dcol[i]; \
995-
float finalColor = texColor[i] * lerp; \
996-
/* Inline clamp to keep the value between 0 and 1 */ \
997-
/* NOTE: The need for clamp the colors could be a sign of problem during interpolation (?) */ \
998-
finalColor = (finalColor < 0.0f) ? 0.0f : (finalColor > 1.0f ? 1.0f : finalColor); \
999-
dst[i] = (uint8_t)(finalColor * 255.0f); \
1014+
float finalColor = texColor[i]; \
1015+
finalColor *= start->color[i] + t * dcol[i]; \
1016+
dst[i] = (uint8_t)(sw_saturate(finalColor) * 255.0f); \
10001017
} \
10011018
} \
10021019
else \
10031020
{ \
10041021
/* Interpolate the color */ \
10051022
for (int i = 0; i < 4; i++) { \
10061023
float finalColor = start->color[i] + t * dcol[i]; \
1007-
/* Inline clamp to keep the value between 0 and 1 */ \
1008-
/* NOTE: The need for clamp the colors could be a sign of problem during interpolation (?) */ \
1009-
finalColor = (finalColor < 0.0f) ? 0.0f : (finalColor > 1.0f ? 1.0f : finalColor); \
1010-
dst[i] = (uint8_t)(finalColor * 255.0f); \
1024+
dst[i] = (uint8_t)(sw_saturate(finalColor) * 255.0f); \
10111025
} \
10121026
} \
10131027
\

0 commit comments

Comments
 (0)