From 88091cf106b6a647d40d865522894bd2b448b9fa Mon Sep 17 00:00:00 2001 From: Peter Harris Date: Wed, 3 Sep 2025 09:10:41 +0100 Subject: [PATCH] Use idiomatic CLZ implementation --- Source/astcenc_mathlib_softfloat.cpp | 70 ++++------------------------ 1 file changed, 10 insertions(+), 60 deletions(-) diff --git a/Source/astcenc_mathlib_softfloat.cpp b/Source/astcenc_mathlib_softfloat.cpp index 83d0dc38..9594450c 100644 --- a/Source/astcenc_mathlib_softfloat.cpp +++ b/Source/astcenc_mathlib_softfloat.cpp @@ -1,6 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // ---------------------------------------------------------------------------- -// Copyright 2011-2021 Arm Limited +// Copyright 2011-2021,2025 Arm Limited // // Licensed under the Apache License, Version 2.0 (the "License"); you may not // use this file except in compliance with the License. You may obtain a copy @@ -30,69 +30,19 @@ typedef uint16_t sf16; typedef uint32_t sf32; /****************************************** - helper functions and their lookup tables + helper functions ******************************************/ -/* count leading zeros functions. Only used when the input is nonzero. */ - -#if defined(__GNUC__) && (defined(__i386) || defined(__amd64)) -#elif defined(__arm__) && defined(__ARMCC_VERSION) -#elif defined(__arm__) && defined(__GNUC__) -#else - /* table used for the slow default versions. */ - static const uint8_t clz_table[256] = - { - 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - }; -#endif -/* - 32-bit count-leading-zeros function: use the Assembly instruction whenever possible. */ +/* Idiomatic count-leading zeros, generates native instruction on modern compilers. */ static uint32_t clz32(uint32_t inp) { - #if defined(__GNUC__) && (defined(__i386) || defined(__amd64)) - uint32_t bsr; - __asm__("bsrl %1, %0": "=r"(bsr):"r"(inp | 1)); - return 31 - bsr; - #else - #if defined(__arm__) && defined(__ARMCC_VERSION) - return __clz(inp); /* armcc builtin */ - #else - #if defined(__arm__) && defined(__GNUC__) - uint32_t lz; - __asm__("clz %0, %1": "=r"(lz):"r"(inp)); - return lz; - #else - /* slow default version */ - uint32_t summa = 24; - if (inp >= UINT32_C(0x10000)) - { - inp >>= 16; - summa -= 16; - } - if (inp >= UINT32_C(0x100)) - { - inp >>= 8; - summa -= 8; - } - return summa + clz_table[inp]; - #endif - #endif - #endif + uint32_t count = 32; + while (inp) + { + inp >>= 1; + count--; + } + return count; } /* the five rounding modes that IEEE-754r defines */