-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[libc][math] Optimize copysign{,f,f16} and fabs{,f,f16} with builtins when available #99037
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[libc][math] Optimize copysign{,f,f16} and fabs{,f,f16} with builtins when available #99037
Conversation
…able Remove support for __builtin_frexpf16 as it decreases performance.
@llvm/pr-subscribers-libc Author: OverMighty (overmighty) ChangesPatch is 26.77 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/99037.diff 21 Files Affected:
diff --git a/libc/cmake/modules/CheckCompilerFeatures.cmake b/libc/cmake/modules/CheckCompilerFeatures.cmake
index a6d793d495c45..2bab968f901eb 100644
--- a/libc/cmake/modules/CheckCompilerFeatures.cmake
+++ b/libc/cmake/modules/CheckCompilerFeatures.cmake
@@ -5,6 +5,7 @@
set(
ALL_COMPILER_FEATURES
"builtin_ceil_floor_rint_trunc"
+ "builtin_fmax_fmin"
"builtin_round"
"builtin_roundeven"
"float16"
@@ -82,6 +83,8 @@ foreach(feature IN LISTS ALL_COMPILER_FEATURES)
set(LIBC_COMPILER_HAS_FIXED_POINT TRUE)
elseif(${feature} STREQUAL "builtin_ceil_floor_rint_trunc")
set(LIBC_COMPILER_HAS_BUILTIN_CEIL_FLOOR_RINT_TRUNC TRUE)
+ elseif(${feature} STREQUAL "builtin_fmax_fmin")
+ set(LIBC_COMPILER_HAS_BUILTIN_FMAX_FMIN TRUE)
elseif(${feature} STREQUAL "builtin_round")
set(LIBC_COMPILER_HAS_BUILTIN_ROUND TRUE)
elseif(${feature} STREQUAL "builtin_roundeven")
diff --git a/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake b/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake
index 253da4ae890e5..ead578f95ac72 100644
--- a/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake
+++ b/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake
@@ -6,6 +6,7 @@ function(_get_compile_options_from_flags output_var)
endif()
check_flag(ADD_ROUND_OPT_FLAG ${ROUND_OPT_FLAG} ${ARGN})
check_flag(ADD_EXPLICIT_SIMD_OPT_FLAG ${EXPLICIT_SIMD_OPT_FLAG} ${ARGN})
+ check_flag(ADD_MISC_MATH_BASIC_OPS_OPT_FLAG ${MISC_MATH_BASIC_OPS_OPT_FLAG} ${ARGN})
if(LLVM_COMPILER_IS_GCC_COMPATIBLE)
if(ADD_FMA_FLAG)
@@ -37,6 +38,12 @@ function(_get_compile_options_from_flags output_var)
if(ADD_EXPLICIT_SIMD_OPT_FLAG)
list(APPEND compile_options "-D__LIBC_EXPLICIT_SIMD_OPT")
endif()
+ if(ADD_MISC_MATH_BASIC_OPS_OPT_FLAG)
+ list(APPEND compile_options "-D__LIBC_MISC_MATH_BASIC_OPS_OPT")
+ if(LIBC_COMPILER_HAS_BUILTIN_FMAX_FMIN)
+ list(APPEND compile_options "-D__LIBC_USE_BUILTIN_FMAX_FMIN")
+ endif()
+ endif()
elseif(MSVC)
if(ADD_FMA_FLAG)
list(APPEND compile_options "/arch:AVX2")
diff --git a/libc/cmake/modules/LLVMLibCFlagRules.cmake b/libc/cmake/modules/LLVMLibCFlagRules.cmake
index eca7ba8d183e6..4398fe55db5aa 100644
--- a/libc/cmake/modules/LLVMLibCFlagRules.cmake
+++ b/libc/cmake/modules/LLVMLibCFlagRules.cmake
@@ -263,6 +263,9 @@ set(FMA_OPT_FLAG "FMA_OPT")
set(ROUND_OPT_FLAG "ROUND_OPT")
# This flag controls whether we use explicit SIMD instructions or not.
set(EXPLICIT_SIMD_OPT_FLAG "EXPLICIT_SIMD_OPT")
+# This flag controls whether we use compiler builtin functions to implement
+# various basic math operations or not.
+set(MISC_MATH_BASIC_OPS_OPT_FLAG "MISC_MATH_BASIC_OPS_OPT")
# Skip FMA_OPT flag for targets that don't support fma.
if(NOT((LIBC_TARGET_ARCHITECTURE_IS_X86 AND (LIBC_CPU_FEATURES MATCHES "FMA")) OR
diff --git a/libc/cmake/modules/compiler_features/check_builtin_fmax_fmin.cpp b/libc/cmake/modules/compiler_features/check_builtin_fmax_fmin.cpp
new file mode 100644
index 0000000000000..a962df33e31c4
--- /dev/null
+++ b/libc/cmake/modules/compiler_features/check_builtin_fmax_fmin.cpp
@@ -0,0 +1,14 @@
+_Float16 try_builtin_fmaxf16(_Float16 x, _Float16 y) {
+ return __builtin_fmaxf16(x, y);
+}
+_Float16 try_builtin_fminf16(_Float16 x, _Float16 y) {
+ return __builtin_fminf16(x, y);
+}
+
+float try_builtin_fmaxf(float x, float y) { return __builtin_fmaxf(x, y); }
+float try_builtin_fminf(float x, float y) { return __builtin_fminf(x, y); }
+
+double try_builtin_fmax(double x, double y) { return __builtin_fmax(x, y); }
+double try_builtin_fmin(double x, double y) { return __builtin_fmin(x, y); }
+
+extern "C" void _start() {}
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index c2f58fb1a4f71..3d713368251f6 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -400,6 +400,8 @@ add_entrypoint_object(
libc.src.__support.FPUtil.basic_operations
COMPILE_OPTIONS
-O2
+ FLAGS
+ MISC_MATH_BASIC_OPS_OPT
)
add_entrypoint_object(
@@ -412,6 +414,8 @@ add_entrypoint_object(
libc.src.__support.FPUtil.basic_operations
COMPILE_OPTIONS
-O2
+ FLAGS
+ MISC_MATH_BASIC_OPS_OPT
)
add_entrypoint_object(
@@ -437,6 +441,8 @@ add_entrypoint_object(
libc.src.__support.FPUtil.basic_operations
COMPILE_OPTIONS
-O3
+ FLAGS
+ MISC_MATH_BASIC_OPS_OPT
)
add_entrypoint_object(
@@ -1406,6 +1412,8 @@ add_entrypoint_object(
libc.src.__support.FPUtil.manipulation_functions
COMPILE_OPTIONS
-O3
+ FLAGS
+ MISC_MATH_BASIC_OPS_OPT
)
add_entrypoint_object(
@@ -1418,6 +1426,8 @@ add_entrypoint_object(
libc.src.__support.FPUtil.manipulation_functions
COMPILE_OPTIONS
-O3
+ FLAGS
+ MISC_MATH_BASIC_OPS_OPT
)
add_entrypoint_object(
@@ -1443,6 +1453,8 @@ add_entrypoint_object(
libc.src.__support.FPUtil.manipulation_functions
COMPILE_OPTIONS
-O3
+ FLAGS
+ MISC_MATH_BASIC_OPS_OPT
)
add_entrypoint_object(
@@ -2202,6 +2214,8 @@ add_entrypoint_object(
libc.src.__support.FPUtil.basic_operations
COMPILE_OPTIONS
-O2
+ FLAGS
+ MISC_MATH_BASIC_OPS_OPT
)
add_entrypoint_object(
@@ -2214,6 +2228,8 @@ add_entrypoint_object(
libc.src.__support.FPUtil.basic_operations
COMPILE_OPTIONS
-O2
+ FLAGS
+ MISC_MATH_BASIC_OPS_OPT
)
add_entrypoint_object(
@@ -2239,6 +2255,8 @@ add_entrypoint_object(
libc.src.__support.FPUtil.basic_operations
COMPILE_OPTIONS
-O3
+ FLAGS
+ MISC_MATH_BASIC_OPS_OPT
)
add_entrypoint_object(
@@ -2450,6 +2468,8 @@ add_entrypoint_object(
libc.src.__support.FPUtil.basic_operations
COMPILE_OPTIONS
-O2
+ FLAGS
+ MISC_MATH_BASIC_OPS_OPT
)
add_entrypoint_object(
@@ -2462,6 +2482,8 @@ add_entrypoint_object(
libc.src.__support.FPUtil.basic_operations
COMPILE_OPTIONS
-O2
+ FLAGS
+ MISC_MATH_BASIC_OPS_OPT
)
add_entrypoint_object(
@@ -2487,6 +2509,8 @@ add_entrypoint_object(
libc.src.__support.FPUtil.basic_operations
COMPILE_OPTIONS
-O3
+ FLAGS
+ MISC_MATH_BASIC_OPS_OPT
)
add_entrypoint_object(
diff --git a/libc/src/math/generic/copysign.cpp b/libc/src/math/generic/copysign.cpp
index 149d725af08e2..186bb2c5983f4 100644
--- a/libc/src/math/generic/copysign.cpp
+++ b/libc/src/math/generic/copysign.cpp
@@ -14,7 +14,11 @@
namespace LIBC_NAMESPACE_DECL {
LLVM_LIBC_FUNCTION(double, copysign, (double x, double y)) {
+#ifdef __LIBC_MISC_MATH_BASIC_OPS_OPT
+ return __builtin_copysign(x, y);
+#else
return fputil::copysign(x, y);
+#endif
}
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/copysignf.cpp b/libc/src/math/generic/copysignf.cpp
index 17cd70d37c308..c79e50b61ebda 100644
--- a/libc/src/math/generic/copysignf.cpp
+++ b/libc/src/math/generic/copysignf.cpp
@@ -14,7 +14,11 @@
namespace LIBC_NAMESPACE_DECL {
LLVM_LIBC_FUNCTION(float, copysignf, (float x, float y)) {
+#ifdef __LIBC_MISC_MATH_BASIC_OPS_OPT
+ return __builtin_copysignf(x, y);
+#else
return fputil::copysign(x, y);
+#endif
}
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/copysignf16.cpp b/libc/src/math/generic/copysignf16.cpp
index 42695b3b4a6de..546622f049ebe 100644
--- a/libc/src/math/generic/copysignf16.cpp
+++ b/libc/src/math/generic/copysignf16.cpp
@@ -14,7 +14,11 @@
namespace LIBC_NAMESPACE_DECL {
LLVM_LIBC_FUNCTION(float16, copysignf16, (float16 x, float16 y)) {
+#ifdef __LIBC_MISC_MATH_BASIC_OPS_OPT
+ return __builtin_copysignf16(x, y);
+#else
return fputil::copysign(x, y);
+#endif
}
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/fabs.cpp b/libc/src/math/generic/fabs.cpp
index 472297aecb2f7..55fa958cd7c00 100644
--- a/libc/src/math/generic/fabs.cpp
+++ b/libc/src/math/generic/fabs.cpp
@@ -13,6 +13,12 @@
namespace LIBC_NAMESPACE_DECL {
-LLVM_LIBC_FUNCTION(double, fabs, (double x)) { return fputil::abs(x); }
+LLVM_LIBC_FUNCTION(double, fabs, (double x)) {
+#ifdef __LIBC_MISC_MATH_BASIC_OPS_OPT
+ return __builtin_fabs(x);
+#else
+ return fputil::abs(x);
+#endif
+}
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/fabsf.cpp b/libc/src/math/generic/fabsf.cpp
index ad4fcb30c795d..2ba18d09bbd5b 100644
--- a/libc/src/math/generic/fabsf.cpp
+++ b/libc/src/math/generic/fabsf.cpp
@@ -13,6 +13,12 @@
namespace LIBC_NAMESPACE_DECL {
-LLVM_LIBC_FUNCTION(float, fabsf, (float x)) { return fputil::abs(x); }
+LLVM_LIBC_FUNCTION(float, fabsf, (float x)) {
+#ifdef __LIBC_MISC_MATH_BASIC_OPS_OPT
+ return __builtin_fabsf(x);
+#else
+ return fputil::abs(x);
+#endif
+}
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/fabsf16.cpp b/libc/src/math/generic/fabsf16.cpp
index 57671fb6067e2..2f982517614c4 100644
--- a/libc/src/math/generic/fabsf16.cpp
+++ b/libc/src/math/generic/fabsf16.cpp
@@ -13,6 +13,12 @@
namespace LIBC_NAMESPACE_DECL {
-LLVM_LIBC_FUNCTION(float16, fabsf16, (float16 x)) { return fputil::abs(x); }
+LLVM_LIBC_FUNCTION(float16, fabsf16, (float16 x)) {
+#ifdef __LIBC_MISC_MATH_BASIC_OPS_OPT
+ return __builtin_fabsf16(x);
+#else
+ return fputil::abs(x);
+#endif
+}
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/fmaximum_num.cpp b/libc/src/math/generic/fmaximum_num.cpp
index 33df7daa380df..1bfc1514393ee 100644
--- a/libc/src/math/generic/fmaximum_num.cpp
+++ b/libc/src/math/generic/fmaximum_num.cpp
@@ -1,4 +1,4 @@
-//===-- Implementation of fmaximum_num function----------------------------===//
+//===-- Implementation of fmaximum_num function ---------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -14,7 +14,11 @@
namespace LIBC_NAMESPACE_DECL {
LLVM_LIBC_FUNCTION(double, fmaximum_num, (double x, double y)) {
+#ifdef __LIBC_USE_BUILTIN_FMAX_FMIN
+ return __builtin_fmax(x, y);
+#else
return fputil::fmaximum_num(x, y);
+#endif
}
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/fmaximum_numf.cpp b/libc/src/math/generic/fmaximum_numf.cpp
index 1577080ba2c25..f8c69fa78be3d 100644
--- a/libc/src/math/generic/fmaximum_numf.cpp
+++ b/libc/src/math/generic/fmaximum_numf.cpp
@@ -1,4 +1,4 @@
-//===-- Implementation of fmaximum_numf function---------------------------===//
+//===-- Implementation of fmaximum_numf function --------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -14,7 +14,11 @@
namespace LIBC_NAMESPACE_DECL {
LLVM_LIBC_FUNCTION(float, fmaximum_numf, (float x, float y)) {
+#ifdef __LIBC_USE_BUILTIN_FMAX_FMIN
+ return __builtin_fmaxf(x, y);
+#else
return fputil::fmaximum_num(x, y);
+#endif
}
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/fmaximum_numf16.cpp b/libc/src/math/generic/fmaximum_numf16.cpp
index 394ce8b5fe4f3..6a012d38abea4 100644
--- a/libc/src/math/generic/fmaximum_numf16.cpp
+++ b/libc/src/math/generic/fmaximum_numf16.cpp
@@ -14,7 +14,11 @@
namespace LIBC_NAMESPACE_DECL {
LLVM_LIBC_FUNCTION(float16, fmaximum_numf16, (float16 x, float16 y)) {
+#ifdef __LIBC_USE_BUILTIN_FMAX_FMIN
+ return __builtin_fmaxf16(x, y);
+#else
return fputil::fmaximum_num(x, y);
+#endif
}
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/fminimum_num.cpp b/libc/src/math/generic/fminimum_num.cpp
index 3ff79def58075..5b9c426ca50c2 100644
--- a/libc/src/math/generic/fminimum_num.cpp
+++ b/libc/src/math/generic/fminimum_num.cpp
@@ -1,4 +1,4 @@
-//===-- Implementation of fminimum_num function----------------------------===//
+//===-- Implementation of fminimum_num function ---------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -14,7 +14,11 @@
namespace LIBC_NAMESPACE_DECL {
LLVM_LIBC_FUNCTION(double, fminimum_num, (double x, double y)) {
+#ifdef __LIBC_USE_BUILTIN_FMAX_FMIN
+ return __builtin_fmin(x, y);
+#else
return fputil::fminimum_num(x, y);
+#endif
}
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/fminimum_numf.cpp b/libc/src/math/generic/fminimum_numf.cpp
index c7ac99b14bd5a..6b6f905e63de3 100644
--- a/libc/src/math/generic/fminimum_numf.cpp
+++ b/libc/src/math/generic/fminimum_numf.cpp
@@ -1,4 +1,4 @@
-//===-- Implementation of fminimum_numf function---------------------------===//
+//===-- Implementation of fminimum_numf function --------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -14,7 +14,11 @@
namespace LIBC_NAMESPACE_DECL {
LLVM_LIBC_FUNCTION(float, fminimum_numf, (float x, float y)) {
+#ifdef __LIBC_USE_BUILTIN_FMAX_FMIN
+ return __builtin_fminf(x, y);
+#else
return fputil::fminimum_num(x, y);
+#endif
}
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/fminimum_numf16.cpp b/libc/src/math/generic/fminimum_numf16.cpp
index 0af7205713c10..8e48aaf27070f 100644
--- a/libc/src/math/generic/fminimum_numf16.cpp
+++ b/libc/src/math/generic/fminimum_numf16.cpp
@@ -14,7 +14,11 @@
namespace LIBC_NAMESPACE_DECL {
LLVM_LIBC_FUNCTION(float16, fminimum_numf16, (float16 x, float16 y)) {
+#ifdef __LIBC_USE_BUILTIN_FMAX_FMIN
+ return __builtin_fminf16(x, y);
+#else
return fputil::fminimum_num(x, y);
+#endif
}
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h b/libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h
index 995e41ba84b03..1ab0afbc9cbe8 100644
--- a/libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h
+++ b/libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h
@@ -10,6 +10,7 @@
#include "src/__support/macros/config.h"
#include "test/src/math/performance_testing/Timer.h"
+#include <algorithm>
#include <cstddef>
#include <fstream>
@@ -28,11 +29,11 @@ template <typename T> class BinaryOpSingleOutputPerf {
static void run_perf_in_range(Func myFunc, Func otherFunc,
StorageType startingBit, StorageType endingBit,
size_t N, size_t rounds, std::ofstream &log) {
- if (endingBit - startingBit < N)
- N = endingBit - startingBit;
+ if (sizeof(StorageType) <= sizeof(size_t))
+ N = std::min(N, static_cast<size_t>(endingBit - startingBit));
auto runner = [=](Func func) {
- volatile T result;
+ [[maybe_unused]] volatile T result;
if (endingBit < startingBit) {
return;
}
diff --git a/libc/test/src/math/performance_testing/CMakeLists.txt b/libc/test/src/math/performance_testing/CMakeLists.txt
index a75becba04d07..a4059c8ff4dd8 100644
--- a/libc/test/src/math/performance_testing/CMakeLists.txt
+++ b/libc/test/src/math/performance_testing/CMakeLists.txt
@@ -402,3 +402,23 @@ add_perf_binary(
LINK_LIBRARIES
LibcFPTestHelpers
)
+
+add_perf_binary(
+ misc_basic_ops_perf
+ SRCS
+ misc_basic_ops_perf.cpp
+ DEPENDS
+ .binary_op_single_output_diff
+ .single_input_single_output_diff
+ libc.src.math.copysignf
+ libc.src.math.copysignf16
+ libc.src.math.fabsf
+ libc.src.math.fabsf16
+ libc.src.math.fmaximum_numf
+ libc.src.math.fmaximum_numf16
+ libc.src.math.fminimum_numf
+ libc.src.math.fminimum_numf16
+ libc.src.math.frexpf16
+ COMPILE_OPTIONS
+ -fno-builtin
+)
diff --git a/libc/test/src/math/performance_testing/SingleInputSingleOutputPerf.h b/libc/test/src/math/performance_testing/SingleInputSingleOutputPerf.h
index 48ae43d6315e3..e0beb729cb9f5 100644
--- a/libc/test/src/math/performance_testing/SingleInputSingleOutputPerf.h
+++ b/libc/test/src/math/performance_testing/SingleInputSingleOutputPerf.h
@@ -10,6 +10,7 @@
#include "src/__support/macros/config.h"
#include "test/src/math/performance_testing/Timer.h"
+#include <algorithm>
#include <fstream>
namespace LIBC_NAMESPACE_DECL {
@@ -26,16 +27,21 @@ template <typename T> class SingleInputSingleOutputPerf {
static void runPerfInRange(Func myFunc, Func otherFunc,
StorageType startingBit, StorageType endingBit,
- std::ofstream &log) {
+ size_t rounds, std::ofstream &log) {
+ size_t n = 10'010'001;
+ if (sizeof(StorageType) <= sizeof(size_t))
+ n = std::min(n, static_cast<size_t>(endingBit - startingBit));
+
auto runner = [=](Func func) {
- constexpr StorageType N = 10'010'001;
- StorageType step = (endingBit - startingBit) / N;
+ StorageType step = (endingBit - startingBit) / n;
if (step == 0)
step = 1;
- volatile T result;
- for (StorageType bits = startingBit; bits < endingBit; bits += step) {
- T x = FPBits(bits).get_val();
- result = func(x);
+ [[maybe_unused]] volatile T result;
+ for (size_t i = 0; i < rounds; i++) {
+ for (StorageType bits = startingBit; bits < endingBit; bits += step) {
+ T x = FPBits(bits).get_val();
+ result = func(x);
+ }
}
};
@@ -44,8 +50,7 @@ template <typename T> class SingleInputSingleOutputPerf {
runner(myFunc);
timer.stop();
- StorageType numberOfRuns = endingBit - startingBit + 1;
- double myAverage = static_cast<double>(timer.nanoseconds()) / numberOfRuns;
+ double myAverage = static_cast<double>(timer.nanoseconds()) / n / rounds;
log << "-- My function --\n";
log << " Total time : " << timer.nanoseconds() << " ns \n";
log << " Average runtime : " << myAverage << " ns/op \n";
@@ -56,8 +61,7 @@ template <typename T> class SingleInputSingleOutputPerf {
runner(otherFunc);
timer.stop();
- double otherAverage =
- static_cast<double>(timer.nanoseconds()) / numberOfRuns;
+ double otherAverage = static_cast<double>(timer.nanoseconds()) / n / rounds;
log << "-- Other function --\n";
log << " Total time : " << timer.nanoseconds() << " ns \n";
log << " Average runtime : " << otherAverage << " ns/op \n";
@@ -68,15 +72,18 @@ template <typename T> class SingleInputSingleOutputPerf {
log << " Mine / Other's : " << myAverage / otherAverage << " \n";
}
- static void runPerf(Func myFunc, Func otherFunc, const char *logFile) {
+ static void runPerf(Func myFunc, Func otherFunc, size_t rounds,
+ const char *logFile) {
std::ofstream log(logFile);
log << " Performance tests with inputs in denormal range:\n";
runPerfInRange(myFunc, otherFunc, /* startingBit= */ StorageType(0),
- /* endingBit= */ FPBits::max_subnormal().uintval(), log);
+ /* endingBit= */ FPBits::max_subnormal().uintval(), rounds,
+ log);
log << "\n Performance tests with inputs in normal range:\n";
runPerfInRange(myFunc, otherFunc,
/* startingBit= */ FPBits::min_normal().uintval(),
- /* endingBit= */ FPBits::max_normal().uintval(), log);
+ /* endingBit= */ FPBits::max_normal().uintval(), rounds,
+ log);
}
};
@@ -86,6 +93,13 @@ template <typename T> class SingleInputSingleOutputPerf {
#define SINGLE_INPUT_SINGLE_OUTPUT_PERF(T, myFunc, otherFunc, filename) \
int main() { \
LIBC_NAMESPACE::testing::SingleInputSingleOutputPerf<T>::runPerf( \
- &myFunc, &otherFunc, filename); \
+ &myFunc, &otherFunc, 1, filename); \
return 0; \
}
+
+#define SINGLE_INPUT_SINGLE_OUTPUT_PERF_EX(T, myFunc, otherFunc, rounds, \
+ filename) \
+ { \
+ LIBC_NAMESPACE::testing::SingleInputSingleOutputPerf<T>::runPerf( \
+ &myFunc, &otherFunc, rounds, filename); \
+ }
diff --git a/libc/test/src/m...
[truncated]
|
Before:
After:
|
libc/cmake/modules/compiler_features/check_builtin_fmax_fmin.cpp
Outdated
Show resolved
Hide resolved
Just FYI, if we get this working in general we can probably remove a lot of GPU implementations that are just |
…able Split checks for __builtin_{fmax,fmin}f16 into separate feature.
For some reason, on x86,
>>> f"{2141192192:>032b}"
'01111111101000000000000000000000'
>>> Code generated by Clang without 0000000000004dc0 <__llvm_libc_19_0_0_git::fmaximum_numf(float, float)>:
4dc0: 55 push rbp
4dc1: 48 89 e5 mov rbp, rsp
4dc4: 0f 28 d0 movaps xmm2, xmm0
4dc7: f3 0f c2 d0 03 cmpunordss xmm2, xmm0
4dcc: 0f 28 da movaps xmm3, xmm2
4dcf: 0f 54 d9 andps xmm3, xmm1
4dd2: f3 0f 5f c8 maxss xmm1, xmm0
4dd6: 0f 55 d1 andnps xmm2, xmm1
4dd9: 0f 56 d3 orps xmm2, xmm3
4ddc: 0f 28 c2 movaps xmm0, xmm2
4ddf: 5d pop rbp
4de0: c3 ret Code generated by Clang with 0000000000004d50 <__llvm_libc_19_0_0_git::fmaximum_numf(float, float)>:
4d50: 55 push rbp
4d51: 48 89 e5 mov rbp, rsp
4d54: c5 f2 5f d0 vmaxss xmm2, xmm1, xmm0
4d58: c5 fa c2 c0 03 vcmpunordss xmm0, xmm0, xmm0
4d5d: c4 e3 69 4a c1 00 vblendvps xmm0, xmm2, xmm1, xmm0
4d63: 5d pop rbp
4d64: c3 ret |
The problem is that |
Let skip the builtin / hardware instructions for these functions. |
…able Revert "Split checks for __builtin_{fmax,fmin}f16 into separate feature."
…able Remove support for __builtin_{fmax,fmin}* due to incorrect results on x86-64 and AArch64.
…able Remove benchmark for frexpf16.
…able Disable __builtin_fabsf16 on GCC for x86.
libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h
Outdated
Show resolved
Hide resolved
libc/test/src/math/performance_testing/SingleInputSingleOutputPerf.h
Outdated
Show resolved
Hide resolved
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM with few nits.
… when available (#99037) Summary: Test Plan: Reviewers: Subscribers: Tasks: Tags: Differential Revision: https://phabricator.intern.facebook.com/D60251098
No description provided.