diff --git a/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake b/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake index 253da4ae890e5..c1b3ab6831dab 100644 --- a/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake +++ b/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake @@ -6,6 +6,7 @@ function(_get_compile_options_from_flags output_var) endif() check_flag(ADD_ROUND_OPT_FLAG ${ROUND_OPT_FLAG} ${ARGN}) check_flag(ADD_EXPLICIT_SIMD_OPT_FLAG ${EXPLICIT_SIMD_OPT_FLAG} ${ARGN}) + check_flag(ADD_MISC_MATH_BASIC_OPS_OPT_FLAG ${MISC_MATH_BASIC_OPS_OPT_FLAG} ${ARGN}) if(LLVM_COMPILER_IS_GCC_COMPATIBLE) if(ADD_FMA_FLAG) @@ -37,6 +38,9 @@ function(_get_compile_options_from_flags output_var) if(ADD_EXPLICIT_SIMD_OPT_FLAG) list(APPEND compile_options "-D__LIBC_EXPLICIT_SIMD_OPT") endif() + if(ADD_MISC_MATH_BASIC_OPS_OPT_FLAG) + list(APPEND compile_options "-D__LIBC_MISC_MATH_BASIC_OPS_OPT") + endif() elseif(MSVC) if(ADD_FMA_FLAG) list(APPEND compile_options "/arch:AVX2") diff --git a/libc/cmake/modules/LLVMLibCFlagRules.cmake b/libc/cmake/modules/LLVMLibCFlagRules.cmake index eca7ba8d183e6..4398fe55db5aa 100644 --- a/libc/cmake/modules/LLVMLibCFlagRules.cmake +++ b/libc/cmake/modules/LLVMLibCFlagRules.cmake @@ -263,6 +263,9 @@ set(FMA_OPT_FLAG "FMA_OPT") set(ROUND_OPT_FLAG "ROUND_OPT") # This flag controls whether we use explicit SIMD instructions or not. set(EXPLICIT_SIMD_OPT_FLAG "EXPLICIT_SIMD_OPT") +# This flag controls whether we use compiler builtin functions to implement +# various basic math operations or not. +set(MISC_MATH_BASIC_OPS_OPT_FLAG "MISC_MATH_BASIC_OPS_OPT") # Skip FMA_OPT flag for targets that don't support fma. if(NOT((LIBC_TARGET_ARCHITECTURE_IS_X86 AND (LIBC_CPU_FEATURES MATCHES "FMA")) OR diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt index c2f58fb1a4f71..c5cce32793060 100644 --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -399,7 +399,9 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.basic_operations COMPILE_OPTIONS - -O2 + -O3 + FLAGS + MISC_MATH_BASIC_OPS_OPT ) add_entrypoint_object( @@ -411,7 +413,9 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.basic_operations COMPILE_OPTIONS - -O2 + -O3 + FLAGS + MISC_MATH_BASIC_OPS_OPT ) add_entrypoint_object( @@ -423,7 +427,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.basic_operations COMPILE_OPTIONS - -O2 + -O3 ) add_entrypoint_object( @@ -435,8 +439,12 @@ add_entrypoint_object( DEPENDS libc.src.__support.macros.properties.types libc.src.__support.FPUtil.basic_operations + libc.src.__support.macros.properties.architectures + libc.src.__support.macros.properties.compiler COMPILE_OPTIONS -O3 + FLAGS + MISC_MATH_BASIC_OPS_OPT ) add_entrypoint_object( @@ -1406,6 +1414,8 @@ add_entrypoint_object( libc.src.__support.FPUtil.manipulation_functions COMPILE_OPTIONS -O3 + FLAGS + MISC_MATH_BASIC_OPS_OPT ) add_entrypoint_object( @@ -1418,6 +1428,8 @@ add_entrypoint_object( libc.src.__support.FPUtil.manipulation_functions COMPILE_OPTIONS -O3 + FLAGS + MISC_MATH_BASIC_OPS_OPT ) add_entrypoint_object( @@ -1443,6 +1455,8 @@ add_entrypoint_object( libc.src.__support.FPUtil.manipulation_functions COMPILE_OPTIONS -O3 + FLAGS + MISC_MATH_BASIC_OPS_OPT ) add_entrypoint_object( diff --git a/libc/src/math/generic/copysign.cpp b/libc/src/math/generic/copysign.cpp index 149d725af08e2..186bb2c5983f4 100644 --- a/libc/src/math/generic/copysign.cpp +++ b/libc/src/math/generic/copysign.cpp @@ -14,7 +14,11 @@ namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(double, copysign, (double x, double y)) { +#ifdef __LIBC_MISC_MATH_BASIC_OPS_OPT + return __builtin_copysign(x, y); +#else return fputil::copysign(x, y); +#endif } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/copysignf.cpp b/libc/src/math/generic/copysignf.cpp index 17cd70d37c308..c79e50b61ebda 100644 --- a/libc/src/math/generic/copysignf.cpp +++ b/libc/src/math/generic/copysignf.cpp @@ -14,7 +14,11 @@ namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(float, copysignf, (float x, float y)) { +#ifdef __LIBC_MISC_MATH_BASIC_OPS_OPT + return __builtin_copysignf(x, y); +#else return fputil::copysign(x, y); +#endif } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/copysignf16.cpp b/libc/src/math/generic/copysignf16.cpp index 42695b3b4a6de..546622f049ebe 100644 --- a/libc/src/math/generic/copysignf16.cpp +++ b/libc/src/math/generic/copysignf16.cpp @@ -14,7 +14,11 @@ namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(float16, copysignf16, (float16 x, float16 y)) { +#ifdef __LIBC_MISC_MATH_BASIC_OPS_OPT + return __builtin_copysignf16(x, y); +#else return fputil::copysign(x, y); +#endif } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/fabs.cpp b/libc/src/math/generic/fabs.cpp index 472297aecb2f7..55fa958cd7c00 100644 --- a/libc/src/math/generic/fabs.cpp +++ b/libc/src/math/generic/fabs.cpp @@ -13,6 +13,12 @@ namespace LIBC_NAMESPACE_DECL { -LLVM_LIBC_FUNCTION(double, fabs, (double x)) { return fputil::abs(x); } +LLVM_LIBC_FUNCTION(double, fabs, (double x)) { +#ifdef __LIBC_MISC_MATH_BASIC_OPS_OPT + return __builtin_fabs(x); +#else + return fputil::abs(x); +#endif +} } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/fabsf.cpp b/libc/src/math/generic/fabsf.cpp index ad4fcb30c795d..2ba18d09bbd5b 100644 --- a/libc/src/math/generic/fabsf.cpp +++ b/libc/src/math/generic/fabsf.cpp @@ -13,6 +13,12 @@ namespace LIBC_NAMESPACE_DECL { -LLVM_LIBC_FUNCTION(float, fabsf, (float x)) { return fputil::abs(x); } +LLVM_LIBC_FUNCTION(float, fabsf, (float x)) { +#ifdef __LIBC_MISC_MATH_BASIC_OPS_OPT + return __builtin_fabsf(x); +#else + return fputil::abs(x); +#endif +} } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/fabsf16.cpp b/libc/src/math/generic/fabsf16.cpp index 57671fb6067e2..02e11330db718 100644 --- a/libc/src/math/generic/fabsf16.cpp +++ b/libc/src/math/generic/fabsf16.cpp @@ -10,9 +10,20 @@ #include "src/__support/FPUtil/BasicOperations.h" #include "src/__support/common.h" #include "src/__support/macros/config.h" +#include "src/__support/macros/properties/architectures.h" +#include "src/__support/macros/properties/compiler.h" namespace LIBC_NAMESPACE_DECL { -LLVM_LIBC_FUNCTION(float16, fabsf16, (float16 x)) { return fputil::abs(x); } +LLVM_LIBC_FUNCTION(float16, fabsf16, (float16 x)) { + // For x86, GCC generates better code from the generic implementation. + // https://godbolt.org/z/K9orM4hTa +#if defined(__LIBC_MISC_MATH_BASIC_OPS_OPT) && \ + !(defined(LIBC_TARGET_ARCH_IS_X86) && defined(LIBC_COMPILER_IS_GCC)) + return __builtin_fabsf16(x); +#else + return fputil::abs(x); +#endif +} } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h b/libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h index 995e41ba84b03..63d9768e21899 100644 --- a/libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h +++ b/libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// +#include "src/__support/CPP/algorithm.h" #include "src/__support/FPUtil/FPBits.h" #include "src/__support/macros/config.h" #include "test/src/math/performance_testing/Timer.h" @@ -28,11 +29,11 @@ template class BinaryOpSingleOutputPerf { static void run_perf_in_range(Func myFunc, Func otherFunc, StorageType startingBit, StorageType endingBit, size_t N, size_t rounds, std::ofstream &log) { - if (endingBit - startingBit < N) - N = endingBit - startingBit; + if (sizeof(StorageType) <= sizeof(size_t)) + N = cpp::min(N, static_cast(endingBit - startingBit)); auto runner = [=](Func func) { - volatile T result; + [[maybe_unused]] volatile T result; if (endingBit < startingBit) { return; } diff --git a/libc/test/src/math/performance_testing/CMakeLists.txt b/libc/test/src/math/performance_testing/CMakeLists.txt index a75becba04d07..be55419c087df 100644 --- a/libc/test/src/math/performance_testing/CMakeLists.txt +++ b/libc/test/src/math/performance_testing/CMakeLists.txt @@ -95,6 +95,9 @@ add_header_library( single_input_single_output_diff HDRS SingleInputSingleOutputPerf.h + DEPENDS + libc.src.__support.CPP.algorithm + libc.src.__support.FPUtil.fp_bits ) add_header_library( @@ -102,6 +105,7 @@ add_header_library( HDRS BinaryOpSingleOutputPerf.h DEPENDS + libc.src.__support.CPP.algorithm libc.src.__support.FPUtil.fp_bits ) @@ -402,3 +406,18 @@ add_perf_binary( LINK_LIBRARIES LibcFPTestHelpers ) + +add_perf_binary( + misc_basic_ops_perf + SRCS + misc_basic_ops_perf.cpp + DEPENDS + .binary_op_single_output_diff + .single_input_single_output_diff + libc.src.math.copysignf + libc.src.math.copysignf16 + libc.src.math.fabsf + libc.src.math.fabsf16 + COMPILE_OPTIONS + -fno-builtin +) diff --git a/libc/test/src/math/performance_testing/SingleInputSingleOutputPerf.h b/libc/test/src/math/performance_testing/SingleInputSingleOutputPerf.h index 48ae43d6315e3..efad1259d6bf1 100644 --- a/libc/test/src/math/performance_testing/SingleInputSingleOutputPerf.h +++ b/libc/test/src/math/performance_testing/SingleInputSingleOutputPerf.h @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// +#include "src/__support/CPP/algorithm.h" #include "src/__support/FPUtil/FPBits.h" #include "src/__support/macros/config.h" #include "test/src/math/performance_testing/Timer.h" @@ -26,16 +27,21 @@ template class SingleInputSingleOutputPerf { static void runPerfInRange(Func myFunc, Func otherFunc, StorageType startingBit, StorageType endingBit, - std::ofstream &log) { + size_t rounds, std::ofstream &log) { + size_t n = 10'010'001; + if (sizeof(StorageType) <= sizeof(size_t)) + n = cpp::min(n, static_cast(endingBit - startingBit)); + auto runner = [=](Func func) { - constexpr StorageType N = 10'010'001; - StorageType step = (endingBit - startingBit) / N; + StorageType step = (endingBit - startingBit) / n; if (step == 0) step = 1; - volatile T result; - for (StorageType bits = startingBit; bits < endingBit; bits += step) { - T x = FPBits(bits).get_val(); - result = func(x); + [[maybe_unused]] volatile T result; + for (size_t i = 0; i < rounds; i++) { + for (StorageType bits = startingBit; bits < endingBit; bits += step) { + T x = FPBits(bits).get_val(); + result = func(x); + } } }; @@ -44,8 +50,7 @@ template class SingleInputSingleOutputPerf { runner(myFunc); timer.stop(); - StorageType numberOfRuns = endingBit - startingBit + 1; - double myAverage = static_cast(timer.nanoseconds()) / numberOfRuns; + double myAverage = static_cast(timer.nanoseconds()) / n / rounds; log << "-- My function --\n"; log << " Total time : " << timer.nanoseconds() << " ns \n"; log << " Average runtime : " << myAverage << " ns/op \n"; @@ -56,8 +61,7 @@ template class SingleInputSingleOutputPerf { runner(otherFunc); timer.stop(); - double otherAverage = - static_cast(timer.nanoseconds()) / numberOfRuns; + double otherAverage = static_cast(timer.nanoseconds()) / n / rounds; log << "-- Other function --\n"; log << " Total time : " << timer.nanoseconds() << " ns \n"; log << " Average runtime : " << otherAverage << " ns/op \n"; @@ -68,15 +72,18 @@ template class SingleInputSingleOutputPerf { log << " Mine / Other's : " << myAverage / otherAverage << " \n"; } - static void runPerf(Func myFunc, Func otherFunc, const char *logFile) { + static void runPerf(Func myFunc, Func otherFunc, size_t rounds, + const char *logFile) { std::ofstream log(logFile); log << " Performance tests with inputs in denormal range:\n"; runPerfInRange(myFunc, otherFunc, /* startingBit= */ StorageType(0), - /* endingBit= */ FPBits::max_subnormal().uintval(), log); + /* endingBit= */ FPBits::max_subnormal().uintval(), rounds, + log); log << "\n Performance tests with inputs in normal range:\n"; runPerfInRange(myFunc, otherFunc, /* startingBit= */ FPBits::min_normal().uintval(), - /* endingBit= */ FPBits::max_normal().uintval(), log); + /* endingBit= */ FPBits::max_normal().uintval(), rounds, + log); } }; @@ -86,6 +93,13 @@ template class SingleInputSingleOutputPerf { #define SINGLE_INPUT_SINGLE_OUTPUT_PERF(T, myFunc, otherFunc, filename) \ int main() { \ LIBC_NAMESPACE::testing::SingleInputSingleOutputPerf::runPerf( \ - &myFunc, &otherFunc, filename); \ + &myFunc, &otherFunc, 1, filename); \ return 0; \ } + +#define SINGLE_INPUT_SINGLE_OUTPUT_PERF_EX(T, myFunc, otherFunc, rounds, \ + filename) \ + { \ + LIBC_NAMESPACE::testing::SingleInputSingleOutputPerf::runPerf( \ + &myFunc, &otherFunc, rounds, filename); \ + } diff --git a/libc/test/src/math/performance_testing/misc_basic_ops_perf.cpp b/libc/test/src/math/performance_testing/misc_basic_ops_perf.cpp new file mode 100644 index 0000000000000..ace1d21c62c32 --- /dev/null +++ b/libc/test/src/math/performance_testing/misc_basic_ops_perf.cpp @@ -0,0 +1,41 @@ +//===-- Performance test for miscellaneous basic operations ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "BinaryOpSingleOutputPerf.h" +#include "SingleInputSingleOutputPerf.h" +#include "src/math/copysignf.h" +#include "src/math/copysignf16.h" +#include "src/math/fabsf.h" +#include "src/math/fabsf16.h" + +#include + +static constexpr size_t FLOAT16_ROUNDS = 20'000; +static constexpr size_t FLOAT_ROUNDS = 40; + +// LLVM libc might be the only libc implementation with support for float16 math +// functions currently. We can't compare our float16 functions against the +// system libc, so we compare them against this placeholder function. +float16 placeholder_unaryf16(float16 x) { return x; } +float16 placeholder_binaryf16(float16 x, float16 y) { return x; } + +int main() { + SINGLE_INPUT_SINGLE_OUTPUT_PERF_EX(float16, LIBC_NAMESPACE::fabsf16, + placeholder_unaryf16, FLOAT16_ROUNDS, + "fabsf16_perf.log") + BINARY_OP_SINGLE_OUTPUT_PERF_EX(float16, LIBC_NAMESPACE::copysignf16, + placeholder_binaryf16, FLOAT16_ROUNDS, + "copysignf16_perf.log") + + SINGLE_INPUT_SINGLE_OUTPUT_PERF_EX(float, LIBC_NAMESPACE::fabsf, fabsf, + FLOAT_ROUNDS, "fabsf_perf.log") + BINARY_OP_SINGLE_OUTPUT_PERF_EX(float, LIBC_NAMESPACE::copysignf, copysignf, + FLOAT_ROUNDS, "copysignf_perf.log") + + return 0; +}