/***********************************************************************
Compare the CPU times of several loops, one doing normal floating-point
operations, and one doing similar operations, but scaled to produce
underflow to zero, underflow to subnormals, NaN, and Infinity, to assess
the cost of exceptional floating-point numbers on the current system.

Several versions of this program can be prepared by defining certain
symbols at compile time:

	FP_T_FLOAT		IEEE 754 32-bit precision
	FP_T_DOUBLE		IEEE 754 64-bit precision
	FP_T_LONG_DOUBLE	IEEE 754 80-bit or 128-bit precision

Some systems abort on overflow, or when NaN or Infinity are generated;
these symbols suppress tests:

	SKIP_OFL		skip overflow test
	SKIP_NAN		skip NaN test
	SKIP_INF		skip Inf test

[10-Feb-2002]
***********************************************************************/

#include <stdio.h>
#include <stdlib.h>
#include <math.h>

#if defined(__STDC__) || defined(__cplusplus)
#define STDC
#define ARGS(parenthesized_list) parenthesized_list
#define VOID_ARG	void
#else
#define ARGS(parenthesized_list) ()
#define VOID_ARG	
#endif

#ifndef EXIT_SUCCESS
#define EXIT_SUCCESS 0
#endif

typedef double fp_time;

#if defined(FP_T_FLOAT)
#define HAVE_FP_T
typedef float fp_t;
#endif

#if defined(FP_T_DOUBLE)
#define HAVE_FP_T
typedef double fp_t;
#endif

#if defined(FP_T_LONG_DOUBLE)
#define HAVE_FP_T
typedef long double fp_t;
#endif

#if !defined(HAVE_FP_T)
#define HAVE_FP_T
#define FP_T_DOUBLE
typedef double fp_t;
#endif

extern fp_time second ARGS((void));
static int	Print_Time = 0;

fp_t	f ARGS((fp_t));
fp_t	inf ARGS((void));
fp_t	nan ARGS((void));
fp_t	one ARGS((void));
fp_t	store ARGS((fp_t));
fp_t	zero ARGS((void));

fp_time	time_loop ARGS((long n, fp_t scale));

int	main ARGS((int argc, char* argv[]));

#if defined(STDC)
int
main(int argc, char* argv[])
#else
int
main(argc, argv)
int argc;
char* argv[];
#endif
{
    long ntimes = (argc < 2) ? 1000000 : atoi(argv[1]);
    fp_time t_normal, t_ufl_to_zero, t_ufl_to_subnormal, t_overflow, t_NaN, t_Inf;

    (void)printf("\n\n------------------------------------------------------------------------\n");
    (void)printf("sizeof(fp_t) = %u\n", sizeof(fp_t));

    Print_Time = 0;
    t_normal = time_loop(ntimes, 1.0);

    /* Adjust loop count to ensure reasonable timing for the normal loop */
    while (t_normal < 1.0)
    {
	ntimes += ntimes;
	t_normal = time_loop(ntimes, 1.0);
    }

    (void)printf("Adjusted loop count = %ld\n", ntimes);

    Print_Time = 1;
    t_normal		= time_loop(ntimes, 1.0);

#if defined(FP_T_FLOAT)
    t_ufl_to_subnormal	= time_loop(ntimes, pow(2.0,-134.0 / 2)); /* subnormals in [2**(-126),2**(-149)) */
    t_ufl_to_zero	= time_loop(ntimes, pow(2.0,-160.0 / 2));
#if !defined(SKIP_OFL)
    t_overflow		= time_loop(ntimes, pow(2.0, 140.0 / 2)); /* maxnormal = (1 - 2**(-24))* 2**128 */
#endif
#endif

#if defined(FP_T_DOUBLE)
    t_ufl_to_subnormal	= time_loop(ntimes, pow(2.0,-1050.0 / 2)); /* subnormals in [2**(-1074),2**(-1022)) */
    t_ufl_to_zero	= time_loop(ntimes, pow(2.0,-1100.0 / 2));
#if !defined(SKIP_OFL)
    t_overflow		= time_loop(ntimes, pow(2.0, 1030.0 / 2)); /* maxnormal = (1 - 2**(-53))* 2**1024 */
#endif
#endif

#if defined(FP_T_LONG_DOUBLE)

#if (LDBL_MANT_DIG == DBL_MANT_DIG)		/* long double masquerading as double */
    t_ufl_to_subnormal	= time_loop(ntimes, pow(2.0,-1050.0 / 2)); /* subnormals in [2**(-1074),2**(-1022)) */
    t_ufl_to_zero	= time_loop(ntimes, pow(2.0,-1100.0 / 2));
#if !defined(SKIP_OFL)
    t_overflow		= time_loop(ntimes, pow(2.0, 1030.0 / 2)); /* maxnormal = (1 - 2**(-53))* 2**1024 */
#endif
#endif

#if (LDBL_MANT_DIG == 64)		/* 80-bit Intel */
    t_ufl_to_subnormal	= time_loop(ntimes, pow(2.0,-16400.0 / 2)); /* subnormals in [2**(-16382),2**(-16446)) */
    t_ufl_to_zero	= time_loop(ntimes, pow(2.0,-16500.0 / 2));
#if !defined(SKIP_OFL)
    t_overflow		= time_loop(ntimes, pow(2.0, 16400.0 / 2)); /* maxnormal = (1 - 2**(-64))* 2**16384 */
#endif
#endif

#if (LDBL_MANT_DIG > 64)		/* 128-bit RISC */
    t_ufl_to_subnormal	= time_loop(ntimes, pow(2.0,-1050.0 / 2)); /* subnormals in [2**(-1074),2**(-1022)) */
    t_ufl_to_zero	= time_loop(ntimes, pow(2.0,-1100.0 / 2));
#if !defined(SKIP_OFL)
    t_overflow		= time_loop(ntimes, pow(2.0, 1030.0 / 2)); /* maxnormal = (1 - 2**(-53))* 2**1024 */
#endif
#endif

#endif /* defined(FP_T_LONG_DOUBLE) */


#if !defined(SKIP_NAN)
    t_NaN		= time_loop(ntimes, nan());
#endif

#if !defined(SKIP_INF)
    t_Inf		= time_loop(ntimes, inf());
#endif

    (void)printf("t_ufl_to_subnormal / t_normal = %8.3f\n", (double)(t_ufl_to_subnormal / t_normal));
    (void)printf("t_ufl_to_zero      / t_normal = %8.3f\n", (double)(t_ufl_to_zero      / t_normal));

#if !defined(SKIP_OFL)
    (void)printf("t_overflow         / t_normal = %8.3f\n", (double)(t_overflow         / t_normal));
#endif

#if !defined(SKIP_NAN)
    (void)printf("t_NaN              / t_normal = %8.3f\n", (double)(t_NaN              / t_normal));
#endif

#if !defined(SKIP_INF)
    (void)printf("t_Inf              / t_normal = %8.3f\n", (double)(t_Inf              / t_normal));
#endif

    (void)printf("------------------------------------------------------------------------\n\n");
    return (EXIT_SUCCESS);
}

#if defined(STDC)
fp_t
f(fp_t x)
#else
fp_t
f(x)
fp_t x;
#endif
{
    return (x);
}

fp_t
inf(VOID_ARG)
{
    return (one()/zero());
}

fp_t
nan(VOID_ARG)
{
    return (zero()/zero());
}

fp_t
one(VOID_ARG)
{
    return ((fp_t)1);
}

#if defined(STDC)
fp_time
time_loop(long ntimes, fp_t scale)
#else
fp_time
time_loop(ntimes, scale)
long ntimes;
fp_time scale;
#endif
{
    fp_time t_start, t_end, t_loop;
    long k;

    t_start = second();
    for (k = 1; k <= ntimes; ++k)
	(void)store(store(f(scale))*store(f(scale)));
    t_end = second();
    t_loop = t_end - t_start;
    if (Print_Time)
    {
	(void)printf("Time = %ld nsec\t\tscale = %.5g\t\tscale**2 = %.5g\n",
		     (long)(1.0e9 * t_loop/((fp_time)(ntimes))),
		     (double)store(scale),
		     (double)store((store(scale) * store(scale))));
    }
    return(t_loop);
}

fp_t
zero(VOID_ARG)
{
    return ((fp_t)0);
}
