/* $Id: agnhlps.c $
** C Helper Routines
** See Copyright notices in this file, and - where not indicated - in agena.h
** initiated Alexander Walz, July 20, 2007
**
** A note on Raspbery Pi: According to https://raspberry-projects.com/pi/programming-in-c/memory/variables#comment-79888,
** 80-bit or more floating point numbers are not supported on both 32-bit and 64-bit platforms. On Raspberries, long doubles
** always have just 8 bytes, which is equal to C doubles.
*/

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <ctype.h>
#include <fcntl.h>
#include <stdarg.h>
#include <limits.h>     /* for PATH_MAX length, cast_num */
#include <sys/stat.h>
#include <math.h>       /* for pow, floor, fabs, etc. */
#include <locale.h>     /* for localeconv */
#include <errno.h>      /* for my_ioerror */
#include <time.h>
#include <float.h>      /* for sincos, rem_pio2, DBL_MAX_EXP */
#include <sys/timeb.h>  /* ftime, for milliseconds */

#include "prepdefs.h"   /* preprocessor aliases, repositioned 2.14.5 */

#ifdef PROPCMPLX
#include "lcomplex.h"   /* agnCmplx_create for complex agnc_* functions */
#endif

#ifdef __CPUID
#include <cpuid.h>
#endif

#ifndef PROPCMPLX
#include <complex.h>
#endif

#ifdef _WIN32
#include <sys/locking.h>
#include <windows.h>   /* free memory */
#include <ntdef.h>     /* for GetProcAddress, etc */
#endif

#if defined(__unix__) || defined(__APPLE__)  /* for getch */
#include <termios.h>
#include <unistd.h>
#endif

#if defined(_WIN32) || defined(__DJGPP__)
#include <conio.h>   /* getch; a UNIX version is included in agnhlps.c */
#endif

#ifdef __APPLE__
  #include <crt_externs.h>
  #define environ (*_NSGetEnviron())
#elif defined(__unix__)
  extern char **environ;
#endif

#if defined(__OS2__)
#define INCL_DOSPROCESS
#define INCL_DOSERRORS
#include <os2.h>  /* for DosGetInfoBlocks, DosQueryModuleName */
#include <unistd.h>
#include <conio.h>  /* for kbhit */
#include <os2emx.h>
#endif

#ifdef LUA_DOS
#include <io.h>     /* for _flush_disk_cache */
#endif


#define agnhlps_c
#define LUA_LIB

#include "agncfg.h"    /* read in endianness */
#include "agnconf.h"
#include "agncmpt.h"   /* for O_BINARY and FP_* */
#include "agnhlps.h"   /* for INLINE alias */
#include "cephes.h"    /* for powi, cephes_gamma */
#include "sofa.h"      /* for iauJd2cal */
#include "lucase.def"  /* for tools_lowercase */
#include "lmathlib.h"  /* for seeds m_z, m_w */
#include "lstrlib.h"   /* for MatchState, match */

/* instead of time.h: */
#include "agnt64.h"

#define BASE_MOVELEN  16384L     /* number of bytes in index section to be moved in a write or delete operation */
#define BASE_OFFSET     256L     /* first position of an index to a key */
#define MAXNRECORDS     104L     /* position of Maximum Records allowed */
#define BASECOMMENT     129L     /* position of comment */

#ifndef MAX_PATH
#define MAX_PATH PATH_MAX
#endif


/* See: https://www.greenend.org.uk/rjk/tech/inline.html */
#if defined __GNUC__ && !defined __GNUC_STDC_INLINE__ && !defined __GNUC_GNU_INLINE__
#define __GNUC_GNU_INLINE__ 1
#endif


#define AGN_LNFACTSIZE 512
#define AGN_LOG2LNFACTSIZE 9

static INLINE int tools_nextprime_isprime (unsigned long long int x);
static FORCE_INLINE int tools_isfracwords (int32_t hx, uint32_t lx);
#ifndef __ARMCPU  /* 2.37.1 */
static long double __polevll (long double x, const long double *P, int n);
static long double __p1evll (long double x, const long double *P, int n);
#endif


/* Upper incomplete gamma function, written by CRBond:
   (C) 1993, C. Bond.  All rights reserved.
   taken from and modified by awalz: http://www.crbond.com/downloads/incog.cpp */
LUALIB_API double tools_upperincompletegamma (double a, double x) {  /* 3.1.4 */
  double xam, r, s, t0;
  int k;
  if (a < 0.0 || x < 0.0) return AGN_NAN;
  if (a == 0) return expn(1, x);
  if (x == 0.0) return tools_gamma(a);
  xam = -x + a*sun_log(x);
  if ((xam > 700) || (a > 170.0)) return HUGE_VAL;
  if (x <= 1.0 + a) {
    s = 1.0/a;
    r = s;
    for (k=1; k <= 60; k++) {
      r *= x/(a + k);
      s += r;
      if (fabs(r/s) < 1e-15) break;
    }
    return sun_exp(xam)*s;
  } else {
    t0 = 0.0;
    for (k=60; k >= 1 ; k--) {
      t0 = (k - a)/(1.0 + k/(x + t0));
    }
    return sun_exp(xam)/(x + t0);
  }
}

LUALIB_API double tools_lowerincompletegamma (double a, double x) {  /* 3.1.4, works like Maple V Release 4 */
  if (a == 0)
    return (x == 0) ? 0 : HUGE_VAL;
  return tools_gamma(a) - tools_upperincompletegamma(a, x);
}


/*							chbevl.c
 *
 *	Evaluate Chebyshev series
 *
 *
 *
 * SYNOPSIS:
 *
 * int N;
 * double x, y, coef[N], chebevl();
 *
 * y = chbevl( x, coef, N );
 *
 *
 *
 * DESCRIPTION:
 *
 * Evaluates the series
 *
 *        N-1
 *         - '
 *  y  =   >   coef[i] T (x/2)
 *         -            i
 *        i=0
 *
 * of Chebyshev polynomials Ti at argument x/2.
 *
 * Coefficients are stored in reverse order, i.e. the zero
 * order term is last in the array.  Note N is the number of
 * coefficients, not the order.
 *
 * If coefficients are for the interval a to b, x must
 * have been transformed to x -> 2(2x - b - a)/(b-a) before
 * entering the routine.  This maps x from (a, b) to (-1, 1),
 * over which the Chebyshev polynomials are defined.
 *
 * If the coefficients are for the inverted interval, in
 * which (a, b) is mapped to (1/b, 1/a), the transformation
 * required is x -> 2(2ab/x - b - a)/(b-a).  If b is infinity,
 * this becomes x -> 4a/x - 1.
 *
 *
 *
 * SPEED:
 *
 * Taking advantage of the recurrence properties of the
 * Chebyshev polynomials, the routine requires one more
 * addition per loop than evaluating a nested polynomial of
 * the same degree.
 *
 */
/*							chbevl.c	*/

/*
Cephes Math Library Release 2.0:  April, 1987
Copyright 1985, 1987 by Stephen L. Moshier
Direct inquiries to 30 Frost Street, Cambridge, MA 02140
*/

/* static double chbevl (double, double array[], int n) {
  double b0, b1, b2, *p;
  int i;
  p = array;
  b0 = *p++;
  b1 = 0.0;
  i = n - 1;
  do {
    b2 = b1;
    b1 = b0;
    b0 = x*b1 - b2 + *p++;
  } while(--i);
  return 0.5*(b0 - b2);
} */

/*						rgamma.c
 *
 *	Reciprocal gamma function
 *
 *
 *
 * SYNOPSIS:
 *
 * double x, y, rgamma();
 *
 * y = rgamma( x );
 *
 *
 *
 * DESCRIPTION:
 *
 * Returns one divided by the gamma function of the argument.
 *
 * The function is approximated by a Chebyshev expansion in
 * the interval [0,1].  Range reduction is by recurrence
 * for arguments between -34.034 and +34.84425627277176174.
 * 1/MAXNUM is returned for positive arguments outside this
 * range.  For arguments less than -34.034 the cosecant
 * reflection formula is applied; lograrithms are employed
 * to avoid unnecessary overflow.
 *
 * The reciprocal gamma function has no singularities,
 * but overflow and underflow may occur for large arguments.
 * These conditions return either MAXNUM or 1/MAXNUM with
 * appropriate sign.
 *
 * ACCURACY:
 *
 *                      Relative error:
 * arithmetic   domain     # trials      peak         rms
 *    DEC      -30,+30       4000       1.2e-16     1.8e-17
 *    IEEE     -30,+30      30000       1.1e-15     2.0e-16
 * For arguments less than -34.034 the peak error is on the
 * order of 5e-15 (DEC), excepting overflow or underflow.
 */

/*
Cephes Math Library Release 2.8:  June, 2000
Copyright 1985, 1987, 2000 by Stephen L. Moshier
*/

/* Chebyshev coefficients for reciprocal gamma function
 * in interval 0 to 1.  Function is 1/(x gamma(x)) - 1
 */

static double RGAM[] = {
 3.13173458231230000000E-17,
-6.70718606477908000000E-16,
 2.20039078172259550000E-15,
 2.47691630348254132600E-13,
-6.60074100411295197440E-12,
 5.13850186324226978840E-11,
 1.08965386454418662084E-9,
-3.33964630686836942556E-8,
 2.68975996440595483619E-7,
 2.96001177518801696639E-6,
-8.04814124978471142852E-5,
 4.16609138709688864714E-4,
 5.06579864028608725080E-3,
-6.41925436109158228810E-2,
-4.98558728684003594785E-3,
 1.27546015610523951063E-1
};

#define RGAMMAXLOG 7.08396418532264106224E2      /* log 2**1022 */
#define RGAMMINLOG -7.08396418532264106224E2     /* log(2**-1022) */
#define RGAMMAXNUM 1.79769313486231570815E308    /* 2**1024*(1-MACHEP) */

LUALIB_API double slm_rgamma (double x) {
  double w, y, z;
  int sign;
  if (x > 34.84425627277176174)
    return 1.0/RGAMMAXNUM;
  if (x <= 0 && tools_isint(x))
    return AGN_NAN;  /* added */
  if (x < -34.034) {
    w = -x;
    z = sun_sin(PI*w);
    if (z == 0.0) return 0.0;
	  if (z < 0.0) {
      sign = 1;
      z = -z;
    } else
      sign = -1;
    y = sun_log(w*z) - sun_log(PI) + sun_lgamma(w);
    if (y < -RGAMMAXLOG)
      return sign*1.0/RGAMMAXNUM;
    if (y > RGAMMAXLOG)
      return sign*RGAMMAXNUM;
    return sign*sun_exp(y);
  }
  z = 1.0;
  w = x;
  while (w > 1.0)	{  /* Downward recurrence */
	  w -= 1.0;
	  z *= w;
	}
  while (w < 0.0) { 	/* Upward recurrence */
    z /= w;
    w += 1.0;
  }
  if (w == 0.0)		/* Nonpositive integer */
    return 0.0;
  if (w == 1.0)		/* Other integer */
    return 1.0/z;
  y = w*(1.0 + chbevl(4.0*w-2.0, RGAM, 16))/z;
  return y;
}


/* forward declarations for Raspberry Pi */
LUALIB_API double sun_acosh (double x);

/* Constants used by Sun's functions */

volatile double
pi_lo    = 1.2246467991473531772E-16,       /* 0x3CA1A626, 0x33145C07 */
tiny     = 1.0e-300,
twom1000 = 9.33263618503218878990e-302;     /* 2**-1000=0x01700000,0*/


LUALIB_API void tools_quicksort (off64_t v[], int32_t left, int32_t right) {  /* 2.11.0 RC2 change */
  register uint32_t i, last;
  register off64_t temp;
  if (left >= right) {
    return;
  }
  SWAP(v[left], v[(left + right)/2], temp);
  last = left;
  for (i=left + 1; i <= right; i++) {
    if (v[i] < v[left]) {
      last++;
      SWAP(v[last], v[i], temp);
    }
  }
  SWAP(v[left], v[last], temp);
  tools_quicksort(v, left, last - 1);
  tools_quicksort(v, last + 1, right);
}


/* Array must be sorted !  cnt is the number of elements in array a. z is the integer to be searched. */
/* prevents overflow, see: https://stackoverflow.com/questions/17358806/fixing-binary-search-bug-from-bentleys-book-programming-pearls-writing-correc */
LUALIB_API int tools_binsearch (off64_t *a, off64_t cnt, off64_t z) {  /* 2.11.0 RC 2 */
  off64_t i, j, x;
  i = 0; j = cnt - 1;
  while (i <= j) {
    x = tools_midpoint(i, j);  /* 2.38.2 patch */
    if (z == a[x])
      return 1;
    if (a[x] < z)
      i = x + 1;
    else
      j = x - 1;
  }
  return 0;
}


/* left: left border, right: right border of array */
LUALIB_API void tools_dquicksort (double v[], int32_t left, int32_t right) {
  register long int i, last;
  register double temp;
  if (left >= right) return;
  SWAP(v[left], v[(left + right)/2], temp);
  last = left;
  for (i=left + 1; i <= right; i++) {
    if (v[i] < v[left]) {
      last++;
      SWAP(v[last], v[i], temp);
    }
  }
  SWAP(v[left], v[last], temp);
  tools_dquicksort(v, left, last - 1);
  tools_dquicksort(v, last + 1, right);
}


/*** IntroSort ************************************************************************************************************** */

/* Based on the heapsort algorithm as published in Niklaus Wirth's `Algorithms and Data Structures` (for Oberon),
   extended to optionally sort only a portion of an array; 2.3.0

   Taken from: https://cs.stackexchange.com/questions/24446/why-does-introsort-use-heapsort-rather-than-mergesort

   "Introsort is a hybrid sorting algorithm that provides both fast average performance and (asymptotically) optimal
   worst-case performance. It begins with quicksort and switches to heapsort when the recursion depth exceeds a level
   based on (the logarithm of) the number of elements being sorted. (Wikipedia, retrieved 2014-May-06.)"

   Do NOT change type of left and right borders to uint32 or size_t, otherwise MinGW crashes.

   There is no benefit when integrating `dsift` into dheapsort, inlined 2.16.13 */

FORCE_INLINE void dsift (double *a, int32_t L, int32_t R, long int offset) {
  register int32_t i, j;
  register double x;
  i = L; j = 2*i + 1 - offset; x = a[i];
  if (j < R && a[j] < a[j + 1]) j++;
  while (j <= R && x < a[j]) {
    a[i] = a[j]; i = j; j = 2*j + 1 - offset;
    if (j < R && a[j] < a[j + 1]) j++;
  }
  a[i] = x;
}

LUALIB_API void tools_dheapsort (double *a, int32_t l, int32_t r) {  /* inlined 2.18.1 */
  register int32_t L, R;
  register double temp;
  L = l + (r - l + 1)/2; R = r;
  while (L > l) {
    L--; dsift(a, L, R, l);
  }
  while (R > l) {
    SWAP(a[l], a[R], temp);  /* 2.15.3 change */
    R--; dsift(a, L, R, l);
  }
}

/* left: left border, right: right border of array, depth initially is 0, threshold is 2*log(#entries)/log(2) */

LUALIB_API void tools_dintrosort (double *v, int32_t left, int32_t right, size_t depth, size_t threshold) {  /* 2.3.0/2.21.1 */
  register int32_t i, last;
  register double temp;
  if (left >= right) return;
  depth++;
  if (depth == threshold) {
    tools_dheapsort(v, left, right);
    return;
  }
  SWAP(v[left], v[(left + right)/2], temp);
  last = left;
  for (i=left + 1; i <= right; i++) {
    if (v[i] < v[left]) {
      last++;
      SWAP(v[last], v[i], temp);
    }
  }
  SWAP(v[left], v[last], temp);
  tools_dintrosort(v, left, last - 1, depth, threshold);
  tools_dintrosort(v, last + 1, right, depth, threshold);
}


/*** IntroSort for UCHARS **************************************************************************************************** */

/* Do NOT change type of left and right borders to uint32 or size_t, otherwise "MinGW" crashes */

FORCE_INLINE void dsift_uchar (unsigned char *a, int32_t L, int32_t R, long int offset) {
  register int32_t i, j;
  register unsigned char x;
  i = L; j = 2*i + 1 - offset; x = a[i];
  if (j < R && a[j] < a[j + 1]) j++;
  while (j <= R && x < a[j]) {
    a[i] = a[j]; i = j; j = 2*j + 1 - offset;
    if (j < R && a[j] < a[j + 1]) j++;
  }
  a[i] = x;
}

LUALIB_API void tools_dheapsort_uchar (unsigned char *a, int32_t l, int32_t r) {  /* inlined 2.18.1 */
  register int32_t L, R;
  register unsigned char temp;
  L = l + (r - l + 1)/2; R = r;
  while (L > l) {
    L--; dsift_uchar(a, L, R, l);
  }
  while (R > l) {
    SWAP(a[l], a[R], temp);  /* 2.15.3 change */
    R--; dsift_uchar(a, L, R, l);
  }
}

/* left: left border, right: right border of array, depth initially is 0, threshold is 2*log(#entries)/log(2) */
LUALIB_API void tools_dintrosort_uchar (unsigned char *v, int32_t left, int32_t right, size_t depth, size_t threshold) {  /* 2.3.0/2.21.1 */
  register int32_t i, last;
  register unsigned char temp;
  if (left >= right) return;
  depth++;
  if (depth == threshold) {
    tools_dheapsort_uchar(v, left, right);
    return;
  }
  SWAP(v[left], v[(left + right)/2], temp);
  last = left;
  for (i=left + 1; i <= right; i++) {
    if (v[i] < v[left]) {
      last++;
      SWAP(v[last], v[i], temp);
    }
  }
  SWAP(v[left], v[last], temp);
  tools_dintrosort_uchar(v, left, last - 1, depth, threshold);
  tools_dintrosort_uchar(v, last + 1, right, depth, threshold);
}


/*** IntroSort for USHORTS **************************************************************************************************** */

/* Do NOT change type of left and right borders to uint32 or size_t, otherwise "MinGW" crashes */

FORCE_INLINE void dsift_ushort (uint16_t *a, int32_t L, int32_t R, long int offset) {
  register int32_t i, j;
  register uint16_t x;
  i = L; j = 2*i + 1 - offset; x = a[i];
  if (j < R && a[j] < a[j + 1]) j++;
  while (j <= R && x < a[j]) {
    a[i] = a[j]; i = j; j = 2*j + 1 - offset;
    if (j < R && a[j] < a[j + 1]) j++;
  }
  a[i] = x;
}

LUALIB_API void tools_dheapsort_ushort (uint16_t *a, int32_t l, int32_t r) {  /* inlined 2.18.1 */
  register int32_t L, R;
  register uint16_t temp;
  L = l + (r - l + 1)/2; R = r;
  while (L > l) {
    L--; dsift_ushort(a, L, R, l);
  }
  while (R > l) {
    SWAP(a[l], a[R], temp);  /* 2.15.3 change */
    R--; dsift_ushort(a, L, R, l);
  }
}

/* left: left border, right: right border of array, depth initially is 0, threshold is 2*log(#entries)/log(2) */
LUALIB_API void tools_dintrosort_ushort (uint16_t *v, int32_t left, int32_t right, size_t depth, size_t threshold) {  /* 2.3.0/2.21.1 */
  register int32_t i, last;
  register uint16_t temp;
  if (left >= right) return;
  depth++;
  if (depth == threshold) {
    tools_dheapsort_ushort(v, left, right);
    return;
  }
  SWAP(v[left], v[(left + right)/2], temp);
  last = left;
  for (i=left + 1; i <= right; i++) {
    if (v[i] < v[left]) {
      last++;
      SWAP(v[last], v[i], temp);
    }
  }
  SWAP(v[left], v[last], temp);
  tools_dintrosort_ushort(v, left, last - 1, depth, threshold);
  tools_dintrosort_ushort(v, last + 1, right, depth, threshold);
}


/*** IntroSort for UINT32 *********************************************************************************************** */

/* Do NOT change type of left and right borders to uint32 or size_t, otherwise "MinGW" crashes */

FORCE_INLINE void dsift_uint32 (uint32_t *a, int32_t L, int32_t R, long int offset) {
  register int32_t i, j;
  register uint32_t x;
  i = L; j = 2*i + 1 - offset; x = a[i];
  if (j < R && a[j] < a[j + 1]) j++;
  while (j <= R && x < a[j]) {
    a[i] = a[j]; i = j; j = 2*j + 1 - offset;
    if (j < R && a[j] < a[j + 1]) j++;
  }
  a[i] = x;
}

LUALIB_API void tools_dheapsort_uint32 (uint32_t *a, int32_t l, int32_t r) {  /* inlined 2.18.1 */
  register int32_t L, R;
  register uint32_t temp;
  L = l + (r - l + 1)/2; R = r;
  while (L > l) {
    L--; dsift_uint32(a, L, R, l);
  }
  while (R > l) {
    SWAP(a[l], a[R], temp);  /* 2.15.3 change */
    R--; dsift_uint32(a, L, R, l);
  }
}

/* left: left border, right: right border of array, depth initially is 0, threshold is 2*log(#entries)/log(2) */
LUALIB_API void tools_dintrosort_uint32 (uint32_t *v, int32_t left, int32_t right, size_t depth, size_t threshold) {  /* 2.3.0/2.21.1 */
  register int32_t i, last;
  register uint32_t temp;
  if (left >= right) return;
  depth++;
  if (depth == threshold) {
    tools_dheapsort_uint32(v, left, right);
    return;
  }
  SWAP(v[left], v[(left + right)/2], temp);
  last = left;
  for (i=left + 1; i <= right; i++) {
    if (v[i] < v[left]) {
      last++;
      SWAP(v[last], v[i], temp);
    }
  }
  SWAP(v[left], v[last], temp);
  tools_dintrosort_uint32(v, left, last - 1, depth, threshold);
  tools_dintrosort_uint32(v, last + 1, right, depth, threshold);
}


/*** IntroSort for INT32 ************************************************************************************************ */

/* Do NOT change type of left and right borders to uint32 or size_t, otherwise "MinGW" crashes */

FORCE_INLINE void dsift_int32 (int32_t *a, int32_t L, int32_t R, long int offset) {
  register int32_t i, j;
  register int32_t x;
  i = L; j = 2*i + 1 - offset; x = a[i];
  if (j < R && a[j] < a[j + 1]) j++;
  while (j <= R && x < a[j]) {
    a[i] = a[j]; i = j; j = 2*j + 1 - offset;
    if (j < R && a[j] < a[j + 1]) j++;
  }
  a[i] = x;
}

LUALIB_API void tools_dheapsort_int32 (int32_t *a, int32_t l, int32_t r) {  /* inlined 2.18.1 */
  register int32_t L, R;
  register int32_t temp;
  L = l + (r - l + 1)/2; R = r;
  while (L > l) {
    L--; dsift_int32(a, L, R, l);
  }
  while (R > l) {
    SWAP(a[l], a[R], temp);  /* 2.15.3 change */
    R--; dsift_int32(a, L, R, l);
  }
}

/* left: left border, right: right border of array, depth initially is 0, threshold is 2*log(#entries)/log(2) */
LUALIB_API void tools_dintrosort_int32 (int32_t *v, int32_t left, int32_t right, size_t depth, size_t threshold) {  /* 2.3.0/2.21.1 */
  register int32_t i, last;
  register int32_t temp;
  if (left >= right) return;
  depth++;
  if (depth == threshold) {
    tools_dheapsort_int32(v, left, right);
    return;
  }
  SWAP(v[left], v[(left + right)/2], temp);
  last = left;
  for (i=left + 1; i <= right; i++) {
    if (v[i] < v[left]) {
      last++;
      SWAP(v[last], v[i], temp);
    }
  }
  SWAP(v[left], v[last], temp);
  tools_dintrosort_int32(v, left, last - 1, depth, threshold);
  tools_dintrosort_int32(v, last + 1, right, depth, threshold);
}


/*** IntroSort for LONG DOUBLES ******************************************************************************************* */

/* Do NOT change type of left and right borders to uint32 or size_t, otherwise "MinGW" crashes */

#ifndef __ARMCPU  /* 2.37.1 */
FORCE_INLINE void dsiftl (long double *a, int32_t L, int32_t R, long int offset) {
  register int32_t i, j;
  register long double x;
  i = L; j = 2*i + 1 - offset; x = a[i];
  if (j < R && a[j] < a[j + 1]) j++;
  while (j <= R && x < a[j]) {
    a[i] = a[j]; i = j; j = 2*j + 1 - offset;
    if (j < R && a[j] < a[j + 1]) j++;
  }
  a[i] = x;
}

LUALIB_API void tools_dheapsortl (long double *a, int32_t l, int32_t r) {  /* inlined 2.18.1 */
  register int32_t L, R;
  register long double temp;
  L = l + (r - l + 1)/2; R = r;
  while (L > l) {
    L--; dsiftl(a, L, R, l);
  }
  while (R > l) {
    SWAP(a[l], a[R], temp);  /* 2.15.3 change */
    R--; dsiftl(a, L, R, l);
  }
}

/* left: left border, right: right border of array, depth initially is 0, threshold is 2*log(#entries)/log(2) */

LUALIB_API void tools_dintrosortl (long double *v, int32_t left, int32_t right, size_t depth, size_t threshold) {
  register int32_t i, last;
  register long double temp;
  if (left >= right) return;
  depth++;
  if (depth == threshold) {
    tools_dheapsortl(v, left, right);
    return;
  }
  SWAP(v[left], v[(left + right)/2], temp);
  last = left;
  for (i=left + 1; i <= right; i++) {
    if (v[i] < v[left]) {
      last++;
      SWAP(v[last], v[i], temp);
    }
  }
  SWAP(v[left], v[last], temp);
  tools_dintrosortl(v, left, last - 1, depth, threshold);
  tools_dintrosortl(v, last + 1, right, depth, threshold);
}
#endif

/** END of INTROSORTs ******************************************************************************************/

/*

Function: pixel_qsort

Old and supposedly optimised quicksort algorithm.

Function : pixel_qsort()

    In : pixel array, size of the array
    Out : int
    Job : sort out the array of pixels
    Note : optimized implementation.

References PIX_STACK_SIZE, and PIX_SWAP.

Taken from https://github.com/wme7/aero-shock/blob/master/mycpp/test/benchmed.c,
written by Nicolas Devillard <ndevilla@free.fr> August 1998. This code in public domain.

The function _can_ be 15 % faster on older systems than the recursive quicksort implementation for
lists of numbers in random order. Only if the list is already sorted in descending order, pixel sort
is much slower.

See also: http://www.eso.org/projects/dfs/papers/jitter99/node27.html

Modified by Alexander Walz to return a success code: 0 - failure, 1 - success, and to dynamically
increase the internal stack instead of throwing an error if the stack size has become too small.

*/

#define PIX_SWAP(a,b) { double temp=(a);(a)=(b);(b)=temp; }
#define PIX_STACK_SIZE 50


LUALIB_API int pixel_qsort (double *pix_arr, int npix) {  /* changed from void to int by Alexander Walz */
  int i, ir, j, k, l, j_stack;
  int *i_stack;
  double  a;
  ir = npix;
  l = 1;
  j_stack = 0;
  i_stack = malloc(PIX_STACK_SIZE*sizeof(double));
  if (i_stack == NULL) return 0;  /* 2.3.0 */
  for (;;) {
    if (ir - l < 7) {
      for (j=l + 1; j <= ir; j++) {
        a = pix_arr[j - 1];
        for (i=j - 1; i >= 1; i--) {
          if (pix_arr[i - 1] <= a) break;
          pix_arr[i] = pix_arr[i - 1];
        }
        pix_arr[i] = a;
      }
      if (j_stack == 0) break;
      ir = i_stack[j_stack-- -1];
      l  = i_stack[j_stack-- -1];
    } else {
      k = (l+ir) >> 1;
      PIX_SWAP(pix_arr[k - 1], pix_arr[l])
      if (pix_arr[l] > pix_arr[ir - 1]) {
        PIX_SWAP(pix_arr[l], pix_arr[ir - 1])
      }
      if (pix_arr[l - 1] > pix_arr[ir - 1]) {
        PIX_SWAP(pix_arr[l - 1], pix_arr[ir - 1])
      }
      if (pix_arr[l] > pix_arr[l - 1]) {
        PIX_SWAP(pix_arr[l], pix_arr[l - 1])
      }
      i = l + 1;
      j = ir;
      a = pix_arr[l - 1];
      for (;;) {
        do i++; while (pix_arr[i - 1] < a);
        do j--; while (pix_arr[j - 1] > a);
        if (j < i) break;
        PIX_SWAP(pix_arr[i - 1], pix_arr[j - 1]);
      }
      pix_arr[l - 1] = pix_arr[j - 1];
      pix_arr[j - 1] = a;
      j_stack += 2;
      if (j_stack > PIX_STACK_SIZE) {
         i_stack = realloc(i_stack, (j_stack + PIX_STACK_SIZE)*sizeof(double));  /* changed by Alexander Walz */
         if (i_stack == NULL) return 0;  /* changed by Alexander Walz */
      }
      if (ir-i+1 >= j - l) {
        i_stack[j_stack - 1] = ir;
        i_stack[j_stack - 2] = i;
        ir = j- 1;
      } else {
        i_stack[j_stack - 1] = j - 1;
        i_stack[j_stack - 2] = l;
        l = i;
      }
    }
  }
  xfree(i_stack);
  return 1;
}


/*---------------------------------------------------------------------------

   Algorithm from N. Wirth's book, implementation by N. Devillard.
   This code in public domain.

   Function :   kth_smallest()
   In       :   array of elements, # of elements in the array, rank k
   Out      :   one element
   Job      :   find the kth smallest element in the array
   [Notice  :   use the median() macro defined below to get the median.]
   Notice   :   works on both sorted as well as unsorted data

                Reference:

                  Author: Wirth, Niklaus
                   Title: Algorithms + data structures = programs
               Publisher: Englewood Cliffs: Prentice-Hall, 1976
    Physical description: 366 p.
                  Series: Prentice-Hall Series in Automatic Computation

 ---------------------------------------------------------------------------*/

/* #define ELEM_SWAP(a,b) { register double t = (a); (a) = (b); (b) = t; } */

LUALIB_API double tools_kth_smallest (double a[], long int n, long int k) {
  register long int i, j, l, m;
  register double x, temp;
  if (n == 0) return AGN_NAN;  /* 2.4.4 */
  l = 0; m = n - 1;
  while (l < m) {
    x = a[k];
    i = l;
    j = m;
    do {
      while (a[i] < x) i++;
      while (x < a[j]) j--;
      if (i <= j) {
        SWAP(a[i], a[j], temp);
        i++; j--;
      }
    } while (i <= j);
    if (j < k) l = i;
    if (k < i) m = j;
  }
  return a[k];
}


/* sorts numbers in ascending order */

#define SWAP(a,b,t) { (t) = (a); (a) = (b); (b) = (t); }

/* see: http://www.ethoberon.ethz.ch/WirthPubl/AD.pdf
   `Algorithms and Data Structures` by Niklaus Wirth 1985, Oberon version: August 2004 */

/* PROCEDURE NonRecursiveQuickSort;
  CONST M = 12;
  VAR i, j, L, R, s: INTEGER; x, w: Item;
  low, high: ARRAY M OF INTEGER; (*index stack*)
  BEGIN
    s := 0; low[0] := 0; high[0] := n-1;
    REPEAT (*take top request from stack*)
      L := low[s]; R := high[s]; DEC(s);
      REPEAT (*partition a[L] ... a[R]*)
        i := L; j := R; x := a[(L+R) DIV 2];
        REPEAT
          WHILE a[i] < x DO INC(i) END ;
          WHILE x < a[j] DO DEC(j) END ;
          IF i <= j THEN
            w := a[i]; a[i] := a[j]; a[j] := w; i := i + 1; j := j - 1
          END
        UNTIL i > j;
        //IF i < R THEN (*stack request to sort right partition*)
        //  INC(s); low[s] := i; high[s] := R
        //END ;
        IF j - L < R - i THEN
          IF i < R THEN (*stack request for sorting right partition*)
            INC(s); low[s] := i; high[s] := R
          END ;
          R := j (*continue sorting left partition*)
        ELSE
          IF L < j THEN (*stack request for sorting left parition*)
            INC(s); low[s] := L; high[s] := j
          END;
        L := i (*continue sorting right partition*)
        END
        R := j (*now L and R delimit the left partition*)
      UNTIL L >= R
    UNTIL s = 0
  END NonRecursiveQuickSort */

LUALIB_API int tools_dnonrecursivequicksort (double *a, size_t n) {
  long int i, j, L, R, M, nsmall, s, *low, *high;
  double x, t;
  M = 2*sun_log2(n) + 1;
  nsmall = 6;
  /* index stack */
  low = malloc(M*sizeof(long int));
  if (low == NULL) return 1;
  high = malloc(M*sizeof(long int));
  if (high == NULL) {
    xfree(low);
    return 1;
  }
  /* Go ! */
  s = 0;
  low[0] = 0; high[0] = n - 1;
  do {  /* take top request from stack */
    L = low[s]; R = high[s]; s--;
    if (R - L + 1 > nsmall) {
      do {  /* partition a[L] ... a[R] */
        i = L; j = R; x = a[tools_midpoint(L, R)];
        do {
          while (a[i] < x) i++;
          while (x < a[j]) j--;
          if (i <= j) {
            SWAP(a[i], a[j], t); i++; j--;
          }
        } while (i <= j);
        if (j - L < R - i) {
          if (i < R) {  /* stack request for sorting right partition */
            s++; low[s] = i; high[s] = R;
          }
          R = j;  /* continue sorting left partition */
        } else {
          if (L < j) {  /* stack request for sorting left parition */
            s++; low[s] = L; high[s] = j;
          }
          L = i;  /* continue sorting right partition */
        }
        /* if (i < R) { s++; low[s] = i; high[s] = R; };
        R = j;  // now L and R delimit the left partition */
      } while (L < R);
    } else {
      for (i=L; i < R; i++) {
        for (j=i; j <= R; j++) {
          if (a[i] > a[j]) SWAP(a[i], a[j], t);
        }
      }
    }
  } while (s >= 0);
  xfreeall(low, high);  /* 2.9.8 */
  return 0;
}


/* Calls pone of the following sorting functions in a standardised fashion; mode:
   0 = recursive quicksort
   1 = pixel quicksort
   2 = heapsort
   3 = introsort (default)
   4 = non-recursive quicksort ala Niklaus Wirth

   size = size of the array */
LUALIB_API int tools_sort (double *a, size_t size, int mode) {
  if (a == NULL || size < 2) return 1;  /* error */
  switch (mode) {
    case 0:  /* use recursive quicksort implementation */
      tools_dquicksort(a, 0, size - 1);
      return 0;
    case 1:  /* 1.11.6, use pixel sort function: this _can_ be 15 % faster on older systems than the
        recursive quicksort implementation for lists of numbers in random order. Only if the list is already
        sorted in descending order, pixel sort is much slower. */
      return pixel_qsort(a, size) == 0;
    case 2:  /* use heapsort, 2.3.0 */
      tools_dheapsort(a, 0, size - 1);
      return 0;
    case 3:  /* use introsort (default), 2.3.0 */
      tools_dintrosort(a, 0, size - 1, 0, 2*sun_log2(size));
      return 0;
    case 4:  /* use non-recursive quicksort, 2.3.0 */
      return tools_dnonrecursivequicksort(a, size);
  }
  return 1;
}


/* Dr. F.H.Toor's database C functions */

LUALIB_API int my_open (const char *file) {
  int hnd;
  hnd = open(file, O_BINARY | O_RDWR);
  if (hnd < 3)
    return -1;
  else
    return hnd;
}


LUALIB_API FILE *my_fopen (const char *file, int append) {  /* 2.11.5, returns NULL on failure */
  FILE *hnd;
  hnd = (append) ? fopen64(file, "a+b") : fopen64(file, "rb+");  /* Open a file for update (both for input and output). The file must exist. */
  return hnd;
}


/* read-only open */
LUALIB_API int my_roopen (const char *file) {
  int hnd;
  hnd = open(file, O_BINARY | O_RDONLY); /*|_S_IWRITE); */
  return (hnd < 3) ? -1 : hnd;
}


LUALIB_API FILE *my_froopen (const char *file) {  /* 2.11.5, returns NULL on failure */
  FILE *hnd;
  hnd = fopen64(file, "rb");  /* Open file for read-only operations. The file must exist. */
  return hnd;
}


LUALIB_API int my_create (const char *file) {
  int hnd;
  hnd = open(file, O_BINARY | O_RDWR | O_CREAT | O_TRUNC, S_IREAD | S_IWRITE);  /* GCC 4.3.2 needs a 3rd arg */
  return (hnd < 3) ? -1 : hnd;
}


/* wb+: write/update: Create an empty file and open it for update (both for input and output). If a file with the
   same name already exists its contents are DISCARDED and the file is treated as a new empty file.
   Source: http://www.cplusplus.com/reference/cstdio/fopen */
LUALIB_API FILE *my_fcreate (const char *file) {  /* 2.11.5, returns NULL on failure */
  FILE *hnd;
  hnd = fopen64(file, "wb+");  /* GCC 4.3.2 needs a 3rd arg */
  return hnd;
}


LUALIB_API int my_close (int hnd) {
  return close(hnd);
}


LUALIB_API off64_t my_seek (int hnd, off64_t pos) {  /* 2.11.0 fix */
  return (lseek64(hnd, pos, SEEK_SET) == -1) ? -1 : 1;
}


LUALIB_API off64_t my_lof (int hnd) {  /* length of file 2.11.0 fix */
  off64_t size;
  size = lseek64(hnd, 0L, SEEK_END);
  if (size == -1)
    fprintf(stderr, "Agena IO subsystem: length-of-file error\n");
  return size;
}


/* return current file position, June 28, 2007; extended 0.32.5, 2.37.6; you have to flush before to get correct reults. */
LUALIB_API off64_t my_fpos (int hnd) {
  off64_t size;
  size = lseek64(hnd, 0L, SEEK_CUR);
  if (size == -1)
    fprintf(stderr, "Agena IO subsystem: position error\n");
  return size;
}


LUALIB_API void my_read (int hnd, void *data, size_t size) {
  ssize_t z;
  if ((z = read(hnd, data, size)) != (ssize_t)size) {
    fprintf(stderr, "Agena IO subsystem: read error\n");
  }
}


LUALIB_API size_t sec_read (int hnd, void *data, size_t size) {
  return (read(hnd, data, size) == (ssize_t)size);
}


LUALIB_API int32_t sec_readl (int hnd, ssize_t *success) {
  int32_t data;
  *success = (read(hnd, &data, sizeof(int32_t)) > 0);
  if (*success == 0) return -1;
#if BYTE_ORDER != BIG_ENDIAN
  tools_swapint32_t(&data);
#endif
  return data;
}


LUALIB_API uint32_t sec_readul (int hnd, ssize_t *success) {
  uint32_t data;
  *success = (read(hnd, &data, sizeof(uint32_t)) > 0);
  if (*success == 0 || data < 0) return -1;
#if BYTE_ORDER != BIG_ENDIAN
  tools_swapuint32_t(&data);
#endif
  return data;
}


/* longs are always stored in Big Endian notation */

LUALIB_API int32_t my_readl (int hnd) {
  int32_t data;
  my_read(hnd, &data, sizeof(int32_t));
#if BYTE_ORDER != BIG_ENDIAN
  tools_swapint32_t(&data);
#endif
  return data;
}


LUALIB_API char my_readc (int hnd) {
  char data;
  my_read(hnd, &data, sizeof(char));
  return data;
}


LUALIB_API void my_write (int hnd, void *data, size_t size) {
  if (write(hnd, data, size) != (ssize_t)size)
    fprintf(stderr, "Agena IO subsystem: write error\n");
}


LUALIB_API int tools_fsync (int hnd) {  /* 2.17.8 */
  int rc;
#ifdef _WIN32
  /* https://learn.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-flushfilebuffers */
  rc = !FlushFileBuffers((HANDLE)_get_osfhandle(hnd));  /* returns nonzero on success, so negate result; 2.37.8 */
  if (!rc) rc = _commit(hnd);
#else
  rc = fsync(hnd);
#if defined(LUA_DOS)
  _flush_disk_cache();  /* 2.17.2 */
#endif
#endif
  return rc;
}


/* Checks whether the file cursor is at end of file, returning 1 if it is, 0 if it is not and -1 if an error occurred. 4.3.0 */
LUALIB_API int tools_eof (FILE *f) {
  int hnd, cur, end;
  hnd = fileno(f);
  cur = lseek64(hnd, 0L, SEEK_CUR);
  if (cur < 0) return -1;
  end = lseek64(hnd, 0L, SEEK_END);
  if (end < 0) return -1;
  if (lseek64(hnd, cur, SEEK_SET) < 0) return -1;  /* reset curdor back to original position */
  set_errno(0);  /* reset, better be sure than sorry */
  return cur == end;
}


#ifndef ENOFILE
#define ENOFILE 2
#endif

LUALIB_API const char *my_ioerror (int en) {  /* 2.6.1 */
  switch (en) {
    case EAGAIN:  return "write operation was not complete";
    case EBADF:   return "file descriptor is invalid or not open";
    case EFBIG:   return "size of the file becomes too large";
    case EINTR:   return "operation interrupted by a signal";
    case EIO:     return "hardware error encountered";
    case ENOSPC:  return "device is full";
    case EPIPE:   return "pipe or FIFO is not open or broken";
    case EINVAL:  return "invalid file handle, or position/size not aligned to block size";  /* 4.10.7 change */
    /* added 2.10.4 */
    case ENOFILE: return "file or directory does not exist";
    case ENXIO:   return "device or address does not exist";
    case ENOMEM:  return "not enough space available";
    case EACCES:  return "permission denied";
    case EEXIST:  return "file or directory already exists";
    case ENODEV:  return "device does not exist";
    case ENOTDIR: return "target is not a directory";
    case EISDIR:  return "target is a directory";
    case ENFILE:  return "there are too many open files in system";
    case EMFILE:  return "there are too many open files";
    case ENOTTY:  return "I/O control operation is inappropriate";
    case EROFS:   return "file system is read-only";
    case EMLINK:  return "there are too many links";
    case ENAMETOOLONG:  return "filename is too long";
    case ENOLCK:  return "locks are not available";
    case ENOTEMPTY:  return "directory is not empty";
#ifndef LUA_DOS
    case EILSEQ:  return "byte sequence is illegal";
    case ERANGE:  return "result is too large";
#endif
    case EDOM:    return "domain error";
    case ESPIPE:  return "seek is invalid (on a pipe ?)";
    case EXDEV:   return "link is improper";
    case EBUSY:   return "strerror reports \'Resource device\'";
    case EFAULT:  return "bad address";
    case EPERM:   return "operation is not permitted";
    case ESRCH:   return "process not found";
    case E2BIG:   return "argument list is too long";
    case ENOEXEC: return "exec format is invalid";
    case ECHILD:  return "child process does not exist";
    /* ELOOP, ENOENT and ETXTBSY usually clash with values queried above on most platforms */
    default: {  /* 2.10.4 improvement */
      int err;
      char firstletter[2];
      char *rest, *errmsg;
      errmsg = strerror(en);
      firstletter[0] = tools_lowercase[uchar(errmsg[0])];
      firstletter[1] = '\0';
      rest = str_substr(errmsg, 1, tools_strlen(errmsg) - 1, &err);  /* 2.17.8 tweak */
      if (err == -1) return NULL;
      return str_concat(firstletter, rest, NULL);
    }
  }
}


/* longs are always stored in Big Endian notation */

LUALIB_API void my_writel (int hnd, int32_t data) {
#if BYTE_ORDER != BIG_ENDIAN
  tools_swapint32_t(&data);
#endif
  my_write(hnd, &data, sizeof(int32_t));
}


LUALIB_API void my_writec (int hnd, char data) {
  my_write(hnd, &data, sizeof(char));
}


/* in UNIX and Windows, if size == 0 then lock entire file */
LUALIB_API int my_lock (int hnd, off64_t start, off64_t size) {
  #ifdef _WIN32
  /* see: http://msdn.microsoft.com/en-us/library/8054ew2f.aspx, topic `_locking` */
  int r;
  off64_t oldpos, cursor;
  if (size == 0) size = (off64_t)pow(2, 63);  /* = lock 2^63 bytes (cannot be more in Windows) */
  /* reset file cursor to `start' */
  oldpos = my_fpos(hnd);
  if (oldpos == -1) return -1;
  cursor = lseek64(hnd, start, SEEK_SET);
  if (cursor == -1) return -1;
  /* lock the file */
  r = _locking(hnd, _LK_NBLCK, size);
  /* set file cursor to original file position */
  cursor = lseek64(hnd, oldpos, SEEK_SET);
  if (cursor == -1 || r != 0) return -1;
  #elif defined(__unix__) || defined(__APPLE__)
  struct flock file;
  file.l_type = F_WRLCK | F_RDLCK;   /* exclusive read and write lock */
  file.l_start = (off_t)start;   /* locking from file position `start` */
  file.l_whence = SEEK_SET;      /* offset is relative to the beginning of the file */
  file.l_len = (size == 0) ? 0L : (off_t)size;  /* lock size bytes (size == 0 query: better sure than sorry) */
  if (fcntl(hnd, F_SETLK, &file) == -1)
    return -1;
  #endif
  return 0;
}


LUALIB_API int my_unlock (int hnd, off64_t start, off64_t size) {
  #ifdef _WIN32
  /* see: http://msdn.microsoft.com/en-us/library/8054ew2f.aspx, topic `_locking` */
  int r;
  off64_t oldpos, cursor;
  if (size == 0) size = (off64_t)pow(2, 63);  /* = lock 2^63 bytes (cannot be more in Windows) */
  /* reset file cursor to `start' */
  oldpos = my_fpos(hnd);
  if (oldpos == -1) return -1;
  cursor = lseek64(hnd, start, SEEK_SET);
  if (cursor == -1) return -1;
  /* lock the file */
  r = _locking(hnd, LK_UNLCK, size);
  /* set file cursor to original file position */
  cursor = lseek64(hnd, oldpos, SEEK_SET);
  if (cursor == -1 || r != 0) return -1;
  #elif defined(__unix__) || defined(__APPLE__)
  struct flock file;
  file.l_type = F_WRLCK | F_RDLCK;
  file.l_whence = SEEK_SET;
  file.l_start = (off_t)start;
  file.l_len = (size == 0) ? 0L : (off_t)size;  /* unlock size bytes (size == 0 query: better sure than sorry) */
  if (fcntl(hnd, F_UNLCK, &file) == -1)
    return -1;
  #endif
  return 0;
}


/* my_move(hnd, low*4L+24L, low*4L+28L, cnt*4L+24L); */
/* fpos: from position, tpos: to position, size */
LUALIB_API void my_move (int hnd, off64_t fpos, off64_t tpos, off64_t size) {  /* 2.11.0 fix */
  off64_t segment;
  char buff[BASE_MOVELEN];
  segment = size - fpos;
  if (segment < 1)
    return;
  if (fpos < 0 || tpos < 0)
    fprintf(stderr, "Agena IO subsystem: move error\n");
  if (fpos > tpos) {
    while (segment >= BASE_MOVELEN) {
      my_seek(hnd, fpos);
      my_read(hnd, buff, BASE_MOVELEN);
      my_seek(hnd, tpos);
      my_write(hnd, buff, BASE_MOVELEN);
      fpos += BASE_MOVELEN;
      tpos += BASE_MOVELEN;
      segment -= BASE_MOVELEN;
    }
    if (segment > 0) {
      my_seek(hnd, fpos);
      my_read(hnd, buff, segment);
      my_seek(hnd, tpos);
      my_write(hnd, buff, segment);
    }
  }
  if (fpos < tpos) {
    while (segment >= BASE_MOVELEN) {
      my_seek(hnd, fpos + segment - BASE_MOVELEN);
      my_read(hnd, buff, BASE_MOVELEN);
      my_seek(hnd, tpos + segment - BASE_MOVELEN);
      my_write(hnd, buff, BASE_MOVELEN);
      segment -= BASE_MOVELEN;
    }
    if (segment > 0) {
      my_seek(hnd, fpos);
      my_read(hnd, buff, segment);
      my_seek(hnd, tpos);
      my_write(hnd, buff, segment);
    }
  }
}


/* my_expand: core functionality for expanding a database by a) reshifting all records, thus
   b) adding more index entries.
   Assumes that the file is open and locked when envoked.
   Restores the file cursor to the position before it was called.

   written by Alexander Walz on June 28, 2007

   Arguments:
      hnd   - the file handler
      mrc   - maximum number of records currently allowed
      cnt   - current number of actual records
      count - number of records to be added */

LUALIB_API void my_expand (int hnd, int mrc, int cnt, int count, int *error) {
  int32_t dsbegin, dsend, offset, j, index, bufsize, cfpos, commentpos, commentlen, newindex;
  int i;
  char *buffer;
  cfpos = my_fpos(hnd);           /* save the current file position for later restoration */
  my_seek(hnd, BASECOMMENT);
  commentpos = my_readl(hnd);
  commentlen = 0;
  if (commentpos != 0) {
    my_seek(hnd, commentpos);
    commentlen = my_readl(hnd) + 4L;
  }
  dsbegin = mrc*4L + BASE_OFFSET;   /* start of current dataset section */
  dsend = my_lof(hnd) - 1;          /* end of current dataset section */
  /* write 0L at end of file count times */
  my_seek(hnd, dsend + 1);
  for (i=0; i < count; i++) {
    my_writel(hnd, 0L);          /* write zeros */
  }
  offset = count*4L;
  buffer = malloc(offset*sizeof(char));
  if (buffer == NULL) {
    *error = 1;
    return;
  }
  /* beginning with the last data set move data sets to new eof */
  bufsize = offset;
  j = dsend + 1;
  while (j > dsbegin) {
    if ((j - dsbegin) >= offset) {
      bufsize = offset;
      my_seek(hnd, j - bufsize);
      my_read(hnd, buffer, bufsize);
      my_seek(hnd, j);
      my_write(hnd, buffer, bufsize);
    } else {  /* last few bytes need special treatment */
      my_seek(hnd, dsbegin);
      my_read(hnd, buffer, j - dsbegin);
      my_seek(hnd, j + bufsize - (j - dsbegin));
      my_write(hnd, buffer, j - dsbegin);
    }
    j -= bufsize;
  }
  /* zero start of old data section, 2.11.0 RC2 fix */
  my_seek(hnd, dsbegin);
  for (i=0; i < count; i++)
    my_writel(hnd, 0L);
  /* update indices */
  for (j=0; j < cnt; j++) {
    my_seek(hnd, j*4L + BASE_OFFSET);
    index = my_readl(hnd);
    my_seek(hnd, j*4L + BASE_OFFSET);
    newindex = (index > commentpos) ? index + offset + commentlen : index + offset;
    my_writel(hnd, newindex);
  }
  /* add new indices to end of index section */
  for (j=0; j < count; j++) {
    my_writel(hnd, 0L);
  }
  /* update maximum number of entries field */
  my_seek(hnd, MAXNRECORDS);
  my_writel(hnd, mrc + count);
  /* update position of comment if present */
  if (commentpos != 0) {
    my_seek(hnd, BASECOMMENT);
    my_writel(hnd, commentpos + offset);
  }
  my_seek(hnd, cfpos);  /* reset cursor to position before the function was called */
  xfree(buffer);
  *error = 0;
}


#if (defined(__unix__) && !defined(LUA_DOS)) || defined(__APPLE__)

static struct termios new_io, old_io;

/* Taken from `C von A bis Z` by Jrgen Wolf, 2nd Ed., pp. 452 */
/* Funktion schaltet das Terminal in den cbreak-Modus:         */
/* Kontrollflag ECHO und ICANON auf 0 setzen                   */
/* Steuerzeichen: Leseoperation liefert 1 Byte VMIN=1 VTIME=1  */
LUALIB_API int cbreak (int fd) {
  /*Sichern unseres Terminals*/
  if ((tcgetattr(fd, &old_io)) == -1)
    return -1;
  new_io = old_io;
  /* Wir verndern jetzt die Flags fr den cbreak-Modus */
  new_io.c_lflag &= ~(ECHO | ICANON);
  new_io.c_cc[VMIN] = 1;
  new_io.c_cc[VTIME]= 0;
  /* Jetzt setzen wir den cbreak-Modus */
  if ((tcsetattr(fd, TCSAFLUSH, &new_io)) == -1)
    return -1;
  return 1;
}


LUALIB_API int getch (void) {
  int c;
  if (cbreak(STDIN_FILENO) == -1) {
    fprintf(stderr, "Agena IO subsystem: error in function cbreak (called by getch).\n");
    exit(EXIT_FAILURE);
  }
  c = getchar();
  /* Alten Terminal-Modus wiederherstellen */
  tcsetattr(STDIN_FILENO, TCSANOW, &old_io);
  return c;
}

#endif

/* Copyright (c) 2008    Johnathon

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
IN THE SOFTWARE.

Source: http://code.google.com/p/linux-kbhit/linux_kbhit.tar.gz

*/

#if (defined(__unix__) && !defined(LUA_DOS))  /* 2.3.4, use DJGPP's built-in kbhit function instead to avoid stdin corruption */

LUALIB_API int kbhit (void) {  /* does not work on Mac */
  struct termios oldt, newt;
  int ch;
  if (tcgetattr(STDIN_FILENO, &oldt) == -1) return -1;
  newt = oldt;
  newt.c_cc[VMIN]  = 0;
  newt.c_cc[VTIME] = 1;
  newt.c_lflag &= ~(ICANON | ECHO);
  if (tcsetattr(STDIN_FILENO, TCSANOW, &newt) == -1) return -1;
  ch = getchar();  /* ch == -1 means key not pressed */
  if (tcsetattr(STDIN_FILENO, TCSANOW, &oldt) == -1) return -1;
  return (ch != -1);
}

#endif


#if defined(__APPLE__)
#include <sys/select.h>

/* found on: http://stackoverflow.com/questions/448944/c-non-blocking-keyboard-input, originally written by Alnitak;
   XXX this implementation most of the times still echoes the key being pressed at stdin */

LUALIB_API int kbhit (void) {
  int r, i;
  fd_set fds;
  struct termios newt, oldt;
  struct timeval tv = {0L, 0L};
  if (tcgetattr(STDIN_FILENO, &oldt) == -1) return -1;  /* save current terminal mode */
  newt = oldt;
  /* cfmakeraw(&newt); */  /* see the GNU C manual for the more info on this function */
  newt.c_lflag &= ~(ICANON | ECHO);
  newt.c_cc[VMIN]  = 1;  /* default: 1 */
  newt.c_cc[VTIME] = 0;  /* default: 0 */
  if (tcsetattr(STDIN_FILENO, TCSANOW, &newt) == -1) return -1;  /* change terminal mode */
  FD_ZERO(&fds);
  FD_SET(STDIN_FILENO, &fds);
  r = select(FD_SETSIZE, &fds, NULL, NULL, &tv);  /* listen for signals */
  i = tcsetattr(STDIN_FILENO, TCSANOW, &oldt);  /* reset terminal mode */
  FD_CLR(STDIN_FILENO, &fds);
  return (i == -1) ? -1 : r;  /* -1: an error has occurred, 0: no input, 1: key pressed */
}

#endif


#ifdef __OS2__  /* 2.3.4 */

LUALIB_API int kbhit (void) {
  KBDKEYINFO data;
  KbdPeek(&data, 0);
  return (data.fbStatus & 0xe0) != 0;
}

#endif

/* stores the current working directory in the already allocated buffer variable buffer,
   and returns 0 on success or -1 otherwise. In case of failure, the function frees buffer,
   so you do not have to free it in the function calling tools_cwd. */
LUALIB_API int tools_cwd (char *buffer) {  /* Agena 1.0.4, Agena 1.5.0, Agena 1.6.0 Valgrind */
  if (buffer == NULL) return -1;
  if (getcwd(buffer, PATH_MAX) == buffer) return 0;
  xfree(buffer);
  return -1;
}


#define NANO_SECOND_MULTIPLIER  1000000000L  /* 1 millisecond = 1,000,000 Nanoseconds */
#define MICRO_SECOND_MULTIPLIER 1000000L     /* 1 microsecond */
LUALIB_API int tools_wait (double x) {  /* 2.4.0 */
  int r;
  if (x < 0) return 0;
#ifdef _WIN32
  Sleep(x*1000);  /* microsecs */
  r = 1;
#elif defined (LUA_DOS) || (defined(__SOLARIS))
  r = usleep(x*MICRO_SECOND_MULTIPLIER) == 0;
#elif defined(__unix__) || defined(__OS2__) || defined(__APPLE__)
  struct timespec tv;
  tv.tv_sec  = (time_t)x;
  tv.tv_nsec = (long int)(luai_numfrac(x)*(lua_Number)NANO_SECOND_MULTIPLIER);
  r = nanosleep(&tv, NULL) == 0;  /* none-busy wait */
#elif defined(__HAIKU__)
  if (tools_isfrac(x)) x = (int)(sun_trunc(x) + 1);
  sleep(x);
  r = 1;
#else
  r = 0;
#endif
  return r;
}


static int ndays[] = {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31};

LUALIB_API int tools_checkdatetime (int year, int month, int day, int hour, int minute, int second, int mseconds) {  /* changed 2.10.0 */
  if (month < 1 || month > 12 || day < 1 ||
    hour < 0 || hour > 24 || minute < 0 || minute > 59 || second < 0 || second > 59 || mseconds < 0 || mseconds > 999)
    return 0;
  else if (hour == 24 && (minute != 0 || second != 0))  /* not midnight ? */
    return 0;
  /* now check for February 29 */
  return day <= ndays[month - 1] + (month == 2)*tools_isleapyear(year);
}


LUALIB_API Time64_T tools_maketime (int year, int month, int day, int hour, int minute, int second, double *alternative) {  /* Agena 1.8.0, added 2.9.8 */
  struct TM time_str;
  Time64_T r;
  *alternative = 0;
  if (tools_checkdatetime(year, month, day, hour, minute, second, 0) == 0) return -2;  /* 1.9.1 */
  time_str.tm_year = year - 1900;
  time_str.tm_mon = month - 1;
  time_str.tm_mday = day;
  time_str.tm_hour = hour;
  time_str.tm_min = minute;
  time_str.tm_sec = second;
  time_str.tm_isdst = -1;
  r = mktime64(&time_str);
  if (r == -1 && (year < 1970 || (year == 1970 && month == 1 && day == 1 && hour == 0))) {  /* 2.16.2 fix for MinGW mktime bug for dates y 1970/1/1 */
    *alternative = iauJuliandate(year, month, day, hour, minute, second) - 2415020.5 + 1 + ((year == 1900 && month > 2) || year > 1900);
    return -1;  /* 2415020.5 = 1900/1/1 = LSD Day 1; alternative is HUGE_VAL if undeterminable. */
  }  /* 2.16.1 patch for buggy GCC mktime[64] */
  return r;
}


LUALIB_API double tools_time (void) {  /* 2.12.4, patched 2.12.6, changed 2.14.0 */
  time_t t;
  int32_t msecs;  /* 2.16.5 patch for Mac OS X */
  double r;
#if defined(_WIN32) || defined(__unix__) || defined(__APPLE__) || defined(__OS2__)
  struct timeb tp;
#endif
  t = (time_t)(-1);
  r = 1;
  msecs = 0;
#if defined(_WIN32) || defined(__OS2__)
  tools_ftime(&tp);  /* UTC, 2.12.6 patch */
  t = tp.time;
  msecs = tp.millitm;
#elif defined(__unix__) || defined(__APPLE__)
  if (tools_ftime(&tp) == 0) { /* set milliseconds only when query has been successful */
    t = tp.time;
    msecs = tp.millitm;
  }
#endif
  if (t == (time_t)(-1))
    r = -1;
  else {
    r = (double)t;
    if (msecs != -1) r += 0.001*msecs;
  }
  return r;
}


/* Copyright (c) 2016 Phoenix Systems
   All rights reserved.

   Redistribution and use in source and binary forms, with or without
   modification, are permitted provided that the following conditions
   are met:
   1. Redistributions of source code must retain the above copyright
      notice, this list of conditions and the following disclaimer.
   2. Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.

   THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   SUCH DAMAGE.

   Taken from newlib-4.2.0/newlib/libc/sys/phoenix/time.c */

/* 2.25.5, ftime and stime have been removed with GLIBC 2.31 */
#if (defined(__linux__) && defined(__GNUC__) && (__GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 31)))

#ifndef set_errno
#define set_errno(x) errno = (x)
#endif

LUALIB_API int tools_stime (const time_t *t) {
  int old_errno, ret;
  struct timespec tp;
  old_errno = errno;
  set_errno(0);
  if (t == NULL) {
    errno = EINVAL;
    return -1;
  }
  tp.tv_sec = *t;
  tp.tv_nsec = 0;
  ret = clock_settime(CLOCK_REALTIME, &tp);
  if (ret < 0) {
    errno = -ret;
    return -1;
  }
  set_errno(old_errno);
  return ret;
}

LUALIB_API int tools_ftime (struct timeb *t) {
  int old_errno;
  struct tm tp;
  struct timespec spec;
  old_errno = errno;
  set_errno(0);
  if (t == NULL) {
    errno = EINVAL;
    return -1;
  }
  t->time = 0;      /* 3.3.1: better be sure than sorry */
  t->millitm = 0;   /* dito */
  t->timezone = 0;  /* dito */
  t->dstflag = 0;   /* dito */
  /* if (time(&t->time) == (time_t)(-1) && errno != 0) return -1; */
  if (clock_gettime(CLOCK_REALTIME, &spec) == -1) return -1;  /* 3.3.1 improvement for Linux: get milliseconds */
  t->time = spec.tv_sec;
  t->millitm = round(spec.tv_nsec/1.0e6);
  if (localtime_r(&t->time, &tp) == NULL) return -1;
  t->timezone = tp.tm_gmtoff/60;
  t->dstflag  = tp.tm_isdst;
  set_errno(old_errno);
  return 0;
}
#endif


LUALIB_API int tools_cpuid (uint32_t call, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx) {  /* 2.14.3 */
#ifdef __CPUID
  uint32_t ax, bx, cx, dx;
  __cpuid(0, ax, bx, cx, dx);
  if (call <= ax) {
    __cpuid(call, ax, bx, cx, dx);
    *eax = ax; *ebx = bx; *ecx = cx; *edx = dx;
    return 0;
  } else
#endif
  return -1;
}


/* Determination of endianness; taken from Jrgen Wolf, `C von A bis Z`, 2. Auflage, Galileo Computing */

LUALIB_API char tools_endian (void) {
  unsigned int Word = 0x22CCDDEE;
  unsigned char *Byte;
  Byte = (unsigned char *) &Word;
  if (Byte[0] == ((Word >> 0) & 0xFF))
    return 0;  /* little endian */
  else if (Byte[0] == ((Word >> 24) & 0xFF))
    return 1;  /* big endian */
  else
    return -1;
}


/* Returns the contents of the given environment variable. To avoid the value from being overwritten in subsequent calls,
   it is duplicated, so FREE it !!! 2.16.11 */
LUALIB_API char *tools_getenv (const char *name) {
/* tested in Windows, Mac OS X, Solaris 10, Linux, OS/2, DOS */
#if defined(_WIN32)
  /* 2.8.6, taken from lua_sys package written by Nodir Temirkhodjaev, <nodir.temir@gmail.com> */
  unsigned int len;
  len = GetEnvironmentVariableA(name, NULL, 0);  /* get number of characters to be stored in buffer, _including_ terminating \0 */
  if (len) {
    char *buf = tools_stralloc(len - 1);  /* 2.16.12 change */
    if (buf && GetEnvironmentVariableA(name, buf, len) == len - 1) return buf;  /* now GetEnvironmentVariableA returns #chars excluding \0 */
  }
  return NULL;
#elif defined(__OS2__)     /* 2.16.11, see: http://www.edm2.com/index.php/DosScanEnv */
  PSZ r = NULL;            /* stores environment variable */
  APIRET rc = NO_ERROR;    /* return code */
  rc = DosScanEnv((PCSZ)name, &r);  /* Get contents of PATH environment */
  return (rc != NO_ERROR) ? NULL : tools_strdup((char *)r);
#elif defined(LUA_DOS)
  char *result = getenv(name);
  return (result) ? tools_strdup(result) : NULL;  /* 2.17.0a fix to prevent segfaults with more recent versions of DJGPP if strdup is called with NULL */
#elif defined(__unix__) || defined(__APPLE__)
  /* char *result = getenv(name); */  /* getenv will not work at least in Linux, Solaris */
  const char *nam, *val, *end, **env;  /* taken from os_environ in loslib.c */
  for (env = (const char **)environ; (nam = *env); env++) {
    end = strchr(val = strchr(nam, '=') + 1, '\0');
    if (tools_strncmp(name, nam, val - nam - 1) == 0) {
      char *result = tools_stralloc(end - val);
      if (result == NULL) return NULL;
      tools_memcpy(result, val, end - val);
      return result;
    }
  }
  return NULL;
#else
  return NULL;
#endif
}


/*							powi.c
 *
 *	Real raised to integer power
 *
 * SYNOPSIS:
 *
 * double x, y, powi();
 * int n;
 *
 * y = powi( x, n );
 *
 * DESCRIPTION:
 *
 * Returns argument x raised to the nth power.
 * The routine efficiently decomposes n as a sum of powers of
 * two. The desired power is a product of two-to-the-kth
 * powers of x.  Thus to compute the 32767 power of x requires
 * 28 multiplications instead of 32767 multiplications.
 *
 * ACCURACY:
 *                      Relative error:
 * arithmetic   x domain   n domain  # trials      peak         rms
 *    DEC       .04,26     -26,26    100000       2.7e-16     4.3e-17
 *    IEEE      .04,26     -26,26     50000       2.0e-15     3.8e-16
 *    IEEE        1,2    -1022,1023   50000       8.6e-14     1.6e-14
 *
 * Returns MAXNUM on overflow, zero on underflow.
 *
 * NOTE: This function is eight percent faster than equivalent
 * tools_intpow when overflow detection has been switched off.
 *
 * Cephes Math Library Release 2.3:  March, 1995
 * Copyright 1984, 1995 by Stephen L. Moshier */

#define NEGZERO    -0.0

LUALIB_API double cephes_powi (double x, int nn) {
  int n, sign, asign;
  double w, y;
#ifdef OVERFLOWDETECTION
  int e, lx;
  double s;
#endif
  /* See pow.c for these tests.  */
  if (nn == INT_MIN) return AGN_NAN;  /* undefined or +/- infinity ? -> protect against an infinite loop;
    n is an int, not a double, so isinf/isnan do not work here. */
  if (x == 0.0) {
    /* if (nn == 0) return 1.0; */
    if (nn <= 0) return AGN_NAN;  /* changed from infinity to undefined */
	  else {
      if (nn & 1) return x;
	    else return 0.0;
    }
  }
  if (nn == 0) return 1.0;
  if (nn == -1) return 1.0/x;
  if (x < 0.0) {
    asign = -1;
    x = -x;
  } else
    asign = 0;
  if (nn < 0) {
    sign = -1;
    n = -nn;
  } else {
    sign = 1;
    n = nn;
  }
  /* Even power will be positive. */
  if ((n & 1) == 0) asign = 0;
  /* Overflow detection; calculate approximate logarithm of answer; we will switch it off for it slows down exponentiation
     significantly. */
#ifdef OVERFLOWDETECTION
  s = sun_frexp(x, &lx);
  e = (lx - 1)*n;
  if (e == 0 || e > 64 || e < -64) {
  	s = (s - 7.0710678118654752e-1)/(s + 7.0710678118654752e-1);
	  s = (2.9142135623730950*s - 0.5 + lx)*nn*LOGE2;
  } else {
    s = LOGE2*e;
	}
  if (s > RGAMMAXLOG) {
    y = HUGE_VAL;
    goto done;
  }
  if (s < RGAMMINLOG) {
    y = 0.0;
    goto done;
  }
  /* Handle tiny denormal answer, but with less accuracy
   * since roundoff error in 1.0/x will be amplified.
   * The precise demarcation should be the gradual underflow threshold.
   */
  if (s < (-RGAMMAXLOG + 2.0) && sign < 0) {
    x = 1.0/x;
    sign = -sign;
  }
#endif
  /* First bit of the power */
  if (n & 1) y = x;
  else y = 1.0;
  w = x;
  n >>= 1;
  while (n) {
    w *= w;  /* arg to the 2-to-the-kth power */
    if (n & 1) y *= w;  /* if that bit is set, then include in product */
    n >>= 1;
  }
  if (sign < 0) y = 1.0/y;
#ifdef OVERFLOWDETECTION
done:
#endif
  if (asign) {
    /* odd power of negative number */
    if (y == 0.0) y = NEGZERO;
    else y = -y;
  }
  return y;
}


LUALIB_API double tools_intpow (double x, int n) {  /* use cephes_powi instead, it is eight percent faster */
  double r;
  if (n == INT_MIN) return AGN_NAN;  /* undefined or +/- infinity ? -> protect against an infinite loop;
    n is an int, not a double, so isinf/isnan do not work here. */
  r = 1.0;
  if (n <= 0) {  /* 2.5.1 fix */
    if (x == 0.0) return AGN_NAN;  /* 0.32.4 patch */
    x = 1.0/x;
    n = -n;
  }
  /* Square-and-Multiply, take log(n) operations instead of n.
     2.29.1 change, see https://stackoverflow.com/questions/213042/how-do-you-do-exponentiation-in-c by user kallikak;
     also see: https://de.wikipedia.org/wiki/Bin%C3%A4re_Exponentiation:
     "This algorithm has been invented in India around 200 BC, and has been written down in the Chandah-stra." */
  while (n) {
    if (n & 1) r *= x;  /* power is odd ? */
    x *= x;
    n >>= 1;
  }
  return r;
}


/* Exponentiation modulo a number, 3.4.6, see also math.powmod */
LUALIB_API double tools_intpowmod (int x, int n, int m) {
  double r;
  if (n == INT_MIN) return AGN_NAN;  /* undefined or +/- infinity ? -> protect against an infinite loop;
    n is an int, not a double, so isinf/isnan do not work here. */
  r = 1.0;
  if (n <= 0) {  /* 2.5.1 fix */
    if (x == 0.0) return AGN_NAN;  /* 0.32.4 patch */
    x = 1.0/x;
    n = -n;
  }
  /* Square-and-Multiply, take log(n) operations instead of n.
     2.29.1 change, see https://stackoverflow.com/questions/213042/how-do-you-do-exponentiation-in-c by user kallikak;
     also see: https://de.wikipedia.org/wiki/Bin%C3%A4re_Exponentiation:
     "This algorithm has been invented in India around 200 BC, and has been written down in the Chandah-stra." */
  x %= m;
  while (n) {
    if (n & 1) r *= x;  /* power is odd ? */
    x *= x % m;
    n >>= 1;
  }
  return r;
}


#ifndef __ARMCPU  /* 2.37.1 */
LUALIB_API long double tools_intpowl (long double x, int n) {
  long double r;
  if (n == INT_MIN) return AGN_NAN;  /* undefined or +/- infinity ? -> protect against an infinite loop;
    n is an int, not a double, so isinf/isnan do not work here. */
  r = 1.0;
  if (n <= 0) {
    if (x == 0.0) return AGN_NAN;
    x = 1.0/x;
    n = -n;
  }
  while (n) {
    if (n & 1) r *= x;  /* power is odd ? */
    x *= x;
    n >>= 1;
  }
  return r;
}
#endif


/* 2.18.1, slower than GCC's built-in isfinite, but faster than ... (const double a) { const double b = a - a; int r = (b == b); return r; } */
LUALIB_API int tools_isfinite (double x) {
  int32_t hx;
  GET_HIGH_WORD(hx, x);
  return ((hx & 0x7ff00000) != 0x7ff00000);
}


/* 2.18.1 as fast as GCC's built-in isnan, faster than ... (const double x) { int r = (x != x); return r } */
LUALIB_API int tools_isnan (double x) {
  int32_t hx;
  uint32_t lx;
  EXTRACT_WORDS(hx, lx, x);
  hx &= 0x7fffffff;  /* |x| */
  return (hx >= 0x7ff00000) && ((hx - 0x7ff00000) | lx) != 0;  /* 3.7.4 patch */
}


LUALIB_API int tools_isinf (double x) {  /* 3.7.4, covers both +/-infinity */
  int32_t hx;
  uint32_t lx;
  EXTRACT_WORDS(hx, lx, x);
  hx &= 0x7fffffff;  /* |x| */
  return (hx >= 0x7ff00000) && ((hx - 0x7ff00000) | lx) == 0;  /* negation of tools_isnan */
}


LUALIB_API int tools_isinfx (double x, int *isnegative) {  /* 4.11.5, based on tools_isinf */
  int32_t hx;
  uint32_t lx;
  EXTRACT_WORDS(hx, lx, x);
  *isnegative = hx < 0;
  hx &= 0x7fffffff;  /* |x| */
  return (hx >= 0x7ff00000) && ((hx - 0x7ff00000) | lx) == 0;  /* negation of tools_isnan */
}


LUALIB_API int tools_isnanorinf (double x) {
  int32_t hx;
  GET_HIGH_WORD(hx, x);
  return (hx & 0x7ff00000) == 0x7ff00000;  /* negation of tools_isfinite */
}


/* define NaN */
LUALIB_API double tools_nan (void) {
  return (0.0/0.0);
}


#ifndef __ARMCPU
LUALIB_API int tools_fpiszerol (long double e) {
  union IEEEl2bits u;
  u.e = e;
  return (u.bits.exp == 0) && ((u.bits.manl | u.bits.manh) == 0);
}
#else
LUALIB_API int tools_fpiszerol (long double e) {
  return e == 0.0L;
}
#endif

#ifndef __ARMCPU
LUALIB_API int tools_fpissubnormall (long double e) {
  union IEEEl2bits u;
  u.e = e;
  return (u.bits.exp == 0) && ((u.bits.manl | u.bits.manh) != 0);
}
#else
LUALIB_API int tools_fpissubnormall (long double e) {
  return fpclassify(e) == FP_SUBNORMAL;
}
#endif


#ifndef __ARMCPU
LUALIB_API int tools_fpisinfl (long double e) {
  union IEEEl2bits u;
  u.e = e;
  mask_nbit_l(u);  /* mask normalization bit if applicable */
  return (u.bits.exp == 32767) && ((u.bits.manl | u.bits.manh) == 0);
}
#else
LUALIB_API int tools_fpisinfl (long double e) {
  return isinf(e);
}
#endif

#ifndef __ARMCPU
LUALIB_API int tools_fpisnanl (long double e) {
  union IEEEl2bits u;
  u.e = e;
  mask_nbit_l(u);  /* mask normalization bit if applicable */
  return (u.bits.exp == 32767) && ((u.bits.manl | u.bits.manh) != 0);
}
#else
LUALIB_API int tools_fpisnanl (long double e) {
  return isnan(e);
}
#endif

#ifndef __ARMCPU
LUALIB_API int tools_fpisnormall (long double e) {
  union IEEEl2bits u;
  u.e = e;
  mask_nbit_l(u);  /* mask normalization bit if applicable */
  return u.bits.exp != 32767;
}
#else
LUALIB_API int tools_fpisnormall (long double e) {
  return isnormal(e);
}
#endif

#ifndef __ARMCPU
LUALIB_API int tools_fpisfinitel (long double e) {
  union IEEEl2bits u;
  u.e = e;
  mask_nbit_l(u);  /* mask normalization bit if applicable */
  return u.bits.exp != 32767;
}
#else
LUALIB_API int tools_fpisfinitel (long double e) {
  return isfinite(e);
}
#endif


#if BYTE_ORDER != BIG_ENDIAN
/* based on SunPro's ieee_double_shape_type, see sunpro.h, 2.25.5 */
typedef union {
  uint64_t val;
  struct {
    uint32_t lx;
    uint32_t hx;
  } parts;
} ieee_uint_shape_type;


/* code taken from http://www.dmh2000.com/cpp/dswap.shtml */
LUALIB_API double tools_tobigendian (double d) {  /* 2.14.9, swap bytes */
  double_cast u;
  uint64_t a;
  size_t i, s;
  unsigned char *src = (unsigned char *)&d;
  unsigned char *dst = (unsigned char *)&a;
  s = sizeof(double);
  for (i=0; i < s; i++) dst[i] = src[s - i - 1];  /* 2.9.0 */
  u.i = a;
  return u.f;
}

LUALIB_API uint64_t tools_twoint32touint64 (int32_t d, int32_t e) {  /* Little Endian, 2.9.8, used by xbase.writetime */
  uint64_t a;
  size_t i, s;
  unsigned char *dst = (unsigned char *)&a;
  unsigned char *src = (unsigned char *)&d;
  s = sizeof(int32_t);
  for (i=0; i < s; i++) dst[i] = src[i];
  src = (unsigned char *)&e;
  for (; i < 2*s; i++) dst[i] = src[i - s];
  return a;
}

/* 2.9.8, k=1: first int32_t, k=2: second one; used by xbase.DBFReadTimeStampAttribute, Little Endian */
LUALIB_API int32_t tools_uint64toint32 (uint64_t d, char k) {
  int32_t a;
  size_t i, s, q;
  a = 0;  /* 2.25.5 */
  unsigned char *dst = (unsigned char *)&a;
  unsigned char *src = (unsigned char *)&d;
  s = sizeof(int32_t);
  q = s*(k - 1);
  for (i=q; i < k*s; i++) dst[i - q] = src[i];
  return a;
}

/* Little Endian, signed 4-byte integer, 2.9.8, used by xbase.DBFWriteAttribute (binary I type) */
LUALIB_API int32_t tools_swapint32 (int32_t d) {
  int32_t a;
  size_t i, s;
  unsigned char *dst = (unsigned char *)&a;
  unsigned char *src = (unsigned char *)&d;
  s = sizeof(int32_t);
  for (i=0; i < s; i++) dst[i] = src[i];  /* do nothing */
  return a;
}

LUALIB_API float tools_swapfloat (float d) {
  float a;
  size_t i, s;
  unsigned char *dst = (unsigned char *)&a;
  unsigned char *src = (unsigned char *)&d;
  s = sizeof(float);
  for (i=0; i < s; i++) dst[i] = src[i];  /* do nothing */
  return a;
}

LUALIB_API double tools_sint2double (int32_t a) {  /* Little Endian, 2.9.8, used by xbase.DBFReadAttribute */
  int32_t d;
  size_t i, s;
  unsigned char *src = (unsigned char *)&a;
  unsigned char *dst = (unsigned char *)&d;
  s = sizeof(int32_t);
  for (i=0; i < s; i++) dst[i] = src[i];  /* do nothing */
  return (double)d;
}

LUALIB_API double tools_uint64todoubleandswap (uint64_t a) {  /* Little Endian */
  double d;
  size_t i, s;
  unsigned char *src = (unsigned char *)&a;
  unsigned char *dst = (unsigned char *)&d;
  s = sizeof(double);
  for (i=0; i < s; i++) dst[i] = src[s - i - 1];  /* 2.10.0 */
  return d;
}

LUALIB_API uint64_t tools_doubletouint64andswap (double a) {  /* Little Endian */
  uint64_t d;
  size_t i, s;
  unsigned char *src = (unsigned char *)&a;
  unsigned char *dst = (unsigned char *)&d;
  s = sizeof(uint64_t);
  for (i=0; i < s; i++) dst[i] = src[s - i - 1];
  return d;
}

#else /* BIG ENDIAN */

typedef union {  /* 2.25.5 */
  uint64_t val;
  struct {
    uint32_t hx;
    uint32_t lx;
  } parts;
} ieee_uint_shape_type;


LUALIB_API double tools_tolittleendian (double d) {  /* 2.14.9, swap bytes */
  double_cast u;
  uint64_t a;
  size_t i, s;
  unsigned char *src = (unsigned char *)&d;
  unsigned char *dst = (unsigned char *)&a;
  s = sizeof(double);
  for (i=0; i < s; i++) dst[i] = src[s - i - 1];  /* 2.9.0 */
  u.i = a;
  return u.f;
}

LUALIB_API double tools_tobigendian (double d) {  /* 2.14.9, swap bytes */
  return d;
}

LUALIB_API uint64_t tools_swapdouble (double d) {  /* Big Endian, used by binio.writenumber */
  uint64_t a;
  size_t i, s;
  unsigned char *dst = (unsigned char *)&a;
  unsigned char *src = (unsigned char *)&d;
  s = sizeof(double);
  for (i=0; i < s; i++) dst[i] = src[i];  /* 2.9.0, do nothing */
  return a;
}

/* unswap using char pointers */
LUALIB_API double tools_unswapdouble (uint64_t a) {  /* Big Endian, used by binio.readnumber, binio.readobjectaux */
  double d;
  size_t i, s;
  unsigned char *src = (unsigned char *)&a;
  unsigned char *dst = (unsigned char *)&d;
  s = sizeof(double);
  for (i=0; i < s; i++) dst[i] = src[i];  /* 2.9.0 */
  return d;
}

LUALIB_API float tools_unswapfloat (uint32_t a) {  /* Big Endian, used by binio.readnumber, binio.readobjectaux */
  float d;
  size_t i, s;
  unsigned char *src = (unsigned char *)&a;
  unsigned char *dst = (unsigned char *)&d;
  s = sizeof(float);
  for (i=0; i < s; i++) dst[i] = src[i];  /* 2.9.0 */
  return d;
}

LUALIB_API int32_t tools_swapint32 (int32_t a) {  /* Big Endian, 2.9.8 */
  int32_t d;
  size_t i, s;
  unsigned char *dst = (unsigned char *)&d;
  unsigned char *src = (unsigned char *)&a;
  s = sizeof(int32_t);
  for (i=0; i < s; i++) dst[i] = src[s - i - 1];
  return d;
}

LUALIB_API uint32_t tools_swapfloat (float a) {  /* Big Endian, 2.9.8 */
  uint32_t d;
  size_t i, s;
  unsigned char *dst = (unsigned char *)&d;
  unsigned char *src = (unsigned char *)&a;
  s = sizeof(uint32_t);
  for (i=0; i < s; i++) dst[i] = src[s - i - 1];
  return d;
}

LUALIB_API uint64_t tools_twoint32touint64 (int32_t d, int32_t e) {  /* Big Endian, 2.9.8, used by xbase.writetime */
  uint64_t a;
  size_t i, s;
  unsigned char *dst = (unsigned char *)&a;
  unsigned char *src = (unsigned char *)&d;
  s = sizeof(int32_t);
  for (i=0; i < s; i++) dst[i] = src[s - i - 1];
  src = (unsigned char *)&e;
  for (; i < 2*s; i++) dst[i] = src[2*s - i - 1];
  return a;
}

/* Big Endian, 2.9.8, k=1: first int32_t, k=2, second one, used by xbase.DBFReadTimeStampAttribute */
LUALIB_API int32_t tools_uint64toint32 (uint64_t d, char k) {
  int32_t a;
  size_t i, s, q;
  unsigned char *dst = (unsigned char *)&a;
  unsigned char *src = (unsigned char *)&d;
  s = sizeof(int32_t);
  q = s*(k - 1);
  /* k=1 -> q = 0; k=2 -> q = s */
  /* q  i  s*k - i - 1 + q
     4  4  7
     4  5  6
     4  6  5
     4  7  4 */
  for (i=q; i < k*s; i++) dst[i - q] = src[s*k - i - 1 + q];
  return a;
}

LUALIB_API double tools_uint64todoubleandswap (uint64_t a) {  /* Big Endian, used by xbase.DBFReadAttribute */
  double d;
  size_t i, s;
  unsigned char *src = (unsigned char *)&a;
  unsigned char *dst = (unsigned char *)&d;
  s = sizeof(double);
  for (i=0; i < s; i++) dst[i] = src[s - i - 1];  /* 2.10.0 */
  return d;
}

LUALIB_API uint64_t tools_doubletouint64andswap (uint64_t a) {  /* Big Endian, used by xbase.DBFReadAttribute */
  uint64_t d;
  size_t i, s;
  unsigned char *src = (unsigned char *)&a;
  unsigned char *dst = (unsigned char *)&d;
  s = sizeof(uint64_t);
  for (i=0; i < s; i++) dst[i] = src[s - i - 1];
  return d;
}

LUALIB_API double tools_sint2double (int32_t a) {  /* Big Endian, 2.9.8, used by xbase.DBFReadAttribute */
  int32_t d;
  size_t i, s;
  unsigned char *src = (unsigned char *)&a;
  unsigned char *dst = (unsigned char *)&d;
  s = sizeof(int32_t);
  for (i=0; i < s; i++) dst[i] = src[s - i - 1];  /* 2.9.0 */
  return (double)d;
}
#endif

LUALIB_API uint32_t tools_swapuint32 (uint32_t d) {
  uint32_t a;
  size_t i, s;
  unsigned char *dst = (unsigned char *)&a;
  unsigned char *src = (unsigned char *)&d;
  s = sizeof(uint32_t);
  for (i=0; i < s; i++) dst[i] = src[s - i - 1];
  return a;
}

LUALIB_API uint32_t tools_uint64touint32 (uint64_t d, uint32_t *low) {  /* 2.25.5 */
  ieee_uint_shape_type v;
  v.val = d;
  *low = v.parts.lx;
  return v.parts.hx;  /* high */
}


LUALIB_API void tools_swapint32_t (int32_t *n) {
  size_t i;
#ifndef __HAIKU__
  size_t s = sizeof(int32_t);
#endif
  union {
    int32_t d;
#ifdef __HAIKU__
    unsigned char b[4];
#else
    unsigned char b[s];
#endif
  } p, q;
  p.d = *n;
#ifndef __HAIKU__
  for (i=0; i < s; i++) q.b[i] = p.b[s - i - 1];  /* 2.9.0 */
#else
  for (i=0; i < 4; i++) q.b[i] = p.b[4 - i - 1];  /* 2.9.4 */
#endif
  *n = q.d;
}


LUALIB_API void tools_swapuint64_t (uint64_t *n) {
  size_t i;
#ifndef __HAIKU__
  size_t s = sizeof(uint64_t);
#endif
  union {
    uint64_t d;
#ifdef __HAIKU__
    unsigned char b[8];
#else
    unsigned char b[s];
#endif
  } p, q;
  p.d = *n;
#ifndef __HAIKU__
  for (i=0; i < s; i++) q.b[i] = p.b[s - i - 1];  /* 2.9.0 */
#else
  for (i=0; i < 8; i++) q.b[i] = p.b[4 - i - 1];  /* 2.9.4 */
#endif
  *n = q.d;
}


LUALIB_API void tools_swapuint32_t (uint32_t *n) {
  size_t i;
#ifndef __HAIKU__
  size_t s = sizeof(uint32_t);
#endif
  union {
    uint32_t d;
#ifdef __HAIKU__
    unsigned char b[4];
#else
    unsigned char b[s];
#endif
  } p, q;
  p.d = *n;
#ifndef __HAIKU__
  for (i=0; i < s; i++) q.b[i] = p.b[s - i - 1];  /* 2.9.0 */
#else
  for (i=0; i < 4; i++) q.b[i] = p.b[4 - i - 1];  /* 2.9.4 */
#endif
  *n = q.d;
}


LUALIB_API void tools_swapuint16_t (uint16_t *n) {  /* new 2.18.2 */
  size_t i;
#ifndef __HAIKU__
  size_t s = sizeof(uint16_t);
#endif
  union {
    uint16_t d;
#ifdef __HAIKU__
    unsigned char b[2];
#else
    unsigned char b[s];
#endif
  } p, q;
  p.d = *n;
#ifndef __HAIKU__
  for (i=0; i < s; i++) q.b[i] = p.b[s - i - 1];
#else
  for (i=0; i < 2; i++) q.b[i] = p.b[2 - i - 1];
#endif
  *n = q.d;
}


LUALIB_API void tools_swapint16_t (int16_t *n) {  /* new 2.38.3 */
  size_t i;
#ifndef __HAIKU__
  size_t s = sizeof(int16_t);
#endif
  union {
    int16_t d;
#ifdef __HAIKU__
    unsigned char b[2];
#else
    unsigned char b[s];
#endif
  } p, q;
  p.d = *n;
#ifndef __HAIKU__
  for (i=0; i < s; i++) q.b[i] = p.b[s - i - 1];
#else
  for (i=0; i < 2; i++) q.b[i] = p.b[2 - i - 1];
#endif
  *n = q.d;
}


/* Swaps the lower n bytes of word x; bytes above those will be discarded. */
LUALIB_API uint32_t tools_swaplower32 (uint32_t x, unsigned int n) {
  if (n == 0 || n > sizeof(uint32_t)) return 0;
  uint32_t y = x & 0xFF;  /* get lowest byte, put it in result */
  while (--n) {
    x >>= CHAR_BIT;  /* get the next byte from word */
    y <<= CHAR_BIT;  /* move result byte one place to the left */
    y |= x & 0xff;   /* add the next byte (see above) to result */
  }
  return y;
}


/* Swaps the upper n bytes of word x; bytes below those will be discarded. */
#define SWAPLEFTSHIFT32  ((sizeof(uint32_t) - 1)*CHAR_BIT)
LUALIB_API uint32_t tools_swapupper32 (uint32_t x, unsigned int n) {
  uint8_t byte;
  uint32_t y;
  unsigned int oldn = n;
  if (n == 0 || n > sizeof(uint32_t)) return 0;
  y = (x & 0xff000000) >> SWAPLEFTSHIFT32;  /* get highest byte, put it in result */
  while (--n) {
    x <<= CHAR_BIT;  /* get the next byte from word; avoids to left-shift by zero */
    byte = (x & 0xff000000) >> SWAPLEFTSHIFT32;
    y |= byte << (oldn - n)*CHAR_BIT;  /* prepend byte to result */
  }
  return y;
}


LUALIB_API uint64_t tools_swaplower64 (uint64_t x, unsigned int n) {
  if (n == 0 || n > sizeof(uint64_t)) return 0;
  uint64_t y = x & 0xff;
  while (--n) {
    x >>= CHAR_BIT;
    y <<= CHAR_BIT;
    y |= x & 0xff;
  }
  return y;
}


#define SWAPLEFTSHIFT64  ((sizeof(uint64_t) - 1)*CHAR_BIT)
LUALIB_API uint64_t tools_swapupper64 (uint64_t x, unsigned int n) {
  uint8_t byte;
  uint64_t y;
  unsigned int oldn = n;
  if (n == 0 || n > sizeof(uint64_t)) return 0;
  y = (x & 0xff000000) >> SWAPLEFTSHIFT64;  /* get highest byte, put it in result */
  while (--n) {
    x <<= CHAR_BIT;  /* get the next byte from word; avoids to left-shift by zero */
    byte = (x & 0xff000000) >> SWAPLEFTSHIFT64;
    y |= byte << (oldn - n)*CHAR_BIT;  /* prepend byte to result */
  }
  return y;
}


/* Determines in which half-plane a!b lies, see `csgn` online help in Maple V Release 4 */
LUALIB_API double tools_csgn (double a, double b) {
  if (a > 0.0 || (a == 0.0 && b > 0.0))
    return 1.0;
  else if (a < 0.0 || (a == 0.0 && b < 0.0))
    return -1.0;
  else
    return 0.0;
}


#ifndef __ARMCPU  /* 2.37.1 */
LUALIB_API long double tools_csgnl (long double a, long double b) {
  if (a > 0.0L || (a == 0.0L && b > 0.0L))
    return 1.0L;
  else if (a < 0.0L || (a == 0.0L && b < 0.0L))
    return -1.0L;
  else
    return 0.0L;
}
#endif


LUALIB_API char *str_charreplace (char *s, char from, char to, int flag) {
  char *olds, *e;
  olds = s;
  while (NULL != (e = strchr(s, from))) {  /* traverses the entire string */
    s = e;
    *s = to;
  }
  s = s + tools_strlen(s) - 1;  /* set pointer to end of string */ /* 2.17.8 tweak */
  /* delete trailing slash if present, only if str has more than one char, 0.31.4 */
  if (flag && tools_strlen(olds) > 1 && (*s == '/' || *s == '\\')) *s = '\0';  /* 2.17.8 tweak */
  return olds;
}


/*
 * str_concat() - allocate memory and safely concatenate strings in portable C
 * (and C++ if you like).
 *
 * This code deals gracefully with potential integer overflows (perhaps when
 * input strings are maliciously long), as well as with input strings changing
 * from under it (perhaps because of misbehavior of another thread).  It does
 * not depend on non-portable functions such as snprintf() and asprintf().
 *
 * Written by Solar Designer <solar at openwall.com> and placed in the
 * public domain.
 *
 * Originally written for and currently maintained as a part of popa3d,
 * a POP3 server:
 *
 *   http://www.openwall.com/popa3d/
 *   file misc.c
 */

/* Concatenates strings and returns a null-terminated string, or NULL if something failed.

   Always pass NULL as the last argument to denote the end of the strings to be concatenated, e.g.
   str_concat(argv[0], argv[1], argv[2], argv[3], NULL).

   You must FREE the memory allocated by concat in case of success. ONLY assign the result to a variable,
   use it in the VM or API, and then finally FREE it !

   Please note that pushing strings onto the stack and then calling `lua_concat` may be 30 % faster in
   standard situations. */

LUALIB_API char *str_concat (const char *s1, ...) {
  va_list args;
  const char *s;
  char *p, *result;
  unsigned long l, m, n;
  p = NULL;  /* to suppress compiler warnings */
  m = n = tools_strlen(s1);  /* 2.17.8 tweak */
  va_start(args, s1);
  while ((s = va_arg(args, char *))) {
    l = tools_strlen(s);  /* 2.17.8 tweak */
    if ((m += l) < l) break;
  }
  va_end(args);
  if (s || m >= INT_MAX) return NULL;
  result = tools_stralloc(m);  /* 2.16.5, (char *)malloc((m + 1)*sizeof(char)); */
  if (!result) return NULL;
  tools_memcpy(p = result, s1, n);
  p += n;
  va_start(args, s1);
  while ((s = va_arg(args, char *))) {
    l = tools_strlen(s);  /* 2.17.8 tweak */
    if ((n += l) < l || n > m) break;
    tools_memcpy(p, s, l);
    p += l;
  }
  va_end(args);
  if (s || m != n || p != result + n) {
    xfree(result);
    return NULL;
  }
  *p = 0;
  return result;
}


/* Insert the null-terminated string what into string str at position pos and returns a new string. FREE IT  ! 2.21.9 */
LUALIB_API char *str_insert (const char *str, const char *what, int pos) {
  char *r = NULL;
  size_t lenstr = tools_strlen(str);  /* 2.25.1 tweak */
  pos = tools_posrelat(pos + 1, lenstr) - 1;
  if (str == NULL || lenstr == 0 || pos > lenstr) return NULL;
  if (pos == 0)  /* prepend what */
    r = str_concat(what, str, NULL);
  else if (pos == lenstr)  /* append what */
    r = str_concat(str, what, NULL);
  else {  /* insert what */
    char *oldr;
    size_t lenwhat, rest;
    lenwhat = tools_strlen(what);  /* 2.25.1 tweak */
    r = (char *)malloc((size_t)((lenstr + lenwhat + 1)*sizeof(char)));  /* 2.25.5, try to prevent compiler warnings in GCC 10.3 */
    if (r == NULL) return NULL;  /* 4.11.5 fix */
    oldr = r;
    tools_memcpy(r, str, pos);  /* 2.25.1 tweak */
    r += pos;
    tools_memcpy(r, what, lenwhat);  /* 2.25.1 tweak */
    r += lenwhat;
    rest = lenstr - pos;
    tools_memcpy(r, str + pos, rest);  /* 2.25.1 tweak */
    r += rest;
    *r = '\0';
    r = oldr;
  }
  return r;
}


/* Taken from: http://c-faq.com/lib/regex.html, maintained by Steve Summit

   Quick little wildcard matcher by Arjan Kenter (Copyright 1995, Arjan Kenter). Processes ? and *
   wildcards and return 0 (false) or 1 (true).

   With this definition, the call strings.glob("aplomb.c", "a*b.c") would return 1. */

LUALIB_API int str_glob (const char *pat, const char *str) {
  switch (*pat) {
    case '\0': return !*str;
    case '*':  return str_glob(pat + 1, str) ||
                 (*str && str_glob(pat, str + 1));
    case '?':  return *str && str_glob(pat + 1, str + 1);
    default:   return *pat == *str && str_glob(pat + 1, str + 1);
  }
}


/* based on oa_substr released with the sources of OpenAxiom 1.4.1, see file open-axiom-1.4.1/src/lib/cfuns-c.c;
   modified by Alexander Walz, 1.10.0.

   Returns a null-erminated substring. argument begin must be in the range 0 .. strlen()-1. error will be set to 1
   in case of an index-out-of-range error, to -1 in case of a failed memory allocation, and 0 in case of success.

   FREE the return after use !

   A light edition of this function without the error parameter and index out-of-range checks is not faster, although
   this may sound strangely. */

LUALIB_API char *str_substr (const char *str, const size_t begin, const size_t end, int *error) {
  char *substring;
  size_t len, s;    /* modified by Alex Walz */
  s = tools_strlen(str);  /* modified by Alex Walz */ /* 2.17.8 tweak */
  if (str == NULL || s == 0 || s < begin || end >= s || begin > end || begin < 0 || end < 0) {
    *error = 1;     /* index out-of-range; modified by Alex Walz */
    return NULL;
  }
  len = (end - begin) + 2;  /* including terminating \0 */
  substring = (char*)malloc(len*sizeof(char));
  if (substring == NULL) {  /* memory allocation failed ?  Modified by Alex Walz */
    *error = -1;
    return NULL;
  }
  tools_memset(substring, '\0', len);
  tools_memcpy(substring, str + begin, len - 1);
  *error = 0;
  return substring;
}


/* ISO 8859/1 Latin-1 alphabetic and upper and lower case bit vector tables.

   Taken from the entropy utility ENT written by John Walker, January 28th, 2008,
   Fourmilab, http://www.fourmilab.ch.

   This software is in the public domain. Permission to use, copy, modify, and distribute this software
   and its documentation for any purpose and without fee is hereby granted, without any conditions or
   restrictions. This software is provided as is without express or implied warranty. */

/* !!! NEEDED by isISO* macros in agnhlps.h !!! */

unsigned char isoalpha[32] = {
    0,   0,   0,   0,   0,   0,   0,   0,
  127, 255, 255, 224, 127, 255, 255, 224,
    0,   0,   0,   0,   0,   0,   0,   0,
  255, 255, 254, 255, 255, 255, 254, 255
};

unsigned char isoupper[32] = {
    0,   0,   0,   0,   0,   0,   0,   0,
  127, 255, 255, 224,   0,   0,   0,   0,
    0,   0,   0,   0,   0,   0,   0,   0,
  255, 255, 254, 254,   0,   0,   0,   0
};

unsigned char isolower[32] = {
    0,   0,   0,   0,   0,   0,   0,   0,
    0,   0,   0,   0, 127, 255, 255, 224,
    0,   0,   0,   0,   0,   0,   0,   0,
    0,   0,   0,   1, 255, 255, 254, 255
};


/* taken from: http://stackoverflow.com/questions/11258019/conversion-from-iso-8859-15-latin9-to-utf-8
   ISO 8859-15 is ISO8859/1 plus the EUR symbol;
   written by Nominal Animal, http://stackoverflow.com/users/1475978/nominal-animal.

   Creates a dynamically allocated copy of string, changing the encoding from ISO-8859-15 to UTF-8. */

LUALIB_API char *latin9_to_utf8 (const char *string) {  /* FREE IT ! */
  char *result;
  size_t  n = 0;
  if (string) {
    const unsigned char *s = (const unsigned char *)string;
    while (*s) {
      if (*s < 128) {
        s++;
        n += 1;
      } else
      if (*s == 164) {
        s++;
        n += 3;
      } else {
        s++;
        n += 2;
      }
    }
  }
  /* Allocate n+1 (to n+7) bytes for the converted string. */
  result = malloc(sizeof(char)*((n | 7) + 1));
  if (!result) return NULL;
  /* Clear the tail of the string, setting the trailing NUL. */
  tools_bzero(result + (n | 7) - 7, 8);
  if (n) {
    const unsigned char *s = (const unsigned char *)string;
    unsigned char *d = (unsigned char *)result;
    while (*s) {
      if (*s < 128) {
        *(d++) = *(s++);
      } else
      if (*s < 192) switch (*s) {
        case 164: *(d++) = 226; *(d++) = 130; *(d++) = 172; s++; break;
        case 166: *(d++) = 197; *(d++) = 160; s++; break;
        case 168: *(d++) = 197; *(d++) = 161; s++; break;
        case 180: *(d++) = 197; *(d++) = 189; s++; break;
        case 184: *(d++) = 197; *(d++) = 190; s++; break;
        case 188: *(d++) = 197; *(d++) = 146; s++; break;
        case 189: *(d++) = 197; *(d++) = 147; s++; break;
        case 190: *(d++) = 197; *(d++) = 184; s++; break;
        default:  *(d++) = 194; *(d++) = *(s++); break;
      } else {
        *(d++) = 195;
        *(d++) = *(s++) - 64;
      }
    }
  }
  /* Done. Remember to free() the resulting string when no longer needed. */
  return result;
}


/* taken from: http://stackoverflow.com/questions/11258019/conversion-from-iso-8859-15-latin9-to-utf-8
   ISO 8859-15 is ISO8859/1 plus the EUR symbol;
   written by Nominal Animal, http://stackoverflow.com/users/1475978/nominal-animal.

   Creates a dynamically allocated copy of string, changing the encoding from UTF-8 to ISO-8859-1/15.
   Unsupported code points are not ignored. */

LUALIB_API char *utf8_to_latin9 (const char *string) {
  size_t size = 0;
  size_t used = 0;
  unsigned char *result = NULL;
  if (string) {
    const unsigned char *s = (const unsigned char *)string;
    while (*s) {
      if (used >= size) {
        void *const old = result;
        size = (used | 255) + 257;
        result = realloc(result, size*sizeof(char));
        if (!result) {
          if (old) free(old);
          return NULL;
        }
      }
      if (*s < 128) {
        result[used++] = *(s++);
        continue;
      } else
      if (s[0] == 226 && s[1] == 130 && s[2] == 172) {
        result[used++] = 164;
        s += 3;
        continue;
      } else
      if (s[0] == 194 && s[1] >= 128 && s[1] <= 191) {
        result[used++] = s[1];
        s += 2;
        continue;
      } else
      if (s[0] == 195 && s[1] >= 128 && s[1] <= 191) {
        result[used++] = s[1] + 64;
        s += 2;
        continue;
      } else
      if (s[0] == 197 && s[1] == 160) {
        result[used++] = 166;
        s += 2;
        continue;
      } else
      if (s[0] == 197 && s[1] == 161) {
        result[used++] = 168;
        s += 2;
        continue;
      } else
      if (s[0] == 197 && s[1] == 189) {
        result[used++] = 180;
        s += 2;
        continue;
      } else
      if (s[0] == 197 && s[1] == 190) {
        result[used++] = 184;
        s += 2;
        continue;
      } else
      if (s[0] == 197 && s[1] == 146) {
        result[used++] = 188;
        s += 2;
        continue;
      } else
      if (s[0] == 197 && s[1] == 147) {
        result[used++] = 189;
        s += 2;
        continue;
      } else
      if (s[0] == 197 && s[1] == 184) {
        result[used++] = 190;
        s += 2;
        continue;
      }
      if (s[0] >= 192 && s[0] < 224 &&
        s[1] >= 128 && s[1] < 192) {
        s += 2;
        continue;
      } else
      if (s[0] >= 224 && s[0] < 240 &&
        s[1] >= 128 && s[1] < 192 &&
        s[2] >= 128 && s[2] < 192) {
        s += 3;
        continue;
      } else
      if (s[0] >= 240 && s[0] < 248 &&
        s[1] >= 128 && s[1] < 192 &&
        s[2] >= 128 && s[2] < 192 &&
        s[3] >= 128 && s[3] < 192) {
        s += 4;
        continue;
      } else
      if (s[0] >= 248 && s[0] < 252 &&
        s[1] >= 128 && s[1] < 192 &&
        s[2] >= 128 && s[2] < 192 &&
        s[3] >= 128 && s[3] < 192 &&
        s[4] >= 128 && s[4] < 192) {
        s += 5;
        continue;
      } else
      if (s[0] >= 252 && s[0] < 254 &&
        s[1] >= 128 && s[1] < 192 &&
        s[2] >= 128 && s[2] < 192 &&
        s[3] >= 128 && s[3] < 192 &&
        s[4] >= 128 && s[4] < 192 &&
        s[5] >= 128 && s[5] < 192) {
        s += 6;
        continue;
      }
      /* if (*s == 132 ||  //  from console
          *s == 148 ||  //  from console
          *s == 129 ||  //  from console
          *s == 225 ||  //  from console
          *s == 142 ||  //  from console
          *s == 153 ||  //  from console
          *s == 154 ||  //  from console
          *s == 155 ||  //  from console
          *s == 138 ||  //  from console
          *s == 130 ||  //  from console
          *s == 144 ||  //  from console
          *s == 144)    //  from console */
      /* 2.12.0 RC 2, don't drop char but put it into result */
      result[used++] = *(s++);
    }
  }
  {
    void *const old = result;
    size = (used | 7) + 1;
    result = realloc(result, size*sizeof(char));
    if (!result) {
      if (old) free(old);
      return NULL;
    }
    tools_bzero(result + used, (size - used)*sizeof(char));
  }
  /* Done. Remember to free() the resulting string when no longer needed. */
  return (char *)result;
}


LUALIB_API size_t utf8_to_latin9_len (const char *string, size_t *utf8len) {
  const unsigned char *s;
  size_t used = 0;
  *utf8len = 0;
  if (string == NULL) return 0;
  s = (const unsigned char *)string;
  while (*s) {
    if (*s < 128) {
      used++; s++;
      continue;
    } else
    if (s[0] == 226 && s[1] == 130 && s[2] == 172) {
      used++;
      s += 3;
      continue;
    } else
    if (s[0] == 194 && s[1] >= 128 && s[1] <= 191) {
      used++;
      s += 2;
      continue;
    } else
    if (s[0] == 195 && s[1] >= 128 && s[1] <= 191) {
      used++;
      s += 2;
      continue;
    } else
    if (s[0] == 197 && s[1] == 160) {
      used++;
      s += 2;
      continue;
    } else
    if (s[0] == 197 && s[1] == 161) {
      used++;
      s += 2;
      continue;
    } else
    if (s[0] == 197 && s[1] == 189) {
      used++;
      s += 2;
      continue;
    } else
    if (s[0] == 197 && s[1] == 190) {
      used++;
      s += 2;
      continue;
    } else
    if (s[0] == 197 && s[1] == 146) {
      used++;
      s += 2;
      continue;
    } else
    if (s[0] == 197 && s[1] == 147) {
      used++;
      s += 2;
      continue;
    } else
    if (s[0] == 197 && s[1] == 184) {
      used++;
      s += 2;
      continue;
    }
    if (s[0] >= 192 && s[0] < 224 &&
      s[1] >= 128 && s[1] < 192) {
      s += 2;
      continue;
    } else
    if (s[0] >= 224 && s[0] < 240 &&
      s[1] >= 128 && s[1] < 192 &&
      s[2] >= 128 && s[2] < 192) {
      s += 3;
      continue;
    } else
    if (s[0] >= 240 && s[0] < 248 &&
      s[1] >= 128 && s[1] < 192 &&
      s[2] >= 128 && s[2] < 192 &&
      s[3] >= 128 && s[3] < 192) {
      s += 4;
      continue;
    } else
    if (s[0] >= 248 && s[0] < 252 &&
      s[1] >= 128 && s[1] < 192 &&
      s[2] >= 128 && s[2] < 192 &&
      s[3] >= 128 && s[3] < 192 &&
      s[4] >= 128 && s[4] < 192) {
      s += 5;
      continue;
    } else
    if (s[0] >= 252 && s[0] < 254 &&
      s[1] >= 128 && s[1] < 192 &&
      s[2] >= 128 && s[2] < 192 &&
      s[3] >= 128 && s[3] < 192 &&
      s[4] >= 128 && s[4] < 192 &&
      s[5] >= 128 && s[5] < 192) {
      s += 6;
      continue;
    }
    used++; s++;
  }
  *utf8len = s - (const unsigned char *)string;  /* length of UTF-8 string */
  return used;  /* size of string converted to Latin */
}


/* detects that the given string is in UTF-8 encoding;
   written by Christoph, http://stackoverflow.com/users/48015/christoph,
   see: http://stackoverflow.com/questions/1031645/how-to-detect-utf-8-in-plain-c */

LUALIB_API int is_utf8 (const char *string, size_t *pos) {
  if (!string) return 0;
  const unsigned char *bytes = (const unsigned char *)string;
  while (*bytes) {
    if ( (  /* ASCII use bytes[0] <= 0x7F to allow ASCII control characters */
      bytes[0] == 0x09 ||
      bytes[0] == 0x0A ||
      bytes[0] == 0x0D ||
      (0x20 <= bytes[0] && bytes[0] <= 0x7E)
      )
    ) {
      bytes += 1;
      continue;
    }
    if ( (  /* non-overlong 2-byte */
      (0xC2 <= bytes[0] && bytes[0] <= 0xDF) &&
      (0x80 <= bytes[1] && bytes[1] <= 0xBF)
    )
    ) {
      bytes += 2;
      continue;
    }
    if ( (  /* excluding overlongs */
        bytes[0] == 0xE0 &&
        (0xA0 <= bytes[1] && bytes[1] <= 0xBF) &&
        (0x80 <= bytes[2] && bytes[2] <= 0xBF)
      ) ||
      (  /* straight 3-byte */
        ((0xE1 <= bytes[0] && bytes[0] <= 0xEC) ||
          bytes[0] == 0xEE ||
          bytes[0] == 0xEF) &&
        (0x80 <= bytes[1] && bytes[1] <= 0xBF) &&
        (0x80 <= bytes[2] && bytes[2] <= 0xBF)
      ) ||
      (  /* excluding surrogates */
        bytes[0] == 0xED &&
        (0x80 <= bytes[1] && bytes[1] <= 0x9F) &&
        (0x80 <= bytes[2] && bytes[2] <= 0xBF)
      )
    ) {
      bytes += 3;
      continue;
    }
    if ( (  /* planes 1-3 */
        bytes[0] == 0xF0 &&
        (0x90 <= bytes[1] && bytes[1] <= 0xBF) &&
        (0x80 <= bytes[2] && bytes[2] <= 0xBF) &&
        (0x80 <= bytes[3] && bytes[3] <= 0xBF)
      ) ||
      (  /* planes 4-15 */
        (0xF1 <= bytes[0] && bytes[0] <= 0xF3) &&
        (0x80 <= bytes[1] && bytes[1] <= 0xBF) &&
        (0x80 <= bytes[2] && bytes[2] <= 0xBF) &&
        (0x80 <= bytes[3] && bytes[3] <= 0xBF)
      ) ||
      (  /* plane 16 */
        bytes[0] == 0xF4 &&
        (0x80 <= bytes[1] && bytes[1] <= 0x8F) &&
        (0x80 <= bytes[2] && bytes[2] <= 0xBF) &&
        (0x80 <= bytes[3] && bytes[3] <= 0xBF)
      )
    ) {
      bytes += 4;
      continue;
    }
    *pos = bytes - (const unsigned char *)string + 1;  /* 2.12.6 */
    return 0;
  }
  pos = 0;
  return 1;
}


/* Assumes a string is UTF-8 encoded and determines its size, counting UTF-8 marks (0xEF) but not succeeding UTF-8 multibytes; written by mpez0,
   http://stackoverflow.com/users/27898/mpez0 */

LUALIB_API size_t size_utf8 (const char *str) {  /* changed 2.12.6 */
  size_t j = 0;
  while (*str) {
    if ((*str++ & 0xc0) != 0x80) j++;  /* not in [128, 191] ? */
  }
  return j;
}


LUALIB_API int tools_isnumericstring (const char *s, int checksign) {  /* 26.08.2012, Agena 1.7.7/1.9.1, extended 3.17.1 */
  size_t n, oldn;
  struct lconv *cv = localeconv();
  char decpoint = (cv ? cv->decimal_point[0] : '.');
  int flag = 1;
  n = tools_strlen(s);  /* 2.17.8 tweak */
  if (n == 0) return 0;  /* 3.17.2 change */
  oldn = n - 1;
  for ( ; n--; s++) {
    if (uchar(*s) < '0' || uchar(*s) > '9') {
      if (uchar(*s) == decpoint && flag && oldn != 0) {
        flag = 0;
      } else if (checksign && n == oldn && oldn != 0 && (uchar(*s) == '+' || uchar(*s) == '-')) {
        /* first char is a sign, and size string > 1, 3.17.1 extension */
        checksign = 0;  /* do nothing */
      } else {
        return 0;
      }
    }
  }
  return 1;
}


/* Like strcmp, but compares case-insensitively, 2.16.1, rewritten 2.38.4 with switch to tools_lowercase */
LUALIB_API int tools_stricmp (const char *p, const char *q) {
  while (*p && tools_lowercase[uchar(*p)] == tools_lowercase[uchar(*q)]) { p++; q++; }
  return (int)tools_lowercase[uchar(*p)] - (int)tools_lowercase[uchar(*q)];
}


/* Returns 1 if src matches at least one of the other arguments, otherwise returns 0. Last argument must be NULL. */
LUALIB_API int tools_streqx (const char *src, ...) {
  va_list args;
  const char *x, *s;
  s = src;
  va_start(args, src);
  while ((x = va_arg(args, const char *))) {
    if (tools_streq(s, x)) { va_end(args); return 1; }
  }
  va_end(args);
  return 0;
}


/* Reverses a string in-place. Written by Deyan Dobromirov. Modified. 2.22.2
   Taken from: https://stackoverflow.com/questions/784417/reversing-a-string-in-c */
LUALIB_API void tools_strnrev (char *s, int n) {
  char *e;  /* end pointer */
  if (s == NULL || n < 2) return;  /* do nothing; 2.25.4 patch to prevent invalid reads */
  e = s;
#ifdef IS32BITALIGNED  /* 2.25.3 tweak */
  if (tools_strisaligned(e)) {
    e += n - 1;
    n = 0;
  }
#endif
  /* 2.25.3 fix to avoid access to unassigned memory, e.g. when operating on an Agena stack, especially with stack.dumpd. */
  while (*e && n-- > 1) { e++; };
  while (e > s) {
    *s ^= *e;
    *e ^= *s;
    *s++ ^= *e--;
  }
}


#ifdef IS32BITALIGNED  /* for 4/8-byte aligned data, 2.25.1/2 fix */
LUALIB_API size_t tools_strlen (const char *p) {  /* 2 % faster than strlen, fastest implementation tried out yet, 2.17.8; 2.25.0 Mac OS X fix */
  BLOCK_T *x;
  const char *oldp = p;
  if (p == NULL) return 0;  /* 2.29.3 better be sure than sorry */
  if (tools_strisaligned(p)) {  /* likely() macro is very slow, so do not use it. */
    x = (BLOCK_T *)p;
    while (NONULL(*x)) x++;
    p = (char *)x;
  }
  while (*p) p++;
  return ((unsigned char *)p) - ((unsigned char *)oldp);  /* 2.25.5 adaption */
}

/* strcmp == 0 substitute, 2.17.8 - 15 % (Win) / 12 % (Stretch) faster than GCC's strcmp(x) == 0 expression, 2.25.0 Mac OS X fix */
LUALIB_API int tools_streq (const char *p, const char *q) {
  BLOCK_T *x, *y;
  if (tools_stringsarealigned(p, q)) {
    x = (BLOCK_T *)p;
    y = (BLOCK_T *)q;
    /* we have to prevent access to unallocated bytes in a 4-byte `integer` to avoid invalid 4-byte reads, so we have to
       use NONULL before comparing the uint32_t's; combining x and y into a uint64_t and then calling ISNULL only once is
       slower. */
    while (NONULL(*x) && NONULL(*y) && *x == *y) { x++; y++; }  /* do not increment x and y w/i the comparison ! */
    p = (char *)x;
    q = (char *)y;
  }
  /* Do not increment p and q w/i the comparison as with a mismatch, it would compare the wrong chars in the last statement
     and might return false positives. */
  while (*p && *p == *q) { p++; q++; }
  return (*(unsigned char *)p) == (*(unsigned char *)q);
}

LUALIB_API int tools_strcmp (const char *p, const char *q) {  /* 2.25.1, not faster than GNU C's strcmp */
  BLOCK_T *x, *y;
  if (tools_stringsarealigned(p, q)) {
    x = (BLOCK_T *)p;
    y = (BLOCK_T *)q;
    /* we have to prevent access to unallocated bytes in a 4-byte `integer` to avoid invalid 4-byte reads, so we have to
       use NONULL before comparing the uint32_t's; combining x and y into a uint64_t and then calling ISNULL only once is
       slower. */
    while (NONULL(*x) && NONULL(*y) && *x == *y) { x++; y++; }  /* do not increment x and y w/i the comparison ! */
    p = (char *)x;
    q = (char *)y;
  }
  /* Do not increment p and q w/i the comparison as with a mismatch, it would compare the wrong chars in the last statement
     and might return false positives. */
  while (*p && *p == *q) { p++; q++; }
  return (*(unsigned char *)p) - (*(unsigned char *)q);
}

LUALIB_API int tools_strncmp (const char *p, const char *q, size_t n) {  /* 2.25.2, 6 percent faster than GNU C's strncmp */
  BLOCK_T *x, *y;
  if (tools_stringsarealigned(p, q)) {
    x = (BLOCK_T *)p;
    y = (BLOCK_T *)q;
    /* we have to prevent access to unallocated bytes in a 4-byte `integer` to avoid invalid 4-byte reads, so we have to
       use NONULL before comparing the uint32_t's; combining x and y into a uint64_t and then calling ISNULL only once is
       slower. */
    while (tools_largeenough(n) && NONULL(*x) && NONULL(*y) && *x == *y) { x++; y++; n -= AGN_BLOCKSIZE; }  /* do not increment x and y w/i the comparison ! */
    if (n == 0) return 0;
    p = (char *)x;
    q = (char *)y;
  }
  /* Do not increment p and q w/i the comparison as with a mismatch, it would compare the wrong chars in the last statement
     and might return false positives. */
  while (n-- > 1 && *p && *p == *q) { p++; q++; }
  return (*(unsigned char *)p) - (*(unsigned char *)q);
}
#endif  /* of IS32BITALIGNED */


/*
 * Copyright (C) 2008 The Android Open Source Project
 * All rights reserved.
 *
 * Taken from: https://android.googlesource.com/platform/bionic/+/ics-mr0/libc/string/strnlen.c
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *  * Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *  * Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */
LUALIB_API size_t tools_strnlen (const char *str, size_t maxlen) {  /* 3.7.8 */
  char*  p = memchr(str, 0, maxlen);
  if (p == NULL) return maxlen;
  else return (p - str);
}


/* Taken from musl src/string/strverscmp.c; 2.25.4

Copyright  2005-2014 Rich Felker, et al.

Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:

The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
LUALIB_API int tools_strverscmp (const char *l0, const char *r0) {
  const unsigned char *l = (const void *)l0;
  const unsigned char *r = (const void *)r0;
  size_t i, dp, j;
  int z = 1;
  /* Find maximal matching prefix and track its maximal digit suffix and whether those digits are all zeros. */
  for (dp=i=0; l[i] == r[i]; i++) {
    int c;
    c = l[i];
    if (!c) return 0;
    if (!isdigit(c)) dp = i + 1, z = 1;
    else if (c != '0') z = 0;
  }
  if (l[dp] != '0' && r[dp] != '0') {
    /* If we're not looking at a digit sequence that began with a zero, longest digit string is greater. */
    for (j=i; isdigit(l[j]); j++)
      if (!isdigit(r[j])) return 1;
    if (isdigit(r[j])) return -1;
  } else if (z && dp < i && (isdigit(l[i]) || isdigit(r[i]))) {
    /* Otherwise, if common prefix of digit sequence is all zeros, digits order less than non-digits. */
    return (unsigned char)(l[i] - '0') - (unsigned char)(r[i] - '0');
  }
  return l[i] - r[i];
}


/* Determines the optimal memory allocation size for a string of length l, with l NOT including the terminating trailing
   NULL character, i.e. '\0'. l should be multiplied by CHARSIZE before calling the function.
   The return is the optimal number of bytes to be allocated including a terminating NULL character, and the number of
   4/8-byte chunks to be allocated in parameter `chunks`. See also: tools_stralloc. */
LUALIB_API size_t tools_optstrlen (size_t l, size_t *chunks) {
  /* add extra bytes to fill up the last 4/8-byte chunk if necessary, regardless whether the strings on the system are aligned or not */
  size_t rem = (++l) & AGN_ALIGNMASK;  /* length including \0; 2.17.2 optimisation */
  l += (rem != 0)*(AGN_BLOCKSIZE - rem);
  *chunks = l / AGN_BLOCKSIZE;   /* chunks of bytes to be allocated */
  return l;                      /* optimal string length in bytes, works flawlessly even with empty strings */
}


/* Convert double (or int, etc.) into a string, taken from agnconf.h, safe for doubles

  ! FREE the string after usage ! */

#define MAXNUMBER2STR 32  /* 16 digits, sign, point, and \0 */

LUALIB_API char *tools_dtoa (double x) {
  char *buf = malloc(MAXNUMBER2STR*sizeof(char));
  if (!buf) return NULL;  /* 2.16.5 */
  sprintf(buf, "%.14g", x);
  return buf;
}


/* changes a float value with a decimal comma to one with a decimal dot, in-place; 2.3.0 RC 2 */
LUALIB_API void tools_commatodot (char *str, int *result) {
  size_t numcolons = 0;
  char *oldpos = str;
  *result = 0;
  while (*str) {
    if (*str == ',')
      numcolons++;
    else if (*str < '0' || *str > '9')
      return;
    str++;
  }
  if (numcolons == 1) {
    str = oldpos;
    str_charreplace(str, ',', '.', 0);
    *result = 1;
  }
  return;
}


LUALIB_API int tools_isdigit (int x, int base) {  /* 4.7.4 */
  return (x >= '0' && x <= '9' && x < '0' + base) ||
        (base > 10 && x >= 'A' && x < 'A' + base - 10) ||
        (base > 10 && x >= 'a' && x < 'a' + base - 10);
}


/* returns the substring in string s that is enclosed by p and q, otherwise returns s. s, p and q must be non-empty. 2.27.4 */
LUALIB_API char *tools_between (const char *s, size_t s_len,
                                const char *p, size_t p_len,
                                const char *q, size_t q_len,
                                size_t *r_len) {
  char *ppos, *qpos;
  if (!s || !p || !q || !s_len || !p_len || !q_len || s_len <= p_len + q_len) {
    *r_len = 0;
    return NULL;
  }
  ppos = NULL; qpos = NULL;
  ppos = strstr(s, p);  /* tools_lmemfind is a little bit slower */
  if (ppos != NULL) {
    qpos = strstr(ppos + p_len, q);
    if (qpos != NULL) {
      *r_len = qpos - (ppos + p_len);
      return ppos + p_len;
    }
  }
  *r_len = s_len;
  return (char *)s;
}


#define VOWEL     (__ALPHA|__0____|__VOWEL|__0____)
#define CONSONANT (__ALPHA|__0____|__0____|__0____)
#define DIACRIT   (__0____|__DIACR|__0____|__0____)

LUALIB_API int tools_isvowel (int c, int withy) {  /* changed 2.10.0, patched and extended 4.11.4 */
  char uc = uchar(c);
  if (!withy && toupper(uc) == 'Y') return 0;
  return tools_alphadia[uc + 1] == VOWEL;  /* 4.11.6 change */
}


LUALIB_API int tools_isconsonant (int c) {  /* changed 2.10.0, 4.11.4 */
  return tools_alphadia[uchar(c) + 1] == CONSONANT;  /* 4.11.6 change */
}


LUALIB_API int tools_isalphadia (int c) {  /* 2.32.1, 4.11.4 */
  int x = tools_alphadia[uchar(c) + 1];
  return (x & __ALPHA) || (x & __DIACR);
}


#ifdef __GNUC__
#if BYTE_ORDER != BIG_ENDIAN
#define LS >>
#define RS <<
#else
#define LS <<
#define RS >>
#endif
#endif  /* of __GNUC__ */

/* Taken from musl-1.2.0, src/string/memcpy.c, MIT licence; 2 % faster than GNU C's memcpy. 2.25.0 Mac OS X fix; 2.25.1 patch */
#ifdef IS32BITALIGNED
LUALIB_API void *tools_memcpy (void *dest, const void *src, size_t n) {
  unsigned char *d = dest;
  const unsigned char *s = src;
  uint32_t w, x;
  for (; (uintptr_t)s % AGN_BLOCKSIZE && n; n--) *d++ = *s++;  /* unaligned ?  no benefit when using tools_strisunaligned */
  if ((uintptr_t)d % AGN_BLOCKSIZE == 0) {  /* aligned ? -> copy words of 4 bytes each at once instead of just single bytes */
    for (; n >= 16; s += 16, d += 16, n -= 16) {
      *(uint32_t *)(d + 0)  = *(uint32_t *)(s + 0);
      *(uint32_t *)(d + 4)  = *(uint32_t *)(s + 4);
      *(uint32_t *)(d + 8)  = *(uint32_t *)(s + 8);
      *(uint32_t *)(d + 12) = *(uint32_t *)(s + 12);
    }
    if (n & 8) {
      *(uint32_t *)(d + 0)  = *(uint32_t *)(s + 0);
      *(uint32_t *)(d + 4)  = *(uint32_t *)(s + 4);
      d += 8; s += 8;
    }
    if (n & 4) {
      *(uint32_t *)(d + 0)  = *(uint32_t *)(s + 0);
      d += 4; s += 4;
    }
    if (n & 2) {
      *d++ = *s++; *d++ = *s++;
    }
    if (n & 1) {
      *d = *s;
    }
    return dest;
  }
  if (n >= 32) {
    switch ((uintptr_t)d % AGN_BLOCKSIZE) {  /* unaligned ? */
      case 1:
        w = *(uint32_t *)s;
        *d++ = *s++;
        *d++ = *s++;
        *d++ = *s++;
        n -= 3;
        for (; n >= 17; s += 16, d += 16, n -= 16) {
          x = *(uint32_t *)(s + 1);
          *(uint32_t *)(d + 0) = (w LS 24) | (x RS 8);
          w = *(uint32_t *)(s + 5);
          *(uint32_t *)(d + 4) = (x LS 24) | (w RS 8);
          x = *(uint32_t *)(s + 9);
          *(uint32_t *)(d + 8) = (w LS 24) | (x RS 8);
          w = *(uint32_t *)(s + 13);
          *(uint32_t *)(d + 12) = (x LS 24) | (w RS 8);
        }
        break;
      case 2:
        w = *(uint32_t *)s;
        *d++ = *s++;
        *d++ = *s++;
        n -= 2;
        for (; n >= 18; s += 16, d += 16, n -= 16) {
          x = *(uint32_t *)(s + 2);
          *(uint32_t *)(d + 0) = (w LS 16) | (x RS 16);
          w = *(uint32_t *)(s + 6);
          *(uint32_t *)(d + 4) = (x LS 16) | (w RS 16);
          x = *(uint32_t *)(s + 10);
          *(uint32_t *)(d + 8) = (w LS 16) | (x RS 16);
          w = *(uint32_t *)(s + 14);
          *(uint32_t *)(d + 12) = (x LS 16) | (w RS 16);
        }
        break;
      case 3:
        w = *(uint32_t *)s;
        *d++ = *s++;
        n -= 1;
        for (; n >= 19; s += 16, d += 16, n -= 16) {
          x = *(uint32_t *)(s + 3);
          *(uint32_t *)(d + 0) = (w LS 8) | (x RS 24);
          w = *(uint32_t *)(s + 7);
          *(uint32_t *)(d + 4) = (x LS 8) | (w RS 24);
          x = *(uint32_t *)(s + 11);
          *(uint32_t *)(d + 8) = (w LS 8) | (x RS 24);
          w = *(uint32_t *)(s + 15);
          *(uint32_t *)(d + 12) = (x LS 8) | (w RS 24);
        }
        break;
    }
  }  /* of n > 32 */
  if (n & 16) {
    *d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
    *d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
    *d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
    *d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
  }
  if (n & 8) {
    *d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
    *d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
  }
  if (n & 4) {
    *d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
  }
  if (n & 2) {
    *d++ = *s++; *d++ = *s++;
  }
  if (n & 1) {
    *d = *s;
  }
  return dest;
}
#endif  /* of IS32BITALIGNED */


/* reads one line of any length, FREE result ! At EOF, the function will always return the last line read plus r = -1
   (and linesize == 0), you will need to check for r == -1 in the caller to be sure about EOF. Rewritten 2.21.5 */
LUALIB_API char *tools_getline (FILE *fp, char *buf, char *line, size_t *linesize, int buffersize, size_t *maxbufsize, int *r) {
  size_t bufsize, newbufsize;
  char *q;
  *linesize = 0;
  do {
    if (fgets(buf, buffersize, fp) == NULL) break;  /* 2.34.9 adaption */
    bufsize = tools_strlen(buf);  /* 2.17.8 tweak */
    q = tools_memchr(buf, '\r', bufsize);  /* second last char in buf is CR and last one is NL ? */
    if (q != NULL && (q - buf + 1) == bufsize - 1 && *(q + 1) == '\n') { *q = '\n'; bufsize--; }
    q = tools_memchr(buf, '\n', bufsize);  /* remove `trailing` (FIXME) NL */
    if (q != NULL) { *q = '\0'; bufsize--; }
    newbufsize = sizeof(char)*(bufsize + *linesize + 1);
    if (newbufsize > *maxbufsize) {  /* also mallocs a value if needed */
      line = (void *)realloc(line, newbufsize);
      *maxbufsize = newbufsize;
    }
    if (line == NULL) {  /* memory allocation error ? */
      *r = 1; *maxbufsize = 0;
      return NULL;
    }
    tools_memcpy(line + *linesize, buf, bufsize);  /* 2.21.5 tweak */
    *linesize += bufsize;
    tools_bzero(line + *linesize, 1);  /* only set NULL when something has been read, don't move statement out of the loop */
  } while (q == NULL);  /* end of _line_ has not yet been reached */
  *r = (feof(fp) != 0 && *linesize == 0) ? -1 : 0;  /* EOF reached ? -> return -1 else 0 */
  clearerr(fp);  /* clear eof indicators of stream */
  return line;  /* line must always be returned, and never NULL, so that the value can be freed successfully later on */
}


/* retrieve information on your platform;
0.12.2, October 12, 2008; extended to correctly identify Vista, Windows 2003, and Windows 2007 Server
on April 08, 2009 - 0.13.4; updated 1.7.9, 07.09.2012. */
#ifdef _WIN32
/* for Windows 7 or later, https://stackoverflow.com/questions/32115255/c-how-to-detect-windows-10, posted by Michael Haephrati;
   returns 6 for Windows 8.1, but 10 for Windows 10. */
LUALIB_API int getSysOpType (int *minorVersion, int *BuildNumber, int *PlatformId, int *ProductType, double *winver, uint8_t *SPmaj, uint8_t *SPmin) {
  int ret = 0;
  NTSTATUS(WINAPI *RtlGetVersion)(LPOSVERSIONINFOEXW);
  OSVERSIONINFOEXW osInfo;
  *(FARPROC*)&RtlGetVersion = GetProcAddress(GetModuleHandleA("ntdll"), "RtlGetVersion");
  *minorVersion = *BuildNumber = *PlatformId = *ProductType = *SPmaj = *SPmin = -1;
  if (RtlGetVersion != NULL) {
    osInfo.dwOSVersionInfoSize = sizeof(osInfo);
    RtlGetVersion(&osInfo);
    ret = osInfo.dwMajorVersion;
    *minorVersion = osInfo.dwMinorVersion;
    *BuildNumber  = osInfo.dwBuildNumber;
    *PlatformId   = osInfo.dwPlatformId;
    *ProductType  = osInfo.wProductType;
    *SPmaj        = osInfo.wServicePackMajor;
    *SPmin        = osInfo.wServicePackMinor;
    *winver       = ret + 0.1*(*minorVersion);
    if (*winver == 10 && *BuildNumber >= 22000) *winver = 11;  /* 2.39.1 */
  } else
    *winver = 0.0;
  return ret;
}

/* correct MinorVersion, etc. */
#define correctWinVerData() { \
  winver->MinorVersion = minversion; \
  winver->BuildNumber  = build; \
  winver->PlatformId   = platform; \
  winver->ProductType  = product; \
}

LUALIB_API int getWindowsVersion (struct WinVer *winver) {
  int result, MajorVersion, MinorVersion, ProductType, PlatformId;
  char *PMaintenance;
  double winversion;
  uint8_t SPmaj, SPmin;
  OSVERSIONINFOEX version;
  version.dwOSVersionInfoSize = sizeof(version);
  GetVersionEx((OSVERSIONINFO *)&version);
  /*  GREP "static const char *versions[]" * when extending this procedure

  dwMajorVersion
  Major version number of the operating system. This member can be one of the following values. Value Meaning
  4 Windows NT 4.0, Windows Me, Windows 98, or Windows 95.
  5 Windows Server 2003, Windows XP, or Windows 2000
  6 Windows Vista, Windows Server 2008, Windows 7, 8, 8.1

  dwMinorVersion
  Minor version number of the operating system. This member can be one of the following values. Value Meaning
  0 Windows 2000, Windows NT 4.0, Windows 95, or Windows Vista
  1 Windows XP, Windows 2008 server
  2 Windows Server 2003, Windows 7, Windows Server 2012
  3 Windows Server 2012 RC 2, Windows 8, 8.1
  10 Windows 98
  90 Windows Me

  dwPlatformId
  Operating system platform. This member can be one of the following values. Value Meaning
  VER_PLATFORM_WIN32_NT
  2 Windows Server 2003, Windows XP, Windows 2000, or Windows NT.
  VER_PLATFORM_WIN32_WINDOWS
  1 Windows Me, Windows 98, or Windows 95.
  VER_PLATFORM_WIN32s
  0 Win32s on Windows 3.1. */
  result = 18;  /* unknown default */
  winver->BuildNumber = (int)version.dwBuildNumber;
  MajorVersion = winver->MajorVersion = (int)version.dwMajorVersion;
  MinorVersion = winver->MinorVersion = (int)version.dwMinorVersion;
  PlatformId = winver->PlatformId = (int)version.dwPlatformId;
  ProductType = winver->ProductType = (char)version.wProductType;
  PMaintenance = (char *)version.szCSDVersion;
  if (!PMaintenance)  /* 2.31.2 fix */
    tools_bzero(winver->Maintenance, szCSDVersionLENGTH);
  else
    strcpy(winver->Maintenance, PMaintenance);
  if (MajorVersion == 4) {
    if (MinorVersion == 0 && PlatformId == 1)
      result = MS_WIN95;      /* 2, Win 95 */
    else if (MinorVersion == 0 && PlatformId == 2)
      result = MS_WINNT4;     /* 3, Win NT 4.0 */
    else if (MinorVersion == 10)
      result = MS_WIN98;      /* 4, Win 98 */
    else if (MinorVersion == 90)
      result = MS_WINME;      /* 5, Win Me, return value changed 1.9.3 */
  } else if (MajorVersion == 5) {
    if (MinorVersion == 0)
      result = MS_WIN2K;      /* 6, Win 2000, return value changed 1.9.3  */
    else if (MinorVersion == 1)
      result = MS_WINXP;      /* 7, Win XP */
    else if (MinorVersion == 2)
      result = MS_WIN2003;    /* 8, Win 2003 */
  } else if (MajorVersion == 6) {
    if (MinorVersion == 0 && ProductType == VER_NT_WORKSTATION)
      result = MS_WINVISTA;   /* 9, Win Vista */
    else if (MinorVersion >= 0 && ProductType != VER_NT_WORKSTATION)
      result = MS_WINS2008;   /* 10, Win Server 2008 */
    else if (MinorVersion == 1 && ProductType == VER_NT_WORKSTATION)
      result = MS_WIN7;       /* 11, Windows 7 */
    else if (MinorVersion == 2 && ProductType == VER_NT_WORKSTATION)
      result = MS_WIN80;      /* 12, Windows 8 */
    else if (MinorVersion == 2 && ProductType != VER_NT_WORKSTATION)
      result = MS_WINS2012;   /* 13, Windows Server 2012 */
  }
  else if (PlatformId == 0)
    result = MS_WIN32S;       /* 1, Win32s under Windows 3.1 */
  else  /* CHANGE result initalisation at the top if you change this line */
    result = MS_WIN10POST;    /* 18, 2.8.5 patch, assume higher Windows version, checked other source files former 0 return. */
  if (result >= MS_WIN80) {   /* DON'T MOVE TO TOP due to unknown behaviour on Vista and earlier!
    2.14.1 fix: we are at least on Windows 8.0 but additionally we have to check for 8.1 and later,
    since on 8.1 and 10, MajorVersion == 6 && MinorVersion == 2 depicting 8.0 */
    int majversion, minversion, build, platform, product;
    majversion = getSysOpType(&minversion, &build, &platform, &product, &winversion, &SPmaj, &SPmin);
    if (majversion == 6 && minversion == 3 && ProductType == VER_NT_WORKSTATION) {
      correctWinVerData();
      result = MS_WIN81;      /* 14, Windows 8.1 */
    } else if (majversion == 6 && minversion == 3 && ProductType != VER_NT_WORKSTATION) {
      correctWinVerData();
      result = MS_WINS2012R2; /* 15, Windows Server 2012 R2 */
    } else if (majversion == 10 && product == 1) {
      correctWinVerData();
      result = MS_WIN10;      /* 16, Windows 10 */
    } else if (majversion == 10 && product != 1) {
      correctWinVerData();
      result = MS_WIN10S;     /* 17, Windows 10 Server */
    }
  }
  return result;
}
#endif


/* Get full path of running application, 2.16.1.
   FREE it !  Note that free() according to the C standard just quits when pointer is NULL.
   See Internet article: "executable - Absolute path to currently executing program - Unix & Linux Stack Exchange" */
LUALIB_API char *getModuleFileName (void) {
  char *apath;
#if defined(_WIN32)
  int winversion;
  struct WinVer winver;
  winversion = getWindowsVersion(&winver);
  if (winversion < MS_WIN2K && winversion != MS_WINNT4) return NULL;  /* not(NT4, W2K or later) ? , 2.17.2 extension */
#endif
  apath = tools_stralloc(MAX_PATH);  /* 2.16.5 */
  if (apath == NULL) return NULL;
#if defined(_WIN32)
  if (GetModuleFileName(NULL, apath, MAX_PATH + 1)) return apath;
#elif defined(__linux__)
  int nchar;
  nchar = readlink("/proc/self/exe", apath, MAX_PATH + 1);
  if (nchar >= 0 && nchar < (MAX_PATH + 1)) {
    apath[nchar] = '\0';
    return apath;
  }
#elif defined(__OS2__)  /* 2.39.12 */
  PPIB ppib;
  char path[MAX_PATH];
  APIRET rc = NO_ERROR;
  /* get process info blocks; http://www.edm2.com/index.php/DosGetInfoBlocks */
  rc = DosGetInfoBlocks(NULL, &ppib);
  if (rc == NO_ERROR) {
    /* get full path of executable; http://www.edm2.com/index.php/DosQueryModuleName */
    rc = DosQueryModuleName(ppib->pib_hmte, sizeof(path), path);
    if (rc == NO_ERROR) {
      /* *strrchr(path, '\\') = 0; */ /* we do not truncate name part including last backslash */
      int i;
      char *p = path;
      for (i=0; *p; i++) {  /* at least in ArcaOS, we get a path in capital letters and with backslashes, so: */
        path[i] = (*p == '\\') ? '/' : tolower(*p);
        p++;
      }
      return tools_strdup(path);
    }
  }
#endif
  return NULL;
}


/* 2.1.3, compares two numbers using Donald Knuth's approximation method, see approx in lbaselib
  sun_frexp(fabs(x) > fabs(y) ? x : y, &exp); return fabs(x - y) <= sun_ldexp(eps, exp); is a bit slower */
LUALIB_API int tools_approx (double x, double y, double eps) {
  if (unlikely((x == y) || (tools_isnan(x) && tools_isnan(y)))) {  /* 2.5.15 optimisation */
    return 1;
  } else {
    double dist = fabs(x - y);
    return (dist != HUGE_VAL) && (dist <= eps || dist <= (eps*fMax(fabs(x), fabs(y))));  /* patched for +/-infinity 2.31.11 */
  }
}


/* Taken from MUSL-1.2.3 src/math/fabsl.c, 2.34.10 */
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
LUALIB_API long double tools_fabsl (long double x) {
  return fabs(x);
}
#elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
LUALIB_API long double tools_fabsl (long double x) {
  union ldshape u = {x};
  u.i.se &= 0x7fff;
  return u.f;
}
#endif


#ifndef __ARMCPU  /* 2.37.1 */
LUALIB_API int tools_approxl (long double x, long double y, long double eps) {
  if (unlikely((x == y) || (tools_fpisnanl(x) && tools_fpisnanl(y)))) {
    return 1;
  } else {
    long double dist = fabsl(x - y);
    return (dist != HUGE_VAL) && (dist <= eps || dist <= (eps*fMax(fabsl(x), fabsl(y))));
  }
}
#endif


/* Taken from MUSL-1.2.3 src/math/fmal.c, 2.34.10
 *
 * Copyright (c) 2005-2011 David Schultz <das@FreeBSD.ORG>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/* just to compile on Raspberry Pi although unused, 2.37.1 */
#ifdef __ARMCPU
#define SPLIT (0x1p32L + 1)

LUALIB_API long double sun_copysignl (long double x, long double y) {
  return copysign(x, y);
}
#endif

#ifndef __ARMCPU  /* 2.37.1 */
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
LUALIB_API long double tools_fmal (long double x, long double y, long double z) {
  return fma(x, y, z);
}
#elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
#if LDBL_MANT_DIG == 64
#define LASTBIT(u) (u.i.m & 1)
#define SPLIT (0x1p32L + 1)
#elif LDBL_MANT_DIG == 113
#define LASTBIT(u) (u.i.lo & 1)
#define SPLIT (0x1p57L + 1)
#endif


/* A struct dd represents a floating-point number with twice the precision
 * of a long double.  We maintain the invariant that "hi" stores the high-order
 * bits of the result. */
struct dd {
  long double hi;
  long double lo;
};

/* Compute a+b exactly, returning the exact result in a struct dd.  We assume
 * that both a and b are finite, but make no assumptions about their relative
 * magnitudes. */
static FORCE_INLINE struct dd dd_add (long double a, long double b) {
  struct dd ret;
  long double s;
  ret.hi = a + b;
  s = ret.hi - a;
  ret.lo = (a - (ret.hi - s)) + (b - s);
  return (ret);
}

/* Compute a+b, with a small tweak:  The least significant bit of the
 * result is adjusted into a sticky bit summarizing all the bits that
 * were lost to rounding.  This adjustment negates the effects of double
 * rounding when the result is added to another number with a higher
 * exponent.  For an explanation of round and sticky bits, see any reference
 * on FPU design, e.g.,
 *
 *     J. Coonen.  An Implementation Guide to a Proposed Standard for
 *     Floating-Point Arithmetic.  Computer, vol. 13, no. 1, Jan 1980. */
static FORCE_INLINE double add_adjusted (long double a, long double b) {
  struct dd sum;
  union ldshape u;
  sum = dd_add(a, b);
  if (sum.lo != 0) {
    u.f = sum.hi;
    if (!LASTBIT(u))
      sum.hi = nextafterl(sum.hi, INFINITY*sum.lo);
  }
  return (sum.hi);
}

/*
 * Compute ldexp(a+b, scale) with a single rounding error. It is assumed
 * that the result will be subnormal, and care is taken to ensure that
 * double rounding does not occur.
 */
static FORCE_INLINE long double add_and_denormalize (long double a, long double b, int scale) {
  struct dd sum;
  int bits_lost;
  union ldshape u;
  sum = dd_add(a, b);
  /* If we are losing at least two bits of accuracy to denormalization,
   * then the first lost bit becomes a round bit, and we adjust the
   * lowest bit of sum.hi to make it a sticky bit summarizing all the
   * bits in sum.lo. With the sticky bit adjusted, the hardware will
   * break any ties in the correct direction.
   *
   * If we are losing only one bit to denormalization, however, we must
   * break the ties manually. */
  if (sum.lo != 0) {
    u.f = sum.hi;
    bits_lost = -u.i.se - scale + 1;
    if ((bits_lost != 1) ^ LASTBIT(u))
      sum.hi = nextafterl(sum.hi, INFINITY*sum.lo);
  }
  return scalbnl(sum.hi, scale);
}

/*
 * Compute a*b exactly, returning the exact result in a struct dd.  We assume
 * that both a and b are normalized, so no underflow or overflow will occur.
 * The current rounding mode must be round-to-nearest.
 */
static FORCE_INLINE struct dd dd_mul (long double a, long double b) {
  struct dd ret;
  long double ha, hb, la, lb, p, q;
  p = a*SPLIT;
  ha = a - p;
  ha += p;
  la = a - ha;
  p = b*SPLIT;
  hb = b - p;
  hb += p;
  lb = b - hb;
  p = ha*hb;
  q = ha*lb + la*hb;
  ret.hi = p + q;
  ret.lo = p - ret.hi + q + la*lb;
  return ret;
}

/*
 * Fused multiply-add: Compute x * y + z with a single rounding error.
 *
 * We use scaling to avoid overflow/underflow, along with the
 * canonical precision-doubling technique adapted from:
 *
 *      Dekker, T.  A Floating-Point Technique for Extending the
 *      Available Precision.  Numer. Math. 18, 224-242 (1971).
 */

/* GCC 11.4.0 complains without reason on probably unitiliazed ex, ey, ez, so suppress warnings; 3.3.2 */
#ifndef __OS2__  /* ignore `expected [error|warning|ignmored] pragma warnings in OS/2 */
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
#endif

LUALIB_API long double tools_fmal (long double x, long double y, long double z) {
	long double xs, ys, zs, adj;
	struct dd xy, r;
	int ex, ey, ez, spread;
	/* Handle special cases. The order of operations and the particular
	 * return values here are crucial in handling special cases involving
	 * infinities, NaNs, overflows, and signed zeroes correctly. */
	if (!isfinite(x) || !isfinite(y)) return (x*y + z);
	if (!isfinite(z)) return (z);
	if (x == 0.0 || y == 0.0) return (x*y + z);
	if (z == 0.0) return (x*y);
	xs = sun_frexpl(x, &ex);
	ys = sun_frexpl(y, &ey);
	zs = sun_frexpl(z, &ez);
	spread = ex + ey - ez;
	/* If x * y and z are many orders of magnitude apart, the scaling
	 * will overflow, so we handle these cases specially.  Rounding
	 * modes other than FE_TONEAREST are painful. */
	if (spread < -LDBL_MANT_DIG) return (z);
	if (spread <= LDBL_MANT_DIG*2)
		zs = scalbnl(zs, -spread);
	else
		zs = sun_copysignl(LDBL_MIN, zs);
	/* Basic approach for round-to-nearest:
	 *
	 *     (xy.hi, xy.lo) = x * y           (exact)
	 *     (r.hi, r.lo)   = xy.hi + z       (exact)
	 *     adj = xy.lo + r.lo               (inexact; low bit is sticky)
	 *     result = r.hi + adj              (correctly rounded) */
	xy = dd_mul(xs, ys);
	r = dd_add(xy.hi, zs);
	spread = ex + ey;
	if (r.hi == 0.0) {
		/* When the addends cancel to 0, ensure that the result has the correct sign. */
		volatile long double vzs = zs; /* XXX gcc CSE bug workaround */
		return xy.hi + vzs + scalbnl(xy.lo, spread);
	}
	adj = add_adjusted(r.lo, xy.lo);
	if (spread + tools_ilogbl(r.hi) > -16383)
		return scalbnl(r.hi + adj, spread);
	else
		return add_and_denormalize(r.hi, adj, spread);
}

#ifndef __OS2__
#pragma GCC diagnostic pop
#endif

#endif
#endif  /* of __ARMCPU */

/* sets bit 0 or 1 at position pos (always positive) to integer x */
LUALIB_API void tools_setbit (int *x, int pos, int bit) {  /* 2.3.3 */
  if (pos < 1 || bit < 0) return;  /* do nothing */
  *x = (*x & ~(1 << (pos - 1))) | (bit << (pos - 1));
}


/* retrieves the pos-th bit from an int, positive or negative, with pos > 0 */
LUALIB_API int tools_getbit (int x, int pos) {  /* 2.3.3 */
  if (pos < 1) return -1;
  if (x >= 0)
    return (x & (1 << (pos - 1))) != 0;
  else
    return (-x & (1 << (pos - 1))) != 0;
}


/* retrieves the pos-th bit from a uint32_t, with pos > 0 */
LUALIB_API int tools_getuint32bit (uint32_t x, int pos) {  /* 2.17.4 */
  return (pos < 1) ? -1 : ((x & (1 << (pos - 1))) != 0);
}


#ifndef PROPCMPLX
LUALIB_API agn_Complex tools_centier (agn_Complex a) {  /* 2.3.3 */
  double aa, bb, re, im;
  agn_Complex X;
  re = creal(a);
  im = cimag(a);
  aa = re - sun_floor(re);
  bb = im - sun_floor(im);
  if (aa + bb < 1)
    X = 0 + 0*I;
  else if ((aa + bb >= 1) && (aa >= bb))
    X = 1 + 0*I;
  else
    X = 1*I;
  return sun_floor(re) + I*sun_floor(im) + X;
}
#else
LUALIB_API void tools_centier (double a, double b, double *x, double *y) {
  double aa, bb, X, Y;
  aa = a - sun_floor(a);
  bb = b - sun_floor(b);
  if ((aa + bb) < 1) {
    X = 0; Y = 0; }
  else if ((aa + bb) >= 1 && (aa >= bb)) {
    X = 1; Y = 0; }
  else {
    X = 0; Y = 1; }
  *x = sun_floor(a) + X; *y = sun_floor(b) + Y;
}
#endif


/* Copyright (C) 2002 by  Red Hat, Incorporated. All rights reserved.
 *
 * Permission to use, copy, modify, and distribute this software
 * is freely granted, provided that this notice is preserved.
 */
LUALIB_API double tools_fdim (double x, double y) {
  if (tools_isnan(x) || tools_isnan(y)) return AGN_NAN;
  return x > y ? x - y : 0.0;
}


#ifndef __ARMCPU  /* 2.37.1 */
LUALIB_API long double tools_fdiml (long double x, long double y) {
  if (tools_isnan(x) || tools_isnan(y)) return AGN_NAN;
  return x > y ? x - y : 0.0;
}
#endif


/*
 * ====================================================
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
 *
 * Developed at SunPro, a Sun Microsystems, Inc. business.
 * Permission to use, copy, modify, and distribute this
 * software is freely granted, provided that this notice
 * is preserved.
 * ====================================================
 */

/* FIXME (?): There are strange situations on some Windows platforms where both this SunPro function and built-in platform
   signbit individually return correct results in Agena, but when both call results with the same argument -0 are compared
   with the (in)equality Agena operator the result differs, probably due to overflows ??? */
LUALIB_API int tools_signbit (double x) {  /* 2.8.4 */
  int32_t hx;
  GET_HIGH_WORD(hx, x);
  return hx & 0x80000000;
}


/* Fused multiply-add pendant, 2.8.4 */
typedef struct { double hi; double lo; } doublefloat;  /* do not use float for the two components hi and lo ! */

static FORCE_INLINE doublefloat aux_split (double a) {  /* using volatile double t, hi, lo is not needed in GCC */
  double t = ((1 << 12) + 1)*a;     /* 12 is the best shift for this version to come close to C's fma function */
  double hi = t - (t - a);
  double lo = a - hi;
  return (doublefloat){hi, lo};
}

LUALIB_API double tools_fma (double a, double b, double c) {
  doublefloat as, bs;
  as = aux_split(a), bs = aux_split(b);
  return ((as.hi*bs.hi + c) + as.hi*bs.lo + as.lo*bs.hi) + as.lo*bs.lo;
}


/* The macro version is not faster */
static void sqdbl (long double *hi, long double *lo, double x) {
  volatile long double xh, xl, xc;
  xc = (long double)x*SPLIT;
  xh = (x - xc) + xc;
  xl = x - xh;
  *hi = (long double)x*x;
  *lo = xh*xh - *hi + 2*xh*xl + xl*xl;
}

/* Cathetus for hypot4, sqrt(a^2 - b^2), 2.11.0 RC2; rewritten 2.35.0 */
LUALIB_API double tools_mhypot (double x, double y) {
  union {double f; uint64_t i;} ux = { x }, uy = { y };
  int ex, ey;
  long double hx, lx, hy, ly, z;
  /* arrange |x| >= |y| */
  ux.i &= -1ULL >> 1;
  uy.i &= -1ULL >> 1;
  if (ux.i < uy.i) return AGN_NAN;
  /* special cases */
  ex = ux.i >> 52;
  ey = uy.i >> 52;
  x = ux.f;
  y = uy.f;
  if (x == y) return 0;  /* with |x| > 5844, |y| > 5844 the function would return a nonzero result if |x| = |y| */
  /* note: hypot(inf,nan) == inf */
  if (ey == 0x7ff) return y;
  if (ex == 0x7ff || uy.i == 0) return x;
  /* note: hypot(x,y) ~= x + y*y/x/2 with inexact for small y/x; 64 difference is enough for ld80 double_t */
  /* if (ex - ey > 64) return x + y; */ /* deactivated in 3.1.3 */
  /* precise sqrt argument in nearest rounding mode without overflow; xh*xh must not overflow and xl*xl must not underflow in sq */
  z = 1.0;
  if (ex > 0x3ff + 510) {
    z = 0x1p700;
    x *= 0x1p-700;
    y *= 0x1p-700;
  } else if (ey < 0x3ff - 450) {
    z = 0x1p-700;
    x *= 0x1p700;
    y *= 0x1p700;
  }
  sqdbl(&hx, &lx, x);
  sqdbl(&hy, &ly, y);
  return z*sqrtl(-ly + lx - hy + hx);  /* 3.1.3 fix (change from sqrt to sqrtl) */
}


LUALIB_API double tools_mpytha (double x, double y) {  /* x^2 - y^2, based on tools_mhypot */
  union {double f; uint64_t i;} ux = { x }, uy = { y };
  int ex, ey;
  long double hx, lx, hy, ly, z;
  /* arrange |x| >= |y| */
  ux.i &= -1ULL >> 1;
  uy.i &= -1ULL >> 1;
  /* special cases */
  ex = ux.i >> 52;
  ey = uy.i >> 52;
  x = ux.f;
  y = uy.f;
  if (x == y) return 0;  /* with |x| > 5844, |y| > 5844 the function would return a nonzero result if |x| = |y| */
  /* note: the following two lines deal with undefined and infinity situations only. hypot(inf,nan) == inf */
  if (ey == 0x7ff) return y;
  if (ex == 0x7ff || uy.i == 0) return x;
  /* note: hypot(x,y) ~= x + y*y/x/2 with inexact for small y/x; 64 difference is enough for ld80 double_t */
  /* if (ex - ey > 64) return x + y; */
  /* precise sqrt argument in nearest rounding mode without overflow; xh*xh must not overflow and xl*xl must not underflow in sq */
  z = 1.0;
  if (ex > 0x3ff + 510) {
    z = 0x1p700;
    x *= 0x1p-700;
    y *= 0x1p-700;
  } else if (ey < 0x3ff - 450) {
    z = 0x1p-700;
    x *= 0x1p700;
    y *= 0x1p700;
  }
  sqdbl(&hx, &lx, x);
  sqdbl(&hy, &ly, y);
  return z*(-ly + lx - hy + hx);
}


/* portable version of fpclassify which is not available on all platforms */
LUALIB_API int tools_fpclassify (double x) {
  uint32_t hx, lx;
  EXTRACT_WORDS (hx, lx, x);
  lx |= hx & 0xfffff;
  hx &= 0x7ff00000;
  if ((hx | lx) == 0)
    return FP_ZERO;
  else if (hx == 0x7ff00000)
    return (lx == 0) ? FP_INFINITE : FP_NAN;
  else if (hx == 0)
    return FP_SUBNORMAL;
  return FP_NORMAL;
}


/*-
 * Copyright (c) 2002, 2003 David Schultz <das@FreeBSD.ORG>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 * $FreeBSD$
 *
 * Taken from source files: lib\libc\ia64\_fpmath.h & lib\libc\gen\fpclassify.c
 */
#ifndef __ARMCPU  /* 2.37.1 */
LUALIB_API int tools_fpclassifyl (long double e) {
  union IEEEl2bits u;
  u.e = e;
  if (u.bits.exp == 0) {
    if ((u.bits.manl | u.bits.manh) == 0) return FP_ZERO;
    return FP_SUBNORMAL;
  }
  mask_nbit_l(u);  /* mask normalization bit if applicable */
  if (u.bits.exp == 32767) {
    if ((u.bits.manl | u.bits.manh) == 0) return FP_INFINITE;
    return FP_NAN;
  }
  return FP_NORMAL;
}
#endif
/* end of FreeBSD code and its derivatives */


/* based on sun_nextafter & sun_asinh, 4 % faster than luai_numsign macro, 2.17.1, returns -1 if x < 0,
   0 if x = 0 and +1 if x > 0. */
LUALIB_API double tools_sign (double x) {
  int32_t hx, ix;
  uint32_t lx;
  EXTRACT_WORDS(hx, lx, x);
  ix = hx & 0x7fffffff;  /* |x| */
  if (unlikely(ix >= 0x7ff00000 && ((ix - 0x7ff00000) | lx) != 0)) return AGN_NAN;  /* x is NaN, return NaN */
  if ((ix | lx) == 0) return 0;  /* x = +-0 */
  return (hx >= 0) ? 1 : -1;     /* x > 0 or x < 0 incl. +/-inf; "-1*(hx < 0) + (hx >= 0)" not faster */
}


/* based on sun_nextafter & sun_asinh, 4 to 5 % faster than luai_numsignum macro, 2.17.1; returns -1 if x < 0,
   and +1 otherwise. This is exactly how Maple's `signum' function works. */
LUALIB_API double tools_signum (double x) {
  int32_t hx, ix;
  uint32_t lx;
  EXTRACT_WORDS(hx, lx, x);
  ix = hx & 0x7fffffff;  /* |x| */
  if (unlikely(ix >= 0x7ff00000 && ((ix - 0x7ff00000) | lx) != 0)) return AGN_NAN;  /* x is NaN, return NaN */
  if (hx >= 0 || ((ix | lx) == 0)) return 1;  /* x > 0 or x = +-0 or x = +inf */
  return -1;  /* x < 0 or x = -inf */
}


#ifndef __ARMCPU  /* 2.37.1 */
LUALIB_API long double tools_signuml (long double x) {
  int fp = tools_fpclassifyl(x);
  if (fp == FP_NAN) return AGN_NAN;
  if (x >= 0.0L || x == HUGE_VAL) return 1.0L;
  return -1.0L;
}


LUALIB_API long double tools_signl (long double x) {
  int fp = tools_fpclassifyl(x);
  if (fp == FP_NAN || fp == FP_ZERO) return x;
  if (x >= 0.0L || x == HUGE_VAL) return 1.0L;
  return -1.0L;
}


#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
LUALIB_API long double sun_nextafterl (long double x, long double y) {
  return nextafter(x, y);
}
#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
LUALIB_API long double sun_nextafterl (long double x, long double y) {
  union ldshape ux, uy;
  if (tools_fpisnanl(x) || tools_fpisnanl(y)) return x + y;
  if (x == y) return y;
  ux.f = x;
  if (x == 0) {
    uy.f = y;
    ux.i.m = 1;
    ux.i.se = uy.i.se & 0x8000;
  } else if ((x < y) == !(ux.i.se & 0x8000)) {
    ux.i.m++;
    if (ux.i.m << 1 == 0) {
      ux.i.m = 1ULL << 63;
      ux.i.se++;
    }
  } else {
    if (ux.i.m << 1 == 0) {
      ux.i.se--;
      if (ux.i.se) ux.i.m = 0;
    }
    ux.i.m--;
  }
  /* raise overflow if ux is infinite and x is finite */
  if ((ux.i.se & 0x7fff) == 0x7fff) return x + x;
  /* raise underflow if ux is subnormal or zero */
  if ((ux.i.se & 0x7fff) == 0) FORCE_EVAL(x*x + ux.f*ux.f);
  return ux.f;
}
#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384
LUALIB_API long double sun_nextafterl (long double x, long double y) {
  union ldshape ux, uy;
  if (tools_fpisnanl(x) || tools_fpisnanl(y)) return x + y;
  if (x == y) return y;
  ux.f = x;
  if (x == 0) {
    uy.f = y;
    ux.i.lo = 1;
    ux.i.se = uy.i.se & 0x8000;
  } else if ((x < y) == !(ux.i.se & 0x8000)) {
    ux.i2.lo++;
    if (ux.i2.lo == 0) ux.i2.hi++;
  } else {
    if (ux.i2.lo == 0) ux.i2.hi--;
    ux.i2.lo--;
  }
  /* raise overflow if ux is infinite and x is finite */
  if ((ux.i.se & 0x7fff) == 0x7fff) return x + x;
  /* raise underflow if ux is subnormal or zero */
  if ((ux.i.se & 0x7fff) == 0) FORCE_EVAL(x*x + ux.f*ux.f);
  return ux.f;
}
#endif


/* Taken from MUSL-1.2.3 src/math/frexpl.c, 2.34.10 */
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
LUALIB_API long double sun_frexpl (long double x, int *e) {
  return frexp(x, e);
}
#elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
LUALIB_API long double sun_frexpl (long double x, int *e) {
  union ldshape u = {x};
  int ee = u.i.se & 0x7fff;
  if (!ee) {
    if (x) {
      x = sun_frexpl(x*0x1p120, e);
      *e -= 120;
    } else *e = 0;
    return x;
  } else if (ee == 0x7fff) return x;
  *e = ee - 0x3ffe;
  u.i.se &= 0x8000;
  u.i.se |= 0x3ffe;
  return u.f;
}
#endif


#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
LUALIB_API long double sun_fmodl (long double x, long double y) {
	return fmod(x, y);
}
#elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
LUALIB_API long double sun_fmodl (long double x, long double y) {
  union ldshape ux = {x}, uy = {y};
  int ex = ux.i.se & 0x7fff;
  int ey = uy.i.se & 0x7fff;
  int sx = ux.i.se & 0x8000;
  if (y == 0 || tools_fpisnanl(y) || ex == 0x7fff) return (x*y)/(x*y);
  ux.i.se = ex;
  uy.i.se = ey;
  if (ux.f <= uy.f) {
    if (ux.f == uy.f) return 0*x;
    return x;
  }
  /* normalize x and y */
  if (!ex) {
    ux.f *= 0x1p120f;
    ex = ux.i.se - 120;
  }
  if (!ey) {
    uy.f *= 0x1p120f;
    ey = uy.i.se - 120;
  }
	/* x mod y */
#if LDBL_MANT_DIG == 64
  uint64_t i, mx, my;
  mx = ux.i.m;
  my = uy.i.m;
  for (; ex > ey; ex--) {
    i = mx - my;
    if (mx >= my) {
      if (i == 0) return 0*x;
      mx = 2*i;
    } else if (2*mx < mx) {
      mx = 2*mx - my;
    } else {
      mx = 2*mx;
    }
  }
  i = mx - my;
  if (mx >= my) {
    if (i == 0) return 0*x;
    mx = i;
  }
  for (; mx >> 63 == 0; mx *= 2, ex--);
  ux.i.m = mx;
#elif LDBL_MANT_DIG == 113
  uint64_t hi, lo, xhi, xlo, yhi, ylo;
  xhi = (ux.i2.hi & -1ULL>>16) | 1ULL<<48;
  yhi = (uy.i2.hi & -1ULL>>16) | 1ULL<<48;
  xlo = ux.i2.lo;
  ylo = uy.i2.lo;
  for (; ex > ey; ex--) {
    hi = xhi - yhi;
    lo = xlo - ylo;
    if (xlo < ylo) hi -= 1;
    if (hi >> 63 == 0) {
      if ((hi|lo) == 0) return 0*x;
      xhi = 2*hi + (lo>>63);
      xlo = 2*lo;
    } else {
      xhi = 2*xhi + (xlo>>63);
      xlo = 2*xlo;
    }
  }
  hi = xhi - yhi;
  lo = xlo - ylo;
  if (xlo < ylo) hi -= 1;
  if (hi >> 63 == 0) {
    if ((hi|lo) == 0) return 0*x;
    xhi = hi;
    xlo = lo;
  }
  for (; xhi >> 48 == 0; xhi = 2*xhi + (xlo>>63), xlo = 2*xlo, ex--);
  ux.i2.hi = xhi;
  ux.i2.lo = xlo;
#endif
  /* scale result */
  if (ex <= 0) {
    ux.i.se = (ex+120)|sx;
    ux.f *= 0x1p-120f;
  } else
    ux.i.se = ex|sx;
  return ux.f;
}
#endif
#endif  /* of __ARMCPU */

/* origin: OpenBSD /usr/src/lib/libm/src/ld80/e_expl.c */
/*
 * Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */
/*
 *      Exponential function, long double precision
 *
 *
 * SYNOPSIS:
 *
 * long double x, y, expl();
 *
 * y = expl( x );
 *
 *
 * DESCRIPTION:
 *
 * Returns e (2.71828...) raised to the x power.
 *
 * Range reduction is accomplished by separating the argument
 * into an integer k and fraction f such that
 *
 *     x    k  f
 *    e  = 2  e.
 *
 * A Pade' form of degree 5/6 is used to approximate exp(f) - 1
 * in the basic range [-0.5 ln 2, 0.5 ln 2].
 *
 *
 * ACCURACY:
 *
 *                      Relative error:
 * arithmetic   domain     # trials      peak         rms
 *    IEEE      +-10000     50000       1.12e-19    2.81e-20
 *
 *
 * Error amplification in the exponential function can be
 * a serious matter.  The error propagation involves
 * exp( X(1+delta) ) = exp(X) ( 1 + X*delta + ... ),
 * which shows that a 1 lsb error in representing X produces
 * a relative error of X times 1 lsb in the function.
 * While the routine gives an accurate result for arguments
 * that are exactly represented by a long double precision
 * computer number, the result contains amplified roundoff
 * error for large arguments not exactly represented.
 *
 *
 * ERROR MESSAGES:
 *
 *   message         condition      value returned
 * exp underflow    x < MINLOG         0.0
 * exp overflow     x > MAXLOG         MAXNUM
 *
 */

#ifndef __ARMCPU  /* 2.37.1 */
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
LUALIB_API long double tools_expl (long double x) {
  return exp(x);
}
#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384

static const long double explP[3] = {
 1.2617719307481059087798E-4L,
 3.0299440770744196129956E-2L,
 9.9999999999999999991025E-1L,
};
static const long double explQ[4] = {
 3.0019850513866445504159E-6L,
 2.5244834034968410419224E-3L,
 2.2726554820815502876593E-1L,
 2.0000000000000000000897E0L,
};
static const long double
LN2HI = 6.9314575195312500000000E-1L,
LN2LO = 1.4286068203094172321215E-6L,
LOG2E = 1.4426950408889634073599E0L;

LUALIB_API long double tools_expl (long double x) {
  long double px, xx;
  int k;
  if (tools_fpisnanl(x)) return x;
  if (x > 11356.5234062941439488L) return x * 0x1p16383L; /* x > ln(2^16384 - 0.5) */
  if (x < -11399.4985314888605581L) return -0x1p-16445L/x; /* x < ln(2^-16446) */
  /* Express e**x = e**f 2**k = e**(f + k ln(2)) */
  px = sun_floorl(LOG2E*x + 0.5);
  k = px;
  x -= px*LN2HI;
  x -= px*LN2LO;
  /* rational approximation of the fractional part: e**x =  1 + 2x P(x**2)/(Q(x**2) - x P(x**2)) */
  xx = x*x;
#ifndef __ARMCPU
  px = x*__polevll(xx, explP, 2);
  x = px/(__polevll(xx, explQ, 3) - px);
#else
  px = x*polevl(xx, explP, 2);
  x = px/(polevl(xx, explQ, 3) - px);
#endif
  x = 1.0 + 2.0*x;
  return scalbnl(x, k);
}
#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384
/* TODO: broken implementation to make things compile */
LUALIB_API long double expl (long double x) {
  return exp(x);
}
#endif


/* Taken from MUSL-1.2.3 src/math/expm1l.c, 2.34.10
 *
 * Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */
/*
 *      Exponential function, minus 1
 *      Long double precision
 *
 *
 * SYNOPSIS:
 *
 * long double x, y, expm1l();
 *
 * y = expm1l( x );
 *
 *
 * DESCRIPTION:
 *
 * Returns e (2.71828...) raised to the x power, minus 1.
 *
 * Range reduction is accomplished by separating the argument
 * into an integer k and fraction f such that
 *
 *     x    k  f
 *    e  = 2  e.
 *
 * An expansion x + .5 x^2 + x^3 R(x) approximates exp(f) - 1
 * in the basic range [-0.5 ln 2, 0.5 ln 2].
 *
 *
 * ACCURACY:
 *
 *                      Relative error:
 * arithmetic   domain     # trials      peak         rms
 *    IEEE    -45,+maxarg   200,000     1.2e-19     2.5e-20
 */

#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
LUALIB_API long double tools_expm1l (long double x) {
  return expm1(x);
}
#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384

/* exp(x) - 1 = x + 0.5 x^2 + x^3 P(x)/Q(x)
   -.5 ln 2  <  x  <  .5 ln 2
   Theoretical peak relative error = 3.4e-22  */
static const long double
expm1lP0 = -1.586135578666346600772998894928250240826E4L,
expm1lP1 =  2.642771505685952966904660652518429479531E3L,
expm1lP2 = -3.423199068835684263987132888286791620673E2L,
expm1lP3 =  1.800826371455042224581246202420972737840E1L,
expm1lP4 = -5.238523121205561042771939008061958820811E-1L,
expm1lQ0 = -9.516813471998079611319047060563358064497E4L,
expm1lQ1 =  3.964866271411091674556850458227710004570E4L,
expm1lQ2 = -7.207678383830091850230366618190187434796E3L,
expm1lQ3 =  7.206038318724600171970199625081491823079E2L,
expm1lQ4 = -4.002027679107076077238836622982900945173E1L,
/* expm1lQ5 = 1.000000000000000000000000000000000000000E0 */
/* C1 + C2 = ln 2 */
expm1lC1 = 6.93145751953125E-1L,
expm1lC2 = 1.428606820309417232121458176568075500134E-6L,
/* ln 2^-65 */
minarg = -4.5054566736396445112120088E1L,
/* ln 2^16384 */
maxarg = 1.1356523406294143949492E4L;

LUALIB_API long double tools_expm1l (long double x) {
  long double px, qx, xx;
  int k;
  if (tools_fpisnanl(x)) return x;
  if (x > maxarg) return x*0x1p16383L; /* overflow, unless x==inf */
  if (x == 0.0) return x;
  if (x < minarg) return -1.0;
  xx = expm1lC1 + expm1lC2;
  /* Express x = ln 2 (k + remainder), remainder not exceeding 1/2. */
  px = sun_floorl(0.5 + x / xx);
  k = px;
  /* remainder times ln 2 */
  x -= px*expm1lC1;
  x -= px*expm1lC2;
  /* Approximate exp(remainder ln 2).*/
  px = (((( expm1lP4*x + expm1lP3)*x + expm1lP2)*x + expm1lP1)*x + expm1lP0)*x;
  qx = (((( x + expm1lQ4)*x + expm1lQ3)*x + expm1lQ2)*x + expm1lQ1)*x + expm1lQ0;
  xx = x*x;
  qx = x + (0.5*xx + xx*px/qx);
  /* exp(x) = exp(k ln 2) exp(remainder ln 2) = 2^k exp(remainder ln 2).
     We have qx = exp(remainder ln 2) - 1, so
     exp(x) - 1  =  2^k (qx + 1) - 1  =  2^k qx + 2^k - 1. */
  px = scalbnl(1.0, k);
  x = px*qx + (px - 1.0);
  return x;
}
#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384
/* TODO: broken implementation to make things compile */
LUALIB_API long double tools_expm1l (long double x) {
  return expm1(x);
}
#endif
#endif  /* of __ARMCPU */


/* 2.17.4, for tools_binomial, returns -1 if x < 0 and odd(entier(x)), and 1 otherwise, i.e. the sign of gamma(). */
LUALIB_API int tools_gammasign (double x) {
  if (x < 0) {
    uint64_t i = (uint64_t)sun_floor(-x);
    int r = i & 1;
    return -1*(r == 0) + (r != 0);  /* no branching, instead of ((i & 1) == 0) ? -1 : 1 */
  }
  return 1;
}


LUALIB_API int tools_gammasignl (long double x) {  /* 3.16.4 */
  if (x < 0) {
    uint64_t i = (uint64_t)sun_floorl(-x);
    int r = i & 1;
    return -1*(r == 0) + (r != 0);  /* no branching, instead of ((i & 1) == 0) ? -1 : 1 */
  }
  return 1;
}


/* taken from: @(#)s_nextafter.c 5.1 93/09/24
 *
 * ====================================================
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
 *
 * Developed at SunPro, a Sun Microsystems, Inc. business.
 * Permission to use, copy, modify, and distribute this
 * software is freely granted, provided that this notice
 * is preserved.
 * ====================================================
 *
 * IEEE functions
 *   nextafter(x,y)
 *   return the next machine floating-point number of x in the direction toward y.
 *   Special cases:
 *
 * Modification taken from www.koders.com /..GNU/l/libc/libc/math/s_nextafter.c
 * because the original Sun implementation is non-ANSI C and does not work with GCC.
 *
 * This implementation is ten percent faster than the one available in GNU C Lib.
 */

LUALIB_API double sun_nextafter (double x, double y) {
  int32_t hx, hy, ix, iy;
  uint32_t lx, ly;
  EXTRACT_WORDS(hx, lx, x);
  EXTRACT_WORDS(hy, ly, y);
  ix = hx & 0x7fffffff;     /* |x| */
  iy = hy & 0x7fffffff;     /* |y| */
  if (unlikely(((ix >= 0x7ff00000) && ((ix - 0x7ff00000) | lx) != 0) ||  /* x is nan, 2.5.15 optimisation */
     ((iy >= 0x7ff00000) && ((iy - 0x7ff00000) | ly) != 0)))             /* y is nan */
    return x + y;
  if (x == y) return y;     /* x = y, return y */
  if ((ix | lx) == 0) {     /* x == 0 */
    double u;
    INSERT_WORDS(x, hy & 0x80000000, 1);  /* return +-minsubnormal */
    u = math_opt_barrier(x);
    u = u*u;
    math_force_eval(u);  /* raise underflow flag */
    return x;
  }
  if (hx >= 0) {  /* x > 0 */
    if (hx > hy || ((hx == hy) && (lx > ly))) {  /* x > y, x -= ulp */
      if (lx == 0) hx -= 1;
      lx -= 1;
    } else {  /* x < y, x += ulp */
      lx += 1;
      if (lx == 0) hx += 1;
    }
  } else {                  /* x < 0 */
    if (hy >= 0 || hx > hy || ((hx == hy) && (lx > ly))) {  /* x < y, x -= ulp */
      if (lx == 0) hx -= 1;
      lx -= 1;
    } else {                /* x > y, x += ulp */
      lx += 1;
      if (lx == 0) hx += 1;
    }
  }
  hy = hx & 0x7ff00000;
  if (unlikely(hy >= 0x7ff00000)) {  /* 2.5.15 optimisation, y is inf or NaN */
    x = x + x;   /* overflow */
    if (FLT_EVAL_METHOD != 0 && FLT_EVAL_METHOD != 1)
      asm ("" : "+m"(x));
    return x;   /* overflow */
  }
  if (unlikely(hy < 0x00100000)) {  /* 2.5.15 optimisation */
    double u = x*x;       /* underflow */
    math_force_eval(u);     /* raise underflow flag */
  }
  INSERT_WORDS(x, hx, lx);
  return x;
}


/* Like `|' operator, based on sun_nextafter, 2.29.2; the implementation is 3 % _slower_ than directly comparing
   the numbers with relational operators. */
LUALIB_API double sun_compare (double x, double y) {
  int32_t hx, hy, ix, iy;
  uint32_t lx, ly;
  EXTRACT_WORDS(hx, lx, x);
  EXTRACT_WORDS(hy, ly, y);
  ix = hx & 0x7fffffff;  /* |x| */
  iy = hy & 0x7fffffff;  /* |y| */
  if (unlikely((ix >= 0x7ff00000) ||  /* x is nan or not finite */
               (iy >= 0x7ff00000) ))  /* y is nan or not finite */
    return AGN_NAN;  /* return nan */
  if (x == y) return 0.0;  /* x = y, return 0 */
  if (hx >= 0)  /* x > 0 */
    return (hx > hy || ((hx == hy) && (lx > ly))) ? 1.0 : -1.0;  /* condition checks for x > y */
  else  /* x < 0 */
    return (hy >= 0 || hx > hy || ((hx == hy) && (lx > ly))) ? -1.0 : 1.0;  /* condition checks for x < y */
}


/*
 * ====================================================
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
 *
 * Developed at SunPro, a Sun Microsystems, Inc. business.
 * Permission to use, copy, modify, and distribute this
 * software is freely granted, provided that this notice
 * is preserved.
 * ====================================================
 *
 * The following two functions are based on Sun's frexp implementation
 * (source file s_frexp.c), taken from the ulibc 0.33.2 library.
 */

/*
 * for non-zero x
 *  x = frexp(arg,&exp);
 * return a double fp quantity x such that 0.5 <= |x| < 1.0
 * and the corresponding binary exponent "exp". That is
 *  arg = x*2^exp.
 * If arg is inf, 0.0, or NaN, then frexp(arg,&exp) returns arg
 * with *exp=0.
 *
 * A little bit modified to return either the exponent (mode = 1) or
 * mantissa (mode = 0).
 */

LUALIB_API double sun_frexp_man (double x) {  /* returns mantissa, 2.10.0 */
  int32_t hx, ix, lx;
  EXTRACT_WORDS(hx, lx, x);
  ix = 0x7fffffff & hx;  /* absolute value |x| */
  if (ix >= 0x7ff00000 || ((ix | lx) == 0)) return x;  /* 0, inf, nan */
  if (unlikely(ix < 0x00100000)) {  /* subnormal, 2.5.15 optimisation */
    x *= TWO54;
    GET_HIGH_WORD(hx, x);
  }
  hx = (hx & 0x800fffff) | 0x3fe00000;
  SET_HIGH_WORD(x, hx);
  return x;
}


LUALIB_API int sun_frexp_exp (double x) {  /* returns exponent, 2.10.0 */
  int eptr;
  int32_t hx, ix, lx;
  EXTRACT_WORDS(hx, lx, x);
  ix = 0x7fffffff & hx;  /* absolute value */
  eptr = 0;
  if (ix >= 0x7ff00000 || ((ix | lx) == 0)) return x;  /* 0, inf, nan */
  if (unlikely(ix < 0x00100000)) {  /* subnormal, 2.5.15 optimisation */
    x *= TWO54;
    GET_HIGH_WORD(hx, x);
    ix = hx & 0x7fffffff;
    eptr = -54;
  }
  return eptr + (ix >> 20) - 1022;
}


LUALIB_API double sun_xfrexp (double x, int *eptr, int *isneg) {  /* returns mantissa, exponent, signbit, 2.10.0 */
  int32_t hx, ix, lx;
  EXTRACT_WORDS(hx, lx, x);
  ix = 0x7fffffff & hx;
  *eptr = 0;
  *isneg = (hx >> 31) & 1;
  if (ix >= 0x7ff00000 || ((ix | lx) == 0)) {  /* 0, inf, nan */
    SET_HIGH_WORD(x, ix);
    return x;  /* -0 -> 0, 0, inf, nan */
  }
  if (unlikely(ix < 0x00100000)) {  /* subnormal, 2.5.15 optimisation */
    x *= TWO54;
    GET_HIGH_WORD(hx, x);
    ix = hx & 0x7fffffff;
    *eptr = -54;
  }
  *eptr += (ix >> 20) - 1022;
  hx = (hx & 0x800fffff) | 0x3fe00000;  /* nil entire exponent, retain sign and mantissa */
  hx &= 0x7fffffff;  /* absolute value */
  SET_HIGH_WORD(x, hx);
  return x;
}


/* Unbiased exponent: returns math.exponent(x) - 1, except 0 -> -1023; suited for expressions such like 2^52 -> 52;
   based on Sun's `rint`, 2.16.0; see also sun_uexponent */
LUALIB_API int32_t sun_exponent (double x) {
  int32_t hx;
  GET_HIGH_WORD(hx, x);
  hx &= 0x7fffffff;  /* absolute value */
  return ((((uint32_t)hx) >> 20) & 0x7ff) - 0x3ff;
}


/* Returns base-2 exponent; similar to sun_exponent but returns sign(x)*sun_exponent(x). If x is nan, returns 0x401 = 1025;
   also returns 1024 for x = +infinity, and -1024 for x = -infinity. For 0 and subnormal numbers, returns 0. 2.31.2.
   As fast as lobject.c/luaO_log2() but covers a broader number range. */

#define aux_sign(hx)   ((((hx) >= 0) ? 1 : -1))
LUALIB_API int32_t sun_uexponent (double x) {
  int32_t hx, ix;
  GET_HIGH_WORD(hx, x);
  ix = 0x7fffffff & hx;  /* absolute value */
  if (ix < 0x00100000) return 0;  /* 0, subnormal */
  else if (ix > 0x7ff00000) return 0x401;  /* nan, return > decimal 1025 */
  return aux_sign(hx)*((ix >> 20) - 0x3ff);  /* 4 % faster than ((0.0 < x) - (x < 0.0))*((ix >> 20) - 0x3ff) */
}


LUALIB_API int sun_isirregular (double x) {  /* x > 2^53, i.e. not all larger integers representable ?  2.16.0 */
  int32_t hx, lx;
  GET_HIGH_WORD(hx, x);
  hx &= 0x7fffffff;  /* absolute value */
  if (hx < 0x43400000) return 0;  /* x < 2^53 = 9007199254740992.0 */
  GET_LOW_WORD(lx, x);
  return !(hx == 0x43400000 && lx == 0);  /* 0 if x = 2^53, changed 2.21.8 */
}


LUALIB_API int sun_issubnormal (double x) {  /* inlined 2.18.1 */
  int32_t hx, lx;
  EXTRACT_WORDS(hx, lx, x);
  hx &= 0x7fffffff;  /* absolute value */
  if (hx >= 0x7ff00000 || ((hx | lx) == 0)) return 0;  /* +/-0, inf, nan */
  return (hx < 0x00100000);  /* subnormal */
}


LUALIB_API int sun_isnormal (double x) {  /* 2.27.0 */
  int32_t hx, lx;
  EXTRACT_WORDS(hx, lx, x);
  hx &= 0x7fffffff;  /* absolute value */
  if (hx >= 0x7ff00000 || ((hx | lx) == 0)) return 0;  /* +/-0, inf, nan */
  return (hx >= 0x00100000);  /* normal */
}


LUALIB_API double sun_normalise (double x, uint32_t *hx) {  /* based on sun_frexp_man, 2.18.1; tweaked 2.30.3 */
  int32_t ix, lx;
  EXTRACT_WORDS(*hx, lx, x);
  ix = 0x7fffffff & *hx;  /* absolute value |x| */
  if (ix >= 0x7ff00000 || ((ix | lx) == 0)) {  /* +/-0, inf, nan */
    return x;  /* 0, inf, nan */
  }
  if (unlikely(ix < 0x00100000)) {  /* subnormal, 2.5.15 optimisation */
    x *= TWO54;
    SET_HIGH_WORD(*hx, x);  /* 2.41.2 fix */
  }
  return x;
}


/* clamping function # define fMod(a, b) ((a) - (b) * floor((a)/(b))) */
LUALIB_API double tools_reducerange (double x, double min, double max) {
  if (min <= x && x < max) return x;  /* just to save computation time */
  /* note that with x = max, we will deliberately `overflow` so you might add 1 to max to prevent that */
  x -= min;
  max -= min;
  return min + fmod(max + fmod(x, max), max);  /* 2.11 RC4, sun_fmod is 22 % slower ! */
}


/* clamping function */
LUALIB_API long double tools_reducerangel (long double x, long double min, long double max) {
  if (min <= x && x < max) return x;  /* just to save computation time */
  x -= min;
  max -= min;
  return min + fmodl(max + fmodl(x, max), max);
}


/* Returns its argument x - a number - if x is non-negative, otherwise returns 0. By passing any
   non-negative optional number d (the `direction`), the return is the same.

   By passing any non-positive optional number d, returns x if it is negative, otherwise returns 0.

   See also independent version math.branch. Used by skycrane.tolerance. */
LUALIB_API double tools_branch (double x, int d) {  /* 2.10.0 */
  /* x the number, d the direction or interval: if d > 0 -> return in [0, infinity] */
  double r;
  r = signbit(x) == 0;  /* if d <= 0 -> return in [-infinity, 0] */
  r = (d > 0) ? r*x : (!r)*x;
  if (r == 0 && signbit(r) != 0) r = 0;  /* convert -0 to 0 */
  return r;
}


/* Computes a mathematical epsilon value that is dependent on the value resp. magnitude of its argument x. It can be
   used in difference quotients, etc., for it prevents huge precision errors with computations on very small or large
   numbers (i.e. C doubles).
   See: http://stackoverflow.com/questions/5543783/java-estimate-a-derivative-at-a-point, answered by trashgod */

LUALIB_API double tools_matheps (double x) {
  x = fabs(x);  /* 2.10.5 patch */
  return (x < 1.0) ? AGN_EPSILON : x*sqrt(sun_nextafter(x, HUGE_VAL) - x);
}


#ifndef __ARMCPU  /* 2.37.1 */
LUALIB_API long double tools_mathepsl (long double x) {
  x = fabsl(x);
  return (x < 1.0L) ? (long double)AGN_HEPSILON : x*sqrtl(nextafterl(x, HUGE_VAL) - x);
}
#endif


LUALIB_API double tools_cbrteps (double x) {  /* 4.2.5 */
  x = fabs(x);
  return (x < 0.0123927159) ? AGN_EPSILON : x*cbrt(sun_nextafter(x, HUGE_VAL) - x);
}


#ifndef __ARMCPU  /* 4.2.5 */
LUALIB_API long double tools_cbrtepsl (long double x) {
  x = fabsl(x);
  return (x < 0.0123927159L) ? AGN_EPSILON : x*cbrtl(nextafterl(x, HUGE_VAL) - x);
}
#endif


/* Excel Serial Date, 2.9.8, does not handle the DST issue (which Excel does not handle, too) */
LUALIB_API double tools_esd (int y, int m, int d, int hh, int mm, int ss) {
  long int md;
  double dh;
  if (y == 1900 && m == 2 && d == 29)  /* Lotus bug: there is no 29th February 1900 since 1900 is not a leap year */
    return 60;
  /* calculate Modified `Julian Lotus Serial Date`, do not remove brackets, integer arithmetic ! */
  md = (1461*(y + 4800 + ((m - 14) / 12))) / 4 +
       (367*(m - 2 - 12*((m - 14) / 12))) / 12 -
       (3*(((y + 4900 + ((m - 14) / 12)) / 100))) / 4 +
       d - 2415019 - 32075;
  if (md < 60) md--;  /* any serial date under 60 is one day off */
  dh = (hh*3600.0 + mm*60.0 + ss)/86400.0;  /* time fraction */
  return md + dh;
}


/* round then trunc x at d decimal digits
   x: number to round
   d: dec digits to round/trunc to: 0 - 14

   taken from: https://bytes.com/topic/c/answers/521405-rounding-nearest-nth-digits, posted by Larry I Smith,
   original function name: `FixDec`.

   "This code dates from 1991. It was used to display financial data on reports. Use at your own risk..."

   Extended by Alexander Walz. Mr. Smith's function prevents round-off errors.

   t = 0: round towards infinity if z is positive, round towards -infinity if x is negative
   t = 4: apply round half up method
   t = 9: always truncate (towards zero).

   d < 0: round fractional digits, d > 0 round integral part.
   If d == AGN_FORNUMADJBAILOUT, then bail out immediately.

   If t is 0 and x is negative, rounds toward 0. */
LUALIB_API double tools_roundf (double x, int d, int t) {
  double m, f, s;
  if (d == AGN_FORNUMADJBAILOUT || x == 0.0) return x;  /* 2.33.3/4 bail out */
  s = 1;
  if (x < 0) {                   /* behave symmetrically */
    s = -1;
    x = -x;
  }
  if (d < -14) d = -14;          /* these if's really do not influence performance perceptibly */
  else if (d > 14) d = 14;       /* IEEE doubles max fractional precision minus 1 */
  /* comments show what would happen if z = 1234.5678 & d = 2 */
  m = cephes_powi(10.0, d);     /* m: = 100.0, optimised 2.29.5, tuned 3.16.3 */
  x *= m;                        /* x: 1234.5678 -> 123456.78 */
  f = sun_modf(x, &x);           /* x: 123456.78 -> 123456.0 & f = 0.78; 2.11.0 improvement */
  f *= 10.0;                     /* f: 0.78 -> 7.8 */
  if (t != 0) sun_modf(f, &f);   /* f: 7.8 -> 7.0, &f gets the integral part, (int)f is the (d+1)-th digit; 2.11.0 improvement */
  if ((t == 0 && f > 0) ||       /* with t = 0, solely checking the (d+1)-th digit does not suffice */
     ((int)f > t)) x += 1.0;     /* with t = 4 (round half up method): x: 123456.0 -> 123457.0 */
  return s*x/m;                  /* 123457.0 / 100.0 = 1234.57 */
}


#ifndef __ARMCPU  /* 2.37.1 */
LUALIB_API long double tools_roundfl (long double x, int d, int t) {
  long double m, f, s;
  if (d == AGN_FORNUMADJBAILOUT || x == 0.0L) return x;  /* bail out */
  s = 1;
  if (x < 0.0L) {                 /* behave symmetrically */
    s = -1;
    x = -x;
  }
  if (d < -18.0L) d = -18.0L;     /* these if's really do not influence performance perceptibly */
  else if (d > 18.0L) d = 18.0L;  /* IEEE doubles max fractional precision minus 1 */
  /* comments show what would happen if z = 1234.5678 & d = 2 */
  m = tools_intpowl(10.0L, d);    /* m: = 100.0, optimised 2.29.5 */
  x *= m;                         /* x: 1234.5678 -> 123456.78 */
  f = sun_modfl(x, &x);           /* x: 123456.78 -> 123456.0 & f = 0.78; 2.11.0 improvement, 3.16.4 tweak */
  f *= 10.0L;                     /* f: 0.78 -> 7.8 */
  if (t != 0.0L) sun_modfl(f, &f);/* f: 7.8 -> 7.0, &f gets the integral part, (int)f is the (d+1)-th digit; 2.11.0 improvement, 3.16.4 tweak */
  if ((t == 0.0L && f > 0.0L) ||  /* with t = 0, solely checking the (d+1)-th digit does not suffice */
     ((int)f > t)) x += 1.0L;     /* with t = 4 (round half up method): x: 123456.0 -> 123457.0 */
  return s*x/m;                   /* 123457.0 / 100.0 = 1234.57 */
}
#endif


LUALIB_API double sun_lgamma (double x);  /* forward declaration */

LUALIB_API double tools_binomial (double n, double k) {  /* 2.10.4 */
  if (tools_isfrac(n) || tools_isfrac(k)) {  /* 2.10.4 improvement, work like in Maple */
    if (tools_isint(k)) {
      if (k < 0.0) return 0.0;
      else if (k == 0.0) return 1;  /* 2.17.4 */
      /* avoid recursion, so call cephes_gamma() below instead of: "else return (n - k + 1)/k*tools_binomial(n, k - 1);" */
    }
    if (tools_isnegint(n - k)) return 0;  /* 2.17.4 fix, do not return undefined, but 0 like in Maple */
    if (k > 0.0) {  /* with k > 0, this is twelve percent faster than cephes_gamma(), 2.17.4 tuning */
      int sign;
      if (tools_isnegint(n)) return AGN_NAN;
      sign = tools_gammasign(n + 1)*tools_gammasign(k + 1)*tools_gammasign(n - k + 1);
      return sign*sun_exp(sun_lgamma(n + 1) - sun_lgamma(k + 1) - sun_lgamma(n - k + 1));
    } else {
      double r = tools_gamma(n + 1)/(tools_gamma(k + 1)*tools_gamma(n - k + 1));  /* 2.17.5 change */
      if (tools_isnanorinf(r))
        r = sun_exp(tools_lnbinomiall((long double)n, (long double)k));  /* 3.7.3 change to avoid overflow */
      return r;
    }
  } else if (n < 0.0) {  /* n, k are both integral */
    /* To prevent overflows, we use the following recursions, see: https://mathworld.wolfram.com/BinomialCoefficient.html 3.7.3 */
    if (k >= 0.0)
      return sun_pow(-1, k, 1)*tools_binomial(-n + k - 1, k);
    else if (k <= n)
      return sun_pow(-1, n - k, 1)*tools_binomial(-k - 1, n - k);
    else
      return 0.0;
  } else if (n < k)  /* 0.28.3 patch */
    return 0.0;
  else {  /* n and k are both integral, with positive n */
    double r = sun_exp(sun_lgamma(n + 1) - sun_lgamma(k + 1) - sun_lgamma(n - k + 1));  /* 2.17.4 tweak */
    return sun_round(r);  /* 2.29.3 improvement */
  }
}


LUALIB_API long double tools_binomiall (long double n, long double k) {  /* 3.16.4 */
  if (tools_isfrac(n) || tools_isfrac(k)) {  /* work like in Maple */
    if (tools_isint(k)) {
      if (k < 0) return 0.0L;
      else if (k == 0) return 1.0L;
      /* avoid recursion, so call cephes_gamma() below instead of: "else return (n - k + 1)/k*tools_binomial(n, k - 1);" */
    }
    if (tools_isnegint(n - k)) return 0;  /* 2.17.4 fix, do not return undefined, but 0 like in Maple */
    if (k > 0) {  /* with k > 0, this is twelve percent faster than cephes_gamma() */
      int sign;
      if (tools_isnegint(n)) return AGN_NAN;
      sign = tools_gammasignl(n + 1)*tools_gammasignl(k + 1)*tools_gammasignl(n - k + 1);
      return sign*tools_expl(tools_lgammal(n + 1) - tools_lgammal(k + 1) - tools_lgammal(n - k + 1));
    } else {
      long double r = tools_gammal(n + 1)/(tools_gammal(k + 1)*tools_gammal(n - k + 1));  /* 2.17.5 change */
      if (tools_isnanorinf(r))
        r = tools_expl(tools_lnbinomiall(n, k));  /* avoid overflow */
      return r;
    }
  } else if (n < 0.0L) {  /* n, k are both integral */
    /* To prevent overflows, we use the following recursions, see: https://mathworld.wolfram.com/BinomialCoefficient.html */
    if (k >= 0.0L)
      return tools_powl(-1, k)*tools_binomiall(-n + k - 1, k);
    else if (k <= n)
      return tools_powl(-1, n - k)*tools_binomiall(-k - 1, n - k);
    else
      return 0.0L;
  } else if (n < k)
    return 0.0L;
  else {  /* n and k are both integral, with positive n */
    long double r = tools_expl(tools_lgammal(n + 1) - tools_lgammal(k + 1) - tools_lgammal(n - k + 1));
    return r; /* DO NOT call sun_roundl(r) for it will result in huge round-off errors ! */
  }
}


LUALIB_API long double tools_lnbinomiall (long double n, long double k) {  /* 3.7.3/4 */
  long double r;
  if (tools_isint(k)) {  /* behave like in Maple */
    if (k < 0) return AGN_NAN;
    if (k == 0) return 0;
  }
  if (tools_isnegint(n)) {
    /* To prevent overflows, we use the following recursions, see: https://mathworld.wolfram.com/BinomialCoefficient.html 3.7.3 */
    if (k >= 0)
      return sun_log(sun_pow(-1, k, 1)) + tools_lnbinomiall(-n + k - 1, k);
    else if (k <= n)
      return sun_log(sun_pow(-1, n - k, 1)) + tools_lnbinomiall(-k - 1, n - k);
    else
      return AGN_NAN;
  }
  if (tools_isnegint(n - k)) return AGN_NAN;  /* behave like in Maple */
  r = tools_lgammal(n + 1) - tools_lgammal(k + 1) - tools_lgammal(n - k + 1);
  return (tools_isinf(r)) ? AGN_NAN : r;
}


LUALIB_API double tools_lnbinomial (double n, double k) {  /* 3.7.3 */
  return tools_lnbinomiall((long double)n, (long double)k);
}


/* returns exp(x^2) if sign >= 0 and exp(-x^2) if sign < 0.
   Based on file `expx2.c` in the Cephes Math Library
   Release 2.9:  June, 2000, Copyright 2000 by Stephen L. Moshier */
#define MAXLOG       +170.0

LUALIB_API double tools_expx2 (double x, int sign) {
  double u, u1, m, f;
  if (x < 0) x = -x;
  if (sign < 0) x = -x;
  if (unlikely(x == -HUGE_VAL))  /* 2.10.1, avoid numeric exceptions with exp(-infinity), i.e. wrong value `undefined`, 2.5.15 optimisation */
    return 0;
  else if (unlikely(x == HUGE_VAL))  /* 2.10.1, dito, 2.5.15 optimisation */
    return HUGE_VAL;
  else {
    /* Represent x as an exact multiple of M (= 128) plus a residual.
       M is a power of 2 chosen so that exp(m * m) does not overflow
       or underflow and so that |x - m| is small. */
    m = 0.0078125*sun_floor(128.0*x + 0.5);
    f = x - m;
    /* x^2 = m^2 + 2mf + f^2 */
    u = m*m;
    u1 = 2*m*f + f*f;
    if (sign < 0) {
      u = -u;
      u1 = -u1;
    }
    if (unlikely((u + u1) > MAXLOG))  /* 2.5.15 optimisation */
      return HUGE_VAL;
    else  /* u is exact, u1 is small.  */
      return sun_exp(u)*sun_exp(u1);
  }
}

LUALIB_API double tools_cexpx2 (double a, double b, double sign, double *im) {
  double t1, t2, t4, t5;
  t1 = a*a;
  t2 = b*b;
  t4 = (sign < 0) ? sun_exp(-t1 + t2) : sun_exp(t1 - t2);
  t5 = a*b;
  *im = -((sign < 0) ? 1 : -1)*t4*sun_sin(2.0*t5);  /* imaginary part */
  return t4*sun_cos(2.0*t5);  /* real part */
}


/* @(#)s_sincos.c 5.1 13/07/15 */
/*
 * ====================================================
 * Copyright (C) 2013 Elliot Saba. All rights reserved.
 *
 * Developed at the University of Washington.
 * Permission to use, copy, modify, and distribute this
 * software is freely granted, provided that this notice
 * is preserved.
 * ====================================================
 */

/* sincos(x, s, c)
 * Several applications need sine and cosine of the same
 * angle x. This function computes both at the same time,
 * and stores the results in *sin and *cos.
 *
 * kernel function:
 *  __kernel_sin    ... sine function on [-pi/4,pi/4]
 *  __kernel_cos    ... cose function on [-pi/4,pi/4]
 *  __ieee754_rem_pio2  ... argument reduction routine
 *
 * Method.
 *      Borrow liberally from s_sin.c and s_cos.c, merging
 *  efforts where applicable and returning their values in
 * appropriate variables, thereby slightly reducing the
 * amount of work relative to just calling sin/cos(x)
 * separately
 *
 * Special cases:
 *      Let trig be any of sin, cos, or tan.
 *      sincos(+-INF, s, c)  is NaN, with signals;
 *      sincos(NaN, s, c)    is that NaN;
 */

#define  STRICT_ASSIGN(type, lval, rval) do {  \
  volatile type __lval;      \
            \
  if (sizeof(type) >= sizeof(double))  \
    (lval) = (rval);    \
  else {          \
    __lval = (rval);    \
    (lval) = __lval;    \
  }          \
} while (0)

/* @(#)k_rem_pio2.c 1.3 95/01/18
 * ====================================================
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
 *
 * Developed at SunSoft, a Sun Microsystems, Inc. business.
 * Permission to use, copy, modify, and distribute this
 * software is freely granted, provided that this notice
 * is preserved.
 * ====================================================
 */

/*
 * Constants:
 * The hexadecimal values are the intended ones for the following
 * constants. The decimal values may be used, provided that the
 * compiler will convert from decimal to binary accurately enough
 * to produce the hexadecimal values shown.
 */

static const int init_jk[] = {3, 4, 4, 6}; /* initial value for jk */

/*
 * Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi
 *
 *    integer array, contains the (24*i)-th to (24*i+23)-th
 *    bit of 2/pi after binary point. The corresponding
 *    floating value is
 *
 *      ipio2[i] * 2^(-24(i+1)).
 *
 * NB: This table must have at least (e0-3)/24 + jk terms.
 *     For quad precision (e0 <= 16360, jk = 6), this is 686.
 */
static const int32_t ipio2[] = {
0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62,
0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A,
0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129,
0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41,
0x3991D6, 0x398353, 0x39F49C, 0x845F8B, 0xBDF928, 0x3B1FF8,
0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF,
0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5,
0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08,
0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3,
0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880,
0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B,

#if LDBL_MAX_EXP > 1024
#if LDBL_MAX_EXP > 16384
#error "ipio2 table needs to be expanded"
#endif
0x47C419, 0xC367CD, 0xDCE809, 0x2A8359, 0xC4768B, 0x961CA6,
0xDDAF44, 0xD15719, 0x053EA5, 0xFF0705, 0x3F7E33, 0xE832C2,
0xDE4F98, 0x327DBB, 0xC33D26, 0xEF6B1E, 0x5EF89F, 0x3A1F35,
0xCAF27F, 0x1D87F1, 0x21907C, 0x7C246A, 0xFA6ED5, 0x772D30,
0x433B15, 0xC614B5, 0x9D19C3, 0xC2C4AD, 0x414D2C, 0x5D000C,
0x467D86, 0x2D71E3, 0x9AC69B, 0x006233, 0x7CD2B4, 0x97A7B4,
0xD55537, 0xF63ED7, 0x1810A3, 0xFC764D, 0x2A9D64, 0xABD770,
0xF87C63, 0x57B07A, 0xE71517, 0x5649C0, 0xD9D63B, 0x3884A7,
0xCB2324, 0x778AD6, 0x23545A, 0xB91F00, 0x1B0AF1, 0xDFCE19,
0xFF319F, 0x6A1E66, 0x615799, 0x47FBAC, 0xD87F7E, 0xB76522,
0x89E832, 0x60BFE6, 0xCDC4EF, 0x09366C, 0xD43F5D, 0xD7DE16,
0xDE3B58, 0x929BDE, 0x2822D2, 0xE88628, 0x4D58E2, 0x32CAC6,
0x16E308, 0xCB7DE0, 0x50C017, 0xA71DF3, 0x5BE018, 0x34132E,
0x621283, 0x014883, 0x5B8EF5, 0x7FB0AD, 0xF2E91E, 0x434A48,
0xD36710, 0xD8DDAA, 0x425FAE, 0xCE616A, 0xA4280A, 0xB499D3,
0xF2A606, 0x7F775C, 0x83C2A3, 0x883C61, 0x78738A, 0x5A8CAF,
0xBDD76F, 0x63A62D, 0xCBBFF4, 0xEF818D, 0x67C126, 0x45CA55,
0x36D9CA, 0xD2A828, 0x8D61C2, 0x77C912, 0x142604, 0x9B4612,
0xC459C4, 0x44C5C8, 0x91B24D, 0xF31700, 0xAD43D4, 0xE54929,
0x10D5FD, 0xFCBE00, 0xCC941E, 0xEECE70, 0xF53E13, 0x80F1EC,
0xC3E7B3, 0x28F8C7, 0x940593, 0x3E71C1, 0xB3092E, 0xF3450B,
0x9C1288, 0x7B20AB, 0x9FB52E, 0xC29247, 0x2F327B, 0x6D550C,
0x90A772, 0x1FE76B, 0x96CB31, 0x4A1679, 0xE27941, 0x89DFF4,
0x9794E8, 0x84E6E2, 0x973199, 0x6BED88, 0x365F5F, 0x0EFDBB,
0xB49A48, 0x6CA467, 0x427271, 0x325D8D, 0xB8159F, 0x09E5BC,
0x25318D, 0x3974F7, 0x1C0530, 0x010C0D, 0x68084B, 0x58EE2C,
0x90AA47, 0x02E774, 0x24D6BD, 0xA67DF7, 0x72486E, 0xEF169F,
0xA6948E, 0xF691B4, 0x5153D1, 0xF20ACF, 0x339820, 0x7E4BF5,
0x6863B2, 0x5F3EDD, 0x035D40, 0x7F8985, 0x295255, 0xC06437,
0x10D86D, 0x324832, 0x754C5B, 0xD4714E, 0x6E5445, 0xC1090B,
0x69F52A, 0xD56614, 0x9D0727, 0x50045D, 0xDB3BB4, 0xC576EA,
0x17F987, 0x7D6B49, 0xBA271D, 0x296996, 0xACCCC6, 0x5414AD,
0x6AE290, 0x89D988, 0x50722C, 0xBEA404, 0x940777, 0x7030F3,
0x27FC00, 0xA871EA, 0x49C266, 0x3DE064, 0x83DD97, 0x973FA3,
0xFD9443, 0x8C860D, 0xDE4131, 0x9D3992, 0x8C70DD, 0xE7B717,
0x3BDF08, 0x2B3715, 0xA0805C, 0x93805A, 0x921110, 0xD8E80F,
0xAF806C, 0x4BFFDB, 0x0F9038, 0x761859, 0x15A562, 0xBBCB61,
0xB989C7, 0xBD4010, 0x04F2D2, 0x277549, 0xF6B6EB, 0xBB22DB,
0xAA140A, 0x2F2689, 0x768364, 0x333B09, 0x1A940E, 0xAA3A51,
0xC2A31D, 0xAEEDAF, 0x12265C, 0x4DC26D, 0x9C7A2D, 0x9756C0,
0x833F03, 0xF6F009, 0x8C402B, 0x99316D, 0x07B439, 0x15200C,
0x5BC3D8, 0xC492F5, 0x4BADC6, 0xA5CA4E, 0xCD37A7, 0x36A9E6,
0x9492AB, 0x6842DD, 0xDE6319, 0xEF8C76, 0x528B68, 0x37DBFC,
0xABA1AE, 0x3115DF, 0xA1AE00, 0xDAFB0C, 0x664D64, 0xB705ED,
0x306529, 0xBF5657, 0x3AFF47, 0xB9F96A, 0xF3BE75, 0xDF9328,
0x3080AB, 0xF68C66, 0x15CB04, 0x0622FA, 0x1DE4D9, 0xA4B33D,
0x8F1B57, 0x09CD36, 0xE9424E, 0xA4BE13, 0xB52333, 0x1AAAF0,
0xA8654F, 0xA5C1D2, 0x0F3F0B, 0xCD785B, 0x76F923, 0x048B7B,
0x721789, 0x53A6C6, 0xE26E6F, 0x00EBEF, 0x584A9B, 0xB7DAC4,
0xBA66AA, 0xCFCF76, 0x1D02D1, 0x2DF1B1, 0xC1998C, 0x77ADC3,
0xDA4886, 0xA05DF7, 0xF480C6, 0x2FF0AC, 0x9AECDD, 0xBC5C3F,
0x6DDED0, 0x1FC790, 0xB6DB2A, 0x3A25A3, 0x9AAF00, 0x9353AD,
0x0457B6, 0xB42D29, 0x7E804B, 0xA707DA, 0x0EAA76, 0xA1597B,
0x2A1216, 0x2DB7DC, 0xFDE5FA, 0xFEDB89, 0xFDBE89, 0x6C76E4,
0xFCA906, 0x70803E, 0x156E85, 0xFF87FD, 0x073E28, 0x336761,
0x86182A, 0xEABD4D, 0xAFE7B3, 0x6E6D8F, 0x396795, 0x5BBF31,
0x48D784, 0x16DF30, 0x432DC7, 0x356125, 0xCE70C9, 0xB8CB30,
0xFD6CBF, 0xA200A4, 0xE46C05, 0xA0DD5A, 0x476F21, 0xD21262,
0x845CB9, 0x496170, 0xE0566B, 0x015299, 0x375550, 0xB7D51E,
0xC4F133, 0x5F6E13, 0xE4305D, 0xA92E85, 0xC3B21D, 0x3632A1,
0xA4B708, 0xD4B1EA, 0x21F716, 0xE4698F, 0x77FF27, 0x80030C,
0x2D408D, 0xA0CD4F, 0x99A520, 0xD3A2B3, 0x0A5D2F, 0x42F9B4,
0xCBDA11, 0xD0BE7D, 0xC1DB9B, 0xBD17AB, 0x81A2CA, 0x5C6A08,
0x17552E, 0x550027, 0xF0147F, 0x8607E1, 0x640B14, 0x8D4196,
0xDEBE87, 0x2AFDDA, 0xB6256B, 0x34897B, 0xFEF305, 0x9EBFB9,
0x4F6A68, 0xA82A4A, 0x5AC44F, 0xBCF82D, 0x985AD7, 0x95C7F4,
0x8D4D0D, 0xA63A20, 0x5F57A4, 0xB13F14, 0x953880, 0x0120CC,
0x86DD71, 0xB6DEC9, 0xF560BF, 0x11654D, 0x6B0701, 0xACB08C,
0xD0C0B2, 0x485551, 0x0EFB1E, 0xC37295, 0x3B06A3, 0x3540C0,
0x7BDC06, 0xCC45E0, 0xFA294E, 0xC8CAD6, 0x41F3E8, 0xDE647C,
0xD8649B, 0x31BED9, 0xC397A4, 0xD45877, 0xC5E369, 0x13DAF0,
0x3C3ABA, 0x461846, 0x5F7555, 0xF5BDD2, 0xC6926E, 0x5D2EAC,
0xED440E, 0x423E1C, 0x87C461, 0xE9FD29, 0xF3D6E7, 0xCA7C22,
0x35916F, 0xC5E008, 0x8DD7FF, 0xE26A6E, 0xC6FDB0, 0xC10893,
0x745D7C, 0xB2AD6B, 0x9D6ECD, 0x7B723E, 0x6A11C6, 0xA9CFF7,
0xDF7329, 0xBAC9B5, 0x5100B7, 0x0DB2E2, 0x24BA74, 0x607DE5,
0x8AD874, 0x2C150D, 0x0C1881, 0x94667E, 0x162901, 0x767A9F,
0xBEFDFD, 0xEF4556, 0x367ED9, 0x13D9EC, 0xB9BA8B, 0xFC97C4,
0x27A831, 0xC36EF1, 0x36C594, 0x56A8D8, 0xB5A8B4, 0x0ECCCF,
0x2D8912, 0x34576F, 0x89562C, 0xE3CE99, 0xB920D6, 0xAA5E6B,
0x9C2A3E, 0xCC5F11, 0x4A0BFD, 0xFBF4E1, 0x6D3B8E, 0x2C86E2,
0x84D4E9, 0xA9B4FC, 0xD1EEEF, 0xC9352E, 0x61392F, 0x442138,
0xC8D91B, 0x0AFC81, 0x6A4AFB, 0xD81C2F, 0x84B453, 0x8C994E,
0xCC2254, 0xDC552A, 0xD6C6C0, 0x96190B, 0xB8701A, 0x649569,
0x605A26, 0xEE523F, 0x0F117F, 0x11B5F4, 0xF5CBFC, 0x2DBC34,
0xEEBC34, 0xCC5DE8, 0x605EDD, 0x9B8E67, 0xEF3392, 0xB817C9,
0x9B5861, 0xBC57E1, 0xC68351, 0x103ED8, 0x4871DD, 0xDD1C2D,
0xA118AF, 0x462C21, 0xD7F359, 0x987AD9, 0xC0549E, 0xFA864F,
0xFC0656, 0xAE79E5, 0x362289, 0x22AD38, 0xDC9367, 0xAAE855,
0x382682, 0x9BE7CA, 0xA40D51, 0xB13399, 0x0ED7A9, 0x480569,
0xF0B265, 0xA7887F, 0x974C88, 0x36D1F9, 0xB39221, 0x4A827B,
0x21CF98, 0xDC9F40, 0x5547DC, 0x3A74E1, 0x42EB67, 0xDF9DFE,
0x5FD45E, 0xA4677B, 0x7AACBA, 0xA2F655, 0x23882B, 0x55BA41,
0x086E59, 0x862A21, 0x834739, 0xE6E389, 0xD49EE5, 0x40FB49,
0xE956FF, 0xCA0F1C, 0x8A59C5, 0x2BFA94, 0xC5C1D3, 0xCFC50F,
0xAE5ADB, 0x86C547, 0x624385, 0x3B8621, 0x94792C, 0x876110,
0x7B4C2A, 0x1A2C80, 0x12BF43, 0x902688, 0x893C78, 0xE4C4A8,
0x7BDBE5, 0xC23AC4, 0xEAF426, 0x8A67F7, 0xBF920D, 0x2BA365,
0xB1933D, 0x0B7CBD, 0xDC51A4, 0x63DD27, 0xDDE169, 0x19949A,
0x9529A8, 0x28CE68, 0xB4ED09, 0x209F44, 0xCA984E, 0x638270,
0x237C7E, 0x32B90F, 0x8EF5A7, 0xE75614, 0x08F121, 0x2A9DB5,
0x4D7E6F, 0x5119A5, 0xABF9B5, 0xD6DF82, 0x61DD96, 0x023616,
0x9F3AC4, 0xA1A283, 0x6DED72, 0x7A8D39, 0xA9B882, 0x5C326B,
0x5B2746, 0xED3400, 0x7700D2, 0x55F4FC, 0x4D5901, 0x8071E0,
#endif
};

static const double PIo2[] = {
  1.57079625129699707031e+00, /* 0x3FF921FB, 0x40000000 */
  7.54978941586159635335e-08, /* 0x3E74442D, 0x00000000 */
  5.39030252995776476554e-15, /* 0x3CF84698, 0x80000000 */
  3.28200341580791294123e-22, /* 0x3B78CC51, 0x60000000 */
  1.27065575308067607349e-29, /* 0x39F01B83, 0x80000000 */
  1.22933308981111328932e-36, /* 0x387A2520, 0x40000000 */
  2.73370053816464559624e-44, /* 0x36E38222, 0x80000000 */
  2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */
};

const double
zero    =  0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */
one     =  1.00000000000000000000e+00, /* 3FF00000, 00000000 */
two24   =  1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
twon24  =  5.96046447753906250000e-08; /* 0x3E700000, 0x00000000 */

/* __kernel_rem_pio2(x,y,e0,nx,prec)
 * double x[],y[]; int e0,nx,prec;
 *
 * __kernel_rem_pio2 return the last three digits of N with
 *    y = x - N*pi/2
 * so that |y| < pi/2.
 *
 * The method is to compute the integer (mod 8) and fraction parts of
 * (2/pi)*x without doing the full multiplication. In general we
 * skip the part of the product that are known to be a huge integer (
 * more accurately, = 0 mod 8 ). Thus the number of operations are
 * independent of the exponent of the input.
 *
 * (2/pi) is represented by an array of 24-bit integers in ipio2[].
 *
 * Input parameters:
 *   x[]  The input value (must be positive) is broken into nx
 *    pieces of 24-bit integers in double precision format.
 *    x[i] will be the i-th 24 bit of x. The scaled exponent
 *    of x[0] is given in input parameter e0 (i.e., x[0]*2^e0
 *    match x's up to 24 bits.
 *
 *    Example of breaking a double positive z into x[0]+x[1]+x[2]:
 *      e0 = ilogb(z)-23
 *      z  = scalbn(z,-e0)
 *    for i = 0,1,2
 *      x[i] = floor(z)
 *      z    = (z-x[i])*2**24
 *
 *
 *  y[]  ouput result in an array of double precision numbers.
 *    The dimension of y[] is:
 *      24-bit  precision  1
 *      53-bit  precision  2
 *      64-bit  precision  2
 *      113-bit precision  3
 *    The actual value is the sum of them. Thus for 113-bit
 *    precison, one may have to do something like:
 *
 *    long double t,w,r_head, r_tail;
 *    t = (long double)y[2] + (long double)y[1];
 *    w = (long double)y[0];
 *    r_head = t+w;
 *    r_tail = w - (r_head - t);
 *
 *  e0  The exponent of x[0]. Must be <= 16360 or you need to
 *              expand the ipio2 table.
 *
 *  nx  dimension of x[]
 *
 *    prec  an integer indicating the precision:
 *      0  24  bits (single)
 *      1  53  bits (double)
 *      2  64  bits (extended)
 *      3  113 bits (quad)
 *
 * External function:
 *  double scalbn(), floor();
 *
 *
 * Here is the description of some local variables:
 *
 *   jk  jk+1 is the initial number of terms of ipio2[] needed
 *    in the computation. The minimum and recommended value
 *    for jk is 3,4,4,6 for single, double, extended, and quad.
 *    jk+1 must be 2 larger than you might expect so that our
 *    recomputation test works. (Up to 24 bits in the integer
 *    part (the 24 bits of it that we compute) and 23 bits in
 *    the fraction part may be lost to cancelation before we
 *    recompute.)
 *
 *   jz  local integer variable indicating the number of
 *    terms of ipio2[] used.
 *
 *  jx  nx - 1
 *
 *  jv  index for pointing to the suitable ipio2[] for the
 *    computation. In general, we want
 *      ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8
 *    is an integer. Thus
 *      e0-3-24*jv >= 0 or (e0-3)/24 >= jv
 *    Hence jv = max(0,(e0-3)/24).
 *
 *  jp  jp+1 is the number of terms in PIo2[] needed, jp = jk.
 *
 *   q[]  double array with integral value, representing the
 *    24-bits chunk of the product of x and 2/pi.
 *
 *  q0  the corresponding exponent of q[0]. Note that the
 *    exponent for q[i] would be q0-24*i.
 *
 *  PIo2[]  double precision array, obtained by cutting pi/2
 *    into 24 bits chunks.
 *
 *  f[]  ipio2[] in floating point
 *
 *  iq[]  integer array by breaking up q[] in 24-bits chunk.
 *
 *  fq[]  final product of x*(2/pi) in fq[0],..,fq[jk]
 *
 *  ih  integer. If >0 it indicates q[] is >= 0.5, hence
 *    it also indicates the *sign* of the result.
 *
 * Constants:
 * The hexadecimal values are the intended ones for the following
 * constants. The decimal values may be used, provided that the
 * compiler will convert from decimal to binary accurately enough
 * to produce the hexadecimal values shown.
 *
 * Handles the case when reduction is not needed: |x| ~<= pi/4, i.e. returns x.
 */

static const double
invpio2 =  6.36619772367581382433e-01,  /* 0x3FE45F30, 0x6DC9C883 */
pio2_1  =  1.57079632673412561417e+00,  /* 0x3FF921FB, 0x54400000 */
pio2_1t =  6.07710050650619224932e-11,  /* 0x3DD0B461, 0x1A626331 */
pio2_2  =  6.07710050630396597660e-11,  /* 0x3DD0B461, 0x1A600000 */
pio2_2t =  2.02226624879595063154e-21,  /* 0x3BA3198A, 0x2E037073 */
pio2_3  =  2.02226624871116645580e-21,  /* 0x3BA3198A, 0x2E000000 */
pio2_3t =  8.47842766036889956997e-32;  /* 0x397B839A, 0x252049C1 */

static int sun_rem_pio2_aux (double *x, double *y, int e0, int nx, int prec) {
  int32_t jz, jx, jv, jp, jk, carry, n, iq[20], i, j, k, m, q0, ih;
  double z, fw, f[20], fq[20], q[20];
  tools_bzero(fq, 20*sizeof(double));  /* 4.9.1 to prevent compiler warnings */
  /* initialize jk*/
  jk = init_jk[prec];
  jp = jk;
  /* determine jx,jv,q0, note that 3>q0 */
  jx =  nx - 1;
  jv = (e0 - 3)/24; if (jv < 0) jv = 0;
  q0 =  e0 - 24*(jv + 1);
  /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */
  j = jv - jx; m = jx + jk;
  for (i=0; i <= m; i++, j++) f[i] = (j < 0) ? zero : (double)ipio2[j];
  /* compute q[0],q[1],...q[jk] */
  for (i=0; i <= jk; i++) {
    for (j=0, fw=0.0; j <= jx; j++) { fw += x[j]*f[jx + i - j]; }
    q[i] = fw;
  }
  jz = jk;
recompute:
  /* distill q[] into iq[] reversingly */
  for (i=0, j=jz, z=q[jz]; j > 0; i++, j--) {
    fw    =  (double)((int32_t)(twon24*z));
    iq[i] =  (int32_t)(z - two24*fw);
    z     =  q[j - 1] + fw;
  }
  /* compute n */
  z  = sun_scalbn(z, q0);       /* actual value of z */
  z -= 8.0*sun_floor(z*0.125);  /* trim off integer >= 8 */
  n  = (int32_t)z;
  z -= (double)n;
  ih = 0;
  if (q0 > 0) {  /* need iq[jz-1] to determine n */
    i  = (iq[jz - 1] >> (24 - q0)); n += i;
  iq[jz - 1] -= i << (24 - q0);
    ih = iq[jz - 1] >> (23 - q0);
  }
  else if (q0 == 0) ih = iq[jz - 1] >> 23;
  else if (z >= 0.5) ih = 2;
  if (ih > 0) {  /* q > 0.5 */
    n += 1; carry = 0;
    for (i=0; i < jz; i++) {  /* compute 1-q */
      j = iq[i];
      if (carry == 0) {
        if (j != 0) {
          carry = 1; iq[i] = 0x1000000 - j;
        }
      } else  iq[i] = 0xffffff - j;
    }
    if (q0 > 0) {  /* rare case: chance is 1 in 12 */
      switch(q0) {
        case 1:
          iq[jz - 1] &= 0x7fffff; break;
        case 2:
          iq[jz - 1] &= 0x3fffff; break;
        }
    }
    if (ih == 2) {
      z = one - z;
      if (carry != 0) z -= sun_scalbn(one, q0);
    }
  }
  /* check if recomputation is needed */
  if (z == zero) {
    j = 0;
    for (i=jz - 1; i >= jk; i--) j |= iq[i];
    if (j == 0) {  /* need recomputation */
      for (k=1; iq[jk - k] == 0; k++);    /* k = no. of terms needed */
      for (i=jz + 1; i <= jz + k; i++) {  /* add q[jz+1] to q[jz+k] */
        f[jx + i] = (double)ipio2[jv + i];
        for (j=0, fw=0.0; j <= jx; j++) fw += x[j]*f[jx + i - j];
        q[i] = fw;
      }
      jz += k;
      goto recompute;
    }
  }
  /* chop off zero terms */
  if (z == 0.0) {
    jz -= 1; q0 -= 24;
    while (iq[jz] == 0) { jz--; q0 -= 24; }
  } else {  /* break z into 24-bit if necessary */
    z = sun_scalbn(z, -q0);
    if (z >= two24) {
      fw = (double)((int32_t)(twon24*z));
      iq[jz] = (int32_t)(z - two24*fw);
      jz += 1; q0 += 24;
      iq[jz] = (int32_t)fw;
    } else iq[jz] = (int32_t)z;
  }
  /* convert integer "bit" chunk to floating-point value */
  fw = sun_scalbn(one, q0);
  for (i=jz; i >= 0; i--) {
    q[i] = fw*(double)iq[i]; fw *= twon24;
  }
  /* compute PIo2[0,...,jp]*q[jz,...,0] */
  for (i=jz; i >= 0; i--) {
    for (fw = 0.0, k = 0; k <= jp && k <= jz - i; k++) fw += PIo2[k]*q[i + k];
    fq[jz - i] = fw;
  }
  /* compress fq[] into y[] */
  switch (prec) {
    case 0:
      fw = 0.0;
      for (i=jz; i >= 0; i--) fw += fq[i];
      y[0] = (ih == 0)? fw : -fw;
      break;
    case 1:
    case 2:
      fw = 0.0;
      for (i=jz; i >=0; i--) fw += fq[i];
      STRICT_ASSIGN(double, fw, fw);
      y[0] = (ih == 0) ? fw : -fw;
      fw = fq[0] - fw;
      for (i=1; i <= jz; i++) fw += fq[i];
      y[1] = (ih == 0) ? fw : -fw;
      break;
    case 3:  /* painful */
      for (i=jz; i > 0; i--) {
        fw      = fq[i - 1] + fq[i];
        fq[i]  += fq[i - 1] - fw;
        fq[i - 1] = fw;
      }
      for (i=jz; i > 1; i--) {
        fw      = fq[i - 1] + fq[i];
        fq[i]  += fq[i - 1] - fw;
        fq[i - 1] = fw;
      }
      for (fw=0.0, i=jz; i >= 2; i--) fw += fq[i];
      if (ih == 0) {
        y[0] =  fq[0]; y[1] =  fq[1]; y[2] =  fw;
      } else {
        y[0] = -fq[0]; y[1] = -fq[1]; y[2] = -fw;
      }
  }
  return n & 7;
}

LUALIB_API int sun_rem_pio2 (double x, double *y) {  /* argument reduction */
  double z, w, t, r, fn;
  double tx[3], ty[2];
  int32_t e0, i, j, nx, n, ix, hx;
  uint32_t low;
  GET_HIGH_WORD(hx, x);  /* high word of x */
  ix = hx & 0x7fffffff;
  if (ix <= 0x3fe921fb) {  /* |x| ~<= pi/4, no need for reduction; activated 2.21.8 */
    y[0] = x; y[1] = 0;
    return 0;
  }
  if (ix <= 0x400f6a7a) {  /* |x| ~<= 5pi/4 */
    if ((ix & 0xfffff) == 0x921fb)  /* |x| ~= pi/2 or 2pi/2 */
      goto medium;  /* cancellation -- use medium case */
    if (ix <= 0x4002d97c) {  /* |x| ~<= 3pi/4 */
      if (hx > 0) {
        z = x - pio2_1;  /* one round good to 85 bits */
        y[0] = z - pio2_1t;
        y[1] = (z - y[0]) - pio2_1t;
        return 1;
      } else {
        z = x + pio2_1;
        y[0] = z + pio2_1t;
        y[1] = (z - y[0]) + pio2_1t;
        return -1;
      }
    } else {
      if (hx > 0) {
        z = x - 2*pio2_1;
        y[0] = z - 2*pio2_1t;
        y[1] = (z - y[0]) - 2*pio2_1t;
        return 2;
      } else {
        z = x + 2*pio2_1;
        y[0] = z + 2*pio2_1t;
        y[1] = (z - y[0]) + 2*pio2_1t;
        return -2;
      }
    }
  }
  if (ix <= 0x401c463b) {    /* |x| ~<= 9pi/4 */
    if (ix <= 0x4015fdbc) {  /* |x| ~<= 7pi/4 */
      if (ix == 0x4012d97c)  /* |x| ~= 3pi/2 */
        goto medium;
      if (hx > 0) {
        z = x - 3*pio2_1;
        y[0] = z - 3*pio2_1t;
        y[1] = (z - y[0]) - 3*pio2_1t;
        return 3;
      } else {
        z = x + 3*pio2_1;
        y[0] = z + 3*pio2_1t;
        y[1] = (z - y[0]) + 3*pio2_1t;
        return -3;
      }
    } else {
      if (ix == 0x401921fb)  /* |x| ~= 4pi/2 */
        goto medium;
      if (hx > 0) {
        z = x - 4*pio2_1;
        y[0] = z - 4*pio2_1t;
        y[1] = (z - y[0]) - 4*pio2_1t;
        return 4;
      } else {
        z = x + 4*pio2_1;
        y[0] = z + 4*pio2_1t;
        y[1] = (z - y[0]) + 4*pio2_1t;
        return -4;
      }
    }
  }
  if (ix < 0x413921fb) {  /* |x| ~< 2^20*(pi/2), medium size */
medium:
    /* Use a specialized rint() to get fn.  Assume round-to-nearest. */
    STRICT_ASSIGN(double, fn, x*invpio2 + 0x1.8p52);
    fn = fn - 0x1.8p52;
    fn = rint(fn);  /* 3.2.1 speed-up over the functions using argument reduction (sin, cos, tan, ...) by 5 to 9 percent */
    n  = (int)fn;
    r  = x - fn*pio2_1;
    w  = fn*pio2_1t;  /* 1st round good to 85 bit */
    {
      uint32_t high;
      j  = ix >> 20;
      y[0] = r - w;
      GET_HIGH_WORD(high, y[0]);
      i = j - ((high >> 20) & 0x7ff);
      if (i > 16) {  /* 2nd iteration needed, good to 118 */
        t = r;
        w = fn*pio2_2;
        r = t - w;
        w = fn*pio2_2t - ((t - r) - w);
        y[0] = r - w;
        GET_HIGH_WORD(high, y[0]);
        i = j - ((high >> 20) & 0x7ff);
        if (i > 49) {  /* 3rd iteration need, 151 bits acc */
          t  = r;      /* will cover all possible cases */
          w  = fn*pio2_3;
          r  = t - w;
          w  = fn*pio2_3t - ((t - r) - w);
          y[0] = r - w;
        }
      }
    }
    y[1] = (r - y[0]) - w;
    return n;
  }
  /* all other (large) arguments */
  if (ix >= 0x7ff00000) {  /* x is inf or NaN */
    y[0] = y[1] = x - x; return 0;
  }
  /* set z = scalbn(|x|, ilogb(x)-23) */
  GET_LOW_WORD(low, x);
  e0 = (ix >> 20) - 1046;  /* e0 = ilogb(z)-23; */
  INSERT_WORDS(z, ix - ((int32_t)(e0 << 20)), low);
  for (i=0; i < 2; i++) {
    tx[i] = (double)((int32_t)(z));
    z = (z - tx[i])*two24;
  }
  tx[2] = z;
  nx = 3;
  while (tx[nx - 1] == zero) nx--;  /* skip zero term */
  n = sun_rem_pio2_aux(tx, ty, e0, nx, 1);
  if (hx < 0) { y[0] = -ty[0]; y[1] = -ty[1]; return -n; }
  y[0] = ty[0]; y[1] = ty[1]; return n;
}


/* origin: FreeBSD /usr/src/lib/msun/ld80/e_rem_pio2.c */
/*
 * ====================================================
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
 * Copyright (c) 2008 Steven G. Kargl, David Schultz, Bruce D. Evans.
 *
 * Developed at SunSoft, a Sun Microsystems, Inc. business.
 * Permission to use, copy, modify, and distribute this
 * software is freely granted, provided that this notice
 * is preserved.
 * ====================================================
 *
 * Optimized by Bruce D. Evans.
 */
#if (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
/* ld80 and ld128 version of __rem_pio2(x,y)
 *
 * return the remainder of x rem pi/2 in y[0]+y[1]
 * use __rem_pio2_large() for large x
 */

#ifndef __ARMCPU
static const long double toint = 1.5/LDBL_EPSILON;
#endif

#if LDBL_MANT_DIG == 64
/* u ~< 0x1p25*pi/2 */
#define SMALL(u) ((((u.i.se & 0x7fffU) << 16) | (u.i.m >> 48)) < (((0x3fff + 25) << 16) | (0x921f >> 1) | 0x8000))
#define QUOBITS(x) ((uint32_t)(int32_t)x & 0x7fffffff)
#define ROUND1 22
#define ROUND2 61
#define NX 3
#define NY 2
/* invpio2:  64 bits of 2/pi
 * pio2_1:   first  39 bits of pi/2
 * pio2_1t:  pi/2 - pio2_1
 * pio2_2:   second 39 bits of pi/2
 * pio2_2t:  pi/2 - (pio2_1+pio2_2)
 * pio2_3:   third  39 bits of pi/2
 * pio2_3t:  pi/2 - (pio2_1+pio2_2+pio2_3) */
#ifndef __ARMCPU
static const double
Rpio2_1 =  1.57079632679597125389e+00, /* 0x3FF921FB, 0x54444000 */
Rpio2_2 = -1.07463465549783099519e-12, /* -0x12e7b967674000.0p-92 */
Rpio2_3 =  6.36831716351370313614e-25; /*  0x18a2e037074000.0p-133 */
static const long double
/* Rinvpio2 =  6.36619772367581343076e-01L, */ /*  0xa2f9836e4e44152a.0p-64 */
Rpio2_1t = -1.07463465549719416346e-12L, /* -0x973dcb3b399d747f.0p-103 */
Rpio2_2t =  6.36831716351095013979e-25L, /*  0xc51701b839a25205.0p-144 */
Rpio2_3t = -2.75299651904407171810e-37L; /* -0xbb5bf6c7ddd660ce.0p-185 */
#endif  /* of __ARMCPU */
#elif LDBL_MANT_DIG == 113
/* u ~< 0x1p45*pi/2 */
#define SMALL(u) ((((u.i.se & 0x7fffU) << 16) | u.i.top) < (((0x3fff + 45) << 16) | 0x921f))
#define QUOBITS(x) ((uint32_t)(int64_t)x & 0x7fffffff)
#define ROUND1 51
#define ROUND2 119
#define NX 5
#define NY 3
#ifndef __ARMCPU
static const long double
/* Rinvpio2 =  6.3661977236758134307553505349005747e-01L, */	/*  0x145f306dc9c882a53f84eafa3ea6a.0p-113 */
Rpio2_1  =  1.5707963267948966192292994253909555e+00L,	/*  0x1921fb54442d18469800000000000.0p-112 */
Rpio2_1t =  2.0222662487959507323996846200947577e-21L,	/*  0x13198a2e03707344a4093822299f3.0p-181 */
Rpio2_2  =  2.0222662487959507323994779168837751e-21L,	/*  0x13198a2e03707344a400000000000.0p-181 */
Rpio2_2t =  2.0670321098263988236496903051604844e-43L,	/*  0x127044533e63a0105df531d89cd91.0p-254 */
Rpio2_3  =  2.0670321098263988236499468110329591e-43L,	/*  0x127044533e63a0105e00000000000.0p-254 */
Rpio2_3t = -2.5650587247459238361625433492959285e-65L;	/* -0x159c4ec64ddaeb5f78671cbfb2210.0p-327 */
#endif   /* of __ARMCPU */
#endif   /* of LDBL_MANT_DIG == 64 */

/* origin: FreeBSD /usr/src/lib/msun/src/k_rem_pio2.c */
/*
 * ====================================================
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
 *
 * Developed at SunSoft, a Sun Microsystems, Inc. business.
 * Permission to use, copy, modify, and distribute this
 * software is freely granted, provided that this notice
 * is preserved.
 * ====================================================
 */
/*
 * __rem_pio2_large(x,y,e0,nx,prec)
 * double x[],y[]; int e0,nx,prec;
 *
 * __rem_pio2_large return the last three digits of N with
 *              y = x - N*pi/2
 * so that |y| < pi/2.
 *
 * The method is to compute the integer (mod 8) and fraction parts of
 * (2/pi)*x without doing the full multiplication. In general we
 * skip the part of the product that are known to be a huge integer (
 * more accurately, = 0 mod 8 ). Thus the number of operations are
 * independent of the exponent of the input.
 *
 * (2/pi) is represented by an array of 24-bit integers in ipio2[].
 *
 * Input parameters:
 *      x[]     The input value (must be positive) is broken into nx
 *              pieces of 24-bit integers in double precision format.
 *              x[i] will be the i-th 24 bit of x. The scaled exponent
 *              of x[0] is given in input parameter e0 (i.e., x[0]*2^e0
 *              match x's up to 24 bits.
 *
 *              Example of breaking a double positive z into x[0]+x[1]+x[2]:
 *                      e0 = ilogb(z)-23
 *                      z  = scalbn(z,-e0)
 *              for i = 0,1,2
 *                      x[i] = floor(z)
 *                      z    = (z-x[i])*2**24
 *
 *      y[]     ouput result in an array of double precision numbers.
 *              The dimension of y[] is:
 *                      24-bit  precision       1
 *                      53-bit  precision       2
 *                      64-bit  precision       2
 *                      113-bit precision       3
 *              The actual value is the sum of them. Thus for 113-bit
 *              precison, one may have to do something like:
 *
 *              long double t,w,r_head, r_tail;
 *              t = (long double)y[2] + (long double)y[1];
 *              w = (long double)y[0];
 *              r_head = t+w;
 *              r_tail = w - (r_head - t);
 *
 *      e0      The exponent of x[0]. Must be <= 16360 or you need to
 *              expand the ipio2 table.
 *
 *      nx      dimension of x[]
 *
 *      prec    an integer indicating the precision:
 *                      0       24  bits (single)
 *                      1       53  bits (double)
 *                      2       64  bits (extended)
 *                      3       113 bits (quad)
 *
 * External function:
 *      double scalbn(), floor();
 *
 * Here is the description of some local variables:
 *
 *      jk      jk+1 is the initial number of terms of ipio2[] needed
 *              in the computation. The minimum and recommended value
 *              for jk is 3,4,4,6 for single, double, extended, and quad.
 *              jk+1 must be 2 larger than you might expect so that our
 *              recomputation test works. (Up to 24 bits in the integer
 *              part (the 24 bits of it that we compute) and 23 bits in
 *              the fraction part may be lost to cancelation before we
 *              recompute.)
 *
 *      jz      local integer variable indicating the number of
 *              terms of ipio2[] used.
 *
 *      jx      nx - 1
 *
 *      jv      index for pointing to the suitable ipio2[] for the
 *              computation. In general, we want
 *                      ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8
 *              is an integer. Thus
 *                      e0-3-24*jv >= 0 or (e0-3)/24 >= jv
 *              Hence jv = max(0,(e0-3)/24).
 *
 *      jp      jp+1 is the number of terms in PIo2[] needed, jp = jk.
 *
 *      q[]     double array with integral value, representing the
 *              24-bits chunk of the product of x and 2/pi.
 *
 *      q0      the corresponding exponent of q[0]. Note that the
 *              exponent for q[i] would be q0-24*i.
 *
 *      PIo2[]  double precision array, obtained by cutting pi/2
 *              into 24 bits chunks.
 *
 *      f[]     ipio2[] in floating point
 *
 *      iq[]    integer array by breaking up q[] in 24-bits chunk.
 *
 *      fq[]    final product of x*(2/pi) in fq[0],..,fq[jk]
 *
 *      ih      integer. If >0 it indicates q[] is >= 0.5, hence
 *              it also indicates the *sign* of the result.
 */
#ifndef __ARMCPU
static int __rem_pio2_largel (long double *x, long double *y, int e0, int nx, int prec) {
  int32_t jz, jx, jv, jp, jk, carry, n, iq[20], i, j, k, m, q0, ih;
  long double z, fw, f[20], fq[20], q[20];
  tools_bzero(fq, 20*sizeof(long double));  /* 4.9.1 to prevent compiler warnings */
  /* initialize jk */
  jk = init_jk[prec];
  jp = jk;
  /* determine jx, jv, q0, note that 3 > q0 */
  jx = nx - 1;
  jv = (e0 - 3)/24;
  if (jv < 0) jv = 0;
  q0 = e0 - 24*(jv + 1);
  /* set up f[0] to f[jx + jk] where f[jx + jk] = ipio2[jv + jk] */
  j = jv - jx; m = jx + jk;
  for (i=0; i <= m; i++, j++)
    f[i] = (j < 0) ? 0.0 : (long double)ipio2[j];
  /* compute q[0],q[1],...q[jk] */
  for (i=0; i <= jk; i++) {
    for (j=0, fw=0.0; j <= jx; j++)
      fw += x[j]*f[jx + i - j];
    q[i] = fw;
  }
  jz = jk;
recompute:
/* distill q[] into iq[] reversingly */
  for (i=0, j=jz, z=q[jz]; j > 0; i++, j--) {
    fw    = (long double)(int32_t)(0x1p-24*z);
    iq[i] = (int32_t)(z - 0x1p24*fw);
    z     = q[j-1] + fw;
  }
  /* compute n */
  z  = scalbnl(z, q0);       /* actual value of z */
  z -= 8.0*floorl(z*0.125);  /* trim off integer >= 8 */
  n  = (int32_t)z;
  z -= (double)n;
  ih = 0;
  if (q0 > 0) {  /* need iq[jz - 1] to determine n */
    i  = iq[jz - 1] >> (24 - q0); n += i;
    iq[jz-1]   -= i << (24 - q0);
    ih = iq[jz - 1] >> (23 - q0);
  }
  else if (q0 == 0)  ih = iq[jz - 1] >> 23;
  else if (z >= 0.5) ih = 2;
  if (ih > 0) {  /* q > 0.5 */
    n += 1; carry = 0;
    for (i=0; i < jz; i++) {  /* compute 1 - q */
      j = iq[i];
      if (carry == 0) {
        if (j != 0) {
          carry = 1;
          iq[i] = 0x1000000 - j;
        }
      } else
        iq[i] = 0xffffff - j;
      }
      if (q0 > 0) {  /* rare case: chance is 1 in 12 */
        switch(q0) {
          case 1:
            iq[jz - 1] &= 0x7fffff; break;
          case 2:
            iq[jz - 1] &= 0x3fffff; break;
        }
      }
      if (ih == 2) {
        z = 1.0 - z;
        if (carry != 0)
          z -= scalbnl(1.0, q0);
      }
    }
    /* check if recomputation is needed */
    if (z == 0.0) {
      j = 0;
      for (i=jz - 1; i >= jk; i--) j |= iq[i];
      if (j == 0) {  /* need recomputation */
        for (k=1; iq[jk-k] == 0; k++);      /* k = no. of terms needed */
        for (i=jz + 1; i <= jz + k; i++) {  /* add q[jz+1] to q[jz+k] */
          f[jx + i] = (long double)ipio2[jv + i];
          for (j=0, fw=0.0; j <= jx; j++)
            fw += x[j]*f[jx + i - j];
          q[i] = fw;
        }
        jz += k;
        goto recompute;
      }
    }
    /* chop off zero terms */
    if (z == 0.0) {
      jz -= 1;
      q0 -= 24;
      while (iq[jz] == 0) {
        jz--;
        q0 -= 24;
      }
    } else { /* break z into 24-bit if necessary */
      z = scalbnl(z, -q0);
      if (z >= 0x1p24) {
        fw = (long double)(int32_t)(0x1p-24*z);
        iq[jz] = (int32_t)(z - 0x1p24*fw);
        jz += 1;
        q0 += 24;
        iq[jz] = (int32_t)fw;
    } else
      iq[jz] = (int32_t)z;
  }
  /* convert integer "bit" chunk to floating-point value */
  fw = scalbn(1.0, q0);
  for (i=jz; i >= 0; i--) {
    q[i] = fw*(long double)iq[i];
    fw *= 0x1p-24;
  }
  /* compute PIo2[0,...,jp]*q[jz,...,0] */
  for (i=jz; i >= 0; i--) {
    for (fw=0.0, k=0; k <= jp && k <= jz - i; k++)
      fw += PIo2[k]*q[i + k];
    fq[jz - i] = fw;
  }
  /* compress fq[] into y[] */
  switch (prec) {
	case 0:
      fw = 0.0;
      for (i=jz; i >= 0; i--)
        fw += fq[i];
      y[0] = (ih == 0) ? fw : -fw;
      break;
	case 1:
	case 2:
      fw = 0.0;
      for (i=jz; i >= 0; i--)
        fw += fq[i];
      /* TODO: drop excess precision here once double_t is used */
      fw = (long double)fw;
      y[0] = (ih == 0) ? fw : -fw;
      fw = fq[0] - fw;
      for (i=1; i <= jz; i++)
        fw += fq[i];
      y[1] = (ih == 0) ? fw : -fw;
      break;
    case 3:  /* painful */
      for (i=jz; i > 0; i--) {
        fw        = fq[i - 1] + fq[i];
        fq[i]    += fq[i - 1] - fw;
        fq[i - 1] = fw;
      }
      for (i=jz; i > 1; i--) {
        fw        = fq[i - 1] + fq[i];
        fq[i]    += fq[i - 1] - fw;
        fq[i - 1] = fw;
      }
      for (fw=0.0, i=jz; i >= 2; i--)
        fw += fq[i];
      if (ih == 0) {
        y[0] =  fq[0]; y[1] =  fq[1]; y[2] =  fw;
    } else {
      y[0] = -fq[0]; y[1] = -fq[1]; y[2] = -fw;
    }
  }
  return n & 7;
}

LUALIB_API int sun_rem_pio2l (long double x, long double *y) {
  union ldshape u, uz;
  long double z, w, t, r, fn;
  long double tx[NX], ty[NY];
  int ex, ey, n, i;
  u.f = x;
  ex = u.i.se & 0x7fff;
  if (SMALL(u)) {
    /* rint(x/(pi/2)), Assume round-to-nearest. */
    fn = x*invpio2 + toint - toint;
    n = QUOBITS(fn);
    r = x - fn*Rpio2_1;
    w = fn*Rpio2_1t;  /* 1st round good to 102/180 bits (ld80/ld128) */
    y[0] = r-w;
    u.f = y[0];
    ey = u.i.se & 0x7fff;
    if (ex - ey > ROUND1) {  /* 2nd iteration needed, good to 141/248 (ld80/ld128) */
      t = r;
      w = fn*Rpio2_2;
      r = t - w;
      w = fn*Rpio2_2t - ((t - r) - w);
      y[0] = r - w;
      u.f = y[0];
      ey = u.i.se & 0x7fff;
      if (ex - ey > ROUND2) {  /* 3rd iteration, good to 180/316 bits */
        t = r;  /* will cover all possible cases (not verified for ld128) */
        w = fn*Rpio2_3;
        r = t - w;
        w = fn*Rpio2_3t - ((t - r) - w);
        y[0] = r-w;
      }
    }
    y[1] = (r - y[0]) - w;
    return n;
  }
  /* all other (large) arguments */
  if (ex == 0x7fff) {  /* x is inf or NaN */
    y[0] = y[1] = x - x;
    return 0;
  }
  /* set z = scalbn(|x|, -ilogb(x) + 23) */
  uz.f = x;
  uz.i.se = 0x3fff + 23;
  z = uz.f;
  for (i=0; i < NX - 1; i++) {
    tx[i] = (long double)(int32_t)z;
    z     = (z - tx[i])*0x1p24;
  }
  tx[i] = z;
  while (tx[i] == 0) i--;
  n = __rem_pio2_largel(tx, ty, ex - 0x3fff - 23, i + 1, NY);
  w = ty[1];
  if (NY == 3) w += ty[2];
  r = ty[0] + w;
  /* TODO: for ld128 this does not follow the recommendation of the
    comments of __rem_pio2_large which seem wrong if |ty[0]| > |ty[1]+ty[2]| */
  w -= r - ty[0];
  if (u.i.se >> 15) {
    y[0] = -r;
    y[1] = -w;
    return -n;
  }
  y[0] = r;
  y[1] = w;
  return n;
}
#endif  /* of not __ARMCPU */

#endif  /* of (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384 */


/* Constants used in polynomial approximation of sin/cos */
static const double
half =  5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
S1  = -1.66666666666666324348e-01,  /* 0xBFC55555, 0x55555549 */
S2  =  8.33333333332248946124e-03,  /* 0x3F811111, 0x1110F8A6 */
S3  = -1.98412698298579493134e-04,  /* 0xBF2A01A0, 0x19C161D5 */
S4  =  2.75573137070700676789e-06,  /* 0x3EC71DE3, 0x57B1FE7D */
S5  = -2.50507602534068634195e-08,  /* 0xBE5AE5E6, 0x8A2B9CEB */
S6  =  1.58969099521155010221e-10,  /* 0x3DE5D93A, 0x5ACFD57C */
C1  =  4.16666666666666019037e-02,  /* 0x3FA55555, 0x5555554C */
C2  = -1.38888888888741095749e-03,  /* 0xBF56C16C, 0x16C15177 */
C3  =  2.48015872894767294178e-05,  /* 0x3EFA01A0, 0x19CB1590 */
C4  = -2.75573143513906633035e-07,  /* 0xBE927E4F, 0x809C52AD */
C5  =  2.08757232129817482790e-09,  /* 0x3E21EE9E, 0xBDB4B1C4 */
C6  = -1.13596475577881948265e-11;  /* 0xBDA8FAE9, 0xBE8838D4 */

FORCE_INLINE void sun_sincos_aux (double x, double y, int iy, double *k_s, double *k_c ) {
  /* Inline calculation of sin/cos, as we can save
     some work, and we will always need to calculate
     both values, no matter the result of switch */
  double z, w, r, v, hz;
  z = x*x;
  w = z*z;
  /* cos-specific computation; equivalent to calling __kernel_cos(x,y) and storing in k_c*/
  r   = z*(C1 + z*(C2 + z*C3)) + w*w*(C4 + z*(C5 + z*C6));
  hz  = 0.5*z;
  v   = one - hz;
  *k_c = v + (((one - v) - hz) + (z*r - x*y));
  /* sin-specific computation; equivalent to calling __kernel_sin(x,y,1) and storing in k_s*/
  r = S2 + z*(S3 + z*S4) + z*w*(S5 + z*S6);
  v = z*x;
  if (iy == 0)
    *k_s = x + v*(S1 + z*r);
  else
    *k_s = x - ((z*(half*y - v*r) - y) - v*S1);
}

LUALIB_API void sun_sincos (double x, double *s, double *c) {  /* 35 % faster than calling sin and cos individually */
  double y[2];
  int32_t ix;
  /* Store high word of x in ix */
  GET_HIGH_WORD(ix, x);
  ix &= 0x7fffffff;
  if (ix <= 0x3fe921fb) {  /* |x| ~< pi/4 */
    /* Check for small x for sin and cos */
    if (ix < 0x3e46a09e) {
      /* Check for exact zero */
      if ((int)x == 0) {
        *s = x;
        *c = 1.0;
        return;
      }
    }
    /* Call kernel function with 0 extra */
    sun_sincos_aux(x, 0.0, 0, s, c);
  } else if (ix >= 0x7ff00000) {
    /* sincos(Inf or NaN) is NaN */
    *s = x - x;
    *c = x - x;
  } else {  /* argument reduction needed */
    double k_c, k_s;
    /* Calculate remainer, then sub out to kernel */
    int32_t n = sun_rem_pio2(x, y);
    sun_sincos_aux(y[0], y[1], 1, &k_s, &k_c);
    /* Figure out permutation of sin/cos outputs to true outputs */
    switch (n & 3) {
      case 0:
        *c = k_c;
        *s = k_s;
        break;
      case 1:
        *c = -k_s;
        *s = k_c;
        break;
      case 2:
        *c = -k_c;
        *s = -k_s;
        break;
      default:
        *c =  k_s;
        *s = -k_c;
        break;
    }
  }
}


/* Writes sine result sin(pa) to the location pointed to by sp, and cosine result cos(pa) to the location pointed to by cp.

   In extensive testing, no errors > 0.97 ulp were found in either the sine or cosine results, suggesting the results returned
   are faithfully rounded.

   Written by:
   njuffa

   Taken from:
   https://stackoverflow.com/questions/42792939/implementation-of-sinpi-and-cospi-using-standard-c-math-library */
LUALIB_API void tools_sincospi (double a, double *sp, double *cp, double *tp, int gettangent) {
  double c, r, s, t, az;
  int64_t i;
  az = a * 0.0;  /* must be evaluated with IEEE-754 semantics */
  /* for |a| >= 2**53, cospi(a) = 1.0, but cospi(Inf) = NaN */
  a = (fabs(a) < 9.0071992547409920e+15) ? a : az;  /* 0x1.0p53 */
  /* reduce argument to primary approximation interval (-0.25, 0.25) */
  r = rint(a + a);  /* must use IEEE-754 "to nearest" rounding; use rint(), not nearbyint() as the latter is missing on some platforms */
  i = (int64_t)r;
  t = fma(-0.5, r, a);
  /* compute core approximations */
  s = t * t;
  /* Approximate cos(pi*x) for x in [-0.25,0.25] */
  r =           -1.0369917389758117e-4;
  r = fma(r, s,  1.9294935641298806e-3);
  r = fma(r, s, -2.5806887942825395e-2);
  r = fma(r, s,  2.3533063028328211e-1);
  r = fma(r, s, -1.3352627688538006e+0);
  r = fma(r, s,  4.0587121264167623e+0);
  r = fma(r, s, -4.9348022005446790e+0);
  c = fma(r, s,  1.0000000000000000e+0);
  /* Approximate sin(pi*x) for x in [-0.25,0.25] */
  r =            4.6151442520157035e-4;
  r = fma(r, s, -7.3700183130883555e-3);
  r = fma(r, s,  8.2145868949323936e-2);
  r = fma(r, s, -5.9926452893214921e-1);
  r = fma(r, s,  2.5501640398732688e+0);
  r = fma(r, s, -5.1677127800499516e+0);
  s *= t;
  r *= s;
  s = fma(t, 3.1415926535897931e+0, r);
  /* map results according to quadrant */
  if (i & 2) {
    s = 0.0 - s;  /* must be evaluated with IEEE-754 semantics */
    c = 0.0 - c;  /* must be evaluated with IEEE-754 semantics */
  }
  if (i & 1) {
    t = 0.0 - s;  /* must be evaluated with IEEE-754 semantics */
    s = c;
    c = t;
  }
  /* NO (!) IEEE-754: sinPi(+n) is +0 and sinPi(-n) is -0 for positive integers n */
  if (a == sun_floor(a)) s = az;
  if (s == -0) s = 0;
  if (c == -0) c = 0;
  *sp = s;
  *cp = c;
  *tp = AGN_NAN;
  if (gettangent) {  /* 4.5.3 to prevent wrong results near the poles;
    the quotient s/c and also the series expansion around a = 0 is very inaccurate, so use sun_tan. */
    if (tools_approx(c, 0, AGN_EPSILON)) {
      /* lua_Number z = a*a;
      *tp = (3.141592653589793238462643 +
            (10.33542556009994005849210 +
            (40.80262463803752710169880 +
            (162.9999519752554449134650 +
            (651.9097561459136237472949 +
             2607.599505146171590188044*z)*z)*z)*z)*z)*a; */
      *tp = sun_tan(M_PI*a);
    } else {
      *tp = s/c;
    }
  }
}


/*
 * ====================================================
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
 * Copyright (c) 2008 Steven G. Kargl, David Schultz, Bruce D. Evans.
 *
 * Developed at SunSoft, a Sun Microsystems, Inc. business.
 * Permission to use, copy, modify, and distribute this
 * software is freely granted, provided that this notice
 * is preserved.
 * ====================================================
 *
 * taken from MUSL-1.2.3 src/math/__sinl.c
 */

#if (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
#if LDBL_MANT_DIG == 64
/*
 * ld80 version of __sin.c.  See __sin.c for most comments.
 */
/*
 * Domain [-0.7854, 0.7854], range ~[-1.89e-22, 1.915e-22]
 * |sin(x)/x - s(x)| < 2**-72.1
 *
 * See __cosl.c for more details about the polynomial.
 */
static const long double
LSINS1 = -0.166666666666666666671L;    /* -0xaaaaaaaaaaaaaaab.0p-66 */
static const long double  /* patched 2.35.1 */
LSINS2 =  0.0083333333333333332L,      /*  0x11111111111111.0p-59 */
LSINS3 = -0.00019841269841269427L,     /* -0x1a01a01a019f81.0p-65 */
LSINS4 =  0.0000027557319223597490L,   /*  0x171de3a55560f7.0p-71 */
LSINS5 = -0.000000025052108218074604L, /* -0x1ae64564f16cad.0p-78 */
LSINS6 =  1.6059006598854211e-10L,     /*  0x161242b90243b5.0p-85 */
LSINS7 = -7.6429779983024564e-13L,     /* -0x1ae42ebd1b2e00.0p-93 */
LSINS8 =  2.6174587166648325e-15L;     /*  0x179372ea0b3f64.0p-101 */
#define SINPOLY(z) (LSINS2+z*(LSINS3+z*(LSINS4+z*(LSINS5+z*(LSINS6+z*(LSINS7+z*LSINS8))))))
#elif LDBL_MANT_DIG == 113
/*
 * ld128 version of __sin.c.  See __sin.c for most comments.
 */
/*
 * Domain [-0.7854, 0.7854], range ~[-1.53e-37, 1.659e-37]
 * |sin(x)/x - s(x)| < 2**-122.1
 *
 * See __cosl.c for more details about the polynomial.
 */
static const long double
LSINS1 = -0.16666666666666666666666666666666666606732416116558L,
LSINS2 =  0.0083333333333333333333333333333331135404851288270047L,
LSINS3 = -0.00019841269841269841269841269839935785325638310428717L,
LSINS4 =  0.27557319223985890652557316053039946268333231205686e-5L,
LSINS5 = -0.25052108385441718775048214826384312253862930064745e-7L,
LSINS6 =  0.16059043836821614596571832194524392581082444805729e-9L,
LSINS7 = -0.76471637318198151807063387954939213287488216303768e-12L,
LSINS8 =  0.28114572543451292625024967174638477283187397621303e-14L;
static const long double  /* patched 2.35.1 */
LSINS9  = -0.82206352458348947812512122163446202498005154296863e-17L,
LSINS10 =  0.19572940011906109418080609928334380560135358385256e-19L,
LSINS11 = -0.38680813379701966970673724299207480965452616911420e-22L,
LSINS12 =  0.64038150078671872796678569586315881020659912139412e-25L;
#define SINPOLY(z) (LSINS2+z*(LSINS3+z*(LSINS4+z*(LSINS5+z*(LSINS6+z*(LSINS7+z*(LSINS8+ \
	z*(LSINS9+z*(LSINS10+z*(LSINS11+z*LSINS12))))))))))
#endif

static long double aux_sinl (long double x, long double y, int iy) {
  long double z, r, v;
  z = x*x;
  v = z*x;
  r = SINPOLY(z);
  if (iy == 0) return x + v*(LSINS1 + z*r);
  return x - ((z*(0.5*y - v*r) - y) - v*LSINS1);
}
#endif

/*
 * ====================================================
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
 * Copyright (c) 2008 Steven G. Kargl, David Schultz, Bruce D. Evans.
 *
 * Developed at SunSoft, a Sun Microsystems, Inc. business.
 * Permission to use, copy, modify, and distribute this
 * software is freely granted, provided that this notice
 * is preserved.
 * ====================================================
 *
 * taken from MUSL-1.2.3 src/math/__cosl.c
 */

#if (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
#if LDBL_MANT_DIG == 64
/*
 * ld80 version of __cos.c.  See __cos.c for most comments.
 */
/*
 * Domain [-0.7854, 0.7854], range ~[-2.43e-23, 2.425e-23]:
 * |cos(x) - c(x)| < 2**-75.1
 *
 * The coefficients of c(x) were generated by a pari-gp script using
 * a Remez algorithm that searches for the best higher coefficients
 * after rounding leading coefficients to a specified precision.
 *
 * Simpler methods like Chebyshev or basic Remez barely suffice for
 * cos() in 64-bit precision, because we want the coefficient of x^2
 * to be precisely -0.5 so that multiplying by it is exact, and plain
 * rounding of the coefficients of a good polynomial approximation only
 * gives this up to about 64-bit precision.  Plain rounding also gives
 * a mediocre approximation for the coefficient of x^4, but a rounding
 * error of 0.5 ulps for this coefficient would only contribute ~0.01
 * ulps to the final error, so this is unimportant.  Rounding errors in
 * higher coefficients are even less important.
 *
 * In fact, coefficients above the x^4 one only need to have 53-bit
 * precision, and this is more efficient.  We get this optimization
 * almost for free from the complications needed to search for the best
 * higher coefficients.
 */
static const long double
LCOSC1 =  0.0416666666666666666136L;        /*  0xaaaaaaaaaaaaaa9b.0p-68 */
static const long double  /* patched 2.35.1 */
LCOSC2 = -0.0013888888888888874L,           /* -0x16c16c16c16c10.0p-62 */
LCOSC3 =  0.000024801587301571716L,         /*  0x1a01a01a018e22.0p-68 */
LCOSC4 = -0.00000027557319215507120L,       /* -0x127e4fb7602f22.0p-74 */
LCOSC5 =  0.0000000020876754400407278L,     /*  0x11eed8caaeccf1.0p-81 */
LCOSC6 = -1.1470297442401303e-11L,          /* -0x19393412bd1529.0p-89 */
LCOSC7 =  4.7383039476436467e-14L;          /*  0x1aac9d9af5c43e.0p-97 */
#define COSPOLY(z) (z*(LCOSC1+z*(LCOSC2+z*(LCOSC3+z*(LCOSC4+z*(LCOSC5+z*(LCOSC6+z*LCOSC7)))))))
#elif LDBL_MANT_DIG == 113
/*
 * ld128 version of __cos.c.  See __cos.c for most comments.
 */
/*
 * Domain [-0.7854, 0.7854], range ~[-1.80e-37, 1.79e-37]:
 * |cos(x) - c(x))| < 2**-122.0
 *
 * 113-bit precision requires more care than 64-bit precision, since
 * simple methods give a minimax polynomial with coefficient for x^2
 * that is 1 ulp below 0.5, but we want it to be precisely 0.5.  See
 * above for more details.
 */
static const long double
LCOSC1 =  0.04166666666666666666666666666666658424671L,
LCOSC2 = -0.001388888888888888888888888888863490893732L,
LCOSC3 =  0.00002480158730158730158730158600795304914210L,
LCOSC4 = -0.2755731922398589065255474947078934284324e-6L,
LCOSC5 =  0.2087675698786809897659225313136400793948e-8L,
LCOSC6 = -0.1147074559772972315817149986812031204775e-10L,
LCOSC7 =  0.4779477332386808976875457937252120293400e-13L;
static const long double  /* patched 2.35.1 */
LCOSC8 = -0.1561920696721507929516718307820958119868e-15L,
LCOSC9 =  0.4110317413744594971475941557607804508039e-18L,
LCOSC10 = -0.8896592467191938803288521958313920156409e-21L,
LCOSC11 =  0.1601061435794535138244346256065192782581e-23L;
#define COSPOLY(z) (z*(LCOSC1+z*(LCOSC2+z*(LCOSC3+z*(LCOSC4+z*(LCOSC5+z*(LCOSC6+z*(LCOSC7+ \
	z*(LCOSC8+z*(LCOSC9+z*(LCOSC10+z*LCOSC11)))))))))))
#endif

static long double aux_cosl (long double x, long double y) {
  long double hz, z, r, w;
  z  = x*x;
  r  = COSPOLY(z);
  hz = 0.5*z;
  w  = 1.0 - hz;
  return w + (((1.0 - w) - hz) + (z*r - x*y));
}
#endif


/*
 * ====================================================
 * Copyright 2004 Sun Microsystems, Inc.  All Rights Reserved.
 * Copyright (c) 2008 Steven G. Kargl, David Schultz, Bruce D. Evans.
 *
 * Permission to use, copy, modify, and distribute this
 * software is freely granted, provided that this notice
 * is preserved.
 * ====================================================
 *
 * taken from MUSL-1.2.3 src/math/__tanl.c
 */

#if (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
#if LDBL_MANT_DIG == 64
/*
 * ld80 version of __tan.c.  See __tan.c for most comments.
 */
/*
 * Domain [-0.67434, 0.67434], range ~[-2.25e-22, 1.921e-22]
 * |tan(x)/x - t(x)| < 2**-71.9
 *
 * See __cosl.c for more details about the polynomial.
 */
static const long double
LTANT3 =  0.333333333333333333180L,         /*  0xaaaaaaaaaaaaaaa5.0p-65 */
LTANT5 =  0.133333333333333372290L,         /*  0x88888888888893c3.0p-66 */
LTANT7 =  0.0539682539682504975744L,        /*  0xdd0dd0dd0dc13ba2.0p-68 */
pio4   =  0.785398163397448309628L,     /*  0xc90fdaa22168c235.0p-64 */
pio4lo = -1.25413940316708300586e-20L;  /* -0xece675d1fc8f8cbb.0p-130 */
#ifndef __ARMCPU
static const long double  /* patched 2.35.1 */
LTANT9  =  0.021869488536312216L,           /*  0x1664f4882cc1c2.0p-58 */
LTANT11 =  0.0088632355256619590L,          /*  0x1226e355c17612.0p-59 */
LTANT13 =  0.0035921281113786528L,          /*  0x1d6d3d185d7ff8.0p-61 */
LTANT15 =  0.0014558334756312418L,          /*  0x17da354aa3f96b.0p-62 */
LTANT17 =  0.00059003538700862256L,         /*  0x13559358685b83.0p-63 */
LTANT19 =  0.00023907843576635544L,         /*  0x1f56242026b5be.0p-65 */
LTANT21 =  0.000097154625656538905L,        /*  0x1977efc26806f4.0p-66 */
LTANT23 =  0.000038440165747303162L,        /*  0x14275a09b3ceac.0p-67 */
LTANT25 =  0.000018082171885432524L,        /*  0x12f5e563e5487e.0p-68 */
LTANT27 =  0.0000024196006108814377L,       /*  0x144c0d80cc6896.0p-71 */
LTANT29 =  0.0000078293456938132840L,       /*  0x106b59141a6cb3.0p-69 */
LTANT31 = -0.0000032609076735050182L,       /* -0x1b5abef3ba4b59.0p-71 */
LTANT33 =  0.0000023261313142559411L;       /*  0x13835436c0c87f.0p-71 */
#endif
#define RPOLY(w) (LTANT5 + w*(LTANT9 + w*(LTANT13 + w*(LTANT17 + w*(LTANT21 + \
	w*(LTANT25 + w*(LTANT29 + w * LTANT33)))))))
#define VPOLY(w) (LTANT7 + w*(LTANT11 + w*(LTANT15 + w*(LTANT19 + w*(LTANT23 + \
	w*(LTANT27 + w * LTANT31))))))
#elif LDBL_MANT_DIG == 113
/*
 * ld128 version of __tan.c.  See __tan.c for most comments.
 */
/*
 * Domain [-0.67434, 0.67434], range ~[-3.37e-36, 1.982e-37]
 * |tan(x)/x - t(x)| < 2**-117.8 (XXX should be ~1e-37)
 *
 * See __cosl.c for more details about the polynomial.
 */
#ifndef __ARMCPU
static const long double
LTANT3 = 0x1.5555555555555555555555555553p-2L,
LTANT5 = 0x1.1111111111111111111111111eb5p-3L,
LTANT7 = 0x1.ba1ba1ba1ba1ba1ba1ba1b694cd6p-5L,
LTANT9 = 0x1.664f4882c10f9f32d6bbe09d8bcdp-6L,
LTANT11 = 0x1.226e355e6c23c8f5b4f5762322eep-7L,
LTANT13 = 0x1.d6d3d0e157ddfb5fed8e84e27b37p-9L,
LTANT15 = 0x1.7da36452b75e2b5fce9ee7c2c92ep-10L,
LTANT17 = 0x1.355824803674477dfcf726649efep-11L,
LTANT19 = 0x1.f57d7734d1656e0aceb716f614c2p-13L,
LTANT21 = 0x1.967e18afcb180ed942dfdc518d6cp-14L,
LTANT23 = 0x1.497d8eea21e95bc7e2aa79b9f2cdp-15L,
LTANT25 = 0x1.0b132d39f055c81be49eff7afd50p-16L,
LTANT27 = 0x1.b0f72d33eff7bfa2fbc1059d90b6p-18L,
LTANT29 = 0x1.5ef2daf21d1113df38d0fbc00267p-19L,
LTANT31 = 0x1.1c77d6eac0234988cdaa04c96626p-20L,
LTANT33 = 0x1.cd2a5a292b180e0bdd701057dfe3p-22L,
LTANT35 = 0x1.75c7357d0298c01a31d0a6f7d518p-23L,
LTANT37 = 0x1.2f3190f4718a9a520f98f50081fcp-24L,
pio4 = 0x1.921fb54442d18469898cc51701b8p-1L,
pio4lo = 0x1.cd129024e088a67cc74020bbea60p-116L;

static const long double  /* patched 2.35.1 */
LTANT39 =  0.000000028443389121318352L,	/*  0x1e8a7592977938.0p-78 */
LTANT41 =  0.000000011981013102001973L,	/*  0x19baa1b1223219.0p-79 */
LTANT43 =  0.0000000038303578044958070L,	/*  0x107385dfb24529.0p-80 */
LTANT45 =  0.0000000034664378216909893L,	/*  0x1dc6c702a05262.0p-81 */
LTANT47 = -0.0000000015090641701997785L,	/* -0x19ecef3569ebb6.0p-82 */
LTANT49 =  0.0000000029449552300483952L,	/*  0x194c0668da786a.0p-81 */
LTANT51 = -0.0000000022006995706097711L,	/* -0x12e763b8845268.0p-81 */
LTANT53 =  0.0000000015468200913196612L,	/*  0x1a92fc98c29554.0p-82 */
LTANT55 = -0.00000000061311613386849674L,	/* -0x151106cbc779a9.0p-83 */
LTANT57 =  1.4912469681508012e-10L;		    /*  0x147edbdba6f43a.0p-85 */
#define RPOLY(w) (LTANT5 + w*(LTANT9 + w*(LTANT13 + w*(LTANT17 + w*(LTANT21 + \
	w*(LTANT25 + w*(LTANT29 + w*(LTANT33 + w*(LTANT37 + w*(LTANT41 + \
	w*(LTANT45 + w*(LTANT49 + w*(LTANT53 + w * LTANT57)))))))))))))
#define VPOLY(w) (LTANT7 + w*(LTANT11 + w*(LTANT15 + w*(LTANT19 + w*(LTANT23 + \
	w*(LTANT27 + w*(LTANT31 + w*(LTANT35 + w*(LTANT39 + w*(LTANT43 + \
	w*(LTANT47 + w*(LTANT51 + w * LTANT55))))))))))))
#endif
#endif

#ifndef __ARMCPU
static long double aux_tanl (long double x, long double y, int odd) {
  long double z, r, v, w, s, a, t;
  int big, sign;
  big = fabsl(x) >= 0.67434;
  sign = 0;  /* to prevent compiler warnings in OS/2 */
  if (big) {
    sign = 0;
    if (x < 0) {
      sign = 1;
      x = -x;
      y = -y;
    }
    x = (pio4 - x) + (pio4lo - y);
    y = 0.0;
  }
  z = x*x;
  w = z*z;
  r = RPOLY(w);
  v = z*VPOLY(w);
  s = z*x;
  r = y + z*(s*(r + v) + y) + LTANT3*s;
  w = x + r;
  if (big) {
    s = 1 - 2*odd;
    v = s - 2.0*(x + (r - w*w / (w + s)));
    return sign ? -v : v;
  }
  if (!odd) return w;
	/* if allow error up to 2 ulp, simply return -1.0 / (x+r) here */
	/* compute -1.0 / (x+r) accurately */
  z = w;
  z = z + 0x1p32 - 0x1p32;
  v = r - (z - x);        /* z+v = r+x */
  t = a = -1.0 / w;       /* a = -1.0/w */
  t = t + 0x1p32 - 0x1p32;
  s = 1.0 + t*z;
  return t + a*(s + t*v);
}
#endif
#endif


/* 2.37.1 patch */
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
LUALIB_API void sun_sincosl (long double x, long double *sin, long double *cos) {
  double sind, cosd;
#ifdef __ARMCPU
  sind = sun_sin(x);
  cosd = sun_cos(x);
#else
  sincos(x, &sind, &cosd);
#endif
  *sin = sind;
  *cos = cosd;
}
#elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
LUALIB_API  void sun_sincosl (long double x, long double *sin, long double *cos) {
  union ldshape u = {x};
  unsigned n;
#ifndef __ARMCPU
  long double y[2], s, c;
#else
  double y[2], s, c;
#endif
  u.i.se &= 0x7fff;
  if (u.i.se == 0x7fff) {
    *sin = *cos = x - x;
    return;
  }
  if (u.f < M_PIO4ld) {  /* 4.0.3 adaption to long double */
    if (u.i.se < 0x3fff - LDBL_MANT_DIG) {
      /* raise underflow if subnormal */
      if (u.i.se == 0) FORCE_EVAL(x*0x1p-120f);
      *sin = x;
      /* raise inexact if x!=0 */
      *cos = 1.0 + x;
      return;
    }
    *sin = aux_sinl(x, 0, 0);
    *cos = aux_cosl(x, 0);
    return;
  }
  n = sun_rem_pio2l(x, y);
  s = aux_sinl(y[0], y[1], 1);
  c = aux_cosl(y[0], y[1]);
  switch (n & 3) {
    case 0:
      *sin = s;
      *cos = c;
      break;
    case 1:
      *sin = c;
      *cos = -s;
      break;
    case 2:
      *sin = -s;
      *cos = -c;
      break;
    case 3:
    default:
      *sin = -c;
      *cos = s;
      break;
  }
}
#endif


#ifndef __ARMCPU
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
LUALIB_API long double sun_sinl (long double x) {
  return sun_sin(x);
}
#elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
LUALIB_API long double sun_sinl (long double x) {
  unsigned n;
  long double y[2];
  union ldshape u = {x};
  u.i.se &= 0x7fff;
  if (u.i.se == 0x7fff) return x - x;
  if (u.f < M_PIO4ld) {  /* 4.0.3 adaption to long double */
    if (u.i.se < 0x3fff - LDBL_MANT_DIG) {
      /* raise underflow if subnormal */
      if (u.i.se == 0) FORCE_EVAL(x*0x1p-120f);
      return x;
    }
    return aux_sinl(x, 0, 0);
  }
  n = sun_rem_pio2l(x, y);
  switch (n & 3) {
    case 0:
      return aux_sinl(y[0], y[1], 1);
    case 1:
      return aux_cosl(y[0], y[1]);
    case 2:
      return -(aux_sinl(y[0], y[1], 1));
    case 3: default:
      return -(aux_cosl(y[0], y[1]));
  }
}
#endif
#endif


#ifndef __ARMCPU
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
LUALIB_API long double sun_cosl (long double x) {
  return sun_cos(x);
}
#elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
LUALIB_API long double sun_cosl (long double x) {
  unsigned n;
  long double y[2];
  union ldshape u = {x};
  u.i.se &= 0x7fff;
  if (u.i.se == 0x7fff) return x - x;
  if (u.f < M_PIO4ld) {  /* 4.0.3 adaption to long double */
    if (u.i.se < 0x3fff - LDBL_MANT_DIG) {
      /* raise underflow if subnormal */
      if (u.i.se == 0) FORCE_EVAL(x*0x1p-120f);
      /* raise inexact if x!=0 */
      return 1.0 + x;
    }
    return aux_cosl(x, 0);
  }
  n = sun_rem_pio2l(x, y);
  switch (n & 3) {
    case 0:
      return aux_cosl(y[0], y[1]);
    case 1:
      return -(aux_sinl(y[0], y[1], 1));
    case 2:
      return -(aux_cosl(y[0], y[1]));
    case 3: default:
      return aux_sinl(y[0], y[1], 1);
  }
}
#endif
#endif


#ifndef __ARMCPU
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
LUALIB_API long double sun_tanl (long double x) {
  return sun_tan(x);
}
#elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
LUALIB_API long double sun_tanl (long double x) {
  long double y[2];
  unsigned n;
  union ldshape u = {x};
  u.i.se &= 0x7fff;
  if (u.i.se == 0x7fff) return x - x;
  if (u.f < M_PIO4ld) {  /* 4.0.3 adaption to long double */
    if (u.i.se < 0x3fff - LDBL_MANT_DIG/2) {
      /* raise inexact if x!=0 and underflow if subnormal */
      FORCE_EVAL(u.i.se == 0 ? x*0x1p-120f : x + 0x1p120f);
      return x;
    }
    return aux_tanl(x, 0, 0);
  }
  n = sun_rem_pio2l(x, y);
  return aux_tanl(y[0], y[1], n & 1);
}
#endif
#endif


#if LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
static const long double
LINVpS0 =  1.66666666666666666631e-01L,
LINVpS1 = -4.16313987993683104320e-01L,
LINVpS2 =  3.69068046323246813704e-01L,
LINVpS3 = -1.36213932016738603108e-01L,
LINVpS4 =  1.78324189708471965733e-02L,
LINVpS5 = -2.19216428382605211588e-04L,
LINVpS6 = -7.10526623669075243183e-06L,
LINVqS1 = -2.94788392796209867269e+00L,
LINVqS2 =  3.27309890266528636716e+00L,
LINVqS3 = -1.68285799854822427013e+00L,
LINVqS4 =  3.90699412641738801874e-01L,
LINVqS5 = -3.14365703596053263322e-02L;

const long double pio2_hil = 1.57079632679489661926L;
const long double pio2_lol = -2.50827880633416601173e-20L;

/* used in asinl() and acosl() */
/* R(x^2) is a rational approximation of (asin(x)-x)/x^3 with Remez algorithm */
#ifndef __ARMCPU
static long double __invtrigl_R (long double z) {
  long double p, q;
  p = z*(LINVpS0 + z*(LINVpS1 + z*(LINVpS2 + z*(LINVpS3 + z*(LINVpS4 + z*(LINVpS5 + z*LINVpS6))))));
  q = 1.0 + z*(LINVqS1 + z*(LINVqS2 + z*(LINVqS3 + z*(LINVqS4 + z*LINVqS5))));
  return p/q;
}
#endif
#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384
#ifndef __ARMCPU
static const long double
LINVpS0 =  1.66666666666666666666666666666700314e-01L,
LINVpS1 = -7.32816946414566252574527475428622708e-01L,
LINVpS2 =  1.34215708714992334609030036562143589e+00L,
LINVpS3 = -1.32483151677116409805070261790752040e+00L,
LINVpS4 =  7.61206183613632558824485341162121989e-01L,
LINVpS5 = -2.56165783329023486777386833928147375e-01L,
LINVpS6 =  4.80718586374448793411019434585413855e-02L,
LINVpS7 = -4.42523267167024279410230886239774718e-03L,
LINVpS8 =  1.44551535183911458253205638280410064e-04L,
LINVpS9 = -2.10558957916600254061591040482706179e-07L,
LINVqS1 = -4.84690167848739751544716485245697428e+00L,
LINVqS2 =  9.96619113536172610135016921140206980e+00L,
LINVqS3 = -1.13177895428973036660836798461641458e+01L,
LINVqS4 =  7.74004374389488266169304117714658761e+00L,
LINVqS5 = -3.25871986053534084709023539900339905e+00L,
LINVqS6 =  8.27830318881232209752469022352928864e-01L,
LINVqS7 = -1.18768052702942805423330715206348004e-01L,
LINVqS8 =  8.32600764660522313269101537926539470e-03L,
LINVqS9 = -1.99407384882605586705979504567947007e-04L;
#endif

const long double pio2_hil = 1.57079632679489661923132169163975140L;
const long double pio2_lol = 4.33590506506189051239852201302167613e-35L;

#ifndef __ARMCPU
static long double __invtrigl_R (long double z) {
  long double p, q;
  p = z*(LINVpS0 + z*(LINVpS1 + z*(LINVpS2 + z*(LINVpS3 + z*(LINVpS4 + z*(LINVpS5 + z*(LINVpS6 + z*(LINVpS7  + z*(LINVpS8 + z*LINVpS9)))))))));
  q = 1.0 + z*(LINVqS1 + z*(LINVqS2 + z*(LINVqS3 + z*(LINVqS4 + z*(LINVqS5 + z*(LINVqS6 + z*(LINVqS7 + z*(LINVqS8 + z*LINVqS9))))))));
  return p/q;
}
#endif
#endif

/*
 * ====================================================
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
 *
 * Developed at SunSoft, a Sun Microsystems, Inc. business.
 * Permission to use, copy, modify, and distribute this
 * software is freely granted, provided that this notice
 * is preserved.
 * ====================================================
 */
/*
 * See comments in asin.c.
 * Converted to long double by David Schultz <das@FreeBSD.ORG>.
 */

#ifndef __ARMCPU
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
LUALIB_API long double sun_asinl (long double x) {
  return asin(x);
}
#elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
#if LDBL_MANT_DIG == 64
#define CLOSETO1(u) (u.i.m>>56 >= 0xf7)
#define CLEARBOTTOM(u) (u.i.m &= -1ULL << 32)
#elif LDBL_MANT_DIG == 113
#define CLOSETO1(u) (u.i.top >= 0xee00)
#define CLEARBOTTOM(u) (u.i.lo = 0)
#endif

LUALIB_API long double sun_asinl (long double x) {
  union ldshape u = {x};
  long double z, r, s;
  uint16_t e = u.i.se & 0x7fff;
  int sign = u.i.se >> 15;
  if (e >= 0x3fff) {  /* |x| >= 1 or nan */
    /* asin(+-1)=+-pi/2 with inexact */
    if (x == 1 || x == -1) return x*pio2_hil + 0x1p-120f;
    return 0/(x - x);
  }
  if (e < 0x3fff - 1) {  /* |x| < 0.5 */
    if (e < 0x3fff - (LDBL_MANT_DIG + 1)/2) {
      /* return x with inexact if x!=0 */
      FORCE_EVAL(x + 0x1p120f);
      return x;
    }
    return x + x*__invtrigl_R(x*x);
  }
  /* 1 > |x| >= 0.5 */
  z = (1.0 - fabsl(x))*0.5;
  s = sqrtl(z);
  r = __invtrigl_R(z);
  if (CLOSETO1(u)) {
    x = pio2_hil - (2*(s + s*r) - pio2_lol);
  } else {
    long double f, c;
    u.f = s;
    CLEARBOTTOM(u);
    f = u.f;
    c = (z - f*f)/(s + f);
    x = 0.5*pio2_hil - (2*s*r - (pio2_lol - 2*c) - (0.5*pio2_hil - 2*f));
  }
  return sign ? -x : x;
}
#endif
#endif


#ifndef __ARMCPU
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
LUALIB_API long double sun_acosl (long double x) {
  return acos(x);
}
#elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
#if LDBL_MANT_DIG == 64
#define CLEARBOTTOM(u) (u.i.m &= -1ULL << 32)
#elif LDBL_MANT_DIG == 113
#define CLEARBOTTOM(u) (u.i.lo = 0)
#endif

LUALIB_API long double sun_acosl (long double x) {
  union ldshape u = {x};
  long double z, s, c, f;
  uint16_t e = u.i.se & 0x7fff;
  if (e >= 0x3fff) {  /* |x| >= 1 or nan */
    if (x == 1) return 0;
    if (x == -1) return 2*pio2_hil + 0x1p-120f;
    return 0/(x - x);
  }
  if (e < 0x3fff - 1) {  /* |x| < 0.5 */
    if (e < 0x3fff - LDBL_MANT_DIG - 1) return pio2_hil + 0x1p-120f;
    return pio2_hil - (__invtrigl_R(x*x)*x - pio2_lol + x);
  }
  if (u.i.se >> 15) {  /* x < -0.5 */
    z = (1 + x)*0.5;
    s = sqrtl(z);
    return 2*(pio2_hil - (__invtrigl_R(z)*s - pio2_lol + s));
  }
  /* x > 0.5 */
  z = (1 - x)*0.5;
  s = sqrtl(z);
  u.f = s;
  CLEARBOTTOM(u);
  f = u.f;
  c = (z - f*f)/(s + f);
  return 2*(__invtrigl_R(z)*s + c + f);
}
#endif
#endif


/*
 * ====================================================
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
 *
 * Developed at SunPro, a Sun Microsystems, Inc. business.
 * Permission to use, copy, modify, and distribute this
 * software is freely granted, provided that this notice
 * is preserved.
 * ====================================================
 */
/*
 * See comments in atan.c.
 * Converted to long double by David Schultz <das@FreeBSD.ORG>.
 */
#ifndef __ARMCPU
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
LUALIB_API long double sun_atanl (long double x) {
  return atan(x);
}
#elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384

#if LDBL_MANT_DIG == 64
#define EXPMAN(u) ((u.i.se & 0x7fff)<<8 | (u.i.m>>55 & 0xff))

static const long double latanhi[] = {
   4.63647609000806116202e-01L,
   7.85398163397448309628e-01L,
   9.82793723247329067960e-01L,
   1.57079632679489661926e+00L,
};

static const long double latanlo[] = {
   1.18469937025062860669e-20L,
  -1.25413940316708300586e-20L,
   2.55232234165405176172e-20L,
  -2.50827880633416601173e-20L,
};

static const long double laT[] = {
   3.33333333333333333017e-01L,
  -1.99999999999999632011e-01L,
   1.42857142857046531280e-01L,
  -1.11111111100562372733e-01L,
   9.09090902935647302252e-02L,
  -7.69230552476207730353e-02L,
   6.66661718042406260546e-02L,
  -5.88158892835030888692e-02L,
   5.25499891539726639379e-02L,
  -4.70119845393155721494e-02L,
   4.03539201366454414072e-02L,
  -2.91303858419364158725e-02L,
   1.24822046299269234080e-02L,
};

static long double T_even (long double x) {
  return laT[0] + x*(laT[2] + x*(laT[4] + x*(laT[6] +
    x*(laT[8] + x*(laT[10] + x*laT[12])))));
}

static long double T_odd (long double x) {
  return laT[1] + x*(laT[3] + x*(laT[5] + x*(laT[7] +
    x*(laT[9] + x*laT[11]))));
}
#elif LDBL_MANT_DIG == 113
#define EXPMAN(u) ((u.i.se & 0x7fff)<<8 | u.i.top>>8)

static const long double latanhi[] = {
   4.63647609000806116214256231461214397e-01L,
   7.85398163397448309615660845819875699e-01L,
   9.82793723247329067985710611014666038e-01L,
   1.57079632679489661923132169163975140e+00L,
};

static const long double latanlo[] = {
   4.89509642257333492668618435220297706e-36L,
   2.16795253253094525619926100651083806e-35L,
  -2.31288434538183565909319952098066272e-35L,
   4.33590506506189051239852201302167613e-35L,
};

static const long double laT[] = {
   3.33333333333333333333333333333333125e-01L,
  -1.99999999999999999999999999999180430e-01L,
   1.42857142857142857142857142125269827e-01L,
  -1.11111111111111111111110834490810169e-01L,
   9.09090909090909090908522355708623681e-02L,
  -7.69230769230769230696553844935357021e-02L,
   6.66666666666666660390096773046256096e-02L,
  -5.88235294117646671706582985209643694e-02L,
   5.26315789473666478515847092020327506e-02L,
  -4.76190476189855517021024424991436144e-02L,
   4.34782608678695085948531993458097026e-02L,
  -3.99999999632663469330634215991142368e-02L,
   3.70370363987423702891250829918659723e-02L,
  -3.44827496515048090726669907612335954e-02L,
   3.22579620681420149871973710852268528e-02L,
  -3.03020767654269261041647570626778067e-02L,
   2.85641979882534783223403715930946138e-02L,
  -2.69824879726738568189929461383741323e-02L,
   2.54194698498808542954187110873675769e-02L,
  -2.35083879708189059926183138130183215e-02L,
   2.04832358998165364349957325067131428e-02L,
  -1.54489555488544397858507248612362957e-02L,
   8.64492360989278761493037861575248038e-03L,
  -2.58521121597609872727919154569765469e-03L,
};

static long double T_even (long double x) {
  return (laT[0] + x*(laT[2] + x*(laT[4] + x*(laT[6] + x*(laT[8] +
    x*(laT[10] + x*(laT[12] + x*(laT[14] + x*(laT[16] +
    x*(laT[18] + x*(laT[20] + x*laT[22])))))))))));
}

static long double T_odd (long double x) {
  return (laT[1] + x*(laT[3] + x*(laT[5] + x*(laT[7] + x*(laT[9] +
    x*(laT[11] + x*(laT[13] + x*(laT[15] + x*(laT[17] +
    x*(laT[19] + x*(laT[21] + x*laT[23])))))))))));
}
#endif

LUALIB_API long double sun_atanl (long double x) {
  union ldshape u = {x};
  long double w, s1, s2, z;
  int id;
  unsigned e = u.i.se & 0x7fff;
  unsigned sign = u.i.se >> 15;
  unsigned expman;
  if (e >= 0x3fff + LDBL_MANT_DIG + 1) {  /* if |x| is large, atan(x)~=pi/2 */
    if (tools_fpisnanl(x)) return x;
    return sign ? -latanhi[3] : latanhi[3];
  }
  /* Extract the exponent and the first few bits of the mantissa. */
  expman = EXPMAN(u);
  if (expman < ((0x3fff - 2) << 8) + 0xc0) {   /* |x| < 0.4375 */
    if (e < 0x3fff - (LDBL_MANT_DIG + 1)/2) {  /* if |x| is small, atanl(x)~=x */
      /* raise underflow if subnormal */
      if (e == 0) FORCE_EVAL((float)x);
      return x;
    }
    id = -1;
  } else {
    x = fabsl(x);
    if (expman < (0x3fff << 8) + 0x30) {  /* |x| < 1.1875 */
      if (expman < ((0x3fff - 1) << 8) + 0x60) {  /*  7/16 <= |x| < 11/16 */
        id = 0;
        x = (2.0*x - 1.0)/(2.0 + x);
      } else {  /* 11/16 <= |x| < 19/16 */
        id = 1;
        x = (x - 1.0)/(x + 1.0);
      }
    } else {
      if (expman < ((0x3fff + 1) << 8) + 0x38) {  /* |x| < 2.4375 */
        id = 2;
        x = (x - 1.5)/(1.0 + 1.5*x);
      } else {  /* 2.4375 <= |x| */
        id = 3;
        x = -1.0/x;
      }
    }
  }
  /* end of argument reduction */
  z = x*x;
  w = z*z;
  /* break sum aT[i]z**(i+1) into odd and even poly */
  s1 = z*T_even(w);
  s2 = w*T_odd(w);
  if (id < 0) return x - x*(s1 + s2);
  z = latanhi[id] - ((x*(s1 + s2) - latanlo[id]) - x);
  return sign ? -z : z;
}
#endif
#endif


/*
 * ====================================================
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
 *
 * Developed at SunSoft, a Sun Microsystems, Inc. business.
 * Permission to use, copy, modify, and distribute this
 * software is freely granted, provided that this notice
 * is preserved.
 * ====================================================
 *
 */
/*
 * See comments in atan2.c.
 * Converted to long double by David Schultz <das@FreeBSD.ORG>.
 */
#ifndef __ARMCPU
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
LUALIB_API long double sun_atan2l (long double y, long double x) {
  return atan2(y, x);
}
#elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
LUALIB_API long double sun_atan2l (long double y, long double x) {
  union ldshape ux, uy;
  long double z;
  int m, ex, ey;
  if (tools_fpisnanl(x) || tools_fpisnanl(y)) return x+y;
  if (x == 1.0L) return sun_atanl(y);  /* 4.2.4 tweak */
  ux.f = x;
  uy.f = y;
  ex = ux.i.se & 0x7fff;
  ey = uy.i.se & 0x7fff;
  m = 2*(ux.i.se >> 15) | uy.i.se >> 15;
  if (y == 0) {
    switch(m) {
      case 0:
      case 1: return y;           /* atan(+-0,+anything)=+-0 */
      case 2: return  2*pio2_hil;  /* atan(+0,-anything) = pi */
      case 3: return -2*pio2_hil;  /* atan(-0,-anything) =-pi */
    }
  }
  if (x == 0) return m & 1 ? -pio2_hil : pio2_hil;
  if (ex == 0x7fff) {
    if (ey == 0x7fff) {
      switch(m) {
        case 0: return  pio2_hil/2;   /* atan(+INF,+INF) */
        case 1: return -pio2_hil/2;   /* atan(-INF,+INF) */
        case 2: return  1.5*pio2_hil; /* atan(+INF,-INF) */
        case 3: return -1.5*pio2_hil; /* atan(-INF,-INF) */
      }
    } else {
      switch(m) {
        case 0: return  0.0;         /* atan(+...,+INF) */
        case 1: return -0.0;         /* atan(-...,+INF) */
        case 2: return  2*pio2_hil;  /* atan(+...,-INF) */
        case 3: return -2*pio2_hil;  /* atan(-...,-INF) */
      }
    }
  }
  if (ex + 120 < ey || ey == 0x7fff) return m & 1 ? -pio2_hil : pio2_hil;
  /* z = atan(|y/x|) without spurious underflow */
  if ((m & 2) && ey + 120 < ex)  /* |y/x| < 0x1p-120, x<0 */
    z = 0.0;
  else
  z = sun_atanl(fabsl(y/x));  /* 3.1.3 tweak */
  switch (m) {
    case 0: return z;               /* atan(+,+) */
    case 1: return -z;              /* atan(-,+) */
    case 2: return 2*pio2_hil - (z - 2*pio2_lol); /* atan(+,-) */
    default: /* case 3 */
      return (z - 2*pio2_lol) - 2*pio2_hil; /* atan(-,-) */
  }
}
#endif
#endif


/*
 * Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */
/*
 *      Relative error logarithm
 *      Natural logarithm of 1+x, long double precision
 *
 *
 * SYNOPSIS:
 *
 * long double x, y, log1pl();
 *
 * y = log1pl( x );
 *
 *
 * DESCRIPTION:
 *
 * Returns the base e (2.718...) logarithm of 1+x.
 *
 * The argument 1+x is separated into its exponent and fractional
 * parts.  If the exponent is between -1 and +1, the logarithm
 * of the fraction is approximated by
 *
 *     log(1+x) = x - 0.5 x^2 + x^3 P(x)/Q(x).
 *
 * Otherwise, setting  z = 2(x-1)/x+1),
 *
 *     log(x) = z + z^3 P(z)/Q(z).
 *
 *
 * ACCURACY:
 *
 *                      Relative error:
 * arithmetic   domain     # trials      peak         rms
 *    IEEE     -1.0, 9.0    100000      8.2e-20    2.5e-20
 *
 * 4 % slower than the one available in GCC
 */

#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
long double tools_log1pl (long double x) {
  return log1p(x);
}
#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
/* Coefficients for log(1+x) = x - x^2/2 + x^3 P(x)/Q(x)
 * 1/sqrt(2) <= x < sqrt(2)
 * Theoretical peak relative error = 2.32e-20
 */

#ifndef __ARMCPU

static const long double log1plP[] = {
   4.5270000862445199635215E-5L,
   4.9854102823193375972212E-1L,
   6.5787325942061044846969E0L,
   2.9911919328553073277375E1L,
   6.0949667980987787057556E1L,
   5.7112963590585538103336E1L,
   2.0039553499201281259648E1L,
};

static const long double log1plQ[] = {
  /* 1.0000000000000000000000E0,*/
   1.5062909083469192043167E1L,
   8.3047565967967209469434E1L,
   2.2176239823732856465394E2L,
   3.0909872225312059774938E2L,
   2.1642788614495947685003E2L,
   6.0118660497603843919306E1L,
};

/* Coefficients for log(x) = z + z^3 P(z^2)/Q(z^2),
 * where z = 2(x-1)/(x+1)
 * 1/sqrt(2) <= x < sqrt(2)
 * Theoretical peak relative error = 6.16e-22
 */
static const long double log1plR[4] = {
   1.9757429581415468984296E-3L,
  -7.1990767473014147232598E-1L,
   1.0777257190312272158094E1L,
  -3.5717684488096787370998E1L,
};

static const long double log1plS[4] = {
  /* 1.00000000000000000000E0L,*/
  -2.6201045551331104417768E1L,
   1.9361891836232102174846E2L,
  -4.2861221385716144629696E2L,
};

static const long double log1plC1 = 6.9314575195312500000000E-1L;
static const long double log1plC2 = 1.4286068203094172321215E-6L;

#else

static double log1plP[] = {
   4.5270000862445199635215E-5,
   4.9854102823193375972212E-1,
   6.5787325942061044846969E0,
   2.9911919328553073277375E1,
   6.0949667980987787057556E1,
   5.7112963590585538103336E1,
   2.0039553499201281259648E1,
};

static double log1plQ[] = {
  /* 1.0000000000000000000000E0,*/
   1.5062909083469192043167E1,
   8.3047565967967209469434E1,
   2.2176239823732856465394E2,
   3.0909872225312059774938E2,
   2.1642788614495947685003E2,
   6.0118660497603843919306E1,
};

static double log1plR[4] = {
   1.9757429581415468984296E-3,
  -7.1990767473014147232598E-1,
   1.0777257190312272158094E1,
  -3.5717684488096787370998E1,
};

static double log1plS[4] = {
  /* 1.00000000000000000000E0,*/
  -2.6201045551331104417768E1,
   1.9361891836232102174846E2,
  -4.2861221385716144629696E2,
};

static const double log1plC1 = 6.9314575195312500000000E-1;
static const double log1plC2 = 1.4286068203094172321215E-6;

#endif

LUALIB_API long double tools_log1pl (long double xm1) {
  long double x, y, z;
  int e;
  if (tools_fpisnanl(xm1)) return xm1;
  if (xm1 == INFINITY) return xm1;
  if (xm1 == 0.0) return xm1;
  x = xm1 + 1.0;
  /* Test for domain errors.  */
  if (x <= 0.0) {
    if (x == 0.0) return -1/(x*x); /* -inf with divbyzero */
    return 0/0.0f; /* nan with invalid */
  }
  /* Separate mantissa from exponent. Use frexp so that denormal numbers will be handled properly. */
  x = frexpl(x, &e);
  /* logarithm using log(x) = z + z^3 P(z)/Q(z), where z = 2(x-1)/x+1) */
  if (e > 2 || e < -2) {
    if (x < SQRTH) { /* 2(2x-1)/(2x+1) */
      e -= 1;
      z = x - 0.5;
      y = 0.5*z + 0.5;
    } else { /*  2 (x-1)/(x+1)   */
      z = x - 0.5;
      z -= 0.5;
      y = 0.5*x  + 0.5;
    }
    x = z/y;
    z = x*x;
#ifndef __ARMCPU
    z = x*(z*__polevll(z, log1plR, 3)/__p1evll(z, log1plS, 3));
#else
    z = x*(z*polevl(z, log1plR, 3)/p1evl(z, log1plS, 3));
#endif
    z = z + e*log1plC2;
    z = z + x;
    z = z + e*log1plC1;
    return z;
  }
  /* logarithm using log(1+x) = x - .5x**2 + x**3 P(x)/Q(x) */
  if (x < SQRTH) {
    e -= 1;
    if (e != 0)
      x = 2.0*x - 1.0;
    else
      x = xm1;
  } else {
    if (e != 0)
      x = x - 1.0;
    else
      x = xm1;
  }
  z = x*x;
#ifndef __ARMCPU
  y = x*(z*__polevll(x, log1plP, 6)/__p1evll(x, log1plQ, 6));
#else
  y = x*(z*polevl(x, log1plP, 6)/p1evl(x, log1plQ, 6));
#endif
  y = y + e*log1plC2;
  z = y - 0.5*z;
  z = z + x;
  z = z + e*log1plC1;
  return z;
}
#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384
/* TODO: broken implementation to make things compile */
LUALIB_API long double tools_log1pl (long double x) {
  return log1p(x);  /* the SunPro version is 15 % slower */
}
#endif


/* The following MUSL version of sinhl is slightly slower than the one in GCC */
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
LUALIB_API long double tools_sinhl (long double x) {
  return sinh(x);
}
#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
LUALIB_API long double tools_sinhl (long double x) {
  union ldshape u = {x};
  unsigned ex = u.i.se & 0x7fff;
  long double h, t, absx;
  h = 0.5;
  if (u.i.se & 0x8000) h = -h;
  /* |x| */
  u.i.se = ex;
  absx = u.f;
  /* |x| < log(LDBL_MAX) */
  if (ex < 0x3fff+13 || (ex == 0x3fff+13 && u.i.m>>32 < 0xb17217f7)) {
    t = expm1l(absx);
    if (ex < 0x3fff) {
      if (ex < 0x3fff-32) return x;
      return h*(2*t - t*t/(1 + t));
    }
    return h*(t + t/(t + 1));
  }
  /* |x| > log(LDBL_MAX) or nan */
  t = expl(0.5*absx);
  return h*t*t;
}
#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384
/* TODO: broken implementation to make things compile */
LUALIB_API long double tools_sinhl (long double x) {
  return sinh(x);
}
#endif


/* The following MUSL version of sinhl is slightly slower than the one in GCC */
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
LUALIB_API long double tools_coshl (long double x) {
  return cosh(x);
}
#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
LUALIB_API long double tools_coshl (long double x) {
  union ldshape u = {x};
  unsigned ex = u.i.se & 0x7fff;
  uint32_t w;
  long double t;
  /* |x| */
  u.i.se = ex;
  x = u.f;
  w = u.i.m >> 32;
  /* |x| < log(2) */
  if (ex < 0x3fff-1 || (ex == 0x3fff-1 && w < 0xb17217f7)) {
    if (ex < 0x3fff-32) {
      FORCE_EVAL(x + 0x1p120f);
      return 1;
    }
    t = expm1l(x);
    return 1 + t*t/(2*(1 + t));
  }
  /* |x| < log(LDBL_MAX) */
  if (ex < 0x3fff+13 || (ex == 0x3fff+13 && w < 0xb17217f7)) {
    t = expl(x);
    return 0.5*(t + 1/t);
  }
  /* |x| > log(LDBL_MAX) or nan */
  t = expl(0.5*x);
  return 0.5*t*t;
}
#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384
/* TODO: broken implementation to make things compile */
LUALIB_API long double tools_coshl (long double x) {
  return cosh(x);
}
#endif


#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
LUALIB_API long double tools_tanhl (long double x) {
  return tanh(x);
}
#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
LUALIB_API long double tools_tanhl (long double x) {
  union ldshape u = {x};
  unsigned ex = u.i.se & 0x7fff;
  unsigned sign = u.i.se & 0x8000;
  uint32_t w;
  long double t;
  /* x = |x| */
  u.i.se = ex;
  x = u.f;
  w = u.i.m >> 32;
  if (ex > 0x3ffe || (ex == 0x3ffe && w > 0x8c9f53d5)) {
    /* |x| > log(3)/2 ~= 0.5493 or nan */
    if (ex >= 0x3fff+5) {
      /* |x| >= 32 */
      t = 1 + 0/(x + 0x1p-120f);
    } else {
      t = expm1l(2*x);
      t = 1 - 2/(t + 2);
    }
  } else if (ex > 0x3ffd || (ex == 0x3ffd && w > 0x82c577d4)) {
    /* |x| > log(5/3)/2 ~= 0.2554 */
    t = expm1l(2*x);
    t = t/(t + 2);
  } else {
    /* |x| is small */
    t = expm1l(-2*x);
    t = -t/(t + 2);
  }
  return sign ? -t : t;
}
#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384
/* TODO: broken implementation to make things compile */
LUALIB_API long double tools_tanhl (long double x) {
  return tanh(x);
}
#endif


/* Taken from MUSL-1.2.3, src/math/asinhl.c */
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
LUALIB_API long double sun_asinhl (long double x) {
  return sun_asinh(x);
}
#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
/* asinh(x) = sign(x)*log(|x|+sqrt(x*x+1)) ~= x - x^3/6 + o(x^5) */
LUALIB_API long double sun_asinhl (long double x) {
  union ldshape u = {x};
  unsigned e = u.i.se & 0x7fff;
  unsigned s = u.i.se >> 15;
  /* |x| */
  u.i.se = e;
  x = u.f;
  if (e >= 0x3fff + 32) {  /* |x| >= 0x1p32 or inf or nan */
    x = tools_logl(x) + 0.693147180559945309417232121458176568L;
  } else if (e >= 0x3fff + 1) {  /* |x| >= 2 */
    x = tools_logl(2*x + 1/(sqrtl(x*x + 1) + x));
  } else if (e >= 0x3fff - 32) {  /* |x| >= 0x1p-32 */
    x = tools_log1pl(x + x*x/(sqrtl(x*x + 1) + 1));  /* XXX change to sun version */
  } else {  /* |x| < 0x1p-32, raise inexact if x!=0 */
    FORCE_EVAL(x + 0x1p120f);
  }
  return s ? -x : x;
}
/* #elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384 */
#else
/* TODO: broken implementation to make things compile */
LUALIB_API long double sun_asinhl (long double x) {
  return sun_asinh(x);
}
#endif

/* Taken from MUSL-1.2.3, src/math/acoshl.c */
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
LUALIB_API long double sun_acoshl (long double x) {
  return sun_acosh(x);
}
#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
/* acosh(x) = log(x + sqrt(x*x-1)) */
LUALIB_API long double sun_acoshl (long double x) {
  union ldshape u = {x};
  int e = u.i.se & 0x7fff;
  if (e < 0x3fff + 1)  /* |x| < 2, invalid if x < 1 or nan */
    return tools_log1pl(x - 1 + sqrtl((x - 1)*(x - 1) + 2*(x - 1)));
  if (e < 0x3fff + 32)  /* |x| < 0x1p32 */
    return tools_logl(2*x - 1/(x + sqrtl(x*x - 1)));
  return tools_logl(x) + 0.693147180559945309417232121458176568L;
}
/* #elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384 */
#else
/* TODO: broken implementation to make things compile */
LUALIB_API long double sun_acoshl (long double x) {
  return sun_acosh(x);
}
#endif


/* Taken from MUSL-1.2.3, src/math/atanhl.c */
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
LUALIB_API long double sun_atanhl (long double x) {
  return sun_atanh(x);  /* 2.40.0 tweak */
}
#elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
/* atanh(x) = log((1+x)/(1-x))/2 = log1p(2x/(1-x))/2 ~= x + x^3/3 + o(x^5) */
LUALIB_API long double sun_atanhl (long double x) {
  union ldshape u = {x};
  unsigned e = u.i.se & 0x7fff;
  unsigned s = u.i.se >> 15;
  /* |x| */
  u.i.se = e;
  x = u.f;
  if (e < 0x3ff - 1) {
    if (e < 0x3ff - LDBL_MANT_DIG/2) {  /* handle underflow */
      if (e == 0) FORCE_EVAL((float)x);
    } else {  /* |x| < 0.5, up to 1.7ulp error */
      x = 0.5*tools_log1pl(2*x + 2*x*x/(1 - x));
    }
  } else {  /* avoid overflow */
    x = 0.5*tools_log1pl(2*(x/(1 - x)));
  }
  return s ? -x : x;
}
#endif


/* Inverse of error complimentary function in double precision, written by Takuya OOURA, taken from
   http://www.kurims.kyoto-u.ac.jp/~ooura/gamerf.html, file gamerf.tar.gz, function dierfc.c; 2.21.5 */
LUALIB_API double tools_inverfc (double y) {
  double s, t, u, w, x, z;
  if (y == 1) return 0;  /* prevent result close to zero */
  z = y;
  if (y > 1) z = 2 - y;
  w = 0.916461398268964 - sun_log(z);
  u = sqrt(w);
  s = (sun_log(u) + 0.488826640273108)/w;
  t = 1 / (u + 0.231729200323405);
  x = u*(1 - s*(s*0.124610454613712 + 0.5)) -
    ((((-0.0728846765585675*t + 0.269999308670029)*t +
    0.150689047360223)*t      + 0.116065025341614)*t +
    0.499999303439796)*t;
  t = 3.97886080735226/(x + 3.97886080735226);
  u = t - 0.5;
  s = (((((((((                 0.00112648096188977922*u +
    1.05739299623423047e-4)*u - 0.00351287146129100025)*u -
    7.71708358954120939e-4)*u + 0.00685649426074558612)*u +
    0.00339721910367775861)*u - 0.011274916933250487)*u -
    0.0118598117047771104)*u  + 0.0142961988697898018)*u +
    0.0346494207789099922)*u  + 0.00220995927012179067;
  s = ((((((((((((s*u         - 0.0743424357241784861)*u -
    0.105872177941595488)*u   + 0.0147297938331485121)*u +
    0.316847638520135944)*u   + 0.713657635868730364)*u +
    1.05375024970847138)*u    + 1.21448730779995237)*u +
    1.16374581931560831)*u    + 0.956464974744799006)*u +
    0.686265948274097816)*u   + 0.434397492331430115)*u +
    0.244044510593190935)*t -
    /* 2.21.7 = z*exp(x*x - 0.120782237635245222) = z*exp(x^2)/exp(0.120782237635245222) = z*exp(x^2)/erf(1e-9)*10^9 */
    z*tools_expx2(x, 1)/1.128379167095512573520033;
  x += s*(x*s + 1);
  return (y > 1) ? -x : x;
}


/* Inverse of error function in double precision, 2.21.5 */
LUALIB_API double tools_inverf (double x) {
  double r = -tools_inverfc(x + 1);
  return r == -0 ? 0 : r;
}


#ifndef __ARMCPU
/* Taken from https://github.com/lakshayg/erfinv, written by lakshayg, MIT licence, 3.3.5 */
LUALIB_API long double tools_inverfl (long double x) {
  long double r, num, den;
  if (x < -1.0L || x > 1.0L) {
    return AGN_NAN;
  } else if (x == 1.0L) {
    return HUGE_VAL;
  } else if (x == -1.0L) {
    return -HUGE_VAL;
  }
  const long double LN2L = 6.931471805599453094172321214581e-1L;
  const long double A0 = 1.1975323115670912564578e0L;
  const long double A1 = 4.7072688112383978012285e1L;
  const long double A2 = 6.9706266534389598238465e2L;
  const long double A3 = 4.8548868893843886794648e3L;
  const long double A4 = 1.6235862515167575384252e4L;
  const long double A5 = 2.3782041382114385731252e4L;
  const long double A6 = 1.1819493347062294404278e4L;
  const long double A7 = 8.8709406962545514830200e2L;
  const long double B0 = 1.0000000000000000000e0L;
  const long double B1 = 4.2313330701600911252e1L;
  const long double B2 = 6.8718700749205790830e2L;
  const long double B3 = 5.3941960214247511077e3L;
  const long double B4 = 2.1213794301586595867e4L;
  const long double B5 = 3.9307895800092710610e4L;
  const long double B6 = 2.8729085735721942674e4L;
  const long double B7 = 5.2264952788528545610e3L;
  const long double C0 = 1.42343711074968357734e0L;
  const long double C1 = 4.63033784615654529590e0L;
  const long double C2 = 5.76949722146069140550e0L;
  const long double C3 = 3.64784832476320460504e0L;
  const long double C4 = 1.27045825245236838258e0L;
  const long double C5 = 2.41780725177450611770e-1L;
  const long double C6 = 2.27238449892691845833e-2L;
  const long double C7 = 7.74545014278341407640e-4L;
  const long double D0 = 1.4142135623730950488016887e0L;
  const long double D1 = 2.9036514445419946173133295e0L;
  const long double D2 = 2.3707661626024532365971225e0L;
  const long double D3 = 9.7547832001787427186894837e-1L;
  const long double D4 = 2.0945065210512749128288442e-1L;
  const long double D5 = 2.1494160384252876777097297e-2L;
  const long double D6 = 7.7441459065157709165577218e-4L;
  const long double D7 = 1.4859850019840355905497876e-9L;
  const long double E0 = 6.65790464350110377720e0L;
  const long double E1 = 5.46378491116411436990e0L;
  const long double E2 = 1.78482653991729133580e0L;
  const long double E3 = 2.96560571828504891230e-1L;
  const long double E4 = 2.65321895265761230930e-2L;
  const long double E5 = 1.24266094738807843860e-3L;
  const long double E6 = 2.71155556874348757815e-5L;
  const long double E7 = 2.01033439929228813265e-7L;
  const long double F0 = 1.414213562373095048801689e0L;
  const long double F1 = 8.482908416595164588112026e-1L;
  const long double F2 = 1.936480946950659106176712e-1L;
  const long double F3 = 2.103693768272068968719679e-2L;
  const long double F4 = 1.112800997078859844711555e-3L;
  const long double F5 = 2.611088405080593625138020e-5L;
  const long double F6 = 2.010321207683943062279931e-7L;
  const long double F7 = 2.891024605872965461538222e-15L;
  long double abs_x = fabsl(x);
  if (abs_x <= 0.85L) {
    r = 0.180625L - 0.25L*x*x;
    num = (((((((A7*r + A6)*r + A5)*r + A4)*r + A3)*r + A2)*r + A1)*r + A0);
    den = (((((((B7*r + B6)*r + B5)*r + B4)*r + B3)*r + B2)*r + B1)*r + B0);
    return x*num/den;
  }
  r = sqrtl(LN2L - tools_logl(1.0L - abs_x));
  if (r <= 5.0L) {
    r -= 1.6L;
    num = (((((((C7*r + C6)*r + C5)*r + C4)*r + C3)*r + C2)*r + C1)*r + C0);
    den = (((((((D7*r + D6)*r + D5)*r + D4)*r + D3)*r + D2)*r + D1)*r + D0);
  } else {
    r -= 5.0L;
    num = (((((((E7*r + E6)*r + E5)*r + E4)*r + E3)*r + E2)*r + E1)*r + E0);
    den = (((((((F7*r + F6)*r + F5)*r + F4)*r + F3)*r + F2)*r + F1)*r + F0);
  }
  return sun_copysignl(num/den, x);
}
#endif


/* origin: OpenBSD /usr/src/lib/libm/src/ld80/e_erfl.c
 * taken from: https://chromium.googlesource.com/external/github.com/kripken/emscripten/+/refs/tags/1.37.5/system/lib/libc/musl/src/math */
/*
  * ====================================================
  * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
  *
  * Developed at SunPro, a Sun Microsystems, Inc. business.
  * Permission to use, copy, modify, and distribute this
  * software is freely granted, provided that this notice
  * is preserved.
  * ====================================================
  */
/*
  * Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>
  *
  * Permission to use, copy, modify, and distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
  * copyright notice and this permission notice appear in all copies.
  *
  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  */
/* double erf(double x)
  * double erfc(double x)
  *                           x
  *                    2      |\
  *     erf(x)  =  ---------  | exp(-t*t)dt
  *                 sqrt(pi) \|
  *                           0
  *
  *     erfc(x) =  1 - erf(x)
  *
  *  Note that
  *              erf(-x) = -erf(x)
  *              erfc(-x) = 2 - erfc(x)
  *
  * Method:
  * 1. For |x| in [0, 0.84375]
  * erf(x) = x + x*R(x^2)
  * erfc(x) = 1 - erf(x) if x in [-.84375,0.25]
  * = 0.5 + ((0.5-x)-x*R) if x in [0.25,0.84375]
  * Remark. The formula is derived by noting
  * erf(x) = (2/sqrt(pi))*(x - x^3/3 + x^5/10 - x^7/42 + ....)
  * and that
  * 2/sqrt(pi) = 1.128379167095512573896158903121545171688
  * is close to one. The interval is chosen because the fix
  * point of erf(x) is near 0.6174 (i.e., erf(x)=x when x is
  * near 0.6174), and by some experiment, 0.84375 is chosen to
  * guarantee the error is less than one ulp for erf.
  *
  * 2. For |x| in [0.84375,1.25], let s = |x| - 1, and
  * c = 0.84506291151 rounded to single (24 bits)
  * erf(x) = sign(x) * (c + P1(s)/Q1(s))
  * erfc(x) = (1-c) - P1(s)/Q1(s) if x > 0
  * 1+(c+P1(s)/Q1(s)) if x < 0
  * Remark: here we use the taylor series expansion at x=1.
  * erf(1+s) = erf(1) + s*Poly(s)
  * = 0.845.. + P1(s)/Q1(s)
  * Note that |P1/Q1|< 0.078 for x in [0.84375,1.25]
  *
  * 3. For x in [1.25,1/0.35(~2.857143)],
  * erfc(x) = (1/x)*exp(-x*x-0.5625+R1(z)/S1(z))
  * z=1/x^2
  * erf(x) = 1 - erfc(x)
  *
  * 4. For x in [1/0.35,107]
  * erfc(x) = (1/x)*exp(-x*x-0.5625+R2/S2) if x > 0
  * = 2.0 - (1/x)*exp(-x*x-0.5625+R2(z)/S2(z))
  * if -6.666<x<0
  * = 2.0 - tiny (if x <= -6.666)
  * z=1/x^2
  * erf(x) = sign(x)*(1.0 - erfc(x)) if x < 6.666, else
  * erf(x) = sign(x)*(1.0 - tiny)
  * Note1:
  * To compute exp(-x*x-0.5625+R/S), let s be a single
  * precision number and s := x; then
  * -x*x = -s*s + (s-x)*(s+x)
  * exp(-x*x-0.5626+R/S) =
  * exp(-s*s-0.5625)*exp((s-x)*(s+x)+R/S);
  * Note2:
  * Here 4 and 5 make use of the asymptotic series
  * exp(-x*x)
  * erfc(x) ~ ---------- * ( 1 + Poly(1/x^2) )
  * x*sqrt(pi)
  *
  * 5. For inf > x >= 107
  * erf(x) = sign(x) *(1 - tiny) (raise inexact)
  * erfc(x) = tiny*tiny (raise underflow) if x > 0
  * = 2 - tiny if x<0
  *
  * 7. Special case:
  * erf(0) = 0, erf(inf) = 1, erf(-inf) = -1,
  * erfc(0) = 1, erfc(inf) = 0, erfc(-inf) = 2,
  * erfc/erf(NaN) is NaN
  */

#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
LUALIB_API double sun_erf (double x);
LUALIB_API double sun_erfc (double x);

long double sun_erfl (long double x) {
  return sun_erf(x);
}
long double sun_erfcl (long double x) {
  return sun_erfc(x);
}
#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
static const long double
erx = 0.845062911510467529296875L,

/* Coefficients for approximation to erf on [0,0.84375] */
/* 8 * (2/sqrt(pi) - 1) */
efx8 = 1.0270333367641005911692712249723613735048E0L,
pp[6] = {
  1.122751350964552113068262337278335028553E6L,
  -2.808533301997696164408397079650699163276E6L,
  -3.314325479115357458197119660818768924100E5L,
  -6.848684465326256109712135497895525446398E4L,
  -2.657817695110739185591505062971929859314E3L,
  -1.655310302737837556654146291646499062882E2L,
},
qq[6] = {
  8.745588372054466262548908189000448124232E6L,
  3.746038264792471129367533128637019611485E6L,
  7.066358783162407559861156173539693900031E5L,
  7.448928604824620999413120955705448117056E4L,
  4.511583986730994111992253980546131408924E3L,
  1.368902937933296323345610240009071254014E2L,
  /* 1.000000000000000000000000000000000000000E0 */
},

/* Coefficients for approximation to erf in [0.84375,1.25] */
/* erf(x+1) = 0.845062911510467529296875 + pa(x)/qa(x)
  -0.15625 <= x <= +.25
  Peak relative error 8.5e-22 */
pa[8] = {
  -1.076952146179812072156734957705102256059E0L,
  1.884814957770385593365179835059971587220E2L,
  -5.339153975012804282890066622962070115606E1L,
  4.435910679869176625928504532109635632618E1L,
  1.683219516032328828278557309642929135179E1L,
  -2.360236618396952560064259585299045804293E0L,
  1.852230047861891953244413872297940938041E0L,
  9.394994446747752308256773044667843200719E-2L,
},
qa[7] = {
  4.559263722294508998149925774781887811255E2L,
  3.289248982200800575749795055149780689738E2L,
  2.846070965875643009598627918383314457912E2L,
  1.398715859064535039433275722017479994465E2L,
  6.060190733759793706299079050985358190726E1L,
  2.078695677795422351040502569964299664233E1L,
  4.641271134150895940966798357442234498546E0L,
  /* 1.000000000000000000000000000000000000000E0 */
},

/* Coefficients for approximation to erfc in [1.25,1/0.35] */
/* erfc(1/x) = x exp (-1/x^2 - 0.5625 + ra(x^2)/sa(x^2))
  1/2.85711669921875 < 1/x < 1/1.25
  Peak relative error 3.1e-21 */
ra[] = {
  1.363566591833846324191000679620738857234E-1L,
  1.018203167219873573808450274314658434507E1L,
  1.862359362334248675526472871224778045594E2L,
  1.411622588180721285284945138667933330348E3L,
  5.088538459741511988784440103218342840478E3L,
  8.928251553922176506858267311750789273656E3L,
  7.264436000148052545243018622742770549982E3L,
  2.387492459664548651671894725748959751119E3L,
  2.220916652813908085449221282808458466556E2L,
},
sa[] = {
  -1.382234625202480685182526402169222331847E1L,
  -3.315638835627950255832519203687435946482E2L,
  -2.949124863912936259747237164260785326692E3L,
  -1.246622099070875940506391433635999693661E4L,
  -2.673079795851665428695842853070996219632E4L,
  -2.880269786660559337358397106518918220991E4L,
  -1.450600228493968044773354186390390823713E4L,
  -2.874539731125893533960680525192064277816E3L,
  -1.402241261419067750237395034116942296027E2L,
  /* 1.000000000000000000000000000000000000000E0 */
},

/* Coefficients for approximation to erfc in [1/.35,107] */
/* erfc(1/x) = x exp (-1/x^2 - 0.5625 + rb(x^2)/sb(x^2))
  1/6.6666259765625 < 1/x < 1/2.85711669921875
  Peak relative error 4.2e-22 */
rb[] = {
  -4.869587348270494309550558460786501252369E-5L,
  -4.030199390527997378549161722412466959403E-3L,
  -9.434425866377037610206443566288917589122E-2L,
  -9.319032754357658601200655161585539404155E-1L,
  -4.273788174307459947350256581445442062291E0L,
  -8.842289940696150508373541814064198259278E0L,
  -7.069215249419887403187988144752613025255E0L,
  -1.401228723639514787920274427443330704764E0L,
},
sb[] = {
  4.936254964107175160157544545879293019085E-3L,
  1.583457624037795744377163924895349412015E-1L,
  1.850647991850328356622940552450636420484E0L,
  9.927611557279019463768050710008450625415E0L,
  2.531667257649436709617165336779212114570E1L,
  2.869752886406743386458304052862814690045E1L,
  1.182059497870819562441683560749192539345E1L,
  /* 1.000000000000000000000000000000000000000E0 */
},
/* erfc(1/x) = x exp (-1/x^2 - 0.5625 + rc(x^2)/sc(x^2))
  1/107 <= 1/x <= 1/6.6666259765625
  Peak relative error 1.1e-21 */
rc[] = {
  -8.299617545269701963973537248996670806850E-5L,
  -6.243845685115818513578933902532056244108E-3L,
  -1.141667210620380223113693474478394397230E-1L,
  -7.521343797212024245375240432734425789409E-1L,
  -1.765321928311155824664963633786967602934E0L,
  -1.029403473103215800456761180695263439188E0L,
},
sc[] = {
  8.413244363014929493035952542677768808601E-3L,
  2.065114333816877479753334599639158060979E-1L,
  1.639064941530797583766364412782135680148E0L,
  4.936788463787115555582319302981666347450E0L,
  5.005177727208955487404729933261347679090E0L,
  /* 1.000000000000000000000000000000000000000E0 */
};

static long double erfc1 (long double x) {
  long double s, P, Q;
  s = fabsl(x) - 1;
  P = pa[0] + s*(pa[1] + s*(pa[2] + s*(pa[3] + s*(pa[4] + s*(pa[5] + s*(pa[6] + s*pa[7]))))));
  Q = qa[0] + s*(qa[1] + s*(qa[2] + s*(qa[3] + s*(qa[4] + s*(qa[5] + s*(qa[6] + s))))));
  return 1 - erx - P/Q;
}

static long double erfc2 (uint32_t ix, long double x) {
  union ldshape u;
  long double s, z, R, S;
  if (ix < 0x3fffa000) /* 0.84375 <= |x| < 1.25 */
    return erfc1(x);
  x = fabsl(x);
  s = 1 / (x*x);
  if (ix < 0x4000b6db) { /* 1.25 <= |x| < 2.857 ~ 1/.35 */
    R = ra[0] + s*(ra[1] + s*(ra[2] + s*(ra[3] + s*(ra[4] + s*(ra[5] + s*(ra[6] + s*(ra[7] + s*ra[8])))))));
    S = sa[0] + s*(sa[1] + s*(sa[2] + s*(sa[3] + s*(sa[4] + s*(sa[5] + s*(sa[6] + s*(sa[7] + s*(sa[8] + s))))))));
  } else if (ix < 0x4001d555) { /* 2.857 <= |x| < 6.6666259765625 */
    R = rb[0] + s*(rb[1] + s*(rb[2] + s*(rb[3] + s*(rb[4] + s*(rb[5] + s*(rb[6] + s*rb[7]))))));
    S = sb[0] + s*(sb[1] + s*(sb[2] + s*(sb[3] + s*(sb[4] + s*(sb[5] + s*(sb[6] + s))))));
  } else { /* 6.666 <= |x| < 107 (erfc only) */
    R = rc[0] + s*(rc[1] + s*(rc[2] + s*(rc[3] + s*(rc[4] + s*rc[5]))));
    S = sc[0] + s*(sc[1] + s*(sc[2] + s*(sc[3] + s*(sc[4] + s))));
  }
  u.f = x;
  u.i.m &= -1ULL << 40;
  z = u.f;
  return expl(-z*z - 0.5625)*expl((z - x)*(z + x) + R/S)/x;
}

LUALIB_API long double sun_erfl (long double x) {
  long double r, s, z, y;
  union ldshape u = {x};
  uint32_t ix = (u.i.se & 0x7fffU) << 16 | u.i.m >> 48;
  int sign = u.i.se >> 15;
  if (ix >= 0x7fff0000)
    /* erf(nan)=nan, erf(+-inf)=+-1 */
    return 1 - 2*sign + 1/x;
  if (ix < 0x3ffed800) { /* |x| < 0.84375 */
    if (ix < 0x3fde8000) { /* |x| < 2**-33 */
      return 0.125*(8*x + efx8*x); /* avoid underflow */
    }
    z = x*x;
    r = pp[0] + z*(pp[1] + z*(pp[2] + z*(pp[3] + z*(pp[4] + z*pp[5]))));
    s = qq[0] + z*(qq[1] + z*(qq[2] + z*(qq[3] + z*(qq[4] + z*(qq[5] + z)))));
    y = r / s;
    return x + x*y;
  }
  if (ix < 0x4001d555) /* |x| < 6.6666259765625 */
    y = 1 - erfc2(ix, x);
  else
    y = 1 - 0x1p-16382L;
  return sign ? -y : y;
}

LUALIB_API long double sun_erfcl (long double x) {
  long double r, s, z, y;
  union ldshape u = {x};
  uint32_t ix = (u.i.se & 0x7fffU) << 16 | u.i.m >> 48;
  int sign = u.i.se >> 15;
  if (ix >= 0x7fff0000)
    /* erfc(nan) = nan, erfc(+-inf) = 0,2 */
    return 2*sign + 1/x;
  if (ix < 0x3ffed800) { /* |x| < 0.84375 */
    if (ix < 0x3fbe0000) /* |x| < 2**-65 */
      return 1.0 - x;
    z = x*x;
    r = pp[0] + z*(pp[1] + z*(pp[2] + z*(pp[3] + z*(pp[4] + z*pp[5]))));
    s = qq[0] + z*(qq[1] + z*(qq[2] + z*(qq[3] + z*(qq[4] + z*(qq[5] + z)))));
    y = r/s;
    if (ix < 0x3ffd8000) /* x < 1/4 */
      return 1.0 - (x + x*y);
    return 0.5 - (x - 0.5 + x*y);
  }
  if (ix < 0x4005d600) /* |x| < 107 */
    return sign ? 2 - erfc2(ix, x) : erfc2(ix, x);
  y = 0x1p-16382L;
  return sign ? 2 - y : y*y;
}
#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384
/* TODO: broken implementation to make things compile */
LUALIB_API long double sun_erfl (long double x) {
  return erf(x);
}

LUALIB_API long double sun_erfcl (long double x) {
  return erfc(x);
}
#endif


/*
 * ====================================================
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
 *
 * Developed at SunPro, a Sun Microsystems, Inc. business.
 * Permission to use, copy, modify, and distribute this
 * software is freely granted, provided that this notice
 * is preserved.
 * ====================================================
 */

/* double erf(double x)
 * double erfc(double x)
 *			     x
 *		      2      |\
 *     erf(x)  =  ---------  | exp(-t*t)dt
 *	 	   sqrt(pi) \|
 *			     0
 *
 *     erfc(x) =  1-erf(x)
 *  Note that
 *		erf(-x) = -erf(x)
 *		erfc(-x) = 2 - erfc(x)
 *
 * Method:
 *	1. For |x| in [0, 0.84375]
 *	    erf(x)  = x + x*R(x^2)
 *          erfc(x) = 1 - erf(x)           if x in [-.84375,0.25]
 *                  = 0.5 + ((0.5-x)-x*R)  if x in [0.25,0.84375]
 *	   where R = P/Q where P is an odd poly of degree 8 and
 *	   Q is an odd poly of degree 10.
 *						 -57.90
 *			| R - (erf(x)-x)/x | <= 2
 *
 *
 *	   Remark. The formula is derived by noting
 *          erf(x) = (2/sqrt(pi))*(x - x^3/3 + x^5/10 - x^7/42 + ....)
 *	   and that
 *          2/sqrt(pi) = 1.128379167095512573896158903121545171688
 *	   is close to one. The interval is chosen because the fix
 *	   point of erf(x) is near 0.6174 (i.e., erf(x)=x when x is
 *	   near 0.6174), and by some experiment, 0.84375 is chosen to
 * 	   guarantee the error is less than one ulp for erf.
 *
 *      2. For |x| in [0.84375,1.25], let s = |x| - 1, and
 *         c = 0.84506291151 rounded to single (24 bits)
 *         	erf(x)  = sign(x) * (c  + P1(s)/Q1(s))
 *         	erfc(x) = (1-c)  - P1(s)/Q1(s) if x > 0
 *			  1+(c+P1(s)/Q1(s))    if x < 0
 *         	|P1/Q1 - (erf(|x|)-c)| <= 2**-59.06
 *	   Remark: here we use the taylor series expansion at x=1.
 *		erf(1+s) = erf(1) + s*Poly(s)
 *			 = 0.845.. + P1(s)/Q1(s)
 *	   That is, we use rational approximation to approximate
 *			erf(1+s) - (c = (single)0.84506291151)
 *	   Note that |P1/Q1|< 0.078 for x in [0.84375,1.25]
 *	   where
 *		P1(s) = degree 6 poly in s
 *		Q1(s) = degree 6 poly in s
 *
 *      3. For x in [1.25,1/0.35(~2.857143)],
 *         	erfc(x) = (1/x)*exp(-x*x-0.5625+R1/S1)
 *         	erf(x)  = 1 - erfc(x)
 *	   where
 *		R1(z) = degree 7 poly in z, (z=1/x^2)
 *		S1(z) = degree 8 poly in z
 *
 *      4. For x in [1/0.35,28]
 *         	erfc(x) = (1/x)*exp(-x*x-0.5625+R2/S2) if x > 0
 *			= 2.0 - (1/x)*exp(-x*x-0.5625+R2/S2) if -6<x<0
 *			= 2.0 - tiny		(if x <= -6)
 *         	erf(x)  = sign(x)*(1.0 - erfc(x)) if x < 6, else
 *         	erf(x)  = sign(x)*(1.0 - tiny)
 *	   where
 *		R2(z) = degree 6 poly in z, (z=1/x^2)
 *		S2(z) = degree 7 poly in z
 *
 *      Note1:
 *	   To compute exp(-x*x-0.5625+R/S), let s be a single
 *	   precision number and s := x; then
 *		-x*x = -s*s + (s-x)*(s+x)
 *	        exp(-x*x-0.5626+R/S) =
 *			exp(-s*s-0.5625)*exp((s-x)*(s+x)+R/S);
 *      Note2:
 *	   Here 4 and 5 make use of the asymptotic series
 *			  exp(-x*x)
 *		erfc(x) ~ ---------- * ( 1 + Poly(1/x^2) )
 *			  x*sqrt(pi)
 *	   We use rational approximation to approximate
 *      	g(s)=f(1/x^2) = log(erfc(x)*x) - x*x + 0.5625
 *	   Here is the error bound for R1/S1 and R2/S2
 *      	|R1/S1 - f(x)|  < 2**(-62.57)
 *      	|R2/S2 - f(x)|  < 2**(-61.52)
 *
 *      5. For inf > x >= 28
 *         	erf(x)  = sign(x) *(1 - tiny)  (raise inexact)
 *         	erfc(x) = tiny*tiny (raise underflow) if x > 0
 *			= 2 - tiny if x<0
 *
 *      7. Special case:
 *         	erf(0)  = 0, erf(inf)  = 1, erf(-inf) = -1,
 *         	erfc(0) = 1, erfc(inf) = 0, erfc(-inf) = 2,
 *	   	erfc/erf(NaN) is NaN
 *
 * This implementation has been found in MUSL library file musl-1.2.4/src/math/erf.c;
 * the erfc implementation is much faster (+13 % compared to the GCC implementation) than
 * the slower original SunPro version without subroutines. 3.1.3
 */
const double
two =  2.00000000000000000000e+00, /* 0x40000000, 0x00000000 */
	/* c = (float)0.84506291151 */
#ifdef __ARMCPU
erx =  8.45062911510467529297e-01,  /* 0x3FEB0AC1, 0x60000000 */
/*
 * Coefficients for approximation to  erf on [0,0.84375]
 */
/* efx =  1.28379167095512586316e-01, */ /* 0x3FC06EBA, 0x8214DB69 */
efx8=  1.02703333676410069053e+00,  /* 0x3FF06EBA, 0x8214DB69 */
#endif
pp0  =  1.28379167095512558561e-01, /* 0x3FC06EBA, 0x8214DB68 */
pp1  = -3.25042107247001499370e-01, /* 0xBFD4CD7D, 0x691CB913 */
pp2  = -2.84817495755985104766e-02, /* 0xBF9D2A51, 0xDBD7194F */
pp3  = -5.77027029648944159157e-03, /* 0xBF77A291, 0x236668E4 */
pp4  = -2.37630166566501626084e-05, /* 0xBEF8EAD6, 0x120016AC */
qq1  =  3.97917223959155352819e-01, /* 0x3FD97779, 0xCDDADC09 */
qq2  =  6.50222499887672944485e-02, /* 0x3FB0A54C, 0x5536CEBA */
qq3  =  5.08130628187576562776e-03, /* 0x3F74D022, 0xC4D36B0F */
qq4  =  1.32494738004321644526e-04, /* 0x3F215DC9, 0x221C1A10 */
qq5  = -3.96022827877536812320e-06, /* 0xBED09C43, 0x42A26120 */
/*
 * Coefficients for approximation to  erf  in [0.84375,1.25]
 */
pa0  = -2.36211856075265944077e-03, /* 0xBF6359B8, 0xBEF77538 */
pa1  =  4.14856118683748331666e-01, /* 0x3FDA8D00, 0xAD92B34D */
pa2  = -3.72207876035701323847e-01, /* 0xBFD7D240, 0xFBB8C3F1 */
pa3  =  3.18346619901161753674e-01, /* 0x3FD45FCA, 0x805120E4 */
pa4  = -1.10894694282396677476e-01, /* 0xBFBC6398, 0x3D3E28EC */
pa5  =  3.54783043256182359371e-02, /* 0x3FA22A36, 0x599795EB */
pa6  = -2.16637559486879084300e-03, /* 0xBF61BF38, 0x0A96073F */
qa1  =  1.06420880400844228286e-01, /* 0x3FBB3E66, 0x18EEE323 */
qa2  =  5.40397917702171048937e-01, /* 0x3FE14AF0, 0x92EB6F33 */
qa3  =  7.18286544141962662868e-02, /* 0x3FB2635C, 0xD99FE9A7 */
qa4  =  1.26171219808761642112e-01, /* 0x3FC02660, 0xE763351F */
qa5  =  1.36370839120290507362e-02, /* 0x3F8BEDC2, 0x6B51DD1C */
qa6  =  1.19844998467991074170e-02, /* 0x3F888B54, 0x5735151D */
/*
 * Coefficients for approximation to  erfc in [1.25,1/0.35]
 */
ra0  = -9.86494403484714822705e-03, /* 0xBF843412, 0x600D6435 */
ra1  = -6.93858572707181764372e-01, /* 0xBFE63416, 0xE4BA7360 */
ra2  = -1.05586262253232909814e+01, /* 0xC0251E04, 0x41B0E726 */
ra3  = -6.23753324503260060396e+01, /* 0xC04F300A, 0xE4CBA38D */
ra4  = -1.62396669462573470355e+02, /* 0xC0644CB1, 0x84282266 */
ra5  = -1.84605092906711035994e+02, /* 0xC067135C, 0xEBCCABB2 */
ra6  = -8.12874355063065934246e+01, /* 0xC0545265, 0x57E4D2F2 */
ra7  = -9.81432934416914548592e+00, /* 0xC023A0EF, 0xC69AC25C */
sa1  =  1.96512716674392571292e+01, /* 0x4033A6B9, 0xBD707687 */
sa2  =  1.37657754143519042600e+02, /* 0x4061350C, 0x526AE721 */
sa3  =  4.34565877475229228821e+02, /* 0x407B290D, 0xD58A1A71 */
sa4  =  6.45387271733267880336e+02, /* 0x40842B19, 0x21EC2868 */
sa5  =  4.29008140027567833386e+02, /* 0x407AD021, 0x57700314 */
sa6  =  1.08635005541779435134e+02, /* 0x405B28A3, 0xEE48AE2C */
sa7  =  6.57024977031928170135e+00, /* 0x401A47EF, 0x8E484A93 */
sa8  = -6.04244152148580987438e-02, /* 0xBFAEEFF2, 0xEE749A62 */
/*
 * Coefficients for approximation to  erfc in [1/.35,28]
 */
rb0  = -9.86494292470009928597e-03, /* 0xBF843412, 0x39E86F4A */
rb1  = -7.99283237680523006574e-01, /* 0xBFE993BA, 0x70C285DE */
rb2  = -1.77579549177547519889e+01, /* 0xC031C209, 0x555F995A */
rb3  = -1.60636384855821916062e+02, /* 0xC064145D, 0x43C5ED98 */
rb4  = -6.37566443368389627722e+02, /* 0xC083EC88, 0x1375F228 */
rb5  = -1.02509513161107724954e+03, /* 0xC0900461, 0x6A2E5992 */
rb6  = -4.83519191608651397019e+02, /* 0xC07E384E, 0x9BDC383F */
sb1  =  3.03380607434824582924e+01, /* 0x403E568B, 0x261D5190 */
sb2  =  3.25792512996573918826e+02, /* 0x40745CAE, 0x221B9F0A */
sb3  =  1.53672958608443695994e+03, /* 0x409802EB, 0x189D5118 */
sb4  =  3.19985821950859553908e+03, /* 0x40A8FFB7, 0x688C246A */
sb5  =  2.55305040643316442583e+03, /* 0x40A3F219, 0xCEDF3BE6 */
sb6  =  4.74528541206955367215e+02, /* 0x407DA874, 0xE79FE763 */
sb7  = -2.24409524465858183362e+01; /* 0xC03670E2, 0x42712D62 */

static FORCE_INLINE double sun_erfc1 (double x) {
  double s, P, Q;
  s = fabs(x) - 1;
  P = pa0+s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6)))));
  Q = 1+s*(qa1+s*(qa2+s*(qa3+s*(qa4+s*(qa5+s*qa6)))));
  return 1 - erx - P/Q;
}

static FORCE_INLINE double sun_erfc2 (uint32_t ix, double x) {
  double s, R, S, z;
  if (ix < 0x3ff40000)  /* |x| < 1.25 */
    return sun_erfc1(x);
  x = fabs(x);
  s = 1/(x*x);
  if (ix < 0x4006db6d) {  /* |x| < 1/.35 ~ 2.85714 */
    R = ra0 + s*(ra1 + s*(ra2 + s*(ra3 + s*(ra4 + s*(
      ra5 + s*(ra6 + s*ra7))))));
    S = 1.0 + s*(sa1 + s*(sa2 + s*(sa3 + s*(sa4 + s*(
      sa5+s*(sa6+s*(sa7 + s*sa8)))))));
  } else {                /* |x| > 1/.35 */
    R = rb0+s*(rb1 + s*(rb2 + s*(rb3 + s*(rb4 + s*(
        rb5+s*rb6)))));
    S = 1.0+s*(sb1 + s*(sb2 + s*(sb3 + s*(sb4 + s*(
        sb5+s*(sb6 + s*sb7))))));
  }
  z = x;
  SET_LOW_WORD(z, 0);
  return sun_exp(-z*z - 0.5625)*sun_exp((z - x)*(z + x) + R/S)/x;
}

LUALIB_API double sun_erf (double x) {
  double r, s, z, y;
  uint32_t ix;
  int sign;
  GET_HIGH_WORD(ix, x);
  sign = ix >> 31;
  ix &= 0x7fffffff;
  if (ix >= 0x7ff00000) {  /* erf(nan) = nan, erf(+-inf) = +-1 */
    return 1 - 2*sign + 1/x;
  }
  if (ix < 0x3feb0000) {    /* |x| < 0.84375 */
    if (ix < 0x3e300000) {  /* |x| < 2**-28 */
      /* avoid underflow */
      return 0.125*(8*x + efx8*x);
    }
    z = x*x;
    r = pp0 + z*(pp1 + z*(pp2 + z*(pp3 + z*pp4)));
    s = 1.0 + z*(qq1 + z*(qq2 + z*(qq3 + z*(qq4 + z*qq5))));
    y = r/s;
    return x + x*y;
  }
  if (ix < 0x40180000)  /* 0.84375 <= |x| < 6 */
    y = 1 - sun_erfc2(ix, x);
  else
    y = 1 - 0x1p-1022;
  return sign ? -y : y;
}


LUALIB_API double sun_erfc (double x) {
  double r, s, z, y;
  uint32_t ix;
  int sign;
  GET_HIGH_WORD(ix, x);
  sign = ix >> 31;
  ix &= 0x7fffffff;
  if (ix >= 0x7ff00000) {
    /* erfc(nan) = nan, erfc(+-inf) = 0, 2 */
    return 2*sign + 1/x;
  }
  if (ix < 0x3feb0000) {  /* |x| < 0.84375 */
    if (ix < 0x3c700000)  /* |x| < 2**-56 */
      return 1.0 - x;
    z = x*x;
    r = pp0 + z*(pp1 + z*(pp2 + z*(pp3 + z*pp4)));
    s = 1.0 + z*(qq1 + z*(qq2 + z*(qq3 + z*(qq4 + z*qq5))));
    y = r/s;
    if (sign || ix < 0x3fd00000) {  /* x < 1/4 */
      return 1.0 - (x + x*y);
    }
    return 0.5 - (x - 0.5 + x*y);
  }
  if (ix < 0x403c0000) {  /* 0.84375 <= |x| < 28 */
    return sign ? 2 - sun_erfc2(ix, x) : sun_erfc2(ix, x);
  }
  return sign ? 2 - 0x1p-1022 : 0x1p-1022*0x1p-1022;
}


/* Taken from: c-SSS package by Fletcher T. Penney, file GLibFacade.c, MIT licence.
 * The following section came from:
 *
 *  http://lists-archives.org/mingw-users/12649-asprintf-missing-vsnprintf-
 *    behaving-differently-and-_vsncprintf-undefined.html
 *
 * and
 *
 *  http://groups.google.com/group/jansson-users/browse_thread/thread/
 *    76a88d63d9519978/041a7d0570de2d48?lnk=raot
 */

/* Solaris and Windows do not provide vasprintf() or asprintf(). */
#if defined(_WIN32) || defined(__SOLARIS)
LUALIB_API int tools_vasprintf (char **sptr, char *fmt, va_list argv) {
  int wanted = vsnprintf( *sptr = NULL, 0, fmt, argv );
  if ((wanted > 0) && ((*sptr = malloc( 1 + wanted )) != NULL))
    return vsprintf( *sptr, fmt, argv );
  return wanted;
}

LUALIB_API int tools_asprintf (char **sptr, char *fmt, ... ) {
  int retval;
  va_list argv;
  va_start(argv, fmt);
  retval = tools_vasprintf(sptr, fmt, argv);
  va_end(argv);
  return retval;
}
#endif


/* Convert a Julian Date to Gregorian Date, 2.10.0 */
LUALIB_API int tools_auxcdate (double x, int *iy, int *im, int *id, double *fd, int *deg, int *min, int *sec) {
  long long int degrees, minutes, seconds;  /* do not use int or long int due to possible overflows */
  /* fd = decimal fractional day */
  if (iauJd2cal(x, 0, iy, im, id, fd) == -1) return -1;
  /* iauJd2cal returns fd with round-off errors, so ignore it */
  seconds = sun_floor(24*3600.0*(*fd) + 0.5);
  degrees = seconds / 3600;
  seconds = fabs(fmod(seconds, 3600.0));
  minutes = seconds / 60;
  seconds = fmod(seconds, 60);
  *deg = (int)degrees;
  *min = (int)minutes;
  *sec = (int)seconds;
  return (sun_floor(x - 0.5) == 60 + 2415018) ? -2 : 0;
}


/* https://stackoverflow.com/questions/18662261/fastest-implementation-of-sine-cosine-and-square-root-in-c-doesnt-need-to-b,
   answer by nimig18 */
static float cosOff4LUT[] =
  {  0x1.000000p+00,  0x1.6A09E6p-01, 0x0.000000p+00, -0x1.6A09E6p-01,
    -0x1.000000p+00, -0x1.6A09E6p-01, 0x0.000000p+00,  0x1.6A09E6p-01 };
LUALIB_API void tools_sincosfast (float x, float *pS, float *pC) {  /* 12 % faster than math.sincos */
  int m, ms, mc;
  float xI, xR, xR2, c, s, cy, sy;
  /* Cody & Waite's range reduction Algorithm, [-pi/4, pi/4] */
  xI  = floorf(x * 0x1.45F306p+00 + 0.5);  /* casting is not faster */
  xR  = (x - xI * 0x1.920000p-01) - xI*0x1.FB5444p-13;
  m   = (int)xI;
  xR2 = xR*xR;
  /* Find cosine & sine index for angle offsets indices */
  mc = (  m  ) & 0x7;     /* two's complement permits upper modulus for negative numbers =P */
  ms = (m + 6) & 0x7;     /* two's complement permits upper modulus for negative numbers =P, note phase correction for sine. */
  /* Find cosine & sine */
  cy = cosOff4LUT[mc];     /* Load angle offset neighborhood cosine value */
  sy = cosOff4LUT[ms];     /* Load angle offset neighborhood sine value */
  /* c = 0xf.ff79fp-4 + xR2 * (-0x7.e58e9p-4); */               /* TOL = 1.2786e-4 */
  c = 0xf.ffffdp-4 + xR2*(-0x7.ffebep-4 + xR2*0xa.956a9p-8);  /* TOL = 1.7882e-7 */
  /* s = xR * (0xf.ffbf7p-4 + xR2 * (-0x2.a41d0cp-4)); */  /* TOL = 4.835251e-6 */
  s = xR*(0xf.fffffp-4 + xR2*(-0x2.aaa65cp-4 + xR2*0x2.1ea25p-8));  /* TOL = 1.1841e-8 */
  *pC = c*cy - s*sy;
  *pS = c*sy + s*cy;
}


/* Returns an approximation of the sine of the given number x, in radians. Depending on the CPU used, it is 7 to 58 percent faster
  than sun_sin. 2.14.6; in the range -1000 .. 1000 with step size 0.1, mean error is 0.000444, median error is 0.000474. */

LUALIB_API double tools_sinfast (double x) {
  double_cast p, q;
  uint64_t sign;
  double half, apx;
  p.f = 0.22308510060189463;
  half = (x < 0) ? -0.5 : 0.5;
  x = (half + (int)(INVPI2*x))*PI2 - x;
  q.f = x;
  sign = q.i & 0x8000000000000000ULL;
  q.i &= 0x7FFFFFFFFFFFFFFFULL;  /* absolute value, only ULL hex will work with uint64_t */
  apx = x*(INVPIO4 - q.f*INVPISQO4);
  p.i |= sign;
  return apx*(0.77633023248007499 + apx*p.f);
}


/* Returns an approximation of the cosine of the given number x, in radians. Depending on the CPU used, it is 7 to 58 percent faster
  than sun_cos. 2.14.6 */

LUALIB_API double tools_cosfast (double x) {
  double_cast p, q;
  uint64_t sign;
  double half, apx;
  p.f = 0.22308510060189463;
  x += PIO2;  /* the rest is the same as in tools_sinfast, avoid extra function call */
  half = (x < 0) ? -0.5 : 0.5;
  x = (half + (int)(INVPI2*x))*PI2 - x;
  q.f = x;
  sign = q.i & 0x8000000000000000ULL;
  q.i &= 0x7FFFFFFFFFFFFFFFULL;  /* absolute value, only ULL hex will work with uint64_t */
  apx = x*(INVPIO4 - q.f*INVPISQO4);
  p.i |= sign;
  return apx*(0.77633023248007499 + apx*p.f);
}


LUALIB_API double tools_tanfast (double x) {  /* 2.14.6, depending on CPU, 7 to 58 percent faster than calling tools_sunscos */
  return tools_sinfast(x)/tools_cosfast(x);
}


/* https://stackoverflow.com/questions/18662261/fastest-implementation-of-sine-cosine-and-square-root-in-c-doesnt-need-to-b,
   answer by nimig18. In the range 0 .. 1000 with step size 0.1, median error is 0.00791 and mean error is 0.00829. */
LUALIB_API float tools_sqrtfast (float x) {  /* 10 percent _slower_ than GCC's sqrt builtin on Aspire V3-331. */
  ieee_float_shape_signedint X, Y;
  float ScOff;
  uint8_t e;
  X.f = x;
  e = (X.i >> 23);  /* f.SFPbits.e; */
  if (x < 0) return AGN_NAN;  /* 2.12.0 RC 3 change */
  else if (x == 0) return 0;
  ScOff = ((e & 1) != 0) ? 1.0f : 0x1.6a09e6p0;    /* NOTE: If exp=EVEN, b/c (exp-127) a (EVEN - ODD) := ODD; but a (ODD - ODD) := EVEN!! */
  e = (e + 127) >> 1;                              /* NOTE: If exp=ODD,  b/c (exp-127) then flr((exp-127)/2) */
  X.i = (X.i & ((1uL << 23) - 1)) | (0x7F << 23);  /* Mask mantissa, force exponent to zero. */
  Y.i = ((uint32_t) e) << 23;
  /* Error grows with square root of the exponent. Unfortunately no work around like inverse square root... :( */
  /* Y.f *= ScOff * (0x9.5f61ap-4 + X.f*(0x6.a09e68p-4));        // Error = +-1.78e-2 * 2^(flr(log2(x)/2)) */
  Y.f *= ScOff*(0x7.2181d8p-4 + X.f*(0xa.05406p-4 + X.f*(-0x1.23a14cp-4)));  /* Error = +-7.64e-5 * 2^(flr(log2(x)/2)) */
  /* Y.f *= ScOff * (0x5.f10e7p-4 + X.f*(0xc.8f2p-4 +X.f*(-0x2.e41a4cp-4 + X.f*(0x6.441e6p-8)))); */  /* Error =  8.21e-5 * 2^(flr(log2(x)/2)) */
  /* Y.f *= ScOff * (0x5.32eb88p-4 + X.f*(0xe.abbf5p-4 + X.f*(-0x5.18ee2p-4 + X.f*(0x1.655efp-4 + X.f*(-0x2.b11518p-8)))));  Error = +-9.92e-6 * 2^(flr(log2(x)/2)) */
  /* Y.f *= ScOff * (0x4.adde5p-4 + X.f*(0x1.08448cp0 + X.f*(-0x7.ae1248p-4 + X.f*(0x3.2cf7a8p-4 + X.f*(-0xc.5c1e2p-8 + X.f*(0x1.4b6dp-8))))));  Error = +-1.38e-6 * 2^(flr(log2(x)/2)) */
  // Y.f *= ScOff * (0x4.4a17fp-4 + X.f*(0x1.22d44p0 + X.f*(-0xa.972e8p-4 + X.f*(0x5.dd53fp-4 + X.f*(-0x2.273c08p-4 + X.f*(0x7.466cb8p-8 + X.f*(-0xa.ac00ep-12)))))));  Error = +-2.9e-7 * 2^(flr(log2(x)/2)) */
  /* Y.f *= ScOff * (0x3.fbb3e8p-4 + X.f*(0x1.3b2a3cp0 + X.f*(-0xd.cbb39p-4 + X.f*(0x9.9444ep-4 + X.f*(-0x4.b5ea38p-4 + X.f*(0x1.802f9ep-4 + X.f*(-0x4.6f0adp-8 + X.f*(0x5.c24a28p-12 ))))))));   Error = +-2.7e-6 * 2^(flr(log2(x)/2)) */
  return Y.f;
}


LUALIB_API float tools_sqrtapx (float x) {  /* 4 % off, taken from http://bits.stephan-brumme.com */
  ieee_float_shape_type u = { x };
  u.word  += 127 << 23;  /* adjust bias */
  u.word >>= 1;          /* approximation of square root */
  return u.value;
}


/*
 * ====================================================
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
 *
 * Developed at SunPro, a Sun Microsystems, Inc. business.
 * Permission to use, copy, modify, and distribute this
 * software is freely granted, provided that this notice
 * is preserved.
 * ====================================================
*/
/*
FUNCTION
<<round>>, <<roundf>>---round to integer, to nearest
INDEX
        round
INDEX
        roundf

ANSI_SYNOPSIS
        #include <math.h>
        double round(double <[x]>);
        float roundf(float <[x]>);

DESCRIPTION
        The <<round>> functions round their argument to the nearest integer
        value in floating-point format, rounding halfway cases away from zero,
        regardless of the current rounding direction.  (While the "inexact"
        floating-point exception behaviour is unspecified by the C standard, the
        <<round>> functions are written so that "inexact" is not raised if the
        result does not equal the argument, which behaviour is as recommended
        by IEEE 754 for its related functions.)

RETURNS
    <[x]> rounded to an integral value.

PORTABILITY
    ANSI C, POSIX

SEEALSO
    <<tools_nearbyint>>, <<rint>>

REMARK
    The openlibm version is slower and the MUSL 1.2.4 implementation is 3.6 % slower. */

LUALIB_API double sun_round (double x) {
  /* Most significant word, least significant word. */
  int32_t msw, exponent_less_1023;
  uint32_t lsw;
  EXTRACT_WORDS(msw, lsw, x);
  /* Extract exponent field. */
  exponent_less_1023 = ((msw & 0x7ff00000) >> 20) - 1023;
  if (exponent_less_1023 < 20) {
    if (exponent_less_1023 < 0) {
      msw &= 0x80000000;
      if (exponent_less_1023 == -1)
        /* Result is +1.0 or -1.0. */
        msw |= (1023 << 20);
      lsw = 0;
    } else {
      uint32_t exponent_mask = 0x000fffff >> exponent_less_1023;
      if ((msw & exponent_mask) == 0 && lsw == 0)
        /* x in an integral value. */
        return x;
      msw += 0x00080000 >> exponent_less_1023;
      msw &= ~exponent_mask;
      lsw = 0;
    }
  } else if (exponent_less_1023 < 52) {  /* changed 2.29.3 */
    uint32_t tmp, exponent_mask;
    exponent_mask = 0xffffffff >> (exponent_less_1023 - 20);
    if ((lsw & exponent_mask) == 0)  /* x is an integral value. */
      return x;
    tmp = lsw + (1 << (51 - exponent_less_1023));
    if (tmp < lsw)
      msw += 1;
    lsw = tmp;
    lsw &= ~exponent_mask;
  } else {  /* x > 51 */
    if (exponent_less_1023 == 1024)  /* x is NaN or infinite. */
      return x + x;
    else
      return x;
  }
  INSERT_WORDS(x, msw, lsw);
  return x;
}


/* @(#)e_fmod.c 1.3 95/01/18 */
/*
 * ====================================================
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
 *
 * Developed at SunSoft, a Sun Microsystems, Inc. business.
 * Permission to use, copy, modify, and distribute this
 * software is freely granted, provided that this notice
 * is preserved.
 * ====================================================
 */

/*
 * __ieee754_fmod(x,y)
 * Return x mod y in exact arithmetic
 * Method: shift and subtract
 *
 * 3 percent slower than GCC's implementation.
 */

static const double Zero[] = { 0.0, -0.0 };

LUALIB_API double sun_fmod (double x, double y) {  /* SLOWER than built-in fmod */
  int32_t n, hx, hy, hz, ix, iy, sx, i;
  uint32_t lx, ly, lz;
  EXTRACT_WORDS(hx, lx, x);
  EXTRACT_WORDS(hy, ly, y);
  sx = hx & 0x80000000;  /* sign of x */
  hx ^= sx;          /* |x| */
  hy &= 0x7fffffff;  /* |y| */
  /* purge off exception values */
  if ((hy | ly) == 0 || (hx >= 0x7ff00000) ||   /* y=0, or x not finite */
     ((hy | ((ly | -ly) >> 31)) > 0x7ff00000))  /* or y is NaN */
     return (x*y)/(x*y);
  if (hx <= hy) {
    if ((hx < hy) || (lx < ly)) return x;  /* |x| < |y| return x */
    if (lx == ly)
      return Zero[(uint32_t)sx >> 31];     /* |x| = |y| return x*0*/
  }
  /* determine ix = ilogb(x) */
  if (unlikely(hx < 0x00100000)) {  /* subnormal x, 2.5.15 optimisation */
    if (hx == 0) {
      for (ix=-1043, i=lx; i > 0; i <<= 1) ix -=1;
    } else {
      for (ix=-1022, i=(hx << 11); i > 0; i <<= 1) ix -=1;
    }
  } else ix = (hx >> 20) - 1023;
  /* determine iy = ilogb(y) */
  if (unlikely(hy < 0x00100000)) {  /* subnormal y, 2.5.15 optimisation */
    if (hy == 0) {
      for (iy=-1043, i=ly; i > 0; i <<= 1) iy -=1;
    } else {
      for (iy=-1022, i=(hy << 11); i > 0; i <<= 1) iy -=1;
    }
  } else iy = (hy >> 20) - 1023;
  /* set up {hx,lx}, {hy,ly} and align y to x */
  if (likely(ix >= -1022))
    hx = 0x00100000 | (0x000fffff & hx);
  else {  /* subnormal x, shift x to normal */
    n = -1022 - ix;
    if (n <= 31) {
      hx = (hx << n) | (lx >> (32 - n));
      lx <<= n;
    } else {
      hx = lx << (n - 32);
      lx = 0;
    }
  }
  if (likely(iy >= -1022))
    hy = 0x00100000 | (0x000fffff & hy);
  else {  /* subnormal y, shift y to normal */
    n = -1022 - iy;
    if (n <= 31) {
      hy = (hy << n) | (ly >> (32 - n));
      ly <<= n;
    } else {
      hy = ly << (n - 32);
      ly = 0;
    }
  }
  /* fix point fmod */
  n = ix - iy;
  while (n--) {
    hz = hx - hy; lz = lx - ly; if (lx < ly) hz -= 1;
    if (hz < 0) { hx = hx + hx + (lx >> 31); lx = lx + lx; }
    else {
      if ((hz|lz) == 0)  /* return sign(x)*0 */
        return Zero[(uint32_t)sx >> 31];
      hx = hz + hz + (lz >> 31); lx = lz + lz;
    }
  }
  hz = hx - hy; lz = lx - ly; if (lx < ly) hz -= 1;
  if (hz >= 0) { hx = hz; lx = lz; }
  /* convert back to floating value and restore the sign */
  if ((hx | lx) == 0)  /* return sign(x)*0 */
    return Zero[(uint32_t)sx >> 31];
  while (hx < 0x00100000) {  /* normalise x */
    hx = hx + hx + (lx >> 31); lx = lx + lx;
    iy -= 1;
  }
  if (iy >= -1022) {  /* normalise output */
    hx = ((hx - 0x00100000) | ((iy + 1023) << 20));
    INSERT_WORDS(x, hx | sx, lx);
  } else {  /* subnormal output */
    n = -1022 - iy;
    if (n <= 20) {
      lx = (lx >> n) | ((uint32_t)hx << (32 - n));
      hx >>= n;
    } else if (n <= 31) {
      lx = (hx << (32 - n)) | (lx >> n); hx = sx;
    } else {
      lx = hx >> (n - 32); hx = sx;
    }
    INSERT_WORDS(x, hx | sx, lx);
    x *= one;  /* create necessary signal */
  }
  return x;  /* exact output */
}


/* @(#)s_modf.c 5.1 93/09/24 */
/*
 * ====================================================
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
 *
 * Developed at SunPro, a Sun Microsystems, Inc. business.
 * Permission to use, copy, modify, and distribute this
 * software is freely granted, provided that this notice
 * is preserved.
 * ====================================================
 */

/*
 * modf(double x, double *iptr)
 * return fraction part of x, and return x's integral part in *iptr.
 * Method:
 *  Bit twiddling.
 *
 * Exception:
 *  No exception.
 */
LUALIB_API double sun_modf (double x, double *iptr) {  /* the function sometimes is not faster than built-in modf */
  int32_t i0, i1, j0;
  uint32_t i;
  EXTRACT_WORDS(i0, i1, x);
  j0 = ((i0 >> 20) & 0x7ff) - 0x3ff;  /* exponent of x */
  if (j0 < 20) {   /* integer part in high x */
    if (j0 < 0) {  /* |x| < 1 */
      INSERT_WORDS(*iptr, i0 & 0x80000000, 0);  /* *iptr = +-0 */
      return x;
    } else {
      i = (0x000fffff) >> j0;
      if (((i0 & i) | i1) == 0) {  /* x is integral */
        uint32_t high;
        *iptr = x;
        GET_HIGH_WORD(high, x);
        INSERT_WORDS(x, high & 0x80000000, 0);  /* return +-0 */
        return x;
      } else {
        INSERT_WORDS(*iptr, i0 & (~i), 0);
        return x - *iptr;
      }
    }
  } else if (unlikely(j0 > 51)) {  /* no fraction part */
    uint32_t high;
    if (unlikely(j0 == 0x400)) {   /* inf/NaN, 2.5.15 optimisation */
      *iptr = x;
      return 0.0/x;
    }
    *iptr = x*one;
    GET_HIGH_WORD(high, x);
    INSERT_WORDS(x, high & 0x80000000, 0);  /* return +-0 */
    return x;
  } else {  /* fraction part in low x */
    i = ((uint32_t)(0xffffffff)) >> (j0 - 20);
    if ((i1 & i) == 0) {  /* x is integral */
      uint32_t high;
      *iptr = x;
      GET_HIGH_WORD(high, x);
      INSERT_WORDS(x, high & 0x80000000, 0);  /* return +-0 */
      return x;
    } else {
      INSERT_WORDS(*iptr, i0, i1 & (~i));
      return x - *iptr;
    }
  }
}


/*-
 * Copyright (c) 2007 David Schultz <das@FreeBSD.ORG>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 * Derived from s_modf.c, which has the following Copyright:
 * ====================================================
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
 *
 * Developed at SunPro, a Sun Microsystems, Inc. business.
 * Permission to use, copy, modify, and distribute this
 * software is freely granted, provided that this notice
 * is preserved.
 * ====================================================
 *
 * $FreeBSD$
 */

#if LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
#define	MASK	((uint64_t)-1)
#else
#define	MASK	((uint32_t)-1)
#endif
/* Return the last n bits of a word, representing the fractional part. */
#define	GETFRAC(bits, n)	((bits) & ~(MASK << (n)))
/* The number of fraction bits in manh, not counting the integer bit */
#define	HIBITS	(LDBL_MANT_DIG - LDBL_MANL_SIZE)

static const long double zerol[] = { 0.0L, -0.0L };

LUALIB_API long double sun_modfl (long double x, long double *iptr) {  /* 2.34.8 */
  union IEEEl2bits u;
  int e;
#ifdef __ARMCPU  /* to prevent compiler warnings, 3.10.2a */
  u.bits.exp = 0;
  u.bits.sign = 0;
#endif
  u.e = x;
  e = u.bits.exp - LDBL_MAX_EXP + 1;
  if (e < HIBITS) {  /* integer part is in manh */
    if (e < 0) {     /* |x|<1 */
      *iptr = zerol[u.bits.sign];
      return (x);
    } else {
      if ((GETFRAC(u.bits.manh, HIBITS - 1 - e) | u.bits.manl) == 0) {	/* x is integral */
        *iptr = x;
        return (zerol[u.bits.sign]);
      } else {  /* clear all but the top e + 1 bits */
        u.bits.manh >>= HIBITS - 1 - e;
        u.bits.manh <<= HIBITS - 1 - e;
        u.bits.manl = 0;
        *iptr = u.e;
        return x - u.e;
      }
    }
  } else if (e >= LDBL_MANT_DIG - 1) {  /* x has no fraction part */
    *iptr = x;
    if (x != x) return (x);  /* NaN */
    return (zerol[u.bits.sign]);
  } else {  /* fraction part is in manl */
    if (GETFRAC(u.bits.manl, LDBL_MANT_DIG - 1 - e) == 0) {  /* x is integral */
      *iptr = x;
      return (zerol[u.bits.sign]);
    } else {  /* clear all but the top e + 1 bits */
      u.bits.manl >>= LDBL_MANT_DIG - 1 - e;
      u.bits.manl <<= LDBL_MANT_DIG - 1 - e;
      *iptr = u.e;
      return x - u.e;
    }
  }
}
/* end of sun_modfl */


/* Taken from MUSL-1.2.3 src/math/scalbnl.c, 2.34.10 */
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
LUALIB_API long double sun_scalbnl (long double x, int n) {
  return scalbn(x, n);
}
#elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
LUALIB_API long double sun_scalbnl (long double x, int n) {
  union ldshape u;
  if (n > 16383) {
    x *= 0x1p16383L;
    n -= 16383;
    if (n > 16383) {
      x *= 0x1p16383L;
      n -= 16383;
      if (n > 16383) n = 16383;
    }
  } else if (n < -16382) {
    x *= 0x1p-16382L * 0x1p113L;
    n += 16382 - 113;
    if (n < -16382) {
      x *= 0x1p-16382L * 0x1p113L;
      n += 16382 - 113;
      if (n < -16382) n = -16382;
    }
  }
  u.f = 1.0;
  u.i.se = 0x3fff + n;
  return x * u.f;
}
#endif


/* Taken from musl-1.2.3 src/math/copysignl.c, 2.34.10 */
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
#ifndef __ARMCPU
LUALIB_API long double sun_copysignl (long double x, long double y) {
  return copysign(x, y);
}
#endif
#elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
#ifndef __ARMCPU
LUALIB_API long double sun_copysignl (long double x, long double y) {
  union ldshape ux = {x}, uy = {y};
  ux.i.se &= 0x7fff;
  ux.i.se |= uy.i.se & 0x8000;
  return ux.f;
}
#endif
#endif


/* Taken from musl-1.2.3 src/math/floorl.c, 2.34.10 */
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
LUALIB_API long double sun_floorl (long double x) {
  return sun_floor(x);  /* 2.41.0 tweak */
}
#elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384

static const long double toint2 = 1/LDBL_EPSILON;

LUALIB_API long double sun_floorl (long double x) {
  union ldshape u = {x};
  int e = u.i.se & 0x7fff;
  long double y;
  if (e >= 0x3fff + LDBL_MANT_DIG - 1 || x == 0) return x;
  /* y = int(x) - x, where int(x) is an integer neighbor of x */
  if (u.i.se >> 15)
    y = x - toint2 + toint2 - x;
  else
    y = x + toint2 - toint2 - x;
  /* special case because of non-nearest rounding modes */
  if (e <= 0x3fff - 1) {
    FORCE_EVAL(y);
    return u.i.se >> 15 ? -1 : 0;
  }
  if (y > 0) return x + y - 1;
  return x + y;
}
#endif


/* Taken from musl-1.2.3 src/math/ceill.c, 2.34.10 */
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
LUALIB_API long double sun_ceill (long double x) {
  return ceil(x);
}
#elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384

LUALIB_API long double sun_ceill (long double x) {
  union ldshape u = {x};
  int e = u.i.se & 0x7fff;
  long double y;
  if (e >= 0x3fff + LDBL_MANT_DIG - 1 || x == 0) return x;
  /* y = int(x) - x, where int(x) is an integer neighbor of x */
  if (u.i.se >> 15)
    y = x - toint2 + toint2 - x;
  else
    y = x + toint2 - toint2 - x;
  /* special case because of non-nearest rounding modes */
  if (e <= 0x3fff - 1) {
    FORCE_EVAL(y);
    return u.i.se >> 15 ? -0.0 : 1;
  }
  if (y < 0) return x + y + 1;
  return x + y;
}
#endif


LUALIB_API long double sun_intl (long double x) {  /* based on sun_modfl, 2.34.8 */
  union IEEEl2bits u;
  int e;
#ifdef __ARMCPU  /* to prevent compiler warnings, 3.10.2a */
  u.bits.exp = 0;
  u.bits.sign = 0;
#endif
  u.e = x;
  e = u.bits.exp - LDBL_MAX_EXP + 1;
  if (e < HIBITS) {  /* integer part is in manh */
    if (e < 0) {     /* |x|<1 */
      return zerol[u.bits.sign];
    } else {
      if ((GETFRAC(u.bits.manh, HIBITS - 1 - e) | u.bits.manl) == 0) {	/* x is integral */
        return x;
      } else {  /* clear all but the top e + 1 bits */
        u.bits.manh >>= HIBITS - 1 - e;
        u.bits.manh <<= HIBITS - 1 - e;
        u.bits.manl = 0;
        return u.e;
      }
    }
  } else if (e >= LDBL_MANT_DIG - 1) {  /* x has no fraction part */
    return x;
  } else {  /* fraction part is in manl */
    if (GETFRAC(u.bits.manl, LDBL_MANT_DIG - 1 - e) == 0) {  /* x is integral */
      return x;
    } else {  /* clear all but the top e + 1 bits */
      u.bits.manl >>= LDBL_MANT_DIG - 1 - e;
      u.bits.manl <<= LDBL_MANT_DIG - 1 - e;
      return u.e;
    }
  }
}


LUALIB_API long double sun_fracl (long double x) {  /* based on sun_modfl, 2.34.8 */
  union IEEEl2bits u;
  int e;
#ifdef __ARMCPU  /* to prevent compiler warnings, 3.10.2a */
  u.bits.exp = 0;
  u.bits.sign = 0;
#endif
  u.e = x;
  e = u.bits.exp - LDBL_MAX_EXP + 1;
  if (e < HIBITS) {  /* integer part is in manh */
    if (e < 0) return x;  /* |x|<1 */
    else {
      if ((GETFRAC(u.bits.manh, HIBITS - 1 - e) | u.bits.manl) == 0) {	/* x is integral */
        return (zerol[u.bits.sign]);
      } else {  /* clear all but the top e+1 bits */
        u.bits.manh >>= HIBITS - 1 - e;
        u.bits.manh <<= HIBITS - 1 - e;
        u.bits.manl = 0;
        return x - u.e;
      }
    }
  } else if (e >= LDBL_MANT_DIG - 1) {  /* x has no fraction part */
    if (x != x) return (x);  /* NaN */
    return (zerol[u.bits.sign]);
  } else {  /* fraction part is in manl */
    if (GETFRAC(u.bits.manl, LDBL_MANT_DIG - 1 - e) == 0) {  /* x is integral */
      return (zerol[u.bits.sign]);
    } else {  /* clear all but the top e+1 bits */
      u.bits.manl >>= LDBL_MANT_DIG - 1 - e;
      u.bits.manl <<= LDBL_MANT_DIG - 1 - e;
      return x - u.e;
    }
  }
}


LUALIB_API int sun_isintl (long double x) {  /* based on sun_modfl, 2.34.8 */
  union IEEEl2bits u;
  int e;
#ifdef __ARMCPU  /* to prevent compiler warnings, 3.10.2a */
  u.bits.exp = 0;
  u.bits.sign = 0;
#endif
  u.e = x;
  e = u.bits.exp - LDBL_MAX_EXP + 1;
  if (e < HIBITS) {  /* integer part is in manh */
    if (e < 0) {     /* |x|<1 */
      return x == 0.0L;
    } else {
      return ((GETFRAC(u.bits.manh, HIBITS - 1 - e) | u.bits.manl) == 0);  /* x is integral */
    }
  } else if (e >= LDBL_MANT_DIG - 1) {  /* x has no fraction part */
    if (x != x) return (x);  /* NaN */
    return 1;
  } else {  /* fraction part is in manl */
    return (GETFRAC(u.bits.manl, LDBL_MANT_DIG - 1 - e) == 0);  /* x is integral */
  }
}


LUALIB_API int sun_isfloatl (long double x) {  /* based on sun_modfl, 2.34.8 */
  union IEEEl2bits u;
  int e;
#ifdef __ARMCPU  /* to prevent compiler warnings, 3.10.2a */
  u.bits.exp = 0;
  u.bits.sign = 0;
#endif
  u.e = x;
  e = u.bits.exp - LDBL_MAX_EXP + 1;
  if (e < HIBITS) {  /* integer part is in manh */
    if (e < 0) {     /* |x|<1 */
      return x != 0.0L;
    } else {
      return ((GETFRAC(u.bits.manh, HIBITS - 1 - e) | u.bits.manl) != 0);
    }
  } else if (e >= LDBL_MANT_DIG - 1) {  /* x has no fraction part */
    if (x != x)	return (x);  /* NaN */
    return 0;
  } else {  /* fraction part is in manl */
    return (GETFRAC(u.bits.manl, LDBL_MANT_DIG - 1 - e) != 0);  /* x is non-integral */
  }
}


/* @(#)s_modf.c 5.1 93/09/24 */
/*
 * ====================================================
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
 *
 * Developed at SunPro, a Sun Microsystems, Inc. business.
 * Permission to use, copy, modify, and distribute this
 * software is freely granted, provided that this notice
 * is preserved.
 * ====================================================
 */

/*
 * frac(double x)
 * return fraction part of x.
 * Method:
 *  Bit twiddling. No difference between -0 and +0, returns just +0. Based on sun_modf.
 *
 * Speed:
 *  Three percent faster than x - trunc(x). Skipping the tests for integral x does not gain speed. 2.29.5
 *
 * Exception:
 *  No exception.
 */
LUALIB_API double sun_frac (double x) {
  int32_t i0, i1, j0;
  uint32_t i;
  EXTRACT_WORDS(i0, i1, x);
  j0 = ((i0 >> 20) & 0x7ff) - 0x3ff;  /* exponent of x */
  if (j0 < 20) {   /* integer part in high x */
    if (j0 < 0) {  /* |x| < 1 */
      return x;
    } else {
      i = (0x000fffff) >> j0;
      if (((i0 & i) | i1) == 0) {  /* x is integral */
        return 0;
      } else {
        double iptr;
        INSERT_WORDS(iptr, i0 & (~i), 0);
        return x - iptr;
      }
    }
  } else if (unlikely(j0 > 51)) {  /* no fraction part */
    if (unlikely(j0 == 0x400)) {  /* inf/NaN, 2.5.15 optimisation */
      return 0.0/x;
    }
    return 0;
  } else {  /* fraction part in low x */
    i = ((uint32_t)(0xffffffff)) >> (j0 - 20);
    if ((i1 & i) == 0) {  /* x is integral */
      return 0;
    } else {
      double iptr;
      INSERT_WORDS(iptr, i0, i1 & (~i));
      return x - iptr;
    }
  }
}


/* @(#)e_remainder.c 5.1 93/09/24 */
/*
 * ====================================================
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
 *
 * Developed at SunPro, a Sun Microsystems, Inc. business.
 * Permission to use, copy, modify, and distribute this
 * software is freely granted, provided that this notice
 * is preserved.
 * ====================================================
 */
/*
SYNOPSIS
  #include <math.h>
  double remainder(double <[x]>, double <[y]>);
DESCRIPTION
  remainder finds the remainder of this value that is in the range -<[y]>/2 .. +<[y]>/2.
  remainder is a System V release 4.

RETURNS
  remainder returns the integer result as a double. */

/* remainder(x, p)
 * Return:
 *   returns  x REM p  =  x - [x/p]*p as if in infinite
 *   precise arithmetic, where [x/p] is the (infinite bit)
 *   integer nearest x/p (in half way case choose the even one).
 * Method:
 *   Based on fmod(), but returns (x - sun_floor(x/p)*p) instead of x - [x/p]chopped * p exactlp.
 * Warning:
 *   33 percent slower than luai_nummod(x) = (x - sun_floor(x/p)*p) !
 * UNEXPOSED.
 */
LUALIB_API double sun_remainder (double x, double p) {
  int32_t hx, hp;
  uint32_t sx, sp, lx, lp;
  double p_half;
  EXTRACT_WORDS(hx, lx, x);
  EXTRACT_WORDS(hp, lp, p);
  sx = hx & 0x80000000;
  sp = hp & 0x80000000;
  hp &= 0x7fffffff;
  hx &= 0x7fffffff;
    /* purge off exception values */
  if ((hp | lp) == 0) return (x*p)/(x*p);  /* p = 0 */
  if ( (hx>= 0x7ff00000) ||   /* x is inf or nan */
     ( (hp >= 0x7ff00000) &&  /* p is NaN */
     (((hp - 0x7ff00000) | lp) != 0)))
    return (x*p)/(x*p);
  if (hp <= 0x7fdfffff)
    x = sun_fmod(x, p + p);  /* now x < 2p; in this context, sun_fmod is 20 % faster */
  if (((hx - hp) | (lx - lp)) == 0) return zero*x;
  x  = fabs(x);  /* SET_HIGH_WORD is thrice as slow */
  p  = fabs(p);
  if (hp < 0x00200000) {  /* |x| < 1/4 */
    if (x + x > p) {
      x -= p;
      if (x + x >= p) x -= p;
    }
  } else {
    p_half = 0.5*p;
    if (x > p_half) {
      x -= p;
      if (x >= p_half) x -= p;
    }
  }
  GET_HIGH_WORD(hx, x);
  if ((hx & 0x7fffffff) == 0) hx = 0;
  SET_HIGH_WORD(x, hx ^ sx);
  /* now adapt to Maple's `mod` operator */
  /* this is slower: x = x + (-1)*(x < 0 && sp != 0)*p + (x < 0 && sp == 0)*p; */
  if (x < 0) {
    SET_HIGH_WORD(p, hp ^ sp);
    x += p;
  }
  return x;
}


/* @(#)e_hypot.c 1.3 95/01/18 */
/*
 * ====================================================
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
 *
 * Developed at SunSoft, a Sun Microsystems, Inc. business.
 * Permission to use, copy, modify, and distribute this
 * software is freely granted, provided that this notice
 * is preserved.
 * ====================================================
 */
LUALIB_API double sun_hypot (double x, double y) {  /* the function only sometimes is faster than built-in hypot; 1 % slower than tools_hypot when inlined */
  double a, b, t1, t2, y1, y2, w;
  int32_t j, k, ha, hb;
  GET_HIGH_WORD(ha, x);
  ha &= 0x7fffffff;
  GET_HIGH_WORD(hb, y);
  hb &= 0x7fffffff;
  if (hb > ha) { a = y; b = x; j = ha; ha = hb; hb = j; } else { a = x; b = y; }
  a = fabs(a);  /* fabs is faster than SET_HIGH_WORD(a, ha) */
  b = fabs(b);  /* dito */
  if ((ha - hb) > 0x3c00000) return a + b;  /* x/y > 2**60 */
  k = 0;
  if (unlikely(ha > 0x5f300000)) {  /* a > 2**500 = 3.2733906078961e+150 */
    if (ha >= 0x7ff00000) {  /* Inf or NaN, 2.5.15 optimisation */
      uint32_t low;
      /* Use original arg order iff result is NaN; quieten sNaNs. */
      w = fabs(x + 0.0) - fabs(y + 0.0);
      GET_LOW_WORD(low, a);
      if (((ha & 0xfffff) | low) == 0) w = a;
      GET_LOW_WORD(low, b);
      if (((hb ^ 0x7ff00000) | low) == 0) w = b;
      return w;
    }
    /* scale a and b by 2**-600 */
    ha -= 0x25800000; hb -= 0x25800000; k += 600;
    SET_HIGH_WORD(a, ha);
    SET_HIGH_WORD(b, hb);
  }
  if (unlikely(hb < 0x20b00000)) {  /* b < 2**-500, 2.5.15 optimisation */
    if (hb <= 0x000fffff) {  /* subnormal b or 0 */
      uint32_t low;
      GET_LOW_WORD(low, b);
      if ((hb | low) == 0) return a;
      t1 = 0;
      SET_HIGH_WORD(t1, 0x7fd00000);  /* t1=2^1022 */
      b *= t1;
      a *= t1;
      k -= 1022;
    } else {             /* scale a and b by 2^600 */
      ha += 0x25800000;  /* a *= 2^600 */
      hb += 0x25800000;  /* b *= 2^600 */
      k -= 600;
      SET_HIGH_WORD(a, ha);
      SET_HIGH_WORD(b, hb);
    }
  }
  /* medium size a and b */
  w = a - b;
  if (w > b) {
    t1 = 0;
    SET_HIGH_WORD(t1, ha);
    t2 = a - t1;
    w  = sqrt(t1*t1 - (b*(-b) - t2*(a + t1)));
  } else {
    a  = a + a;
    y1 = 0;
    SET_HIGH_WORD(y1, hb);
    y2 = b - y1;
    t1 = 0;
    SET_HIGH_WORD(t1, ha + 0x00100000);
    t2 = a - t1;
    w  = sqrt(t1*y1 - (w*(-w) - (t1*y2 + t2*b)));
  }
  if (k != 0) {
    uint32_t high;
    t1 = 1.0;
    GET_HIGH_WORD(high, t1);
    SET_HIGH_WORD(t1, high + (k << 20));
    return t1*w;
  } else
    return w;
}


/* taken from MUSL v1.2.3, file src/math/hypotl.c; as fast as builtin hypotl, 2.34.9; the openlibm-0.4.1
   version is not faster */
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
LUALIB_API long double tools_hypotl (long double x, long double y) {
  return hypot(x, y);
}


/* 4.6.2 fix; avoids overflow and gained accuracy, see: https://stackoverflow.com/questions/49191477/companion-to-hypot */
LUALIB_API long double tools_mhypotl (long double x, long double y) {
  return sqrt(x - y)*sqrt(x + y);
}
#elif ((LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384)
#define sqmacro(hi,lo,x) { \
  xc = x*SPLIT; \
  xh = x - xc + xc; \
  xl = x - xh; \
  hi = x*x; \
  lo = xh*xh - hi + 2*xh*xl + xl*xl; \
}

LUALIB_API long double tools_hypotl (long double x, long double y) {
  int ex, ey;
  long double hx, lx, hy, ly, z, xh, xl, xc;
  union ldshape ux = {x}, uy = {y};
  ux.i.se &= 0x7fff;
  uy.i.se &= 0x7fff;
  if (ux.i.se < uy.i.se) {
    ex = uy.i.se;
    ey = ux.i.se;
    x = uy.f;
    y = ux.f;
  } else {
    ex = ux.i.se;
    ey = uy.i.se;
    x = ux.f;
    y = uy.f;
  }
  if (ex == 0x7fff || y == 0) {  /* adapted by awalz */
    if (tools_fpisinfl(y)) return y;
    return x;
  }
  if (ex - ey > LDBL_MANT_DIG) return x + y;
  z = 1;
  if (ex > 0x3fff+8000) {
    z = 0x1p10000L;
    x *= 0x1p-10000L;
    y *= 0x1p-10000L;
  } else if (ey < 0x3fff-8000) {
    z = 0x1p-10000L;
    x *= 0x1p10000L;
    y *= 0x1p10000L;
  }
  sqmacro(hx, lx, x);
  sqmacro(hy, ly, y);
  return z*sqrtl(ly + lx + hy + hx);
}


/* hypot4, sqrt(a^2 - b^2), based on tools_hypotl, 2.35.0 */
LUALIB_API long double tools_mhypotl (long double x, long double y) {
  int ex, ey;
  long double hx, lx, hy, ly, z, xh, xl, xc;
  union ldshape ux = {x}, uy = {y};
  ux.i.se &= 0x7fff;
  uy.i.se &= 0x7fff;
  if (ux.i.se < uy.i.se) return AGN_NAN;  /* |x| < |y| ? */
  ex = ux.i.se;
  ey = uy.i.se;
  x = ux.f;
  y = uy.f;
  if (ex == 0x7fff || y == 0) {  /* adapted by awalz */
    if (tools_fpisinfl(y)) return y;
    return x;
  }
  if (ex - ey > LDBL_MANT_DIG) return x + y;
  z = 1;
  if (ex > 0x3fff+8000) {
    z = 0x1p10000L;
    x *= 0x1p-10000L;
    y *= 0x1p-10000L;
  } else if (ey < 0x3fff-8000) {
    z = 0x1p-10000L;
    x *= 0x1p10000L;
    y *= 0x1p10000L;
  }
  sqmacro(hx, lx, x);
  sqmacro(hy, ly, y);
  return z*sqrtl(-ly + lx - hy + hx);
}
#else
LUALIB_API long double tools_hypotl (long double x, long double y) {
  return sun_hypot((double)x, (double)y);
}


/* 4.6.2 fix; avoids overflow and gained accuracy, see: https://stackoverflow.com/questions/49191477/companion-to-hypot */
LUALIB_API long double tools_mhypotl (long double x, long double y) {
  return sqrt(x - y)*sqrt(x + y);
}
#endif


LUALIB_API double sun_pytha (double x, double y) {  /* based on sun_hypot, 2.21.7 */
  double a, b, t1, t2, y1, y2, w;
  int32_t j, k, ha, hb;
  GET_HIGH_WORD(ha, x);
  ha &= 0x7fffffff;
  GET_HIGH_WORD(hb, y);
  hb &= 0x7fffffff;
  if (hb > ha) { a = y; b = x; j = ha; ha = hb; hb = j; } else { a = x; b = y; }
  a = fabs(a);  /* fabs is faster than SET_HIGH_WORD(a, ha) */
  b = fabs(b);  /* dito */
  if ((ha - hb) > 0x3c00000) {  /* x/y > 2**60 */
    w = a + b;
    return w*w;
  }
  k = 0;
  if (unlikely(ha > 0x5f300000)) {  /* a > 2**500 = 3.2733906078961e+150 */
    if (ha >= 0x7ff00000) {  /* Inf or NaN, 2.5.15 optimisation */
      uint32_t low;
      /* Use original arg order iff result is NaN; quieten sNaNs. */
      w = fabs(x + 0.0) - fabs(y + 0.0);
      GET_LOW_WORD(low, a);
      if (((ha & 0xfffff) | low) == 0) w = a;
      GET_LOW_WORD(low, b);
      if (((hb ^ 0x7ff00000) | low) == 0) w = b;
      return w;
    }
    /* scale a and b by 2**-600 */
    ha -= 0x25800000; hb -= 0x25800000; k += 600;
    SET_HIGH_WORD(a, ha);
    SET_HIGH_WORD(b, hb);
  }
  if (unlikely(hb < 0x20b00000)) {  /* b < 2**-500, 2.5.15 optimisation */
    if (hb <= 0x000fffff) {  /* subnormal b or 0 */
      uint32_t low;
      GET_LOW_WORD(low, b);
      if ((hb | low) == 0) return a*a;
      t1 = 0;
      SET_HIGH_WORD(t1, 0x7fd00000);  /* t1=2^1022 */
      b *= t1;
      a *= t1;
      k -= 1022;
    } else {             /* scale a and b by 2^600 */
      ha += 0x25800000;  /* a *= 2^600 */
      hb += 0x25800000;  /* b *= 2^600 */
      k -= 600;
      SET_HIGH_WORD(a, ha);
      SET_HIGH_WORD(b, hb);
    }
  }
  /* medium size a and b */
  w = a - b;
  if (w > b) {
    t1 = 0;
    SET_HIGH_WORD(t1, ha);
    t2 = a - t1;
    w  = t1*t1 - (b*(-b) - t2*(a + t1));
  } else {
    a  = a + a;
    y1 = 0;
    SET_HIGH_WORD(y1, hb);
    y2 = b - y1;
    t1 = 0;
    SET_HIGH_WORD(t1, ha + 0x00100000);
    t2 = a - t1;
    w  = t1*y1 - (w*(-w) - (t1*y2 + t2*b));
  }
  if (k != 0) {
    uint32_t high;
    t1 = 1.0;
    GET_HIGH_WORD(high, t1);
    SET_HIGH_WORD(t1, high + (k << 20));
    return t1*w;
  } else
    return w;
}


#ifndef __ARMCPU  /* 2.37.1 */
/* Taken and adapted from MUSL-1.2.3 src/math/hypotl.c, 2.34.9 */
LUALIB_API long double tools_pythal (long double x, long double y) {
  int ex, ey;
  long double hx, lx, hy, ly, z, xh, xl, xc;
  union ldshape ux = { x }, uy = { y };
  ux.i.se &= 0x7fff;  /* |x| */
  uy.i.se &= 0x7fff;  /* |y| */
  if (ux.i.se < uy.i.se) {  /* |x| < |y| ? -> Swap arguments */
    ex = uy.i.se;
    ey = ux.i.se;
    x = uy.f;
    y = ux.f;
  } else {
    ex = ux.i.se;
    ey = uy.i.se;
    x = ux.f;
    y = uy.f;
  }
  z = 1;
  if (ex == 0x7fff && tools_fpisinfl(y)) return y;  /* patched 2.35.0 */
	if (ex == 0x7fff || y == 0.0L) return x*x;  /* patched 2.35.0 */
  if (ex - ey > LDBL_MANT_DIG) return x*x + y*y;  /* any argument is zero ? patched 4.5.5 */
  if (ex > 0x3fff+8000) {
    z = 0x1p10000L;
    x *= 0x1p-10000L;
    y *= 0x1p-10000L;
  } else if (ey < 0x3fff-8000) {
    z = 0x1p-10000L;
    x *= 0x1p10000L;
    y *= 0x1p10000L;
  }
  sqmacro(hx, lx, x);  /* adapted by awalz */
  sqmacro(hy, ly, y);  /* dito */
  return z*(ly + lx + hy + hx);
}
#else
LUALIB_API long double tools_pythal (long double x, long double y) {
  return sun_pytha(x, y);
}
#endif


/* Based on tools_pythal, 2.35.0 */
LUALIB_API void tools_squarel (long double *hi, long double *lo, long double x) {
	long double xh, xl, xc;
  union ldshape ux = { x };
  /* adjust subnormal numbers, see MUSL-1.2.3 src/math/cbrtl.c */
	if ((ux.i.se & 0x7fff) == 0) x *= 0x1p120;
	xc = x*SPLIT;
	xh = x - xc + xc;
	xl = x - xh;
	*hi = x*x;
	*lo = xh*xh - *hi + 2*xh*xl + xl*xl;
	/* the square of x is hi + lo */
}


#ifndef __ARMCPU
LUALIB_API long double tools_mpythal (long double x, long double y) {
  int ex, ey;
  long double hx, lx, hy, ly, z, xh, xl, xc;
  union ldshape ux = { x }, uy = { y };
  ux.i.se &= 0x7fff;
  uy.i.se &= 0x7fff;
  if (ux.i.se < uy.i.se) {
    ex = uy.i.se;
    ey = ux.i.se;
    x = uy.f;
    y = ux.f;
    z = -1.0L;
  } else {
    ex = ux.i.se;
    ey = uy.i.se;
    x = ux.f;
    y = uy.f;
    z = 1.0L;
  }
  if (ex == 0x7fff && tools_fpisinfl(y)) return y;  /* patched 2.35.0 */
	if (ex == 0x7fff || y == 0) {
    return z*x*x; /* patched 2.35.0 */
  }
  if (ex - ey > LDBL_MANT_DIG) return x + y;
  if (ex > 0x3fff+8000) {
    z = 0x1p10000L;
    x *= 0x1p-10000L;
    y *= 0x1p-10000L;
  } else if (ey < 0x3fff-8000) {
    z = 0x1p-10000L;
    x *= 0x1p10000L;
    y *= 0x1p10000L;
  }
  sqmacro(hx, lx, x);
  sqmacro(hy, ly, y);
  return z*(-ly + lx - hy + hx);
}
#else
LUALIB_API long double tools_mpythal (long double x, long double y) {
  return fma(x, x, -y*y);
}
#endif


LUALIB_API double sun_hypot2 (double y) {  /* 2.14.13, sqrt(1 + x^2), 44 % faster than MinGW's _hypot */
  double a, b, t1, t2, y1, y2, w;
  int32_t j, k, ha, hb;
  ha = 0x3ff00000;
  GET_HIGH_WORD(hb, y);
  hb &= 0x7fffffff;
  if (hb > ha) { a = fabs(y); b = 1; j = ha; ha = hb; hb = j; } else { a = 1; b = fabs(y); }
  if ((ha - hb) > 0x3c00000) return a + b;  /* y = 0 || 1/y > 2**60 */
  k = 0;
  if (unlikely(ha > 0x5f300000)) {  /* a > 2**500 = 3.2733906078961e+150, 2.5.15 optimisation */
    if (ha >= 0x7ff00000) {  /* Inf or NaN */
      uint32_t low;
      /* Use original arg order iff result is NaN; quieten sNaNs. */
      w = 1 - fabs(y + 0.0);
      GET_LOW_WORD(low, a);
      if (((ha & 0xfffff) | low) == 0) w = a;
      GET_LOW_WORD(low, b);
      if (((hb ^ 0x7ff00000) | low) == 0) w = b;
      return w;
    }
    /* scale a and b by 2**-600 */
    ha -= 0x25800000; hb -= 0x25800000; k += 600;
    SET_HIGH_WORD(a, ha);
    SET_HIGH_WORD(b, hb);
  }
  if (unlikely(hb < 0x20b00000)) {     /* b < 2**-500, 2.5.15 optimisation */
    if (hb <= 0x000fffff) {  /* subnormal b or 0 */
      uint32_t low;
      GET_LOW_WORD(low, b);
      if ((hb | low) == 0) return a;
      t1 = 0;
      SET_HIGH_WORD(t1, 0x7fd00000);  /* t1=2^1022 */
      b *= t1;
      a *= t1;
      k -= 1022;
    } else {             /* scale a and b by 2^600 */
      ha += 0x25800000;  /* a *= 2^600 */
      hb += 0x25800000;  /* b *= 2^600 */
      k -= 600;
      SET_HIGH_WORD(a, ha);
      SET_HIGH_WORD(b, hb);
    }
  }
  /* medium size a and b */
  w = a - b;
  if (w > b) {  /* -1e-18 < y < 1e-18 */
    t1 = 0;
    SET_HIGH_WORD(t1, ha);
    t2 = a - t1;
    w  = sqrt(t1*t1 - (b*(-b) - t2*(a + t1)));
  } else {
    a  = a + a;
    y1 = 0;
    SET_HIGH_WORD(y1, hb);
    y2 = b - y1;
    t1 = 0;
    SET_HIGH_WORD(t1, ha + 0x00100000);
    t2 = a - t1;
    w  = sqrt(t1*y1 - (w*(-w) - (t1*y2 + t2*b)));
  }
  if (k != 0) {
    uint32_t high;
    t1 = 1.0;
    GET_HIGH_WORD(high, t1);
    SET_HIGH_WORD(t1, high + (k << 20));
    return t1*w;
  } else
    return w;
}


LUALIB_API double sun_hypot3 (double y) {  /* 2.14.13, sqrt(1 - x^2), 0.5 % slower than tools_hypot3 */
  double a, b, t1, t2, w;
  int32_t k, ha, hb;
  uint32_t lb;
  ha = 0x3ff00000;
  EXTRACT_WORDS(hb, lb, y);
  hb &= 0x7fffffff;
  if ((hb | ((lb | (-lb)) >> 31)) > 0x3ff00000) return AGN_NAN;  /* |x| > 1 */
  if (hb > ha) { a = fabs(y); b = 1; ha = hb; } else { a = 1; b = fabs(y); }
  if ((ha - hb) > 0x3c00000) return a + b;  /* x/y > 2**60 */
  k = 0;
  if (unlikely(ha > 0x5f300000)) {     /* a>2**500, 2.5.15 optimisation */
    if (ha >= 0x7ff00000) {  /* Inf or NaN */
      uint32_t low;
      /* Use original arg order iff result is NaN; quieten sNaNs. */
      w = 1 - fabs(y + 0.0);
      GET_LOW_WORD(low, a);
      if (((ha & 0xfffff) | low) == 0) w = a;
        GET_LOW_WORD(low, b);
        if (((hb ^ 0x7ff00000) | low) == 0) w = b;
        return w;
    }
    /* scale a and b by 2**-600 */
    ha -= 0x25800000; hb -= 0x25800000; k += 600;
    SET_HIGH_WORD(a, ha);
    SET_HIGH_WORD(b, hb);
  }
  if (unlikely(hb < 0x20b00000)) {     /* b < 2**-500, 2.5.15 optimisation */
    if (hb <= 0x000fffff) {  /* subnormal b or 0 */
      uint32_t low;
      GET_LOW_WORD(low, b);
      if ((hb | low) == 0) return a;
      t1 = 0;
      SET_HIGH_WORD(t1, 0x7fd00000);  /* t1=2^1022 */
      b *= t1;
      a *= t1;
      k -= 1022;
    } else {             /* scale a and b by 2^600 */
      ha += 0x25800000;  /* a *= 2^600 */
      hb += 0x25800000;  /* b *= 2^600 */
      k -= 600;
      SET_HIGH_WORD(a, ha);
      SET_HIGH_WORD(b, hb);
    }
  }
  /* medium size a and b */
  w = a - b;
  t1 = 0;
  SET_HIGH_WORD(t1, ha);
  t2 = a - t1;
  return sqrt(t1*t1 + (b*(-b) - t2*(a + t1)));
}


/* @(#)e_atan2.c 1.3 95/01/18 */
/*
 * ====================================================
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
 *
 * Developed at SunSoft, a Sun Microsystems, Inc. business.
 * Permission to use, copy, modify, and distribute this
 * software is freely granted, provided that this notice
 * is preserved.
 * ====================================================
 *
 */

/* __ieee754_atan2(y,x)
 * Method :
 *  1. Reduce y to positive by atan2(y,x)=-atan2(-y,x).
 *  2. Reduce x to positive by (if x and y are unexceptional):
 *    ARG (x+iy) = arctan(y/x)        ... if x > 0,
 *    ARG (x+iy) = pi - arctan[y/(-x)]   ... if x < 0,
 *
 * Special cases:
 *
 *  ATAN2((anything), NaN ) is NaN;
 *  ATAN2(NAN , (anything) ) is NaN;
 *  ATAN2(+-0, +(anything but NaN)) is +-0  ;
 *  ATAN2(+-0, -(anything but NaN)) is +-pi ;
 *  ATAN2(+-(anything but 0 and NaN), 0) is +-pi/2;
 *  ATAN2(+-(anything but INF and NaN), +INF) is +-0 ;
 *  ATAN2(+-(anything but INF and NaN), -INF) is +-pi;
 *  ATAN2(+-INF,+INF ) is +-pi/4 ;
 *  ATAN2(+-INF,-INF ) is +-3pi/4;
 *  ATAN2(+-INF, (anything but,0,NaN, and INF)) is +-pi/2;
 *
 * Constants:
 * The hexadecimal values are the intended ones for the following
 * constants. The decimal values may be used, provided that the
 * compiler will convert from decimal to binary accurately enough
 * to produce the hexadecimal values shown.
 */

const double
pi_o_4  = 7.8539816339744827900E-01, /* 0x3FE921FB, 0x54442D18 */
pi_o_2  = 1.5707963267948965580E+00, /* 0x3FF921FB, 0x54442D18 */
pi      = 3.1415926535897931160E+00, /* 0x400921FB, 0x54442D18 */
huge    = 1.0e300;

/* @(#)s_atan.c 5.1 93/09/24 */
/*
 * ====================================================
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
 *
 * Developed at SunPro, a Sun Microsystems, Inc. business.
 * Permission to use, copy, modify, and distribute this
 * software is freely granted, provided that this notice
 * is preserved.
 * ====================================================
 */

/* atan(x)
 * Method
 *   1. Reduce x to positive by atan(x) = -atan(-x).
 *   2. According to the integer k=4t+0.25 chopped, t=x, the argument
 *      is further reduced to one of the following intervals and the
 *      arctangent of t is evaluated by the corresponding formula:
 *
 *      [0,7/16]      atan(x) = t-t^3*(a1+t^2*(a2+...(a10+t^2*a11)...)
 *      [7/16,11/16]  atan(x) = atan(1/2) + atan( (t-0.5)/(1+t/2) )
 *      [11/16.19/16] atan(x) = atan( 1 ) + atan( (t-1)/(1+t) )
 *      [19/16,39/16] atan(x) = atan(3/2) + atan( (t-1.5)/(1+1.5t) )
 *      [39/16,INF]   atan(x) = atan(INF) + atan( -1/t )
 *
 * Constants:
 * The hexadecimal values are the intended ones for the following
 * constants. The decimal values may be used, provided that the
 * compiler will convert from decimal to binary accurately enough
 * to produce the hexadecimal values shown.
 *
 * The function is half as fast as GCC's atanh.
 *
 */

static const double atanhi[] = {
  4.63647609000806093515e-01, /* atan(0.5)hi 0x3FDDAC67, 0x0561BB4F */
  7.85398163397448278999e-01, /* atan(1.0)hi 0x3FE921FB, 0x54442D18 */
  9.82793723247329054082e-01, /* atan(1.5)hi 0x3FEF730B, 0xD281F69B */
  1.57079632679489655800e+00, /* atan(inf)hi 0x3FF921FB, 0x54442D18 */
};

static const double atanlo[] = {
  2.26987774529616870924e-17, /* atan(0.5)lo 0x3C7A2B7F, 0x222F65E2 */
  3.06161699786838301793e-17, /* atan(1.0)lo 0x3C81A626, 0x33145C07 */
  1.39033110312309984516e-17, /* atan(1.5)lo 0x3C700788, 0x7AF0CBBD */
  6.12323399573676603587e-17, /* atan(inf)lo 0x3C91A626, 0x33145C07 */
};

static const double aT[] = {
  3.33333333333329318027e-01, /* 0x3FD55555, 0x5555550D */
 -1.99999999998764832476e-01, /* 0xBFC99999, 0x9998EBC4 */
  1.42857142725034663711e-01, /* 0x3FC24924, 0x920083FF */
 -1.11111104054623557880e-01, /* 0xBFBC71C6, 0xFE231671 */
  9.09088713343650656196e-02, /* 0x3FB745CD, 0xC54C206E */
 -7.69187620504482999495e-02, /* 0xBFB3B0F2, 0xAF749A6D */
  6.66107313738753120669e-02, /* 0x3FB10D66, 0xA0D03D51 */
 -5.83357013379057348645e-02, /* 0xBFADDE2D, 0x52DEFD9A */
  4.97687799461593236017e-02, /* 0x3FA97B4B, 0x24760DEB */
 -3.65315727442169155270e-02, /* 0xBFA2B444, 0x2C6A6C2F */
  1.62858201153657823623e-02, /* 0x3F90AD3A, 0xE322DA11 */
};

LUALIB_API double sun_atan (double x) {
  double w, s1, s2, z;
  int32_t ix, hx, id;
  GET_HIGH_WORD(hx, x);
  ix = hx & 0x7fffffff;
  if (ix >= 0x44100000) {  /* if |x| >= 2^66 */
    uint32_t low;
    GET_LOW_WORD(low, x);
    if (ix > 0x7ff00000 ||
       (ix == 0x7ff00000 && (low != 0)))
       return x + x;  /* NaN */
    if (hx > 0) return atanhi[3] + *(volatile double *)&atanlo[3];
    else return -atanhi[3] - *(volatile double *)&atanlo[3];
  }
  if (ix < 0x3fdc0000) {  /* |x| < 0.4375 */
    if (ix < 0x3e400000) {  /* |x| < 2^-27 */
      if (huge + x > one) return x;  /* raise inexact */
    }
    id = -1;
  } else {
    x = fabs(x);
    if (ix < 0x3ff30000) {    /* |x| < 1.1875 */
      if (ix < 0x3fe60000) {  /* 7/16 <=|x|<11/16 */
        id = 0; x = (2.0*x - one)/(2.0 + x);
      } else {  /* 11/16<=|x|< 19/16 */
        id = 1; x = (x - one)/(x + one);
      }
    } else {
      if (ix < 0x40038000) {  /* |x| < 2.4375 */
        id = 2; x = (x - 1.5)/(one + 1.5*x);
      } else {  /* 2.4375 <= |x| < 2^66 */
        id = 3; x = -1.0/x;
      }
    }
  }
  /* end of argument reduction */
  z = x*x;
  w = z*z;
  /* break sum from i=0 to 10 aT[i]z**(i+1) into odd and even poly */
  s1 = z*(aT[0] + w*(aT[2] + w*(aT[4] + w*(aT[6] + w*(aT[8] + w*aT[10])))));
  s2 = w*(aT[1] + w*(aT[3] + w*(aT[5] + w*(aT[7] + w*aT[9]))));
  if (id < 0) return x - x*(s1 + s2);
  else {
    z = atanhi[id] - ((x*(s1 + s2) - atanlo[id]) - x);
    return (hx < 0) ? -z : z;
  }
}


LUALIB_API double sun_atan2 (double y, double x) {
  double z;
  int32_t k, m, hx, hy, ix, iy;
  uint32_t lx, ly;
  EXTRACT_WORDS(hx, lx, x);
  ix = hx & 0x7fffffff;
  EXTRACT_WORDS(hy, ly, y);
  iy = hy & 0x7fffffff;
  if (unlikely(((ix | ((lx | -lx) >> 31)) > 0x7ff00000) ||  /* 2.5.15 optimisation */
     ((iy | ((ly | -ly) >> 31)) > 0x7ff00000)))  /* x or y is NaN */
    return x + y;
  if (((hx - 0x3ff00000) | lx) == 0) return sun_atan(y);   /* x=1.0 */
  m = ((hy >> 31) & 1) | ((hx >> 30) & 2);  /* 2*sign(x)+sign(y) */
  /* when y = 0 */
  if ((iy | ly) == 0) {
    switch(m) {
      case 0:
      case 1: return y;         /* atan(+-0,+anything)=+-0 */
      case 2: return  pi + tiny;  /* atan(+0,-anything) = pi */
      case 3: return -pi - tiny;  /* atan(-0,-anything) =-pi */
    }
  }
  /* when x = 0 */
  if ((ix | lx) == 0) return (hy < 0) ? -pi_o_2 - tiny : pi_o_2 + tiny;
  /* when x is INF */
  if (unlikely(ix == 0x7ff00000)) {  /* 2.5.15 optimisation */
    if (iy == 0x7ff00000) {
      switch(m) {
        case 0: return  pi_o_4 + tiny;      /* atan(+INF,+INF) */
        case 1: return -pi_o_4 - tiny;      /* atan(-INF,+INF) */
        case 2: return  3.0*pi_o_4 + tiny;  /*atan(+INF,-INF)*/
        case 3: return -3.0*pi_o_4 - tiny;  /*atan(-INF,-INF)*/
      }
    } else {
      switch(m) {
        case 0: return  zero;       /* atan(+...,+INF) */
        case 1: return -zero;       /* atan(-...,+INF) */
        case 2: return  pi + tiny;  /* atan(+...,-INF) */
        case 3: return -pi - tiny;  /* atan(-...,-INF) */
      }
    }
  }
  /* when y is INF */
  if (unlikely(iy == 0x7ff00000)) return (hy < 0) ? -pi_o_2 - tiny : pi_o_2 + tiny;  /* 2.5.15 optimisation */
  /* compute y/x */
  k = (iy - ix) >> 20;
  if (k > 60) {       /* |y/x| >  2**60 */
    z = pi_o_2 + 0.5*pi_lo;
    m &= 1;
  }
  else if (hx < 0 && k <- 60) z = 0.0;   /* 0 > |y|/x > -2**-60 */
  else z = sun_atan(fabs(y/x));    /* safe to do y/x */
  switch (m) {
    case 0: return z;  /* atan(+,+) */
    case 1: return -z;  /* atan(-,+) */
    case 2: return pi - (z - pi_lo);/* atan(+,-) */
    default: /* case 3 */
      return (z - pi_lo) - pi;/* atan(-,-) */
  }
}


/* @(#)e_atanh.c 5.1 93/09/24 */
/*
 * ====================================================
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
 *
 * Developed at SunPro, a Sun Microsystems, Inc. business.
 * Permission to use, copy, modify, and distribute this
 * software is freely granted, provided that this notice
 * is preserved.
 * ====================================================
 *
 * __ieee754_atanh(x)
 *
 * 15 percent faster than GCC's atanh.
 *
 * Method :
 *    1.Reduced x to positive by atanh(-x) = -atanh(x)
 *    2.For x>=0.5
 *                  1              2x                          x
 *	atanh(x) = --- * log(1 + -------) = 0.5 * log1p(2 * --------)
 *                  2             1 - x                      1 - x
 *
 * 	For x<0.5
 *	atanh(x) = 0.5*log1p(2x+2x*x/(1-x))
 *
 * Special cases:
 *	atanh(x) is NaN if |x| > 1 with signal;
 *	atanh(NaN) is that NaN with no signal;
 *	atanh(+-1) is +-INF with signal.
 *
 */

LUALIB_API double sun_atanh (double x) {
  double t;
  int32_t hx,ix;
  uint32_t lx;
  EXTRACT_WORDS(hx, lx, x);
  ix = hx & 0x7fffffff;
  if ((ix | ((lx | (-lx)) >> 31)) > 0x3ff00000) return AGN_NAN;  /* |x|>1 */
	if (ix == 0x3ff00000) return AGN_NAN; /* x/zero = +/-inf; */
  if (ix < 0x3e300000 && (huge + x) > zero) return x;  /* x<2**-28 */
  SET_HIGH_WORD(x, ix);
  if (ix < 0x3fe00000) {  /* x < 0.5 */
    t = x + x;
    t = 0.5*log1p(t + t*x/(one - x));
  } else
    t = 0.5*log1p((x + x)/(one - x));
  return (hx >= 0) ? t : -t;
}


/* @(#)k_sin.c 1.3 95/01/18 */
/*
 * ====================================================
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
 *
 * Developed at SunSoft, a Sun Microsystems, Inc. business.
 * Permission to use, copy, modify, and distribute this
 * software is freely granted, provided that this notice
 * is preserved.
 * ====================================================
 */

/* __kernel_sin( x, y, iy)
 * kernel sin function on ~[-pi/4, pi/4] (except on -0), pi/4 ~ 0.7854
 * Input x is assumed to be bounded by ~pi/4 in magnitude.
 * Input y is the tail of x.
 * Input iy indicates whether y is 0. (if iy=0, y assume to be 0).
 *
 * Algorithm
 *  1. Since sin(-x) = -sin(x), we need only to consider positive x.
 *  2. Callers must return sin(-0) = -0 without calling here since our
 *     odd polynomial is not evaluated in a way that preserves -0.
 *     Callers may do the optimisation sin(x) ~ x for tiny x.
 *  3. sin(x) is approximated by a polynomial of degree 13 on
 *     [0,pi/4]
 *                 3            13
 *       sin(x) ~ x + S1*x + ... + S6*x
 *     where
 *
 *   |sin(x)         2     4     6     8     10     12  |     -58
 *   |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x  +S6*x   )| <= 2
 *   |  x                      |
 *
 *  4. sin(x+y) = sin(x) + sin'(x')*y
 *        ~ sin(x) + (1-x*x/2)*y
 *     For better accuracy, let
 *         3      2      2      2      2
 *    r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6))))
 *     then                   3    2
 *    sin(x) = x + (S1*x + (x *(r-y/2)+y))
 */

static FORCE_INLINE double kernel_sin (double x, double y, int iy) {  /* inlined 2.16.13 */
  double z, r, v, w;
  z =  x*x;
  w =  z*z;
  r =  S2 + z*(S3 + z*S4) + z*w*(S5 + z*S6);
  v =  z*x;
  if (iy == 0) return x + v*(S1 + z*r);
  else return x - ((z*(half*y - v*r) - y) - v*S1);
}

/* @(#)k_cos.c 1.3 95/01/18 */
/*
 * ====================================================
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
 *
 * Developed at SunSoft, a Sun Microsystems, Inc. business.
 * Permission to use, copy, modify, and distribute this
 * software is freely granted, provided that this notice
 * is preserved.
 * ====================================================
 */

/*
 * __kernel_cos( x,  y )
 * kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164
 * Input x is assumed to be bounded by ~pi/4 in magnitude.
 * Input y is the tail of x.
 *
 * Algorithm
 *  1. Since cos(-x) = cos(x), we need only to consider positive x.
 *  2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0.
 *  3. cos(x) is approximated by a polynomial of degree 14 on
 *     [0,pi/4]
 *                         4            14
 *       cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x
 *     where the remez error is
 *
 *   |              2     4     6     8     10    12     14 |     -58
 *   |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x  +C6*x  )| <= 2
 *   |                             |
 *
 *                  4     6     8     10    12     14
 *  4. let r = C1*x +C2*x +C3*x +C4*x +C5*x  +C6*x  , then
 *         cos(x) ~ 1 - x*x/2 + r
 *     since cos(x+y) ~ cos(x) - sin(x)*y
 *        ~ cos(x) - x*y,
 *     a correction term is necessary in cos(x) and hence
 *    cos(x+y) = 1 - (x*x/2 - (r - x*y))
 *     For better accuracy, rearrange to
 *    cos(x+y) ~ w + (tmp + (r-x*y))
 *     where w = 1 - x*x/2 and tmp is a tiny correction term
 *     (1 - x*x/2 == w + tmp exactly in infinite precision).
 *     The exactness of w + tmp in infinite precision depends on w
 *     and tmp having the same precision as x.  If they have extra
 *     precision due to compiler bugs, then the extra precision is
 *     only good provided it is retained in all terms of the final
 *     expression for cos().  Retention happens in all cases tested
 *     under FreeBSD, so don't pessimize things by forcibly clipping
 *     any extra precision in w.
 */

static FORCE_INLINE double kernel_cos (double x, double y) {  /* inlined 2.16.13 */
  double hz, z, r, w;
  z  = x*x;
  w  = z*z;
  r  = z*(C1 + z*(C2 + z*C3)) + w*w*(C4 + z*(C5 + z*C6));
  hz = 0.5*z;
  w  = one - hz;
  return w + (((one - w) - hz) + (z*r - x*y));
}


/* @(#)s_sin.c 5.1 93/09/24 */
/*
 * ====================================================
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
 *
 * Developed at SunPro, a Sun Microsystems, Inc. business.
 * Permission to use, copy, modify, and distribute this
 * software is freely granted, provided that this notice
 * is preserved.
 * ====================================================
 */

/* sin(x)
 * Return sine function of x.
 *
 * kernel function:
 *  __kernel_sin    ... sine function on [-pi/4,pi/4]
 *  __kernel_cos    ... cose function on [-pi/4,pi/4]
 *  __ieee754_rem_pio2  ... argument reduction routine
 *
 * Method.
 *      Let S,C and T denote the sin, cos and tan respectively on
 *  [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
 *  in [-pi/4 , +pi/4], and let n = k mod 4.
 *  We have
 *
 *          n        sin(x)      cos(x)        tan(x)
 *     ----------------------------------------------------------
 *      0         S     C     T
 *      1         C    -S    -1/T
 *      2        -S    -C     T
 *      3        -C     S    -1/T
 *     ----------------------------------------------------------
 *
 * Special cases:
 *      Let trig be any of sin, cos, or tan.
 *      trig(+-INF)  is NaN, with signals;
 *      trig(NaN)    is that NaN;
 *
 * Accuracy:
 *  TRIG(x) returns trig(x) nearly rounded
 */

LUALIB_API double sun_sin (double x) {
  double y[2], z = 0.0;
  int32_t n, ix;
  GET_HIGH_WORD(ix, x);     /* high word of x. */
  ix &= 0x7fffffff;
  if (ix <= 0x3fe921fb) {   /* |x| ~< pi/4 */
    if (ix < 0x3e500000) {  /* |x| < 2**-26 */
      if ((int) x == 0) return x;
    }  /* generate inexact */
    return kernel_sin(x, z, 0);
  }
  /* sin(Inf or NaN) is NaN */
  else if (unlikely(ix >= 0x7ff00000)) return x - x;  /* 2.5.15 optimisation */
  else {
    n = sun_rem_pio2(x, y);  /* argument reduction needed */
    switch (n & 3) {
      case 0: return kernel_sin(y[0], y[1], 1);
      case 1: return kernel_cos(y[0], y[1]);
      case 2: return -kernel_sin(y[0], y[1], 1);
      default:
        return -kernel_cos(y[0], y[1]);
    }
  }
}

/* @(#)s_cos.c 5.1 93/09/24 */
/*
 * ====================================================
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
 *
 * Developed at SunPro, a Sun Microsystems, Inc. business.
 * Permission to use, copy, modify, and distribute this
 * software is freely granted, provided that this notice
 * is preserved.
 * ====================================================
 */

/* cos(x)
 * Return cosine function of x.
 *
 * kernel function:
 *  __kernel_sin    ... sine function on [-pi/4,pi/4]
 *  __kernel_cos    ... cosine function on [-pi/4,pi/4]
 *  __ieee754_rem_pio2  ... argument reduction routine
 *
 * Method.
 *      Let S,C and T denote the sin, cos and tan respectively on
 *  [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
 *  in [-pi/4 , +pi/4], and let n = k mod 4.
 *  We have
 *
 *          n        sin(x)      cos(x)        tan(x)
 *     ----------------------------------------------------------
 *      0         S     C     T
 *      1         C    -S    -1/T
 *      2        -S    -C     T
 *      3        -C     S    -1/T
 *     ----------------------------------------------------------
 *
 * Special cases:
 *      Let trig be any of sin, cos, or tan.
 *      trig(+-INF)  is NaN, with signals;
 *      trig(NaN)    is that NaN;
 *
 * Accuracy:
 *  TRIG(x) returns trig(x) nearly rounded
 */

LUALIB_API double sun_cos (double x) {
  double y[2], z = 0.0;
  int32_t n, ix;
  GET_HIGH_WORD(ix, x);  /* high word of x. */
  ix &= 0x7fffffff;
  if (ix <= 0x3fe921fb) {  /* |x| ~< pi/4 */
    if (ix < 0x3e46a09e)   /* if x < 2**-27 * sqrt(2) */
      if (((int)x) == 0) return 1.0;  /* generate inexact */
    return kernel_cos(x, z);
  }
  /* cos(Inf or NaN) is NaN */
  else if (unlikely(ix >= 0x7ff00000)) return x - x;  /* 2.5.15 optimisation */
  else {
    n = sun_rem_pio2(x, y);  /* argument reduction needed */
    switch(n & 3) {
      case 0: return  kernel_cos(y[0], y[1]);
      case 1: return -kernel_sin(y[0], y[1], 1);
      case 2: return -kernel_cos(y[0], y[1]);
      default:
        return kernel_sin(y[0], y[1], 1);
    }
  }
}


/* @(#)k_tan.c 1.5 04/04/22 SMI */

/*
 * ====================================================
 * Copyright 2004 Sun Microsystems, Inc.  All Rights Reserved.
 *
 * Permission to use, copy, modify, and distribute this
 * software is freely granted, provided that this notice
 * is preserved.
 * ====================================================
 */

/* __kernel_tan( x, y, k )
 * kernel tan function on ~[-pi/4, pi/4] (except on -0), pi/4 ~ 0.7854
 * Input x is assumed to be bounded by ~pi/4 in magnitude.
 * Input y is the tail of x.
 * Input k indicates whether tan (if k = 1) or -1/tan (if k = -1) is returned.
 *
 * Algorithm
 *  1. Since tan(-x) = -tan(x), we need only to consider positive x.
 *  2. Callers must return tan(-0) = -0 without calling here since our
 *     odd polynomial is not evaluated in a way that preserves -0.
 *     Callers may do the optimisation tan(x) ~ x for tiny x.
 *  3. tan(x) is approximated by a odd polynomial of degree 27 on
 *     [0,0.67434]
 *                 3             27
 *       tan(x) ~ x + T1*x + ... + T13*x
 *     where
 *
 *           |tan(x)         2     4            26   |     -59.2
 *           |----- - (1+T1*x +T2*x +.... +T13*x    )| <= 2
 *           |  x           |
 *
 *     Note: tan(x+y) = tan(x) + tan'(x)*y
 *              ~ tan(x) + (1+x*x)*y
 *     Therefore, for better accuracy in computing tan(x+y), let
 *         3      2      2       2       2
 *    r = x *(T2+x *(T3+x *(...+x *(T12+x *T13))))
 *     then
 *             3    2
 *    tan(x+y) = x + (T1*x + (x *(r+y)+y))
 *
 *      4. For x in [0.67434,pi/4],  let y = pi/4 - x, then
 *    tan(x) = tan(pi/4-y) = (1-tan(y))/(1+tan(y))
 *           = 1 - 2*(tan(y) - (tan(y)^2)/(1+tan(y)))
 */

static const double xxx[] = {
 3.33333333333334091986e-01,  /* 3FD55555, 55555563 */
 1.33333333333201242699e-01,  /* 3FC11111, 1110FE7A */
 5.39682539762260521377e-02,  /* 3FABA1BA, 1BB341FE */
 2.18694882948595424599e-02,  /* 3F9664F4, 8406D637 */
 8.86323982359930005737e-03,  /* 3F8226E3, E96E8493 */
 3.59207910759131235356e-03,  /* 3F6D6D22, C9560328 */
 1.45620945432529025516e-03,  /* 3F57DBC8, FEE08315 */
 5.88041240820264096874e-04,  /* 3F4344D8, F2F26501 */
 2.46463134818469906812e-04,  /* 3F3026F7, 1A8D1068 */
 7.81794442939557092300e-05,  /* 3F147E88, A03792A6 */
 7.14072491382608190305e-05,  /* 3F12B80F, 32F0A7E9 */
-1.85586374855275456654e-05,  /* BEF375CB, DB605373 */
 2.59073051863633712884e-05,  /* 3EFB2A70, 74BF7AD4 */
 1.00000000000000000000e+00,  /* 3FF00000, 00000000, one */
 7.85398163397448278999e-01,  /* 3FE921FB, 54442D18, pio4 */
 3.06161699786838301793e-17   /* 3C81A626, 33145C07, pio4lo */
};
#define  pio4    xxx[14]
#define  pio4lo  xxx[15]
#define  T       xxx

/* @(#)s_tan.c 5.1 93/09/24 */
/*
 * ====================================================
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
 *
 * Developed at SunPro, a Sun Microsystems, Inc. business.
 * Permission to use, copy, modify, and distribute this
 * software is freely granted, provided that this notice
 * is preserved.
 * ====================================================
 */


/* tan(x)
 * Return tangent function of x.
 *
 * kernel function:
 *  __kernel_tan    ... tangent function on [-pi/4,pi/4]
 *  __ieee754_rem_pio2  ... argument reduction routine
 *
 * Method.
 *      Let S,C and T denote the sin, cos and tan respectively on
 *  [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
 *  in [-pi/4 , +pi/4], and let n = k mod 4.
 *  We have
 *
 *      n        sin(x)   cos(x)    tan(x)
 *     ----------------------------------------------------------
 *      0         S     C     T
 *      1         C    -S    -1/T
 *      2        -S    -C     T
 *      3        -C     S    -1/T
 *     ----------------------------------------------------------
 *
 * Special cases:
 *      Let trig be any of sin, cos, or tan.
 *      trig(+-INF)  is NaN, with signals;
 *      trig(NaN)    is that NaN;
 *
 * Accuracy:
 *  TRIG(x) returns trig(x) nearly rounded
 */

static FORCE_INLINE double kernel_tan (double x, double y, int iy) {  /* inlined 2.16.13 */
  double z, r, v, w, s;
  int32_t ix, hx;
  GET_HIGH_WORD(hx, x);
  ix = hx & 0x7fffffff;   /* high word of |x| */
  if (ix < 0x3e300000) {  /* x < 2**-28, added 3.2.1 */
    if ((int) x == 0) {	  /* generate inexact */
      uint32_t lx;
      GET_LOW_WORD(lx, x);
      if (((ix | lx) | (iy + 1)) == 0) {
        return one/fabs(x);
      } else {
        if (iy == 1)
          return x;
        else {	/* compute -1 / (x+y) carefully */
          double a, t;
          z = w = x + y;
          SET_LOW_WORD(z, 0);  /* = __LO(z) = 0; */
          v = y - (z - x);
          t = a = -one / w;
          SET_LOW_WORD(t, 0);  /* __LO(t) = 0; */
          s = one + t*z;
          return t + a*(s + t*v);
        }
      }
    }
  }
  if (ix >= 0x3FE59428) {  /* |x| >= 0.6744 */
    if (hx < 0) {
      x = -x;
      y = -y;
    }
    z = pio4 - x;
    w = pio4lo - y;
    x = z + w;
    y = 0.0;
  }
  z = x*x;
  w = z*z;
  /* Break x^5*(T[1]+x^2*T[2]+...) into  x^5(T[1]+x^4*T[3]+...+x^20*T[11]) + x^5(x^2*(T[2]+x^4*T[4]+...+x^22*[T12])) */
  r = T[1] + w*(T[3] + w*(T[5] + w*(T[7] + w*(T[9] + w*T[11]))));
  v = z*(T[2] + w*(T[4] + w*(T[6] + w*(T[8] + w*(T[10] + w*T[12])))));
  s = z * x;
  r = y + z*(s*(r + v) + y);
  r += T[0] * s;
  w = x + r;
  if (ix >= 0x3FE59428) {  /* |x| >= 0.6744 */
    v = (double)iy;
    return (double)(1 - ((hx >> 30) & 2))*(v - 2.0*(x - (w*w/(w + v) - r)));
  }
  if (iy == 1)
    return w;
  else {
    /* if allow error up to 2 ulp, simply return -1.0 / (x+r) here, compute -1.0 / (x+r) accurately */
    double a, t;
    z = w;
    SET_LOW_WORD(z, 0);
    v = r - (z - x);  /* z+v = r+x */
    t = a = -1.0/w;   /* a = -1.0/w */
    SET_LOW_WORD(t, 0);
    s = 1.0 + t*z;
    return t + a*(s + t*v);
  }
}

LUALIB_API double sun_tan (double x) {
  double y[2], z = 0.0;
  int32_t n, ix;
  GET_HIGH_WORD(ix, x);  /* high word of x. */
  ix &= 0x7fffffff;
  if (ix <= 0x3fe921fb) /* |x| ~< pi/4, removed check for subnormals as it is already done by kernel_tan, 3.2.2 */
    return kernel_tan(x, z, 1);
  /* tan(Inf or NaN) is NaN */
  else if (ix >= 0x7ff00000)
    return x - x;  /* NaN */
  else {
    n = sun_rem_pio2(x, y);  /* argument reduction needed */
    return kernel_tan(y[0], y[1], 1 - ((n & 1) << 1));  /* 1 -- n even, -1 -- n odd */
  }
}


/* @(#)s_asinh.c 5.1 93/09/24 */
/*
 * ====================================================
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
 *
 * Developed at SunPro, a Sun Microsystems, Inc. business.
 * Permission to use, copy, modify, and distribute this
 * software is freely granted, provided that this notice
 * is preserved.
 * ====================================================
 *
 * asinh(x)
 * Method :
 *  Based on
 *    asinh(x) = sign(x) * log [ |x| + sqrt(x*x+1) ]
 *  we have
 *  asinh(x) := x  if  1+x*x=1,
 *     := sign(x)*(log(x)+ln2)) for large |x|, else
 *     := sign(x)*log(2|x|+1/(|x|+sqrt(x*x+1))) if|x|>2, else
 *     := sign(x)*log1p(|x| + x^2/(1 + sqrt(1+x^2)))
 */

LUALIB_API double sun_asinh (double x) {  /* 2.14.13, 15 % faster than GCC's asinh */
  double t, w;
  int32_t hx, ix;
  GET_HIGH_WORD(hx, x);
  ix = hx & 0x7fffffff;
  if (unlikely(ix >= 0x7ff00000)) return x + x;  /* x is inf or NaN, 2.5.15 optimisation */
  if (ix < 0x3e300000) {  /* |x| < 2**-28 */
    if (huge + x > one) return x;  /* return x inexact except 0 */
  }
  if (ix > 0x41b00000) {         /* |x| > 2**28 */
    w = sun_log(fabs(x)) + LN2;
  } else if (ix > 0x40000000) {  /* 2**28 > |x| > 2.0 */
    t = fabs(x);
    w = sun_log(2.0*t + one/(sqrt(x*x + one) + t));
  } else {  /* 2**-28 < |x| < 2.0 */
    t = x*x;
    w = log1p(fabs(x) + t/(one + sqrt(one + t)));  /* DO NOT use kernel_log1p, it returns wrong results in this context ! */
  }
  return (hx > 0) ? w : -w;
}


/* @(#)e_acosh.c 1.3 95/01/18 */
/*
 * ====================================================
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
 *
 * Developed at SunSoft, a Sun Microsystems, Inc. business.
 * Permission to use, copy, modify, and distribute this
 * software is freely granted, provided that this notice
 * is preserved.
 * ====================================================
 *
 * __ieee754_acosh(x)
 * Method :
 *	Based on
 *		acosh(x) = log [ x + sqrt(x*x-1) ]
 *	we have
 *		acosh(x) := log(x)+ln2,	if x is large; else
 *		acosh(x) := log(2x-1/(sqrt(x*x-1)+x)) if x>2; else
 *		acosh(x) := log1p(t+sqrt(2.0*t+t*t)); where t=x-1.
 *
 * Special cases:
 *	acosh(x) is NaN with signal if x<1.
 *	acosh(NaN) is NaN without signal.
 *
 * 8 percent _slower_ than GCC's acosh.
 */

LUALIB_API double sun_acosh (double x) {
  double t;
  int32_t hx, lx;
  EXTRACT_WORDS(hx, lx, x);
  if (hx < 0x3ff00000) {  /* x < 1 */
    return AGN_NAN;
  } else if (unlikely(hx >= 0x41b00000)) {	/* x > 2**28 */
    if (hx >= 0x7ff00000) {	/* x is inf of NaN */
      return x + x;
    } else
      return sun_log(x) + LN2;  /* acosh(huge)=log(2x) */
  } else if (((hx - 0x3ff00000) | lx) == 0) {  /* x == 1 is not faster, even when skipping lx */
    return 0.0;  /* acosh(1) = 0 */
  } else if (hx > 0x40000000) {  /* 2**28 > x > 2 */
    t = x*x;
    return sun_log(2.0*x - one/(x + sqrt(t - one)));
  } else {  /* 1 < x < 2 */
    t = x - one;
    return log1p(t + sqrt(2.0*t + t*t));
	}
}


/* @(#)e_sqrt.c 1.3 95/01/18 */
/*
 * ====================================================
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
 *
 * Developed at SunSoft, a Sun Microsystems, Inc. business.
 * Permission to use, copy, modify, and distribute this
 * software is freely granted, provided that this notice
 * is preserved.
 * ====================================================
 */

/* __ieee754_sqrt(x)
 * Return correctly rounded sqrt.
 *           ------------------------------------------
 *	     |  Use the hardware sqrt if you have one |
 *           ------------------------------------------
 * Method:
 *   Bit by bit method using integer arithmetic. (Slow, but portable)
 *   1. Normalization
 *	Scale x to y in [1,4) with even powers of 2:
 *	find an integer k such that  1 <= (y=x*2^(2k)) < 4, then
 *		sqrt(x) = 2^k * sqrt(y)
 *   2. Bit by bit computation
 *	Let q  = sqrt(y) truncated to i bit after binary point (q = 1),
 *	     i							 0
 *                                     i+1         2
 *	    s  = 2*q , and	y  =  2   * ( y - q  ).		(1)
 *	     i      i            i                 i
 *
 *	To compute q    from q , one checks whether
 *		    i+1       i
 *
 *			      -(i+1) 2
 *			(q + 2      ) <= y.			(2)
 *     			  i
 *							      -(i+1)
 *	If (2) is false, then q   = q ; otherwise q   = q  + 2      .
 *		 	       i+1   i             i+1   i
 *
 *	With some algebric manipulation, it is not difficult to see
 *	that (2) is equivalent to
 *                             -(i+1)
 *			s  +  2       <= y			(3)
 *			 i                i
 *
 *	The advantage of (3) is that s  and y  can be computed by
 *				      i      i
 *	the following recurrence formula:
 *	    if (3) is false
 *
 *	    s     =  s  ,	y    = y   ;			(4)
 *	     i+1      i		 i+1    i
 *
 *	    otherwise,
 *                         -i                     -(i+1)
 *	    s	  =  s  + 2  ,  y    = y  -  s  - 2  		(5)
 *           i+1      i          i+1    i     i
 *
 *	One may easily use induction to prove (4) and (5).
 *	Note. Since the left hand side of (3) contain only i+2 bits,
 *	      it does not necessary to do a full (53-bit) comparison
 *	      in (3).
 *   3. Final rounding
 *	After generating the 53 bits result, we compute one more bit.
 *	Together with the remainder, we can decide whether the
 *	result is exact, bigger than 1/2ulp, or less than 1/2ulp
 *	(it will never equal to 1/2ulp).
 *	The rounding mode can be detected by checking whether
 *	huge + tiny is equal to huge, and whether huge - tiny is
 *	equal to huge for some floating point number "huge" and "tiny".
 *
 * Special cases:
 *	sqrt(+-0) = +-0 	... exact
 *	sqrt(inf) = inf
 *	sqrt(-ve) = NaN		... with invalid signal
 *	sqrt(NaN) = NaN		... with invalid signal for signaling NaN
 *
 * Other methods : see the appended file at the end of the program below.
 *
 * NOTE: This function is very, very slow. Use builtin sqrt instead.
 */

LUALIB_API double sun_sqrt (double x) {
  double z;
  int32_t sign = (int32_t)0x80000000;
  uint32_t r, t1, s1, ix1, q1;
  int32_t ix0, s0, q, m, t, i;
  EXTRACT_WORDS(ix0, ix1, x);
  /* take care of Inf and NaN */
  if (unlikely((ix0 & 0x7ff00000) == 0x7ff00000)) {
    return x*x + x;  /* sqrt(NaN)=NaN, sqrt(+inf)=+inf
                        sqrt(-inf)=sNaN */
  }
  /* take care of zero */
  if (ix0 <= 0) {
    if (((ix0 & (~sign)) | ix1) == 0) return x;  /* sqrt(+-0) = +-0 */
    else if (ix0 < 0)
      return AGN_NAN;  /* (x-x)/(x-x); */ /* sqrt(-ve) = sNaN */
  }
  /* normalize x */
  m = (ix0 >> 20);
  if (m == 0) {  /* subnormal x */
    while (ix0 == 0) {
      m -= 21;
      ix0 |= (ix1 >> 11); ix1 <<= 21;
    }
    for (i=0; (ix0 & 0x00100000) == 0; i++) ix0 <<= 1;
    m -= i - 1;
    ix0 |= (ix1 >> (32 - i));
    ix1 <<= i;
  }
  m -= 1023;  /* unbias exponent */
  ix0 = (ix0 & 0x000fffff) | 0x00100000;
  if (m & 1) {  /* odd m, double x to make it even */
    ix0 += ix0 + ((ix1 & sign) >> 31);
    ix1 += ix1;
  }
  m >>= 1;  /* m = [m/2] */
  /* generate sqrt(x) bit by bit */
  ix0 += ix0 + ((ix1 & sign) >> 31);
  ix1 += ix1;
  q = q1 = s0 = s1 = 0;  /* [q,q1] = sqrt(x) */
  r = 0x00200000;  /* r = moving bit from right to left */
  while (r != 0) {
    t = s0 + r;
    if(t <= ix0) {
      s0   = t + r;
      ix0 -= t;
      q   += r;
    }
    ix0 += ix0 + ((ix1 & sign) >> 31);
    ix1 += ix1;
    r >>= 1;
  }
  r = sign;
  while (r != 0) {
    t1 = s1 + r;
    t  = s0;
    if ((t < ix0) || ((t == ix0) && (t1 <= ix1))) {
      s1  = t1 + r;
      if (((t1 & sign) == sign) && (s1 & sign) == 0) s0 += 1;
      ix0 -= t;
      if (ix1 < t1) ix0 -= 1;
      ix1 -= t1;
      q1  += r;
    }
    ix0 += ix0 + ((ix1 & sign) >> 31);
    ix1 += ix1;
    r >>= 1;
  }
  /* use floating add to find out rounding direction */
  if ((ix0 | ix1) != 0) {
    z = one - tiny;  /* trigger inexact flag */
    if (z >= one) {
      z = one + tiny;
      if (q1 == (unsigned)0xffffffff) { q1 = 0; q += 1; }
      else if (z > one) {
        if (q1 == (unsigned)0xfffffffe) q += 1;
        q1 += 2;
      } else
        q1 += (q1 & 1);
    }
  }
  ix0 = (q >> 1) + 0x3fe00000;
  ix1 =  q1 >> 1;
  if ((q & 1) == 1) ix1 |= sign;
  ix0 += (m << 20);
  INSERT_WORDS(z, ix0, ix1);
  return z;
}


/* Integer square root, that is the square root of x rounded down to the next integer towards zero. 4.11.0 */
LUALIB_API double tools_isqrt (double x) {
  int s;
  if (x == 0) return 0;
  else if (x < 0) return AGN_NAN;
  else {
    s = luai_numint(sqrt(x));
    s = (s + ((int)x)/s) >> 1;
    return s*s > x ? s - 1 : s;
  }
}


/* @(#)e_log.c 1.3 95/01/18 */
/*
 * ====================================================
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
 *
 * Developed at SunSoft, a Sun Microsystems, Inc. business.
 * Permission to use, copy, modify, and distribute this
 * software is freely granted, provided that this notice
 * is preserved.
 * ====================================================
 *
 * __ieee754_log(x)
 *
 * Return the logrithm of x
 *
 * Method :
 *   1. Argument Reduction: find k and f such that
 *      x = 2^k * (1+f),
 *     where  sqrt(2)/2 < 1+f < sqrt(2) .
 *
 *   2. Approximation of log(1+f).
 *  Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)
 *     = 2s + 2/3 s**3 + 2/5 s**5 + .....,
 *          = 2s + s*R
 *      We use a special Reme algorithm on [0,0.1716] to generate
 *   a polynomial of degree 14 to approximate R The maximum error
 *  of this polynomial approximation is bounded by 2**-58.45. In
 *  other words,
 *            2      4      6      8      10      12      14
 *      R(z) ~ Lg1*s +Lg2*s +Lg3*s +Lg4*s +Lg5*s  +Lg6*s  +Lg7*s
 *    (the values of Lg1 to Lg7 are listed in the program)
 *  and
 *      |      2          14          |     -58.45
 *      | Lg1*s +...+Lg7*s    -  R(z) | <= 2
 *      |                             |
 *  Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2.
 *  In order to guarantee error in log below 1ulp, we compute log
 *  by
 *    log(1+f) = f - s*(f - R)  (if f is not too large)
 *    log(1+f) = f - (hfsq - s*(hfsq+R)).  (better accuracy)
 *
 *  3. Finally,  log(x) = k*ln2 + log(1+f).
 *          = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo)))
 *     Here ln2 is split into two floating point number:
 *      ln2_hi + ln2_lo,
 *     where n*ln2_hi is always exact for |n| < 2000.
 *
 * Special cases:
 *  log(x) is NaN with signal if x < 0 (including -INF) ;
 *  log(+INF) is +INF; log(0) is -INF with signal;
 *  log(NaN) is that NaN with no signal.
 *
 * Accuracy:
 *  according to an error analysis, the error is always less than
 *  1 ulp (unit in the last place).
 *
 * Constants:
 * The hexadecimal values are the intended ones for the following
 * constants. The decimal values may be used, provided that the
 * compiler will convert from decimal to binary accurately enough
 * to produce the hexadecimal values shown.
 */

const double
ln2_hi  =  6.93147180369123816490e-01,  /* 3fe62e42 fee00000 */
ln2_lo  =  1.90821492927058770002e-10,  /* 3dea39ef 35793c76 */
two54   =  1.80143985094819840000e+16,  /* 43500000 00000000 */
Lg1 = 6.666666666666735130e-01,  /* 3FE55555 55555593 */
Lg2 = 3.999999999940941908e-01,  /* 3FD99999 9997FA04 */
Lg3 = 2.857142874366239149e-01,  /* 3FD24924 94229359 */
Lg4 = 2.222219843214978396e-01,  /* 3FCC71C5 1D8E78AF */
Lg5 = 1.818357216161805012e-01,  /* 3FC74664 96CB03DE */
Lg6 = 1.531383769920937332e-01,  /* 3FC39A09 D078C69F */
Lg7 = 1.479819860511658591e-01;  /* 3FC2F112 DF3E5244 */

LUALIB_API double sun_log (double x) {
  double hfsq, f, s, z, R, w, t1, t2, dk;
  int32_t k, hx, i, j;
  uint32_t lx;
  EXTRACT_WORDS(hx, lx, x);
  k = 0;
  if (hx < 0x00100000) {  /* x < 2**-1022  */
    if (unlikely( (((hx & 0x7fffffff) | lx) == 0) || (hx < 0)))  /* 2.14.13 change, do not return -inf for 0 args any longer, 2.5.15 optimisation */
      return AGN_NAN; /* AGN_NAN is much faster than: (x - x)/zero; */ /* log(+-0) && log(-#) = NaN; use sun_log0 for x = 0 -> -inf */
    k -= 54; x *= two54; /* subnormal number, scale up x */
    GET_HIGH_WORD(hx, x);
  }
  if (hx >= 0x7ff00000) return x + x;  /* x is inf of NaN */
  k += (hx >> 20) - 1023;
  hx &= 0x000fffff;
  i = (hx + 0x95f64) & 0x100000;
  SET_HIGH_WORD(x, hx | (i ^ 0x3ff00000));  /* normalise x or x/2 */
  k += (i >> 20);
  f = x - 1.0;
  if ((0x000fffff & (2 + hx)) < 3) {  /* -2**-20 <= f < 2**-20 */
    if (f == zero) {
      if (k == 0) {
        return zero;
      } else {
        dk = (double)k;
        return dk*ln2_hi + dk*ln2_lo;
      }
    }
    R = f*f*(0.5 - 0.33333333333333333*f);
    if (k == 0) return f - R;
    else {
      dk = (double)k;
      return dk*ln2_hi - ((R - dk*ln2_lo) - f);
    }
  }
  s = f/(2.0 + f);
  dk = (double)k;
  z = s*s;
  i = hx - 0x6147a;
  w = z*z;
  j = 0x6b851 - hx;
  t1 = w*(Lg2 + w*(Lg4 + w*Lg6));
  t2 = z*(Lg1 + w*(Lg3 + w*(Lg5 + w*Lg7)));
  i |= j;
  R = t2 + t1;
  if (i > 0) {
    hfsq = 0.5*f*f;
    if (k == 0) return f - (hfsq - s*(hfsq + R)); else
    return dk*ln2_hi - ((hfsq - (s*(hfsq + R) + dk*ln2_lo)) - f);
  } else {
    if (k == 0) return f - s*(f - R); else
    return dk*ln2_hi - ((s*(f - R) - dk*ln2_lo) - f);
  }
}


#ifndef __ARMCPU  /* 2.37.1 */
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
long double LUALIB_API sqrtl (long double x) {
  return sqrt(x);
}
#elif (LDBL_MANT_DIG == 113 || LDBL_MANT_DIG == 64) && LDBL_MAX_EXP == 16384

/* if x in [1,2): i = (int)(64*x);
   if x in [2,4): i = (int)(32*x-64);
   __rsqrt_tab[i]*2^-16 is estimating 1/sqrt(x) with small relative error:
   |__rsqrt_tab[i]*0x1p-16*sqrt(x) - 1| < -0x1.fdp-9 < 2^-8 */
const uint16_t __rsqrt_tab[128] = {
0xb451,0xb2f0,0xb196,0xb044,0xaef9,0xadb6,0xac79,0xab43,
0xaa14,0xa8eb,0xa7c8,0xa6aa,0xa592,0xa480,0xa373,0xa26b,
0xa168,0xa06a,0x9f70,0x9e7b,0x9d8a,0x9c9d,0x9bb5,0x9ad1,
0x99f0,0x9913,0x983a,0x9765,0x9693,0x95c4,0x94f8,0x9430,
0x936b,0x92a9,0x91ea,0x912e,0x9075,0x8fbe,0x8f0a,0x8e59,
0x8daa,0x8cfe,0x8c54,0x8bac,0x8b07,0x8a64,0x89c4,0x8925,
0x8889,0x87ee,0x8756,0x86c0,0x862b,0x8599,0x8508,0x8479,
0x83ec,0x8361,0x82d8,0x8250,0x81c9,0x8145,0x80c2,0x8040,
0xff02,0xfd0e,0xfb25,0xf947,0xf773,0xf5aa,0xf3ea,0xf234,
0xf087,0xeee3,0xed47,0xebb3,0xea27,0xe8a3,0xe727,0xe5b2,
0xe443,0xe2dc,0xe17a,0xe020,0xdecb,0xdd7d,0xdc34,0xdaf1,
0xd9b3,0xd87b,0xd748,0xd61a,0xd4f1,0xd3cd,0xd2ad,0xd192,
0xd07b,0xcf69,0xce5b,0xcd51,0xcc4a,0xcb48,0xca4a,0xc94f,
0xc858,0xc764,0xc674,0xc587,0xc49d,0xc3b7,0xc2d4,0xc1f4,
0xc116,0xc03c,0xbf65,0xbe90,0xbdbe,0xbcef,0xbc23,0xbb59,
0xba91,0xb9cc,0xb90a,0xb84a,0xb78c,0xb6d0,0xb617,0xb560,
};

#define FENV_SUPPORT 0

typedef struct {
  uint64_t hi;
  uint64_t lo;
} u128;

/* top: 16 bit sign+exponent, x: significand.  */
static FORCE_INLINE long double mkldbl (uint64_t top, u128 x) {
  union ldshape u;
#if LDBL_MANT_DIG == 113
  u.i2.hi = x.hi;
  u.i2.lo = x.lo;
  u.i2.hi &= 0x0000ffffffffffffULL;
  u.i2.hi |= top << 48;
#elif LDBL_MANT_DIG == 64
  u.i.se = top;
  u.i.m = x.lo;
  /* force the top bit on non-zero (and non-subnormal) results.  */
  if (top & 0x7fff) u.i.m |= 0x8000000000000000ULL;
#endif
  return u.f;
}

/* return: top 16 bit is sign+exp and following bits are the significand.  */
static FORCE_INLINE u128 asu128 (long double x) {
  union ldshape u = {.f=x};
  u128 r;
#if LDBL_MANT_DIG == 113
  r.hi = u.i2.hi;
  r.lo = u.i2.lo;
#elif LDBL_MANT_DIG == 64
  r.lo = u.i.m << 49;
  /* ignore the top bit: pseudo numbers are not handled. */
  r.hi = u.i.m >> 15;
  r.hi &= 0x0000ffffffffffffULL;
  r.hi |= (uint64_t)u.i.se << 48;
#endif
  return r;
}

/* returns a*b*2^-32 - e, with error 0 <= e < 1.  */
static FORCE_INLINE uint32_t mul32 (uint32_t a, uint32_t b) {
  return (uint64_t)a*b >> 32;
}

/* returns a*b*2^-64 - e, with error 0 <= e < 3.  */
static FORCE_INLINE uint64_t mul64 (uint64_t a, uint64_t b) {
  uint64_t ahi = a >> 32;
  uint64_t alo = a & 0xffffffff;
  uint64_t bhi = b >> 32;
  uint64_t blo = b & 0xffffffff;
  return ahi*bhi + (ahi*blo >> 32) + (alo*bhi >> 32);
}

static FORCE_INLINE u128 add64 (u128 a, uint64_t b) {
  u128 r;
  r.lo = a.lo + b;
  r.hi = a.hi;
  if (r.lo < a.lo) r.hi++;
  return r;
}

static FORCE_INLINE u128 add128 (u128 a, u128 b) {
  u128 r;
  r.lo = a.lo + b.lo;
  r.hi = a.hi + b.hi;
  if (r.lo < a.lo) r.hi++;
  return r;
}

static FORCE_INLINE u128 sub64 (u128 a, uint64_t b) {
  u128 r;
  r.lo = a.lo - b;
  r.hi = a.hi;
  if (a.lo < b) r.hi--;
  return r;
}

static FORCE_INLINE u128 sub128 (u128 a, u128 b) {
  u128 r;
  r.lo = a.lo - b.lo;
  r.hi = a.hi - b.hi;
  if (a.lo < b.lo) r.hi--;
  return r;
}

/* a<<n, 0 <= n <= 127 */
static FORCE_INLINE u128 lsh (u128 a, int n) {
  if (n == 0) return a;
  if (n >= 64) {
    a.hi = a.lo << (n - 64);
    a.lo = 0;
  } else {
    a.hi = (a.hi << n) | (a.lo >> (64 - n));
    a.lo = a.lo << n;
  }
  return a;
}

/* a>>n, 0 <= n <= 127 */
static FORCE_INLINE u128 rsh (u128 a, int n) {
  if (n == 0) return a;
  if (n >= 64) {
    a.lo = a.hi >> (n - 64);
    a.hi = 0;
  } else {
    a.lo = (a.lo >> n) | (a.hi << (64 - n));
    a.hi = a.hi >> n;
  }
  return a;
}

/* returns a*b exactly.  */
static FORCE_INLINE u128 mul64_128 (uint64_t a, uint64_t b) {
  u128 r;
  uint64_t ahi = a >> 32;
  uint64_t alo = a&0xffffffff;
  uint64_t bhi = b >> 32;
  uint64_t blo = b&0xffffffff;
  uint64_t lo1 = ((ahi*blo) & 0xffffffff) + ((alo*bhi) & 0xffffffff) + (alo*blo >> 32);
  uint64_t lo2 = (alo*blo) & 0xffffffff;
  r.hi = ahi*bhi + (ahi*blo >> 32) + (alo*bhi >> 32) + (lo1 >> 32);
  r.lo = (lo1 << 32) + lo2;
  return r;
}

/* returns a*b*2^-128 - e, with error 0 <= e < 7.  */
static FORCE_INLINE u128 mul128 (u128 a, u128 b) {
  u128 hi = mul64_128(a.hi, b.hi);
  uint64_t m1 = mul64(a.hi, b.lo);
  uint64_t m2 = mul64(a.lo, b.hi);
  return add64(add64(hi, m1), m2);
}

/* returns a*b % 2^128.  */
static FORCE_INLINE u128 mul128_tail (u128 a, u128 b) {
  u128 lo = mul64_128(a.lo, b.lo);
  lo.hi += a.hi*b.lo + a.lo*b.hi;
  return lo;
}

#if LDBL_MANT_DIG != DBL_MANT_DIG
static long double __math_invalidl (long double x) {
	return (x - x)/(x - x);
}
#endif

LUALIB_API long double tools_sqrtl (long double x) {
  u128 ix, ml;
  uint64_t top;
  ix = asu128(x);
  top = ix.hi >> 48;
  if (predict_false(top - 0x0001 >= 0x7fff - 0x0001)) {
  /* x < 0x1p-16382 or inf or nan.  */
  if (2*ix.hi == 0 && ix.lo == 0) return x;
    if (ix.hi == 0x7fff000000000000ULL && ix.lo == 0) return x;
    if (top >= 0x7fff) return __math_invalidl(x);
    /* x is subnormal, normalize it.  */
    ix = asu128(x*0x1p112);
    top = ix.hi >> 48;
    top -= 112;
  }
  /* x = 4^e m; with int e and m in [1, 4) */
  int even = top & 1;
  ml = lsh(ix, 15);
  ml.hi |= 0x8000000000000000ULL;
  if (even) ml = rsh(ml, 1);
  top = (top + 0x3fff) >> 1;
  /* r ~ 1/sqrt(m) */
  static const uint64_t three = 0xc0000000;
  uint64_t r, s, d, u, i;
  i = (ix.hi >> 42) % 128;
  r = (uint32_t)__rsqrt_tab[i] << 16;
  /* |r sqrt(m) - 1| < 0x1p-8 */
  s = mul32(ml.hi >> 32, r);
  d = mul32(s, r);
  u = three - d;
  r = mul32(u, r) << 1;
  /* |r sqrt(m) - 1| < 0x1.7bp-16, switch to 64bit */
  r = r << 32;
  s = mul64(ml.hi, r);
  d = mul64(s, r);
  u = (three << 32) - d;
  r = mul64(u, r) << 1;
  /* |r sqrt(m) - 1| < 0x1.a5p-31 */
  s = mul64(u, s) << 1;
  d = mul64(s, r);
  u = (three << 32) - d;
  r = mul64(u, r) << 1;
  /* |r sqrt(m) - 1| < 0x1.c001p-59, switch to 128bit */
  static const u128 threel = {.hi=three << 32, .lo=0};
  u128 rl, sl, dl, ul;
  rl.hi = r;
  rl.lo = 0;
  sl = mul128(ml, rl);
  dl = mul128(sl, rl);
  ul = sub128(threel, dl);
  sl = mul128(ul, sl); /* repr: 3.125 */
  /* -0x1p-116 < s - sqrt(m) < 0x3.8001p-125 */
  sl = rsh(sub64(sl, 4), 125 - (LDBL_MANT_DIG - 1));
  /* s < sqrt(m) < s + 1 ULP + tiny */
  long double y;
  u128 d2, d1, d0;
  d0 = sub128(lsh(ml, 2*(LDBL_MANT_DIG - 1) - 126), mul128_tail(sl, sl));
  d1 = sub128(sl, d0);
  d2 = add128(add64(sl, 1), d1);
  sl = add64(sl, d1.hi >> 63);
  y = mkldbl(top, sl);
  if (FENV_SUPPORT) {
    /* handle rounding modes and inexact exception.  */
    top = predict_false((d2.hi | d2.lo) == 0) ? 0 : 1;
    top |= ((d1.hi ^ d2.hi) & 0x8000000000000000ULL) >> 48;
    y += mkldbl(top, (u128){0});
  }
  return y;
}
#else
#error unsupported long double format
#endif

/* Taken from MUSL-1.2.3 src/math/cbrtl.c, 2.34.10 */
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
LUALIB_API long double tools_cbrtl (long double x) {
  return cbrt(x);
}
#elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
static const unsigned cbrtlB1 = 709958130; /* B1 = (127-127.0/3-0.03306235651)*2**23 */

LUALIB_API long double tools_cbrtl (long double x) {
  union ldshape u = {x}, v;
  union {float f; uint32_t i;} uft;
  long double r, s, t, w;
  long double dr, dt, dx;  /* formerly `double_t` which is `at least` double */
  double ft;  /* formerly `float_t` which is `at least` double */
  int e = u.i.se & 0x7fff;
  int sign = u.i.se & 0x8000;
  /* If x = +-Inf, then cbrt(x) = +-Inf.
   * If x = NaN, then cbrt(x) = NaN. */
  if (e == 0x7fff) return x + x;
  if (e == 0) {  /* Adjust subnormal numbers. */
    u.f *= 0x1p120;
    e = u.i.se & 0x7fff;
    /* If x = +-0, then cbrt(x) = +-0. */
    if (e == 0) return x;
    e -= 120;
  }
  e -= 0x3fff;
  u.i.se = 0x3fff;
  x = u.f;
  switch (e % 3) {
    case 1:
    case -2:
      x *= 2;
      e--;
      break;
    case 2:
    case -1:
      x *= 4;
      e -= 2;
      break;
  }
  v.f = 1.0;
  v.i.se = sign | (0x3fff + e/3);
  /* The following is the guts of s_cbrtf, with the handling of
   * special values removed and extra care for accuracy not taken,
   * but with most of the extra accuracy not discarded. */
  /* ~5-bit estimate: */
  uft.f = x;
  uft.i = (uft.i & 0x7fffffff)/3 + cbrtlB1;
  ft = uft.f;
  /* ~16-bit estimate: */
  dx = x;
  dt = ft;
  dr = dt*dt*dt;
  dt = dt*(dx + dx + dr)/(dx + dr + dr);
  /* ~47-bit estimate: */
  dr = dt*dt*dt;
  dt = dt*(dx + dx + dr)/(dx + dr + dr);
#if LDBL_MANT_DIG == 64
  /* dt is cbrtl(x) to ~47 bits (after x has been reduced to 1 <= x < 8).
   * Round it away from zero to 32 bits (32 so that t*t is exact, and
   * away from zero for technical reasons). */
  t = dt + (0x1.0p32L + 0x1.0p-31L) - 0x1.0p32;
#elif LDBL_MANT_DIG == 113
  /* Round dt away from zero to 47 bits.  Since we don't trust the 47,
   * add 2 47-bit ulps instead of 1 to round up.  Rounding is slow and
   * might be avoidable in this case, since on most machines dt will
   * have been evaluated in 53-bit precision and the technical reasons
   * for rounding up might not apply to either case in cbrtl() since
   * dt is much more accurate than needed. */
  t = dt + 0x2.0p-46 + 0x1.0p60L - 0x1.0p60;
#endif
  /* Final step Newton iteration to 64 or 113 bits with error < 0.667 ulps */
  s = t*t;         /* t*t is exact */
  r = x/s;         /* error <= 0.5 ulps; |r| < |t| */
  w = t + t;       /* t+t is exact */
  r = (r - t)/(w + r); /* r-t is exact; w+r ~= 3*t */
  t = t + t*r;       /* error <= 0.5 + 0.5/3 + epsilon */
  t *= v.f;
  return t;
}
#endif
#endif  /* of __ARMCPU */


/* origin: OpenBSD /usr/src/lib/libm/src/polevll.c, taken from the MUSL-1.2.3 library */
/*
 * Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */
/*
 *      Evaluate polynomial
 *
 *
 * SYNOPSIS:
 *
 * int N;
 * long double x, y, coef[N+1], polevl[];
 *
 * y = polevll( x, coef, N );
 *
 *
 * DESCRIPTION:
 *
 * Evaluates polynomial of degree N:
 *
 *                     2          N
 * y  =  C  + C x + C x  +...+ C x
 *        0    1     2          N
 *
 * Coefficients are stored in reverse order:
 *
 * coef[0] = C  , ..., coef[N] = C  .
 *            N                   0
 *
 *  The function p1evll() assumes that coef[N] = 1.0 and is
 * omitted from the array.  Its calling arguments are
 * otherwise the same as polevll().
 *
 *
 * SPEED:
 *
 * In the interest of speed, there are no checks for out
 * of bounds arithmetic.  This routine is used by most of
 * the functions in the library.  Depending on available
 * equipment features, the user may wish to rewrite the
 * program in microcode or assembly language.
 *
 */

/* #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
#else */

/* Polynomial evaluator: P[0] x^n + P[1] x^(n-1) + ...  + P[n] */
#ifndef __ARMCPU  /* 2.37.1 */
static long double __polevll (long double x, const long double *p, int n) {
  long double r;
  switch (n) {  /* 3.1.2 speed upgrade from foggy +3.5 % (degree 0) to real +133 % (degree 30)
       We will split apart a polynomial into one of even degree and into one of odd degree, then simply summing the results up,
       the performance boost is 30 percentage points over the `unsplit hard-coded-non-loop method`.
       See https://en.wikipedia.org/wiki/Horner%27s_method, chapter `Parallel Evaluation`.
       Do not use `volatile` as it slows down the code significantly (at least -12 %) on MinGW/GCC 9.2.0 on Windows 8.1;
       On `What is the largest degree of polynomials most commonly used ?`, see:
       https://math.stackexchange.com/questions/2197171/when-where-and-how-often-do-you-find-polynomials-of-higher-degrees-than-two
       One answer, from mathreadler, is: `15`.
       The function is currently called with n = 2, 3, 6, 7, 8. Shortened 3.1.3. */
    case 10: {  /* actually n = 11 */
      /* r = p[10]+(p[9]+(p[8]+(p[7]+(p[6]+(p[5]+(p[4]+(p[3]+(p[2]+(p[1]+p[0]*x)*x)*x)*x)*x)*x)*x)*x)*x)*x; */
      long double s, xsq;
      xsq = x*x;
      s = p[10]+(p[8]+(p[6]+(p[4]+(p[2]+p[0]*xsq)*xsq)*xsq)*xsq)*xsq;
      r = (p[9]+(p[7]+(p[5]+(p[3]+p[1]*xsq)*xsq)*xsq)*xsq)*x;
      r += s;
      break;
    }
    case 9: {  /* actually n = 10 */
      /* r = p[9]+(p[8]+(p[7]+(p[6]+(p[5]+(p[4]+(p[3]+(p[2]+(p[1]+p[0]*x)*x)*x)*x)*x)*x)*x)*x)*x; */
      long double s, xsq;
      xsq = x*x;
      s = (p[8]+(p[6]+(p[4]+(p[2]+p[0]*xsq)*xsq)*xsq)*xsq)*x;
      r = p[9]+(p[7]+(p[5]+(p[3]+p[1]*xsq)*xsq)*xsq)*xsq;
      r += s;
      break;
    }
    case 8: {  /* actually n = 9 */
      /* r = p[8]+(p[7]+(p[6]+(p[5]+(p[4]+(p[3]+(p[2]+(p[1]+p[0]*x)*x)*x)*x)*x)*x)*x)*x; */
      long double s, xsq;
      xsq = x*x;
      s = p[8]+(p[6]+(p[4]+(p[2]+p[0]*xsq)*xsq)*xsq)*xsq;
      r = (p[7]+(p[5]+(p[3]+p[1]*xsq)*xsq)*xsq)*x;
      r += s;
      break;
    }
    case 7: { /* actually n = 8 */
      /* r = p[7]+(p[6]+(p[5]+(p[4]+(p[3]+(p[2]+(p[1]+p[0]*x)*x)*x)*x)*x)*x)*x; */
      long double s, xsq;
      xsq = x*x;
      s = (p[6]+(p[4]+(p[2]+p[0]*xsq)*xsq)*xsq)*x;
      r = p[7]+(p[5]+(p[3]+p[1]*xsq)*xsq)*xsq;
      r += s;
      break;
    }
    case 6: {  /* actually n = 7 */
      /* r = p[6]+(p[5]+(p[4]+(p[3]+(p[2]+(p[1]+p[0]*x)*x)*x)*x)*x)*x; */
      long double s, xsq;
      xsq = x*x;
      s = p[6]+(p[4]+(p[2]+p[0]*xsq)*xsq)*xsq;
      r = (p[5]+(p[3]+p[1]*xsq)*xsq)*x;
      r += s;
      break;
    }
    /* from here on any tuning does not make sense any longer */
    case 5: { /* actually n = 6 */
      /* r = p[5]+(p[4]+(p[3]+(p[2]+(p[1]+p[0]*x)*x)*x)*x)*x; */
      long double s, xsq;
      xsq = x*x;
      s = (p[4]+(p[2]+p[0]*xsq)*xsq)*x;
      r = p[5]+(p[3]+p[1]*xsq)*xsq;
      r += s;
      break;
    }
    case 4: {  /* actually n = 5, anything below this polynomial degree is futile snd esoteric */
      /* r = p[4]+(p[3]+(p[2]+(p[1]+p[0]*x)*x)*x)*x; */
      long double s, xsq;
      xsq = x*x;
      s = p[4]+(p[2]+p[0]*xsq)*xsq;
      r = (p[3]+p[1]*xsq)*x;
      r += s;
      break;
    }
    case 3:  /* actually n = 4 */
      r = p[3]+(p[2]+(p[1]+p[0]*x)*x)*x;
      break;
    case 2:  /* actually n = 3 */
      r = p[2]+(p[1]+p[0]*x)*x;
      break;
    case 1:  /* actually n = 2 */
      r = p[1]+p[0]*x;
      break;
    default: {  /* now we get very slowly */
      r = *p++;
      do {
        /* y = y * x + *P++; */
        r = fmal(r, x, *p++);  /* 2.35.1 tuning */
      } while (--n);
    }
  }
  return r;
}

/* Polynomial evaluator: x^n + P[0] x^(n-1) + P[1] x^(n-2) +  ...  + P[n], tuned 3.1.2
   Maple V Release 4:
   deg := 11:
   r := x^deg:
   for i from deg to 1 by -1 do
      r := r + x^(i-1)*p[deg-i]
   od:
   convert(r, horner, [x], optimized);

   The function is currently called with n = 3, 6, 7. */
static long double __p1evll (long double x, const long double *p, int n) {
  long double y;
  switch (n) {  /* shortened 3.1.3 */
    case 10:
      y = p[9]+(p[8]+(p[7]+(p[6]+(p[5]+(p[4]+(p[3]+(p[2]+(p[1]+(p[0]+x)*x)*x)*x)*x)*x)*x)*x)*x)*x;
      break;
    case 9:
      y = p[8]+(p[7]+(p[6]+(p[5]+(p[4]+(p[3]+(p[2]+(p[1]+(p[0]+x)*x)*x)*x)*x)*x)*x)*x)*x;
      break;
    case 8:
      y = p[7]+(p[6]+(p[5]+(p[4]+(p[3]+(p[2]+(p[1]+(p[0]+x)*x)*x)*x)*x)*x)*x)*x;
      break;
    case 7:
      y = p[6]+(p[5]+(p[4]+(p[3]+(p[2]+(p[1]+(p[0]+x)*x)*x)*x)*x)*x)*x;
      break;
    case 6:
      y = p[5]+(p[4]+(p[3]+(p[2]+(p[1]+(p[0]+x)*x)*x)*x)*x)*x;
      break;
    case 5:
      y = p[4]+(p[3]+(p[2]+(p[1]+(p[0]+x)*x)*x)*x)*x;
      break;
    case 4:
      y = p[3]+(p[2]+(p[1]+(p[0]+x)*x)*x)*x;
      break;
    case 3:
      y = p[2]+(p[1]+(p[0]+x)*x)*x;
      break;
    case 2:
      y = p[1]+(p[0]+x)*x;
      break;
    case 1:
      y = p[0]+x;
      break;
    default: {
      n -= 1;
      y = x + *p++;
      do {
        /* y = y * x + *P++; */
        y = fmal(y, x, *p++);  /* 2.35.1 tuning */
      } while (--n);
    }
  }
  return y;
}
#endif
/* #endif */


/* Taken from MUSL-1.2.3, src/math/powl.c, 2.34.10
 *
 * Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */
/*                                                      powl.c
 *
 *      Power function, long double precision
 *
 *
 * SYNOPSIS:
 *
 * long double x, y, z, powl();
 *
 * z = powl( x, y );
 *
 *
 * DESCRIPTION:
 *
 * Computes x raised to the yth power.  Analytically,
 *
 *      x**y  =  exp( y log(x) ).
 *
 * Following Cody and Waite, this program uses a lookup table
 * of 2**-i/32 and pseudo extended precision arithmetic to
 * obtain several extra bits of accuracy in both the logarithm
 * and the exponential.
 *
 *
 * ACCURACY:
 *
 * The relative error of pow(x,y) can be estimated
 * by   y dl ln(2),   where dl is the absolute error of
 * the internally computed base 2 logarithm.  At the ends
 * of the approximation interval the logarithm equal 1/32
 * and its relative error is about 1 lsb = 1.1e-19.  Hence
 * the predicted relative error in the result is 2.3e-21 y .
 *
 *                      Relative error:
 * arithmetic   domain     # trials      peak         rms
 *
 *    IEEE     +-1000       40000      2.8e-18      3.7e-19
 * .001 < x < 1000, with log(x) uniformly distributed.
 * -1000 < y < 1000, y uniformly distributed.
 *
 *    IEEE     0,8700       60000      6.5e-18      1.0e-18
 * 0.99 < x < 1.01, 0 < y < 8700, uniformly distributed.
 *
 *
 * ERROR MESSAGES:
 *
 *   message         condition      value returned
 * pow overflow     x**y > MAXNUM      INFINITY
 * pow underflow   x**y < 1/MAXNUM       0.0
 * pow domain      x<0 and y noninteger  0.0
 *
 */

#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
LUALIB_API long double tools_powl (long double x, long double y) {
	return pow(x, y);
}
#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384

/* Table size */
#define NXT 32

/* log(1+x) =  x - .5x^2 + x^3 *  P(z)/Q(z)
 * on the domain  2^(-1/32) - 1  <=  x  <=  2^(1/32) - 1
 */
static const long double powlP[] = {
   8.3319510773868690346226E-4L,
   4.9000050881978028599627E-1L,
   1.7500123722550302671919E0L,
   1.4000100839971580279335E0L,
};

static const long double powlQ[] = {
/* 1.0000000000000000000000E0L,*/
   5.2500282295834889175431E0L,
   8.4000598057587009834666E0L,
   4.2000302519914740834728E0L,
};

/* A[i] = 2^(-i/32), rounded to IEEE long double precision.
 * If i is even, A[i] + B[i/2] gives additional accuracy.
 */
static const long double powlA[33] = {
   1.0000000000000000000000E0L,
   9.7857206208770013448287E-1L,
   9.5760328069857364691013E-1L,
   9.3708381705514995065011E-1L,
   9.1700404320467123175367E-1L,
   8.9735453750155359320742E-1L,
   8.7812608018664974155474E-1L,
   8.5930964906123895780165E-1L,
   8.4089641525371454301892E-1L,
   8.2287773907698242225554E-1L,
   8.0524516597462715409607E-1L,
   7.8799042255394324325455E-1L,
   7.7110541270397041179298E-1L,
   7.5458221379671136985669E-1L,
   7.3841307296974965571198E-1L,
   7.2259040348852331001267E-1L,
   7.0710678118654752438189E-1L,
   6.9195494098191597746178E-1L,
   6.7712777346844636413344E-1L,
   6.6261832157987064729696E-1L,
   6.4841977732550483296079E-1L,
   6.3452547859586661129850E-1L,
   6.2092890603674202431705E-1L,
   6.0762367999023443907803E-1L,
   5.9460355750136053334378E-1L,
   5.8186242938878875689693E-1L,
   5.6939431737834582684856E-1L,
   5.5719337129794626814472E-1L,
   5.4525386633262882960438E-1L,
   5.3357020033841180906486E-1L,
   5.2213689121370692017331E-1L,
   5.1094857432705833910408E-1L,
   5.0000000000000000000000E-1L,
};

static const long double powlB[17] = {
   0.0000000000000000000000E0L,
   2.6176170809902549338711E-20L,
  -1.0126791927256478897086E-20L,
   1.3438228172316276937655E-21L,
   1.2207982955417546912101E-20L,
  -6.3084814358060867200133E-21L,
   1.3164426894366316434230E-20L,
  -1.8527916071632873716786E-20L,
   1.8950325588932570796551E-20L,
   1.5564775779538780478155E-20L,
   6.0859793637556860974380E-21L,
  -2.0208749253662532228949E-20L,
   1.4966292219224761844552E-20L,
   3.3540909728056476875639E-21L,
  -8.6987564101742849540743E-22L,
  -1.2327176863327626135542E-20L,
   0.0000000000000000000000E0L,
};

/* 2^x = 1 + x P(x),
 * on the interval -1/32 <= x <= 0
 */
static const long double powlR[] = {
   1.5089970579127659901157E-5L,
   1.5402715328927013076125E-4L,
   1.3333556028915671091390E-3L,
   9.6181291046036762031786E-3L,
   5.5504108664798463044015E-2L,
   2.4022650695910062854352E-1L,
   6.9314718055994530931447E-1L,
};

#define MEXP (NXT*16384.0L)
/* The following if denormal numbers are supported, else -MEXP: */
#define MNEXP (-NXT*(16384.0L+64.0L))
/* log2(e) - 1 */
#define LOG2EA 0.44269504088896340735992L

#define F W
#define Fa Wa
#define Fb Wb
#define G W
#define Ga Wa
#define Gb u
#define H W
#define Ha Wb
#define Hb Wb

static const long double MAXLOGL = 1.1356523406294143949492E4L;
static const long double MINLOGL = -1.13994985314888605586758E4L;
static const long double LOGE2L = 6.9314718055994530941723E-1L;
static const long double hugeval = 0x1p10000L;
/* XXX Prevent gcc from erroneously constant folding this. */
static const volatile long double twom10000 = 0x1p-10000L;

static long double reducl (long double);

LUALIB_API long double tools_powl (long double x, long double y) {
  int i, nflg, iyflg, yoddint;
  long e;
  volatile long double z = 0;
  long double w = 0, W = 0, Wa = 0, Wb = 0, ya = 0, yb = 0, u = 0;
  /* make sure no invalid exception is raised by nan comparision */
  if (tools_fpisnanl(x)) {
    if (!tools_fpisnanl(y) && y == 0.0) return 1.0;
    return x;
  }
  if (tools_fpisnanl(y)) {
    if (x == 1.0) return 1.0;
    return y;
  }
  if (x == 1.0) return 1.0; /* 1**y = 1, even if y is nan */
  if (x == -1.0 && !tools_fpisfinitel(y)) return 1.0; /* -1**inf = 1 */
  if (y == 0.0) return 1.0; /* x**0 = 1, even if x is nan */
  if (y == 1.0) return x;
  if (y >= LDBL_MAX) {
    if (x > 1.0 || x < -1.0) return INFINITY;
    if (x != 0.0) return 0.0;
  }
  if (y <= -LDBL_MAX) {
    if (x > 1.0 || x < -1.0) return 0.0;
    if (x != 0.0 || y == -INFINITY) return INFINITY;
  }
  if (x >= LDBL_MAX) {
    if (y > 0.0) return INFINITY;
    return 0.0;
  }
  w = floorl(y);
  /* Set iyflg to 1 if y is an integer. */
  iyflg = 0;
  if (w == y) iyflg = 1;
  /* Test for odd integer y. */
  yoddint = 0;
  if (iyflg) {
    ya = fabsl(y);
    ya = floorl(0.5*ya);
    yb = 0.5*fabsl(w);
    if (ya != yb) yoddint = 1;
  }
  if (x <= -LDBL_MAX) {
    if (y > 0.0) {
      if (yoddint) return -INFINITY;
      return INFINITY;
    }
    if (y < 0.0) {
      if (yoddint) return -0.0;
      return 0.0;
    }
  }
  nflg = 0; /* (x<0)**(odd int) */
  if (x <= 0.0) {
    if (x == 0.0) {
      if (y < 0.0) {
        if (signbit(x) && yoddint) return -1.0/0.0;  /* (-0.0)**(-odd int) = -inf, divbyzero */
        /* (+-0.0)**(negative) = inf, divbyzero */
        return 1.0/0.0;
	    }
      if (signbit(x) && yoddint) return -0.0;
      return 0.0;
    }
    if (iyflg == 0) return (x - x)/(x - x); /* (x<0)**(non-int) is NaN */
    /* (x<0)**(integer) */
    if (yoddint) nflg = 1; /* negate result */
    x = -x;
  }
  /* (+integer)**(integer)  */
  if (iyflg && floorl(x) == x && fabsl(y) < 32768.0) {
    w = tools_powil(x, (int)y);
    return nflg ? -w : w;
  }
  /* separate significand from exponent */
  x = frexpl(x, &i);
  e = i;
  /* find significand in antilog table A[] */
  i = 1;
  if (x <= powlA[17])  i = 17;
  if (x <= powlA[i+8]) i += 8;
  if (x <= powlA[i+4]) i += 4;
  if (x <= powlA[i+2]) i += 2;
  if (x >= powlA[1])   i = -1;
  i += 1;
  /* Find (x - A[i])/A[i]
   * in order to compute log(x/A[i]):
   *
   * log(x) = log( a x/a ) = log(a) + log(x/a)
   *
   * log(x/a) = log(1+v),  v = x/a - 1 = (x-a)/a
   */
  x -= powlA[i];
  x -= powlB[i/2];
  x /= powlA[i];
  /* rational approximation for log(1+v): log(1+v)  =  v  -  v**2/2  +  v**3 P(v) / Q(v) */
  z = x*x;
#ifndef __ARMCPU
  w = x*(z*__polevll(x, powlP, 3)/__p1evll(x, powlQ, 3));
#else
  w = x*(z*polevl(x, powlP, 3)/p1evl(x, powlQ, 3));
#endif
  w = w - 0.5*z;
  /* Convert to base 2 logarithm: multiply by log2(e) = 1 + LOG2EA */
  z = LOG2EA*w;
  z += w;
  z += LOG2EA*x;
  z += x;
  /* Compute exponent term of the base 2 logarithm. */
  w = -i;
  w /= NXT;
  w += e;
  /* Now base 2 log of x is w + z. Multiply base 2 log by y, in extended precision. */
  /* separate y into large part ya and small part yb less than 1/NXT */
  ya = reducl(y);
  yb = y - ya;
  /* (w+z)(ya+yb)
   * = w*ya + w*yb + z*y
   */
  F = z*y + w*yb;
  Fa = reducl(F);
  Fb = F - Fa;
  G = Fa + w*ya;
  Ga = reducl(G);
  Gb = G - Ga;
  H = Fb + Gb;
  Ha = reducl(H);
  w = (Ga + Ha)*NXT;
  /* Test the power of 2 for overflow */
  if (w > MEXP) return hugeval*hugeval;  /* overflow */
  if (w < MNEXP) return twom10000*twom10000;  /* underflow */
  e = w;
  Hb = H - Ha;
  if (Hb > 0.0) {
    e += 1;
    Hb -= 1.0/NXT;  /*0.0625L;*/
  }
  /* Now the product y * log2(x)  =  Hb + e/NXT.
   *
   * Compute base 2 exponential of Hb,
   * where -0.0625 <= Hb <= 0. */
#ifndef __ARMCPU
  z = Hb*__polevll(Hb, powlR, 6);  /*  z = 2**Hb - 1  */
#else
  z = Hb*polevl(Hb, powlR, 6);  /*  z = 2**Hb - 1  */
#endif
  /* Express e/NXT as an integer plus a negative number of (1/NXT)ths.
   * Find lookup table entry for the fractional power of 2. */
  if (e < 0) i = 0;
  else i = 1;
  i = e/NXT + i;
  e = NXT*i - e;
  w = powlA[e];
  z = w*z;  /* 2**-e * ( 1 + (2**Hb-1) )  */
  z = z + w;
  z = scalbnl(z, i);  /* multiply by integer power of 2 */
  if (nflg) z = -z;
  return z;
}

/* Find a multiple of 1/NXT that is within 1/NXT of x. */
static long double reducl (long double x) {
  long double t;
  t = x*NXT;
  t = floorl(t);
  t = t/NXT;
  return t;
}

/*
 *      Positive real raised to integer power, long double precision
 *
 *
 * SYNOPSIS:
 *
 * long double x, y, powil();
 * int n;
 *
 * y = powil( x, n );
 *
 *
 * DESCRIPTION:
 *
 * Returns argument x>0 raised to the nth power.
 * The routine efficiently decomposes n as a sum of powers of
 * two. The desired power is a product of two-to-the-kth
 * powers of x.  Thus to compute the 32767 power of x requires
 * 28 multiplications instead of 32767 multiplications.
 *
 *
 * ACCURACY:
 *
 *                      Relative error:
 * arithmetic   x domain   n domain  # trials      peak         rms
 *    IEEE     .001,1000  -1022,1023  50000       4.3e-17     7.8e-18
 *    IEEE        1,2     -1022,1023  20000       3.9e-17     7.6e-18
 *    IEEE     .99,1.01     0,8700    10000       3.6e-16     7.2e-17
 *
 * Returns MAXNUM on overflow, zero on underflow.
 */

LUALIB_API long double tools_powil (long double x, int nn) {
  long double ww, y;
  long double s;
  int n, e, sign, signx, lx;
  if (nn == 0) return 1.0;
  if (x == 0) return 0;  /* added awalz */
  if (x < 0) {  /* dito; if n is odd and x < 0 do nothing */
    x = -x;
    signx = (nn % 2) ? -1 : 1;
  } else
    signx = 1;
  if (nn < 0) {
    sign = -1;
    n = -nn;
  } else {
    sign = 1;
    n = nn;
  }
  /* Overflow detection; calculate approximate logarithm of answer */
  s = x;
  s = frexpl(s, &lx);
  e = (lx - 1)*n;
  if ((e == 0) || (e > 64) || (e < -64)) {
    s = (s - 7.0710678118654752e-1L)/(s + 7.0710678118654752e-1L);
    s = (2.9142135623730950L*s - 0.5 + lx)*nn*LOGE2L;
  } else {
    s = LOGE2L*e;
  }
  if (s > MAXLOGL) return signx*hugeval*hugeval;  /* overflow */
  if (s < MINLOGL) return signx*twom10000*twom10000;  /* underflow */
  /* Handle tiny denormal answer, but with less accuracy since roundoff error in 1.0/x will be amplified.
   * The precise demarcation should be the gradual underflow threshold. */
  if (s < -MAXLOGL + 2.0) {
    x = 1.0/x;
    sign = -sign;
  }
  /* First bit of the power */
  if (n & 1) y = x;
  else y = 1.0;
  ww = x;
  n >>= 1;
  while (n) {
    ww *= ww;   /* arg to the 2-to-the-kth power */
    if (n & 1) y *= ww;  /* if that bit is set, then include in product */
    n >>= 1;
  }
  if (sign < 0) y = 1.0/y;
  return signx*y;
}
#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384   /* ARM 64-bit */
/* TODO: broken implementation to make things compile */
long double tools_powl (long double x, long double y) {
  return sun_pow(x, y, 1);
}

long double tools_powil (long double x, int nn) {
  return cephes_powi(x, nn);  /* tuned 3.16.3 */
}
#endif


/* origin: OpenBSD /usr/src/lib/libm/src/ld80/e_logl.c, taken from the MUSL-1.2.3 library  */
/*
 * Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */
/*
 *      Natural logarithm, long double precision
 *
 *
 * SYNOPSIS:
 *
 * long double x, y, logl();
 *
 * y = logl( x );
 *
 *
 * DESCRIPTION:
 *
 * Returns the base e (2.718...) logarithm of x.
 *
 * The argument is separated into its exponent and fractional
 * parts.  If the exponent is between -1 and +1, the logarithm
 * of the fraction is approximated by
 *
 *     log(1+x) = x - 0.5 x**2 + x**3 P(x)/Q(x).
 *
 * Otherwise, setting  z = 2(x-1)/(x+1),
 *
 *     log(x) = log(1+z/2) - log(1-z/2) = z + z**3 P(z)/Q(z).
 *
 *
 * ACCURACY:
 *
 *                      Relative error:
 * arithmetic   domain     # trials      peak         rms
 *    IEEE      0.5, 2.0    150000      8.71e-20    2.75e-20
 *    IEEE     exp(+-10000) 100000      5.39e-20    2.34e-20
 *
 * In the tests over the interval exp(+-10000), the logarithms
 * of the random arguments were uniformly distributed over
 * [-10000, +10000].
 */

#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
LUALIB_API long double tools_logl (long double x) {
  return log(x);
}
#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
/* Coefficients for log(1+x) = x - x**2/2 + x**3 P(x)/Q(x)
 * 1/sqrt(2) <= x < sqrt(2)
 * Theoretical peak relative error = 2.32e-20
 */
static const long double LOGLP[] = {
   4.5270000862445199635215E-5L,
   4.9854102823193375972212E-1L,
   6.5787325942061044846969E0L,
   2.9911919328553073277375E1L,
   6.0949667980987787057556E1L,
   5.7112963590585538103336E1L,
   2.0039553499201281259648E1L,
};
static const long double LOGLQ[] = {
/* 1.0000000000000000000000E0,*/
   1.5062909083469192043167E1L,
   8.3047565967967209469434E1L,
   2.2176239823732856465394E2L,
   3.0909872225312059774938E2L,
   2.1642788614495947685003E2L,
   6.0118660497603843919306E1L,
};

/* Coefficients for log(x) = z + z^3 P(z^2)/Q(z^2),
 * where z = 2(x-1)/(x+1)
 * 1/sqrt(2) <= x < sqrt(2)
 * Theoretical peak relative error = 6.16e-22
 */
static const long double LOGLR[4] = {
   1.9757429581415468984296E-3L,
  -7.1990767473014147232598E-1L,
   1.0777257190312272158094E1L,
  -3.5717684488096787370998E1L,
};
static const long double LOGLS[4] = {
/* 1.00000000000000000000E0L,*/
  -2.6201045551331104417768E1L,
   1.9361891836232102174846E2L,
  -4.2861221385716144629696E2L,
};
static const long double LOGLC1 = 6.9314575195312500000000E-1L;
static const long double LOGLC2 = 1.4286068203094172321215E-6L;

LUALIB_API long double tools_logl (long double x) {  /* 2.34.8, as fast as GCC's logl */
  long double y, z;
  int e;
  if (tools_fpisnanl(x)) return x;
  if (x == INFINITY) return x;
  if (x <= 0.0L) return AGN_NAN;
  /* separate mantissa from exponent */
  /* Note, frexp is used so that denormal numbers will be handled properly. */
  x = frexpl(x, &e);
  /* logarithm using log(x) = z + z**3 P(z)/Q(z), where z = 2(x-1)/(x+1) */
  if (e > 2.0L || e < -2.0L) {
    if (x < SQRTH) {  /* 2(2x-1)/(2x+1) */
      e -= 1;
      z = x - 0.5L;
      y = 0.5L*z + 0.5L;
    } else {  /*  2 (x-1)/(x+1)   */
      z = x - 0.5L;
      z -= 0.5L;
      y = 0.5L*x + 0.5L;
    }
    x = z/y;
    z = x*x;
#ifndef __ARMCPU
    z = x*(z*__polevll(z, LOGLR, 3)/__p1evll(z, LOGLS, 3));
#else
    z = x*(z*polevl(z, LOGLR, 3)/p1evl(z, LOGLS, 3));
#endif
    z = z + e*LOGLC2;
    z = z + x;
    z = z + e*LOGLC1;
    return z;
  }
  /* logarithm using log(1+x) = x - .5x**2 + x**3 P(x)/Q(x) */
  if (x < SQRTH) {
    e -= 1.0L;
    x = 2.0L*x - 1.0L;
  } else {
    x = x - 1.0L;
  }
  z = x*x;
#ifndef __ARMCPU
  y = x*(z*__polevll(x, LOGLP, 6)/__p1evll(x, LOGLQ, 6));
#else
  y = x*(z*polevl(x, LOGLP, 6)/p1evl(x, LOGLQ, 6));
#endif
  y = y + e*LOGLC2;
  z = y - 0.5L*z;
  /* Note, the sum of above terms does not exceed x/4, so it contributes at most about 1/4 lsb to the error. */
  z = z + x;
  z = z + e*LOGLC1;  /* This sum has an error of 1/2 lsb. */
  return z;
}
#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384
/* TODO: broken implementation to make things compile */
LUALIB_API long double tools_logl (long double x) {
  return log(x);
}
#endif


LUALIB_API double tools_logbase (double x, double b) {  /* 2.14.2 */
  if (x == 1 && b < 0) return 0;
  else if (x == b && (x < 0 || x > 1)) return 1;
  else if (x <= 0 || b <= 0 || b == 1) return AGN_NAN;
  else return sun_log(x)/sun_log(b);
}


LUALIB_API long double tools_logbasel (long double x, long double b) {  /* 2.34.8 */
  if (x == 1.0L && b < 0.0L) return 0.0L;
  else if (x == b && (x < 0.0L || x > 1.0L)) return 1.0L;
  else if (x <= 0.0L || b <= 0.0L || b == 1.0L) return AGN_NAN;
  else return tools_logl(x)/tools_logl(b);
}


LUALIB_API float tools_lnfast (float x) {  /* 2.14.6
  18 percent faster than sun_log on Aspire V3-331 (Intel(R) Pentium(R) 3556U @ 1.70GHz); the function is quite `crude` at
  integral x and its vicinity. Mean and median error is 1.35e-2 in the range 0.1 .. 1000, step 0.1 */
  ieee_float_shape_type u;
  u.value = x;
  x = 8.2629582881927490e-8f*(float)u.word;
  return x - 87.989971088f;
}


/* @(#)e_exp.c 1.6 04/04/22 */
/*
 * ====================================================
 * Copyright (C) 2004 by Sun Microsystems, Inc. All rights reserved.
 *
 * Permission to use, copy, modify, and distribute this
 * software is freely granted, provided that this notice
 * is preserved.
 * ====================================================
 */

/* __ieee754_exp(x)
 * Returns the exponential of x.
 *
 * Method
 *   1. Argument reduction:
 *      Reduce x to an r so that |r| <= 0.5*ln2 ~ 0.34658.
 *  Given x, find r and integer k such that
 *
 *               x = k*ln2 + r,  |r| <= 0.5*ln2.
 *
 *      Here r will be represented as r = hi-lo for better
 *  accuracy.
 *
 *   2. Approximation of exp(r) by a special rational function on
 *  the interval [0,0.34658]:
 *  Write
 *      R(r**2) = r*(exp(r)+1)/(exp(r)-1) = 2 + r*r/6 - r**4/360 + ...
 *      We use a special Remes algorithm on [0,0.34658] to generate
 *   a polynomial of degree 5 to approximate R. The maximum error
 *  of this polynomial approximation is bounded by 2**-59. In
 *  other words,
 *      R(z) ~ 2.0 + P1*z + P2*z**2 + P3*z**3 + P4*z**4 + P5*z**5
 *    (where z=r*r, and the values of P1 to P5 are listed below)
 *  and
 *      |                  5          |     -59
 *      | 2.0+P1*z+...+P5*z   -  R(z) | <= 2
 *      |                             |
 *  The computation of exp(r) thus becomes
 *                             2*r
 *    exp(r) = 1 + -------
 *                  R - r
 *                                 r*R1(r)
 *           = 1 + r + ----------- (for better accuracy)
 *                      2 - R1(r)
 *  where
 *               2       4             10
 *    R1(r) = r - (P1*r  + P2*r  + ... + P5*r   ).
 *
 *   3. Scale back to obtain exp(x):
 *  From step 1, we have
 *     exp(x) = 2^k * exp(r)
 *
 * Special cases:
 *  exp(INF) is INF, exp(NaN) is NaN;
 *  exp(-INF) is 0, and
 *  for finite argument, only exp(0)=1 is exact.
 *
 * Accuracy:
 *  according to an error analysis, the error is always less than
 *  1 ulp (unit in the last place).
 *
 * Misc. info.
 *  For IEEE double
 *      if x >  7.09782712893383973096e+02 then exp(x) overflow
 *      if x < -7.45133219101941108420e+02 then exp(x) underflow
 *
 * Constants:
 * The hexadecimal values are the intended ones for the following
 * constants. The decimal values may be used, provided that the
 * compiler will convert from decimal to binary accurately enough
 * to produce the hexadecimal values shown.
 */

const double
halF[2]      = { 0.5, -0.5, },
o_threshold =   7.09782712893383973096e+02,    /* 0x40862E42, 0xFEFA39EF */
u_threshold =  -7.45133219101941108420e+02,    /* 0xc0874910, 0xD52D3051 */
ln2HI[2]    = { 6.93147180369123816490e-01,    /* 0x3fe62e42, 0xfee00000 */
               -6.93147180369123816490e-01,},  /* 0xbfe62e42, 0xfee00000 */
ln2LO[2]    = { 1.90821492927058770002e-10,    /* 0x3dea39ef, 0x35793c76 */
               -1.90821492927058770002e-10,},  /* 0xbdea39ef, 0x35793c76 */
invln2      =   1.44269504088896338700e+00,    /* 0x3ff71547, 0x652b82fe */
P1          =   1.66666666666666019037e-01,    /* 0x3FC55555, 0x5555553E */
P2          =  -2.77777777770155933842e-03,    /* 0xBF66C16C, 0x16BEBD93 */
P3          =   6.61375632143793436117e-05,    /* 0x3F11566A, 0xAF25DE2C */
P4          =  -1.65339022054652515390e-06,    /* 0xBEBBBD41, 0xC5D26BF1 */
P5          =   4.13813679705723846039e-08;    /* 0x3E663769, 0x72BEA4D0 */

LUALIB_API double sun_exp (double x) {  /* default IEEE double exp */
  double y, hi = 0.0, lo = 0.0, c, t, twopk;
  int32_t k = 0, xsb;
  uint32_t hx;
  GET_HIGH_WORD(hx, x);
  xsb = (hx >> 31) & 1;  /* sign bit of x */
  hx &= 0x7fffffff;      /* high word of |x| */
  /* filter out non-finite argument */
  if (hx >= 0x40862E42) {  /* if |x|>=709.78... */
    if (hx >= 0x7ff00000) {  /* x is inf or NaN */
      uint32_t lx;
      GET_LOW_WORD(lx, x);
      if (((hx & 0xfffff) | lx) != 0)
        return x + x;  /* NaN */
      else
        return (xsb == 0) ? x : 0.0;  /* exp(+-inf)={inf,0} */
    }
    if (x > o_threshold) return huge*huge;  /* overflow */
    if (x < u_threshold) return twom1000*twom1000;  /* underflow */
  }
  /* this implementation gives 2.7182818284590455 for exp(1.0),
     which is well within the allowable error. However,
     2.718281828459045 is closer to the true value so we prefer that
     answer, given that 1.0 is such an important argument value. */
  if (x == 1.0)
    return 2.718281828459045235360;
  /* argument reduction */
  if (hx > 0x3fd62e42) {    /* if  |x| > 0.5 ln2 */
    if (hx < 0x3FF0A2B2) {  /* and |x| < 1.5 ln2 */
      hi = x - ln2HI[xsb]; lo = ln2LO[xsb]; k = 1 - xsb - xsb;
    } else {
      k  = (int)(invln2*x + halF[xsb]);
      t  = k;
      hi = x - t*ln2HI[0];  /* t*ln2HI is exact here */
      lo = t*ln2LO[0];
    }
    STRICT_ASSIGN(double, x, hi - lo);
  }
  else if (unlikely(hx < 0x3e300000))  {  /* when |x|<2**-28, 2.5.15 optimisation */
    if (huge + x > one) return one + x;   /* trigger inexact */
  }
  else k = 0;
  /* x is now in primary range */
  t  = x*x;
  if (k >= -1021)
    INSERT_WORDS(twopk, 0x3ff00000 + (k << 20), 0);
  else
    INSERT_WORDS(twopk, 0x3ff00000 + ((k + 1000) << 20), 0);
  c  = x - t*(P1 + t*(P2 + t*(P3 + t*(P4 + t*P5))));
  if (k == 0) return one - ((x*c)/(c - 2.0) - x);
  else y = one - ((lo - (x*c)/(2.0 - c)) - hi);
  if (k >= -1021) {
    if (k == 1024) return y*2.0*0x1p1023;
    return y*twopk;
  } else {
    return y*twopk*twom1000;
  }
}


/* origin: FreeBSD /usr/src/lib/msun/ld80/s_exp2l.c and /usr/src/lib/msun/ld128/s_exp2l.c
 * taken from: https://chromium.googlesource.com/external/github.com/kripken/emscripten/+/refs/tags/1.37.5/system/lib/libc/musl/src/math/exp2l.c
 *-
 * Copyright (c) 2005-2008 David Schultz <das@FreeBSD.ORG>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
LUALIB_API long double tools_exp2l (long double x) {
	return cephes_exp2(x);
}
#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
#define TBLBITS 7
#define TBLSIZE (1 << TBLBITS)

static const double
redux = 0x1.8p63 / TBLSIZE,
E2P1    = 0x1.62e42fefa39efp-1,
E2P2    = 0x1.ebfbdff82c58fp-3,
E2P3    = 0x1.c6b08d7049fap-5,
E2P4    = 0x1.3b2ab6fba4da5p-7,
E2P5    = 0x1.5d8804780a736p-10,
E2P6    = 0x1.430918835e33dp-13;

static const double tbl[TBLSIZE * 2] = {
	0x1.6a09e667f3bcdp-1,   -0x1.bdd3413b2648p-55,
	0x1.6c012750bdabfp-1,   -0x1.2895667ff0cp-57,
	0x1.6dfb23c651a2fp-1,   -0x1.bbe3a683c88p-58,
	0x1.6ff7df9519484p-1,   -0x1.83c0f25860fp-56,
	0x1.71f75e8ec5f74p-1,   -0x1.16e4786887bp-56,
	0x1.73f9a48a58174p-1,   -0x1.0a8d96c65d5p-55,
	0x1.75feb564267c9p-1,   -0x1.0245957316ep-55,
	0x1.780694fde5d3fp-1,    0x1.866b80a0216p-55,
	0x1.7a11473eb0187p-1,   -0x1.41577ee0499p-56,
	0x1.7c1ed0130c132p-1,    0x1.f124cd1164ep-55,
	0x1.7e2f336cf4e62p-1,    0x1.05d02ba157ap-57,
	0x1.80427543e1a12p-1,   -0x1.27c86626d97p-55,
	0x1.82589994cce13p-1,   -0x1.d4c1dd41533p-55,
	0x1.8471a4623c7adp-1,   -0x1.8d684a341cep-56,
	0x1.868d99b4492edp-1,   -0x1.fc6f89bd4f68p-55,
	0x1.88ac7d98a6699p-1,    0x1.994c2f37cb5p-55,
	0x1.8ace5422aa0dbp-1,    0x1.6e9f156864bp-55,
	0x1.8cf3216b5448cp-1,   -0x1.0d55e32e9e4p-57,
	0x1.8f1ae99157736p-1,    0x1.5cc13a2e397p-56,
	0x1.9145b0b91ffc6p-1,   -0x1.dd6792e5825p-55,
	0x1.93737b0cdc5e5p-1,   -0x1.75fc781b58p-58,
	0x1.95a44cbc8520fp-1,   -0x1.64b7c96a5fp-57,
	0x1.97d829fde4e5p-1,    -0x1.d185b7c1b86p-55,
	0x1.9a0f170ca07bap-1,   -0x1.173bd91cee6p-55,
	0x1.9c49182a3f09p-1,     0x1.c7c46b071f2p-57,
	0x1.9e86319e32323p-1,    0x1.824ca78e64cp-57,
	0x1.a0c667b5de565p-1,   -0x1.359495d1cd5p-55,
	0x1.a309bec4a2d33p-1,    0x1.6305c7ddc368p-55,
	0x1.a5503b23e255dp-1,   -0x1.d2f6edb8d42p-55,
	0x1.a799e1330b358p-1,    0x1.bcb7ecac564p-55,
	0x1.a9e6b5579fdbfp-1,    0x1.0fac90ef7fdp-55,
	0x1.ac36bbfd3f37ap-1,   -0x1.f9234cae76dp-56,
	0x1.ae89f995ad3adp-1,    0x1.7a1cd345dcc8p-55,
	0x1.b0e07298db666p-1,   -0x1.bdef54c80e4p-55,
	0x1.b33a2b84f15fbp-1,   -0x1.2805e3084d8p-58,
	0x1.b59728de5593ap-1,   -0x1.c71dfbbba6ep-55,
	0x1.b7f76f2fb5e47p-1,   -0x1.5584f7e54acp-57,
	0x1.ba5b030a1064ap-1,   -0x1.efcd30e5429p-55,
	0x1.bcc1e904bc1d2p-1,    0x1.23dd07a2d9fp-56,
	0x1.bf2c25bd71e09p-1,   -0x1.efdca3f6b9c8p-55,
	0x1.c199bdd85529cp-1,    0x1.11065895049p-56,
	0x1.c40ab5fffd07ap-1,    0x1.b4537e083c6p-55,
	0x1.c67f12e57d14bp-1,    0x1.2884dff483c8p-55,
	0x1.c8f6d9406e7b5p-1,    0x1.1acbc48805cp-57,
	0x1.cb720dcef9069p-1,    0x1.503cbd1e94ap-57,
	0x1.cdf0b555dc3fap-1,   -0x1.dd83b53829dp-56,
	0x1.d072d4a07897cp-1,   -0x1.cbc3743797a8p-55,
	0x1.d2f87080d89f2p-1,   -0x1.d487b719d858p-55,
	0x1.d5818dcfba487p-1,    0x1.2ed02d75b37p-56,
	0x1.d80e316c98398p-1,   -0x1.11ec18bedep-55,
	0x1.da9e603db3285p-1,    0x1.c2300696db5p-55,
	0x1.dd321f301b46p-1,     0x1.2da5778f019p-55,
	0x1.dfc97337b9b5fp-1,   -0x1.1a5cd4f184b8p-55,
	0x1.e264614f5a129p-1,   -0x1.7b627817a148p-55,
	0x1.e502ee78b3ff6p-1,    0x1.39e8980a9cdp-56,
	0x1.e7a51fbc74c83p-1,    0x1.2d522ca0c8ep-55,
	0x1.ea4afa2a490dap-1,   -0x1.e9c23179c288p-55,
	0x1.ecf482d8e67f1p-1,   -0x1.c93f3b411ad8p-55,
	0x1.efa1bee615a27p-1,    0x1.dc7f486a4b68p-55,
	0x1.f252b376bba97p-1,    0x1.3a1a5bf0d8e8p-55,
	0x1.f50765b6e454p-1,     0x1.9d3e12dd8a18p-55,
	0x1.f7bfdad9cbe14p-1,   -0x1.dbb12d00635p-55,
	0x1.fa7c1819e90d8p-1,    0x1.74853f3a593p-56,
	0x1.fd3c22b8f71f1p-1,    0x1.2eb74966578p-58,
	0x1p+0,                  0x0p+0,
	0x1.0163da9fb3335p+0,    0x1.b61299ab8cd8p-54,
	0x1.02c9a3e778061p+0,   -0x1.19083535b08p-56,
	0x1.04315e86e7f85p+0,   -0x1.0a31c1977c98p-54,
	0x1.059b0d3158574p+0,    0x1.d73e2a475b4p-55,
	0x1.0706b29ddf6dep+0,   -0x1.c91dfe2b13cp-55,
	0x1.0874518759bc8p+0,    0x1.186be4bb284p-57,
	0x1.09e3ecac6f383p+0,    0x1.14878183161p-54,
	0x1.0b5586cf9890fp+0,    0x1.8a62e4adc61p-54,
	0x1.0cc922b7247f7p+0,    0x1.01edc16e24f8p-54,
	0x1.0e3ec32d3d1a2p+0,    0x1.03a1727c58p-59,
	0x1.0fb66affed31bp+0,   -0x1.b9bedc44ebcp-57,
	0x1.11301d0125b51p+0,   -0x1.6c51039449bp-54,
	0x1.12abdc06c31ccp+0,   -0x1.1b514b36ca8p-58,
	0x1.1429aaea92dep+0,    -0x1.32fbf9af1368p-54,
	0x1.15a98c8a58e51p+0,    0x1.2406ab9eeabp-55,
	0x1.172b83c7d517bp+0,   -0x1.19041b9d78ap-55,
	0x1.18af9388c8deap+0,   -0x1.11023d1970f8p-54,
	0x1.1a35beb6fcb75p+0,    0x1.e5b4c7b4969p-55,
	0x1.1bbe084045cd4p+0,   -0x1.95386352ef6p-54,
	0x1.1d4873168b9aap+0,    0x1.e016e00a264p-54,
	0x1.1ed5022fcd91dp+0,   -0x1.1df98027bb78p-54,
	0x1.2063b88628cd6p+0,    0x1.dc775814a85p-55,
	0x1.21f49917ddc96p+0,    0x1.2a97e9494a6p-55,
	0x1.2387a6e756238p+0,    0x1.9b07eb6c7058p-54,
	0x1.251ce4fb2a63fp+0,    0x1.ac155bef4f5p-55,
	0x1.26b4565e27cddp+0,    0x1.2bd339940eap-55,
	0x1.284dfe1f56381p+0,   -0x1.a4c3a8c3f0d8p-54,
	0x1.29e9df51fdee1p+0,    0x1.612e8afad12p-55,
	0x1.2b87fd0dad99p+0,    -0x1.10adcd6382p-59,
	0x1.2d285a6e4030bp+0,    0x1.0024754db42p-54,
	0x1.2ecafa93e2f56p+0,    0x1.1ca0f45d524p-56,
	0x1.306fe0a31b715p+0,    0x1.6f46ad23183p-55,
	0x1.32170fc4cd831p+0,    0x1.a9ce78e1804p-55,
	0x1.33c08b26416ffp+0,    0x1.327218436598p-54,
	0x1.356c55f929ff1p+0,   -0x1.b5cee5c4e46p-55,
	0x1.371a7373aa9cbp+0,   -0x1.63aeabf42ebp-54,
	0x1.38cae6d05d866p+0,   -0x1.e958d3c99048p-54,
	0x1.3a7db34e59ff7p+0,   -0x1.5e436d661f6p-56,
	0x1.3c32dc313a8e5p+0,   -0x1.efff8375d2ap-54,
	0x1.3dea64c123422p+0,    0x1.ada0911f09fp-55,
	0x1.3fa4504ac801cp+0,   -0x1.7d023f956fap-54,
	0x1.4160a21f72e2ap+0,   -0x1.ef3691c309p-58,
	0x1.431f5d950a897p+0,   -0x1.1c7dde35f7ap-55,
	0x1.44e086061892dp+0,    0x1.89b7a04ef8p-59,
	0x1.46a41ed1d0057p+0,    0x1.c944bd1648a8p-54,
	0x1.486a2b5c13cdp+0,     0x1.3c1a3b69062p-56,
	0x1.4a32af0d7d3dep+0,    0x1.9cb62f3d1be8p-54,
	0x1.4bfdad5362a27p+0,    0x1.d4397afec42p-56,
	0x1.4dcb299fddd0dp+0,    0x1.8ecdbbc6a78p-54,
	0x1.4f9b2769d2ca7p+0,   -0x1.4b309d25958p-54,
	0x1.516daa2cf6642p+0,   -0x1.f768569bd94p-55,
	0x1.5342b569d4f82p+0,   -0x1.07abe1db13dp-55,
	0x1.551a4ca5d920fp+0,   -0x1.d689cefede6p-55,
	0x1.56f4736b527dap+0,    0x1.9bb2c011d938p-54,
	0x1.58d12d497c7fdp+0,    0x1.295e15b9a1ep-55,
	0x1.5ab07dd485429p+0,    0x1.6324c0546478p-54,
	0x1.5c9268a5946b7p+0,    0x1.c4b1b81698p-60,
	0x1.5e76f15ad2148p+0,    0x1.ba6f93080e68p-54,
	0x1.605e1b976dc09p+0,   -0x1.3e2429b56de8p-54,
	0x1.6247eb03a5585p+0,   -0x1.383c17e40b48p-54,
	0x1.6434634ccc32p+0,    -0x1.c483c759d89p-55,
	0x1.6623882552225p+0,   -0x1.bb60987591cp-54,
	0x1.68155d44ca973p+0,    0x1.038ae44f74p-57,
};
/*
 * exp2l(x): compute the base 2 exponential of x
 *
 * Accuracy: Peak error < 0.511 ulp.
 *
 * Method: (equally-spaced tables)
 *
 *   Reduce x:
 *     x = 2**k + y, for integer k and |y| <= 1/2.
 *     Thus we have exp2l(x) = 2**k * exp2(y).
 *
 *   Reduce y:
 *     y = i/TBLSIZE + z for integer i near y * TBLSIZE.
 *     Thus we have exp2(y) = exp2(i/TBLSIZE) * exp2(z),
 *     with |z| <= 2**-(TBLBITS+1).
 *
 *   We compute exp2(i/TBLSIZE) via table lookup and exp2(z) via a
 *   degree-6 minimax polynomial with maximum error under 2**-69.
 *   The table entries each have 104 bits of accuracy, encoded as
 *   a pair of double precision values.
 */
LUALIB_API long double tools_exp2l (long double x) {
  union ldshape u = {x};
  int e = u.i.se & 0x7fff;
  long double r, z;
  uint32_t i0;
  union {uint32_t u; int32_t i;} k;
  /* Filter out exceptional cases. */
  if (e >= 0x3fff + 13) {  /* |x| >= 8192 or x is NaN */
    if (u.i.se >= 0x3fff + 14 && u.i.se >> 15 == 0) return x * 0x1p16383L;  /* overflow */
    if (e == 0x7fff) return -1/x;  /* -inf or -nan */
    if (x < -16382) {
      if (x <= -16446 || x - 0x1p63 + 0x1p63 != x) {  /* underflow */
        FORCE_EVAL((float)(-0x1p-149/x));
      }
      if (x <= -16446) return 0;
    }
  } else if (e < 0x3fff - 64) return 1 + x;
  /*
   * Reduce x, computing z, i0, and k. The low bits of x + redux
   * contain the 16-bit integer part of the exponent (k) followed by
   * TBLBITS fractional bits (i0). We use bit tricks to extract these
   * as integers, then set z to the remainder.
   *
   * Example: Suppose x is 0xabc.123456p0 and TBLBITS is 8.
   * Then the low-order word of x + redux is 0x000abc12,
   * We split this into k = 0xabc and i0 = 0x12 (adjusted to
   * index into the table), then we compute z = 0x0.003456p0.
   */
  u.f = x + redux;
  i0 = u.i.m + TBLSIZE / 2;
  k.u = i0 / TBLSIZE * TBLSIZE;
  k.i /= TBLSIZE;
  i0 %= TBLSIZE;
  u.f -= redux;
  z = x - u.f;
  /* Compute r = exp2l(y) = exp2lt[i0] * p(z). */
  long double t_hi = tbl[2*i0];
  long double t_lo = tbl[2*i0 + 1];
  /* XXX This gives > 1 ulp errors outside of FE_TONEAREST mode */
  r = t_lo + (t_hi + t_lo) * z*(E2P1 + z*(E2P2 + z*(E2P3 + z*(E2P4 + z*(E2P5 + z * E2P6))))) + t_hi;
  return scalbnl(r, k.i);
}
#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384
#define TBLBITS 7
#define TBLSIZE (1 << TBLBITS)
static const long double
    E2P1        = 0x1.62e42fefa39ef35793c7673007e6p-1L,
    E2P2        = 0x1.ebfbdff82c58ea86f16b06ec9736p-3L,
    E2P3        = 0x1.c6b08d704a0bf8b33a762bad3459p-5L,
    E2P4        = 0x1.3b2ab6fba4e7729ccbbe0b4f3fc2p-7L,
    E2P5        = 0x1.5d87fe78a67311071dee13fd11d9p-10L,
    E2P6        = 0x1.430912f86c7876f4b663b23c5fe5p-13L;

static const double
    E2P7        = 0x1.ffcbfc588b041p-17,
    E2P8        = 0x1.62c0223a5c7c7p-20,
    E2P9        = 0x1.b52541ff59713p-24,
    E2P10       = 0x1.e4cf56a391e22p-28,
    redux       = 0x1.8p112 / TBLSIZE;

static const long double tbl[TBLSIZE] = {
	0x1.6a09e667f3bcc908b2fb1366dfeap-1L,
	0x1.6c012750bdabeed76a99800f4edep-1L,
	0x1.6dfb23c651a2ef220e2cbe1bc0d4p-1L,
	0x1.6ff7df9519483cf87e1b4f3e1e98p-1L,
	0x1.71f75e8ec5f73dd2370f2ef0b148p-1L,
	0x1.73f9a48a58173bd5c9a4e68ab074p-1L,
	0x1.75feb564267c8bf6e9aa33a489a8p-1L,
	0x1.780694fde5d3f619ae02808592a4p-1L,
	0x1.7a11473eb0186d7d51023f6ccb1ap-1L,
	0x1.7c1ed0130c1327c49334459378dep-1L,
	0x1.7e2f336cf4e62105d02ba1579756p-1L,
	0x1.80427543e1a11b60de67649a3842p-1L,
	0x1.82589994cce128acf88afab34928p-1L,
	0x1.8471a4623c7acce52f6b97c6444cp-1L,
	0x1.868d99b4492ec80e41d90ac2556ap-1L,
	0x1.88ac7d98a669966530bcdf2d4cc0p-1L,
	0x1.8ace5422aa0db5ba7c55a192c648p-1L,
	0x1.8cf3216b5448bef2aa1cd161c57ap-1L,
	0x1.8f1ae991577362b982745c72eddap-1L,
	0x1.9145b0b91ffc588a61b469f6b6a0p-1L,
	0x1.93737b0cdc5e4f4501c3f2540ae8p-1L,
	0x1.95a44cbc8520ee9b483695a0e7fep-1L,
	0x1.97d829fde4e4f8b9e920f91e8eb6p-1L,
	0x1.9a0f170ca07b9ba3109b8c467844p-1L,
	0x1.9c49182a3f0901c7c46b071f28dep-1L,
	0x1.9e86319e323231824ca78e64c462p-1L,
	0x1.a0c667b5de564b29ada8b8cabbacp-1L,
	0x1.a309bec4a2d3358c171f770db1f4p-1L,
	0x1.a5503b23e255c8b424491caf88ccp-1L,
	0x1.a799e1330b3586f2dfb2b158f31ep-1L,
	0x1.a9e6b5579fdbf43eb243bdff53a2p-1L,
	0x1.ac36bbfd3f379c0db966a3126988p-1L,
	0x1.ae89f995ad3ad5e8734d17731c80p-1L,
	0x1.b0e07298db66590842acdfc6fb4ep-1L,
	0x1.b33a2b84f15faf6bfd0e7bd941b0p-1L,
	0x1.b59728de559398e3881111648738p-1L,
	0x1.b7f76f2fb5e46eaa7b081ab53ff6p-1L,
	0x1.ba5b030a10649840cb3c6af5b74cp-1L,
	0x1.bcc1e904bc1d2247ba0f45b3d06cp-1L,
	0x1.bf2c25bd71e088408d7025190cd0p-1L,
	0x1.c199bdd85529c2220cb12a0916bap-1L,
	0x1.c40ab5fffd07a6d14df820f17deap-1L,
	0x1.c67f12e57d14b4a2137fd20f2a26p-1L,
	0x1.c8f6d9406e7b511acbc48805c3f6p-1L,
	0x1.cb720dcef90691503cbd1e949d0ap-1L,
	0x1.cdf0b555dc3f9c44f8958fac4f12p-1L,
	0x1.d072d4a07897b8d0f22f21a13792p-1L,
	0x1.d2f87080d89f18ade123989ea50ep-1L,
	0x1.d5818dcfba48725da05aeb66dff8p-1L,
	0x1.d80e316c98397bb84f9d048807a0p-1L,
	0x1.da9e603db3285708c01a5b6d480cp-1L,
	0x1.dd321f301b4604b695de3c0630c0p-1L,
	0x1.dfc97337b9b5eb968cac39ed284cp-1L,
	0x1.e264614f5a128a12761fa17adc74p-1L,
	0x1.e502ee78b3ff6273d130153992d0p-1L,
	0x1.e7a51fbc74c834b548b2832378a4p-1L,
	0x1.ea4afa2a490d9858f73a18f5dab4p-1L,
	0x1.ecf482d8e67f08db0312fb949d50p-1L,
	0x1.efa1bee615a27771fd21a92dabb6p-1L,
	0x1.f252b376bba974e8696fc3638f24p-1L,
	0x1.f50765b6e4540674f84b762861a6p-1L,
	0x1.f7bfdad9cbe138913b4bfe72bd78p-1L,
	0x1.fa7c1819e90d82e90a7e74b26360p-1L,
	0x1.fd3c22b8f71f10975ba4b32bd006p-1L,
	0x1.0000000000000000000000000000p+0L,
	0x1.0163da9fb33356d84a66ae336e98p+0L,
	0x1.02c9a3e778060ee6f7caca4f7a18p+0L,
	0x1.04315e86e7f84bd738f9a20da442p+0L,
	0x1.059b0d31585743ae7c548eb68c6ap+0L,
	0x1.0706b29ddf6ddc6dc403a9d87b1ep+0L,
	0x1.0874518759bc808c35f25d942856p+0L,
	0x1.09e3ecac6f3834521e060c584d5cp+0L,
	0x1.0b5586cf9890f6298b92b7184200p+0L,
	0x1.0cc922b7247f7407b705b893dbdep+0L,
	0x1.0e3ec32d3d1a2020742e4f8af794p+0L,
	0x1.0fb66affed31af232091dd8a169ep+0L,
	0x1.11301d0125b50a4ebbf1aed9321cp+0L,
	0x1.12abdc06c31cbfb92bad324d6f84p+0L,
	0x1.1429aaea92ddfb34101943b2588ep+0L,
	0x1.15a98c8a58e512480d573dd562aep+0L,
	0x1.172b83c7d517adcdf7c8c50eb162p+0L,
	0x1.18af9388c8de9bbbf70b9a3c269cp+0L,
	0x1.1a35beb6fcb753cb698f692d2038p+0L,
	0x1.1bbe084045cd39ab1e72b442810ep+0L,
	0x1.1d4873168b9aa7805b8028990be8p+0L,
	0x1.1ed5022fcd91cb8819ff61121fbep+0L,
	0x1.2063b88628cd63b8eeb0295093f6p+0L,
	0x1.21f49917ddc962552fd29294bc20p+0L,
	0x1.2387a6e75623866c1fadb1c159c0p+0L,
	0x1.251ce4fb2a63f3582ab7de9e9562p+0L,
	0x1.26b4565e27cdd257a673281d3068p+0L,
	0x1.284dfe1f5638096cf15cf03c9fa0p+0L,
	0x1.29e9df51fdee12c25d15f5a25022p+0L,
	0x1.2b87fd0dad98ffddea46538fca24p+0L,
	0x1.2d285a6e4030b40091d536d0733ep+0L,
	0x1.2ecafa93e2f5611ca0f45d5239a4p+0L,
	0x1.306fe0a31b7152de8d5a463063bep+0L,
	0x1.32170fc4cd8313539cf1c3009330p+0L,
	0x1.33c08b26416ff4c9c8610d96680ep+0L,
	0x1.356c55f929ff0c94623476373be4p+0L,
	0x1.371a7373aa9caa7145502f45452ap+0L,
	0x1.38cae6d05d86585a9cb0d9bed530p+0L,
	0x1.3a7db34e59ff6ea1bc9299e0a1fep+0L,
	0x1.3c32dc313a8e484001f228b58cf0p+0L,
	0x1.3dea64c12342235b41223e13d7eep+0L,
	0x1.3fa4504ac801ba0bf701aa417b9cp+0L,
	0x1.4160a21f72e29f84325b8f3dbacap+0L,
	0x1.431f5d950a896dc704439410b628p+0L,
	0x1.44e086061892d03136f409df0724p+0L,
	0x1.46a41ed1d005772512f459229f0ap+0L,
	0x1.486a2b5c13cd013c1a3b69062f26p+0L,
	0x1.4a32af0d7d3de672d8bcf46f99b4p+0L,
	0x1.4bfdad5362a271d4397afec42e36p+0L,
	0x1.4dcb299fddd0d63b36ef1a9e19dep+0L,
	0x1.4f9b2769d2ca6ad33d8b69aa0b8cp+0L,
	0x1.516daa2cf6641c112f52c84d6066p+0L,
	0x1.5342b569d4f81df0a83c49d86bf4p+0L,
	0x1.551a4ca5d920ec52ec620243540cp+0L,
	0x1.56f4736b527da66ecb004764e61ep+0L,
	0x1.58d12d497c7fd252bc2b7343d554p+0L,
	0x1.5ab07dd48542958c93015191e9a8p+0L,
	0x1.5c9268a5946b701c4b1b81697ed4p+0L,
	0x1.5e76f15ad21486e9be4c20399d12p+0L,
	0x1.605e1b976dc08b076f592a487066p+0L,
	0x1.6247eb03a5584b1f0fa06fd2d9eap+0L,
	0x1.6434634ccc31fc76f8714c4ee122p+0L,
	0x1.66238825522249127d9e29b92ea2p+0L,
	0x1.68155d44ca973081c57227b9f69ep+0L,
};

static const float eps[TBLSIZE] = {
	-0x1.5c50p-101,
	-0x1.5d00p-106,
	 0x1.8e90p-102,
	-0x1.5340p-103,
	 0x1.1bd0p-102,
	-0x1.4600p-105,
	-0x1.7a40p-104,
	 0x1.d590p-102,
	-0x1.d590p-101,
	 0x1.b100p-103,
	-0x1.0d80p-105,
	 0x1.6b00p-103,
	-0x1.9f00p-105,
	 0x1.c400p-103,
	 0x1.e120p-103,
	-0x1.c100p-104,
	-0x1.9d20p-103,
	 0x1.a800p-108,
	 0x1.4c00p-106,
	-0x1.9500p-106,
	 0x1.6900p-105,
	-0x1.29d0p-100,
	 0x1.4c60p-103,
	 0x1.13a0p-102,
	-0x1.5b60p-103,
	-0x1.1c40p-103,
	 0x1.db80p-102,
	 0x1.91a0p-102,
	 0x1.dc00p-105,
	 0x1.44c0p-104,
	 0x1.9710p-102,
	 0x1.8760p-103,
	-0x1.a720p-103,
	 0x1.ed20p-103,
	-0x1.49c0p-102,
	-0x1.e000p-111,
	 0x1.86a0p-103,
	 0x1.2b40p-103,
	-0x1.b400p-108,
	 0x1.1280p-99,
	-0x1.02d8p-102,
	-0x1.e3d0p-103,
	-0x1.b080p-105,
	-0x1.f100p-107,
	-0x1.16c0p-105,
	-0x1.1190p-103,
	-0x1.a7d2p-100,
	 0x1.3450p-103,
	-0x1.67c0p-105,
	 0x1.4b80p-104,
	-0x1.c4e0p-103,
	 0x1.6000p-108,
	-0x1.3f60p-105,
	 0x1.93f0p-104,
	 0x1.5fe0p-105,
	 0x1.6f80p-107,
	-0x1.7600p-106,
	 0x1.21e0p-106,
	-0x1.3a40p-106,
	-0x1.40c0p-104,
	-0x1.9860p-105,
	-0x1.5d40p-108,
	-0x1.1d70p-106,
	 0x1.2760p-105,
	 0x0.0000p+0,
	 0x1.21e2p-104,
	-0x1.9520p-108,
	-0x1.5720p-106,
	-0x1.4810p-106,
	-0x1.be00p-109,
	 0x1.0080p-105,
	-0x1.5780p-108,
	-0x1.d460p-105,
	-0x1.6140p-105,
	 0x1.4630p-104,
	 0x1.ad50p-103,
	 0x1.82e0p-105,
	 0x1.1d3cp-101,
	 0x1.6100p-107,
	 0x1.ec30p-104,
	 0x1.f200p-108,
	 0x1.0b40p-103,
	 0x1.3660p-102,
	 0x1.d9d0p-103,
	-0x1.02d0p-102,
	 0x1.b070p-103,
	 0x1.b9c0p-104,
	-0x1.01c0p-103,
	-0x1.dfe0p-103,
	 0x1.1b60p-104,
	-0x1.ae94p-101,
	-0x1.3340p-104,
	 0x1.b3d8p-102,
	-0x1.6e40p-105,
	-0x1.3670p-103,
	 0x1.c140p-104,
	 0x1.1840p-101,
	 0x1.1ab0p-102,
	-0x1.a400p-104,
	 0x1.1f00p-104,
	-0x1.7180p-103,
	 0x1.4ce0p-102,
	 0x1.9200p-107,
	-0x1.54c0p-103,
	 0x1.1b80p-105,
	-0x1.1828p-101,
	 0x1.5720p-102,
	-0x1.a060p-100,
	 0x1.9160p-102,
	 0x1.a280p-104,
	 0x1.3400p-107,
	 0x1.2b20p-102,
	 0x1.7800p-108,
	 0x1.cfd0p-101,
	 0x1.2ef0p-102,
	-0x1.2760p-99,
	 0x1.b380p-104,
	 0x1.0048p-101,
	-0x1.60b0p-102,
	 0x1.a1ccp-100,
	-0x1.a640p-104,
	-0x1.08a0p-101,
	 0x1.7e60p-102,
	 0x1.22c0p-103,
	-0x1.7200p-106,
	 0x1.f0f0p-102,
	 0x1.eb4ep-99,
	 0x1.c6e0p-103,
};

/*
 * exp2l(x): compute the base 2 exponential of x
 *
 * Accuracy: Peak error < 0.502 ulp.
 *
 * Method: (accurate tables)
 *
 *   Reduce x:
 *     x = 2**k + y, for integer k and |y| <= 1/2.
 *     Thus we have exp2(x) = 2**k * exp2(y).
 *
 *   Reduce y:
 *     y = i/TBLSIZE + z - eps[i] for integer i near y * TBLSIZE.
 *     Thus we have exp2(y) = exp2(i/TBLSIZE) * exp2(z - eps[i]),
 *     with |z - eps[i]| <= 2**-8 + 2**-98 for the table used.
 *
 *   We compute exp2(i/TBLSIZE) via table lookup and exp2(z - eps[i]) via
 *   a degree-10 minimax polynomial with maximum error under 2**-120.
 *   The values in exp2t[] and eps[] are chosen such that
 *   exp2t[i] = exp2(i/TBLSIZE + eps[i]), and eps[i] is a small offset such
 *   that exp2t[i] is accurate to 2**-122.
 *
 *   Note that the range of i is +-TBLSIZE/2, so we actually index the tables
 *   by i0 = i + TBLSIZE/2.
 *
 *   This method is due to Gal, with many details due to Gal and Bachelis:
 *
 *	Gal, S. and Bachelis, B.  An Accurate Elementary Mathematical Library
 *	for the IEEE Floating Point Standard.  TOMS 17(1), 26-46 (1991).
 */
LUALIB_API long double tools_exp2l (long double x) {
  union ldshape u = {x};
  int e = u.i.se & 0x7fff;
  long double r, z, t;
  uint32_t i0;
  union {uint32_t u; int32_t i;} k;
  /* Filter out exceptional cases. */
  if (e >= 0x3fff + 14) {  /* |x| >= 16384 or x is NaN */
    if (u.i.se >= 0x3fff + 15 && u.i.se >> 15 == 0) return x * 0x1p16383L;  /* overflow */
    if (e == 0x7fff) return -1/x;  /* -inf or -nan */
    if (x < -16382) {
      if (x <= -16495 || x - 0x1p112 + 0x1p112 != x) FORCE_EVAL((float)(-0x1p-149/x));  /* underflow */
      if (x <= -16446) return 0;
    }
  } else if (e < 0x3fff - 114) return 1 + x;
  /*
   * Reduce x, computing z, i0, and k. The low bits of x + redux
   * contain the 16-bit integer part of the exponent (k) followed by
   * TBLBITS fractional bits (i0). We use bit tricks to extract these
   * as integers, then set z to the remainder.
   *
   * Example: Suppose x is 0xabc.123456p0 and TBLBITS is 8.
   * Then the low-order word of x + redux is 0x000abc12,
   * We split this into k = 0xabc and i0 = 0x12 (adjusted to
   * index into the table), then we compute z = 0x0.003456p0.
  */
  u.f = x + redux;
  i0 = u.i2.lo + TBLSIZE / 2;
  k.u = i0 / TBLSIZE * TBLSIZE;
  k.i /= TBLSIZE;
  i0 %= TBLSIZE;
  u.f -= redux;
  z = x - u.f;
  /* Compute r = exp2(y) = exp2t[i0] * p(z - eps[i]). */
  t = tbl[i0];
  z -= eps[i0];
  r = t + t * z*(E2P1 + z*(E2P2 + z*(E2P3 + z*(E2P4 + z*(E2P5 + z*(E2P6 + z*(E2P7 + z*(E2P8 + z*(E2P9 + z * E2P10)))))))));
  return scalbnl(r, k.i);
}
#endif


/* Taken from: https://chromium.googlesource.com/external/github.com/kripken/emscripten/+/refs/tags/1.37.5/system/lib/libc/musl/src/math/exp10l.c, 2.34.7 */
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
LUALIB_API long double tools_exp10l (long double x) {
  return exp10(x);
}
#elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
LUALIB_API long double tools_exp10l (long double x) {
  static const long double p10[] = { 1e-15L, 1e-14L, 1e-13L, 1e-12L, 1e-11L, 1e-10L,
    1e-9L, 1e-8L, 1e-7L, 1e-6L, 1e-5L, 1e-4L, 1e-3L, 1e-2L, 1e-1L, 1, 1e1, 1e2, 1e3,
    1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15
  };
  long double n, y = sun_modfl(x, &n);  /* 3.16.4 tweak */
  union ldshape u = {n};
  /* fabsl(n) < 16 without raising invalid on nan */
  if ((u.i.se & 0x7fff) < 0x3fff + 4) {
    if (!y) return p10[(int)n + 15];
    y = tools_exp2l(3.32192809488736234787031942948939L*y);
    return y * p10[(int)n + 15];
  }
#ifndef __DJGPP__
  return powl(10.0, x);
#else
  return tools_powl(10.0, x);
#endif
}
#endif


/* Approximates 2^x with enough precission (mean error = 3.2037e-7) for -infinity < x < 10.
   For x > 10, accuracy is quite bad. */
LUALIB_API float tools_pow2fast (float x) {  /* 2.14.6 */
  float clipx, y;
  ieee_float_shape_type u;
  clipx = (x < -126) ? -126.0f : x;
  y = clipx - (int)clipx + (float)(x < 0);
  u.word = (uint32_t)((1 << 23)*(clipx + 121.2740575f + 27.7280233f/(4.84252568f - y) - 1.49012907f * y));
  return u.value;
}


/* @(#)s_fabs.c 5.1 93/09/24 */
/*
 * ====================================================
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
 *
 * Developed at SunPro, a Sun Microsystems, Inc. business.
 * Permission to use, copy, modify, and distribute this
 * software is freely granted, provided that this notice
 * is preserved.
 * ====================================================
 */

/*
 * fabs(x) returns the absolute value of x.
 */

LUALIB_API double sun_fabs (double x) {  /* as fast as GCC's fabs */
  uint32_t high;
  GET_HIGH_WORD(high, x);
  SET_HIGH_WORD(x, high & 0x7fffffff);
  return x;
}


/* @(#)e_sinh.c 1.3 95/01/18 */
/*
 * ====================================================
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
 *
 * Developed at SunSoft, a Sun Microsystems, Inc. business.
 * Permission to use, copy, modify, and distribute this
 * software is freely granted, provided that this notice
 * is preserved.
 * ====================================================
 */

/* __ieee754_sinh(x)
 * Method :
 * mathematically sinh(x) if defined to be (exp(x)-exp(-x))/2
 *  1. Replace x by |x| (sinh(-x) = -sinh(x)).
 *  2.
 *                                        E + E/(E+1)
 *      0        <= x <= 22     :  sinh(x) := --------------, E=expm1(x)
 *                           2
 *
 *      22       <= x <= lnovft :  sinh(x) := exp(x)/2
 *      lnovft   <= x <= ln2ovft:  sinh(x) := exp(x/2)/2 * exp(x/2)
 *      ln2ovft  <  x      :  sinh(x) := x*shuge (overflow)
 *
 * Special cases:
 *  sinh(x) is |x| if x is +INF, -INF, or NaN.
 *  only sinh(0)=0 is exact for finite x.
 */

static const double shuge = 1.0e307;

LUALIB_API double sun_sinh (double x) {
  double t, h, w;
  int32_t ix, jx;
  /* High word of |x|. */
  GET_HIGH_WORD(jx, x);
  ix = jx & 0x7fffffff;
  /* x is INF or NaN */
  if (unlikely(ix >= 0x7ff00000)) return x + x;  /* 2.5.15 optimisation */
  h = 0.5;
  if (jx < 0) h = -h;
  /* |x| in [0,22], return sign(x)*0.5*(E+E/(E+1))) */
  if (ix < 0x40360000) {  /* |x| < 22 */
    if (ix < 0x3e300000)  /* |x| < 2**-28 */
      if (shuge + x > one) return x;  /* sinh(tiny) = tiny with inexact */
    t = sun_expm1(fabs(x));
    if (ix < 0x3ff00000) return h*(2.0*t - t*t/(t + one));
    return h*(t + t/(t + one));
  }
  /* |x| in [22, log(maxdouble)] return 0.5*exp(|x|) */
  if (ix < 0x40862E42) return h*sun_exp(fabs(x));
  /* |x| in [log(maxdouble), overflowthresold] */
  if (ix <= 0x408633CE) {
    /* return h*2.0*__ldexp_exp(fabs(x), -1); */
    /* taken from Sun source file e_sinh.c 1.3 95/01/18 */
    w = sun_exp(0.5*fabs(x));
    t = h*w;
    return t*w;
  }
  /* |x| > overflowthresold, sinh(x) overflow */
  return x*shuge;
}


/* @(#)e_cosh.c 1.3 95/01/18 */
/*
 * ====================================================
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
 *
 * Developed at SunSoft, a Sun Microsystems, Inc. business.
 * Permission to use, copy, modify, and distribute this
 * software is freely granted, provided that this notice
 * is preserved.
 * ====================================================
 */

/* __ieee754_cosh(x)
 * Method :
 * mathematically cosh(x) if defined to be (exp(x)+exp(-x))/2
 *  1. Replace x by |x| (cosh(x) = cosh(-x)).
 *  2.
 *                                            [ exp(x) - 1 ]^2
 *      0        <= x <= ln2/2  :  cosh(x) := 1 + -------------------
 *                              2*exp(x)
 *
 *                                      exp(x) +  1/exp(x)
 *      ln2/2    <= x <= 22     :  cosh(x) := -------------------
 *                             2
 *      22       <= x <= lnovft :  cosh(x) := exp(x)/2
 *      lnovft   <= x <= ln2ovft:  cosh(x) := exp(x/2)/2 * exp(x/2)
 *      ln2ovft  <  x      :  cosh(x) := huge*huge (overflow)
 *
 * Special cases:
 *  cosh(x) is |x| if x is +INF, -INF, or NaN.
 *  only cosh(0)=1 is exact for finite x.
 */

LUALIB_API double sun_cosh (double x) {
  double t, w;
  int32_t ix;
  /* High word of |x|. */
  GET_HIGH_WORD(ix, x);
  ix &= 0x7fffffff;
  /* x is INF or NaN */
  if (ix >= 0x7ff00000) return x*x;
  /* |x| in [0,0.5*ln2], return 1+expm1(|x|)^2/(2*exp(|x|)) */
  if (ix < 0x3fd62e43) {
    t = sun_expm1(fabs(x));
    w = one + t;
    if (ix < 0x3c800000) return w;  /* cosh(tiny) = 1 */
    return one + (t*t)/(w + w);
  }
  /* |x| in [0.5*ln2,22], return (exp(|x|)+1/exp(|x|)/2; */
  if (ix < 0x40360000) {
    t = sun_exp(fabs(x));
    return half*t + half/t;
  }
  /* |x| in [22, log(maxdouble)] return half*exp(|x|) */
  if (ix < 0x40862E42) return half*sun_exp(fabs(x));
  /* |x| in [log(maxdouble), overflowthresold] */
  if (ix <= 0x408633CE) {
    /* return __ldexp_exp(fabs(x), -1); */
    /* taken from Sun source file e_cosh.c 1.3 95/01/18 */
    w = sun_exp(half*fabs(x));
    t = half*w;
    return t*w;
  }
  /* |x| > overflowthresold, cosh(x) overflow */
  return huge*huge;
}


LUALIB_API void sun_sinhcosh (double x, double *sih, double *coh) {  /* 12 % slower than tools_sinhcosh */
  *sih = sun_sinh(x);
  /* see page 85, Abramowitz and Stegun, Handbook of Mathematical Functions: cosh(x) = sqrt(1+(sinh(x)**2) */
  *coh = sun_hypot2(*sih);  /* much faster than calling cosh */
}


/* This function is susceptible to OVERFLOW ! */
LUALIB_API void tools_sinhcosh (double x, double *sih, double *coh) {  /* 12 % faster than sun_sinhcosh, 2.32.6 */
  lua_Number a, b;
  /* This is slower and not more precise:
  a = sun_exp(2*x);
  b = sun_exp(-x);
  *sih = 0.5*(a - 1)*b;
  *coh = 0.5*(a + 1)*b; */
  a = sun_exp(x);
  b = 1/a;
  *sih = 0.5*(a - b);
  *coh = 0.5*(a + b);
}


/* @(#)s_tanh.c 1.3 95/01/18 */
/*
 * ====================================================
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
 *
 * Developed at SunSoft, a Sun Microsystems, Inc. business.
 * Permission to use, copy, modify, and distribute this
 * software is freely granted, provided that this notice
 * is preserved.
 * ====================================================
 */

/* Tanh(x)
 * Return the Hyperbolic Tangent of x
 *
 * Method :
 *               x    -x
 *              e  - e
 *  0. tanh(x) is defined to be -----------
 *               x    -x
 *              e  + e
 *  1. reduce x to non-negative by tanh(-x) = -tanh(x).
 *  2.  0      <= x <= 2**-55 : tanh(x) := x*(one+x)
 *                  -t
 *      2**-55 <  x <=  1     : tanh(x) := -----; t = expm1(-2x)
 *                 t + 2
 *                 2
 *      1      <= x <=  22.0  : tanh(x) := 1-  ----- ; t=expm1(2x)
 *               t + 2
 *      22.0   <  x <= INF    : tanh(x) := 1.
 *
 * Special cases:
 *  tanh(NaN) is NaN;
 *  only tanh(0)=0 is exact for finite argument.
 */

/* static const double two = 2.0; */

LUALIB_API double sun_tanh (double x) {
  double t, z;
  int32_t jx, ix;
  /* High word of |x|. */
  GET_HIGH_WORD(jx, x);
  ix = jx & 0x7fffffff;
  /* x is INF or NaN */
  if (unlikely(ix >= 0x7ff00000)) {   /* 2.5.15 optimisation */
    if (jx >= 0) return one/x + one;  /* tanh(+-inf)=+-1 */
    else return one/x - one;  /* tanh(NaN) = NaN */
  }
  /* |x| < 22 */
  if (ix < 0x40360000) {     /* |x|<22 */
    if (ix < 0x3c800000)     /* |x|<2**-55 */
      return x*(one + x);    /* tanh(small) = small */
    if (ix >= 0x3ff00000) {   /* |x|>=1  */
      t = sun_expm1(two*fabs(x));
      z = one - two/(t + two);
    } else {
      t = sun_expm1(-two*fabs(x));
      z = -t/(t + two);
    }
    /* |x| > 22, return +-1 */
  } else {
    z = one - tiny;    /* raised inexact flag */
  }
  return (jx >= 0) ? z : -z;
}


/* @(#)s_expm1.c 5.1 93/09/24 */
/*
 * ====================================================
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
 *
 * Developed at SunPro, a Sun Microsystems, Inc. business.
 * Permission to use, copy, modify, and distribute this
 * software is freely granted, provided that this notice
 * is preserved.
 * ====================================================
 */

/* expm1(x)
 * Returns exp(x)-1, the exponential of x minus 1.
 *
 * Method
 *   1. Argument reduction:
 *  Given x, find r and integer k such that
 *
 *               x = k*ln2 + r,  |r| <= 0.5*ln2 ~ 0.34658
 *
 *      Here a correction term c will be computed to compensate
 *  the error in r when rounded to a floating-point number.
 *
 *   2. Approximating expm1(r) by a special rational function on
 *  the interval [0,0.34658]:
 *  Since
 *      r*(exp(r)+1)/(exp(r)-1) = 2+ r^2/6 - r^4/360 + ...
 *  we define R1(r*r) by
 *      r*(exp(r)+1)/(exp(r)-1) = 2+ r^2/6 * R1(r*r)
 *  That is,
 *      R1(r**2) = 6/r *((exp(r)+1)/(exp(r)-1) - 2/r)
 *         = 6/r * ( 1 + 2.0*(1/(exp(r)-1) - 1/r))
 *         = 1 - r^2/60 + r^4/2520 - r^6/100800 + ...
 *      We use a special Reme algorithm on [0,0.347] to generate
 *   a polynomial of degree 5 in r*r to approximate R1. The
 *  maximum error of this polynomial approximation is bounded
 *  by 2**-61. In other words,
 *      R1(z) ~ 1.0 + Q1*z + Q2*z**2 + Q3*z**3 + Q4*z**4 + Q5*z**5
 *  where   Q1  =  -1.6666666666666567384E-2,
 *     Q2  =   3.9682539681370365873E-4,
 *     Q3  =  -9.9206344733435987357E-6,
 *     Q4  =   2.5051361420808517002E-7,
 *     Q5  =  -6.2843505682382617102E-9;
 *    z   =  r*r,
 *  with error bounded by
 *      |                  5           |     -61
 *      | 1.0+Q1*z+...+Q5*z   -  R1(z) | <= 2
 *      |                              |
 *
 *  expm1(r) = exp(r)-1 is then computed by the following
 *   specific way which minimize the accumulation rounding error:
 *             2     3
 *            r     r    [ 3 - (R1 + R1*r/2)  ]
 *        expm1(r) = r + --- + --- * [--------------------]
 *                  2     2    [ 6 - r*(3 - R1*r/2) ]
 *
 *  To compensate the error in the argument reduction, we use
 *    expm1(r+c) = expm1(r) + c + expm1(r)*c
 *         ~ expm1(r) + c + r*c
 *  Thus c+r*c will be added in as the correction terms for
 *  expm1(r+c). Now rearrange the term to avoid optimisation
 *   screw up:
 *            (      2                                    2 )
 *            ({  ( r    [ R1 -  (3 - R1*r/2) ]  )  }    r  )
 *   expm1(r+c)~r - ({r*(--- * [--------------------]-c)-c} - --- )
 *                  ({  ( 2    [ 6 - r*(3 - R1*r/2) ]  )  }    2  )
 *                      (                                             )
 *
 *       = r - E
 *   3. Scale back to obtain expm1(x):
 *  From step 1, we have
 *     expm1(x) = either 2^k*[expm1(r)+1] - 1
 *        = or     2^k*[expm1(r) + (1-2^-k)]
 *   4. Implementation notes:
 *  (A). To save one multiplication, we scale the coefficient Qi
 *       to Qi*2^i, and replace z by (x^2)/2.
 *  (B). To achieve maximum accuracy, we compute expm1(x) by
 *    (i)   if x < -56*ln2, return -1.0, (raise inexact if x!=inf)
 *    (ii)  if k=0, return r-E
 *    (iii) if k=-1, return 0.5*(r-E)-0.5
 *        (iv)  if k=1 if r < -0.25, return 2*((r+0.5)- E)
 *                  else       return  1.0+2.0*(r-E);
 *    (v)   if (k<-2||k>56) return 2^k(1-(E-r)) - 1 (or exp(x)-1)
 *    (vi)  if k <= 20, return 2^k((1-2^-k)-(E-r)), else
 *    (vii) return 2^k(1-((E+2^-k)-r))
 *
 * Special cases:
 *  expm1(INF) is INF, expm1(NaN) is NaN;
 *  expm1(-INF) is -1, and
 *  for finite argument, only expm1(0)=0 is exact.
 *
 * Accuracy:
 *  according to an error analysis, the error is always less than
 *  1 ulp (unit in the last place).
 *
 * Misc. info.
 *  For IEEE double
 *      if x >  7.09782712893383973096e+02 then expm1(x) overflow
 *
 * Constants:
 * The hexadecimal values are the intended ones for the following
 * constants. The decimal values may be used, provided that the
 * compiler will convert from decimal to binary accurately enough
 * to produce the hexadecimal values shown.
 *
 * The expm1 x86 assembler function in x86math.h surprisingly is half as fast.
 */

static const double
/* Scaled Q's: Qn_here = 2**n * Qn_above, for R(2*z) where z = hxs = x*x/2: */
Q1  =  -3.33333333333331316428e-02, /* BFA11111 111110F4 */
Q2  =   1.58730158725481460165e-03, /* 3F5A01A0 19FE5585 */
Q3  =  -7.93650757867487942473e-05, /* BF14CE19 9EAADBB7 */
Q4  =   4.00821782732936239552e-06, /* 3ED0CFCA 86E65239 */
Q5  =  -2.01099218183624371326e-07; /* BE8AFDB7 6E09C32D */

LUALIB_API double sun_expm1 (double x) {  /* more than twice as fast as GCC's implementation */
  double y, hi, lo, c, t, e, hxs, hfx, r1, twopk;
  int32_t k, xsb;
  uint32_t hx;
  c = 0;
  GET_HIGH_WORD(hx, x);
  xsb = hx & 0x80000000;  /* sign bit of x */
  hx &= 0x7fffffff;       /* high word of |x| */
  /* filter out huge and non-finite argument */
  if (hx >= 0x4043687A) {    /* if |x| >= 56*ln2 */
    if (hx >= 0x40862E42) {  /* if |x| >= 709.78... */
      if (unlikely(hx >= 0x7ff00000)) {  /* 2.5.15 optimisation, x is inf or NaN */
        uint32_t low;
        GET_LOW_WORD(low, x);
        if (((hx & 0xfffff) | low) != 0)
          return x + x;    /* NaN */
        else return (xsb == 0) ? x : -1.0;    /* exp(+-inf)={inf,-1} */
      }
      if (x > o_threshold) return huge*huge;  /* overflow */
    }
    if (xsb != 0) {  /* x < -56*ln2, return -1.0 with inexact */
      if (x + tiny < 0.0)   /* raise inexact */
    return tiny - one;  /* return -1 */
    }
  }
  /* argument reduction */
  if (hx > 0x3fd62e42) {    /* if  |x| > 0.5 ln2 */
    if (hx < 0x3FF0A2B2) {  /* and |x| < 1.5 ln2 */
      if (xsb == 0) {
        hi = x - ln2_hi; lo =  ln2_lo; k = 1;
      } else {
        hi = x + ln2_hi; lo = -ln2_lo; k = -1;
      }
    } else {
      k  = invln2*x + ((xsb == 0) ? 0.5 : -0.5);
      t  = k;
      hi = x - t*ln2_hi;  /* t*ln2_hi is exact here */
      lo = t*ln2_lo;
    }
    STRICT_ASSIGN(double, x, hi - lo);
    c = (hi - x) - lo;
  }
  else if (hx < 0x3c900000) {  /* when |x|<2**-54, return x */
    t = huge + x;  /* return x with inexact flags when x!=0 */
    return x - (t - (huge + x));
  }
  else k = 0;
  /* x is now in primary range */
  hfx = 0.5*x;
  hxs = x*hfx;
  r1 = one + hxs*(Q1 + hxs*(Q2 + hxs*(Q3 + hxs*(Q4 + hxs*Q5))));
  t  = 3.0 - r1*hfx;
  e  = hxs*((r1 - t)/(6.0 - x*t));
  if (k == 0) return x - (x*e - hxs);  /* c is 0 */
  else {
    INSERT_WORDS(twopk, 0x3ff00000 + (k << 20), 0);  /* 2^k */
    e  = (x*(e - c) - c);
    e -= hxs;
    if (k == -1) return 0.5*(x - e)-0.5;
    if (k == 1) {
      if (x < -0.25) return -2.0*(e - (x + 0.5));
      else return  one + 2.0*(x - e);
    }
    if (k <= -2 || k > 56) {   /* suffice to return exp(x)-1 */
      y = one - (e - x);
      if (k == 1024) y = y*2.0*0x1p1023;
      else y = y*twopk;
      return y - one;
    }
    t = one;
    if (k < 20) {
      SET_HIGH_WORD(t, 0x3ff00000 - (0x200000 >> k));  /* t=1-2^-k */
      y = t - (e - x);
      y = y*twopk;
    } else {
      SET_HIGH_WORD(t, ((0x3ff - k) << 20));  /* 2^-k */
      y = x - (e + t);
      y += one;
      y = y*twopk;
    }
  }
  return y;
}

/* @(#)s_log1p.c 5.1 93/09/24 */
/*
 * ====================================================
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
 *
 * Developed at SunPro, a Sun Microsystems, Inc. business.
 * Permission to use, copy, modify, and distribute this
 * software is freely granted, provided that this notice
 * is preserved.
 * ====================================================
 *
 * double log1p(double x)
 *
 * Method :
 *   1. Argument Reduction: find k and f such that
 *      1+x = 2^k * (1+f),
 *     where  sqrt(2)/2 < 1+f < sqrt(2) .
 *
 *      Note. If k=0, then f=x is exact. However, if k!=0, then f
 *  may not be representable exactly. In that case, a correction
 *  term is need. Let u=1+x rounded. Let c = (1+x)-u, then
 *  log(1+x) - log(u) ~ c/u. Thus, we proceed to compute log(u),
 *  and add back the correction term c/u.
 *  (Note: when x > 2**53, one can simply return log(x))
 *
 *   2. Approximation of log1p(f).
 *  Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)
 *     = 2s + 2/3 s**3 + 2/5 s**5 + .....,
 *          = 2s + s*R
 *      We use a special Reme algorithm on [0,0.1716] to generate
 *   a polynomial of degree 14 to approximate R The maximum error
 *  of this polynomial approximation is bounded by 2**-58.45. In
 *  other words,
 *            2      4      6      8      10      12      14
 *      R(z) ~ Lp1*s +Lp2*s +Lp3*s +Lp4*s +Lp5*s  +Lp6*s  +Lp7*s
 *    (the values of Lp1 to Lp7 are listed in the program)
 *  and
 *      |      2          14          |     -58.45
 *      | Lp1*s +...+Lp7*s    -  R(z) | <= 2
 *      |                             |
 *  Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2.
 *  In order to guarantee error in log below 1ulp, we compute log
 *  by
 *    log1p(f) = f - (hfsq - s*(hfsq+R)).
 *
 *  3. Finally, log1p(x) = k*ln2 + log1p(f).
 *            = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo)))
 *     Here ln2 is split into two floating point number:
 *      ln2_hi + ln2_lo,
 *     where n*ln2_hi is always exact for |n| < 2000.
 *
 * Special cases:
 *  log1p(x) is NaN with signal if x < -1 (including -INF) ;
 *  log1p(+INF) is +INF; log1p(-1) is -INF with signal;
 *  log1p(NaN) is that NaN with no signal.
 *
 * Accuracy:
 *  according to an error analysis, the error is always less than
 *  1 ulp (unit in the last place).
 *
 * Constants:
 * The hexadecimal values are the intended ones for the following
 * constants. The decimal values may be used, provided that the
 * compiler will convert from decimal to binary accurately enough
 * to produce the hexadecimal values shown.
 *
 * Note: Assuming log() return accurate answer, the following
 *    algorithm can be used to compute log1p(x) to within a few ULP:
 *
 *    u = 1+x;
 *    if(u==1.0) return x ; else
 *         return log(u)*(x/(u-1.0));
 *
 *   See HP-15C Advanced Functions Handbook, p.193.
 *
 * k_log1p(f):
 *
 * Return log(1+f) - f for 1+f in ~[sqrt(2)/2, sqrt(2)].
 *
 * The following describes the overall strategy for computing
 * logarithms in base e.  The argument reduction and adding the final
 * term of the polynomial are done by the caller for increased accuracy
 * when different bases are used.
 *
 * Method :
 *   1. Argument Reduction: find k and f such that
 *      x = 2^k * (1+f),
 *     where  sqrt(2)/2 < 1+f < sqrt(2) .
 *
 *   2. Approximation of log(1+f).
 *  Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)
 *     = 2s + 2/3 s**3 + 2/5 s**5 + .....,
 *          = 2s + s*R
 *      We use a special Reme algorithm on [0,0.1716] to generate
 *   a polynomial of degree 14 to approximate R The maximum error
 *  of this polynomial approximation is bounded by 2**-58.45. In
 *  other words,
 *            2      4      6      8      10      12      14
 *      R(z) ~ Lg1*s +Lg2*s +Lg3*s +Lg4*s +Lg5*s  +Lg6*s  +Lg7*s
 *    (the values of Lg1 to Lg7 are listed in the program)
 *  and
 *      |      2          14          |     -58.45
 *      | Lg1*s +...+Lg7*s    -  R(z) | <= 2
 *      |                             |
 *  Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2.
 *  In order to guarantee error in log below 1ulp, we compute log
 *  by
 *    log(1+f) = f - s*(f - R)  (if f is not too large)
 *    log(1+f) = f - (hfsq - s*(hfsq+R)).  (better accuracy)
 *
 *  3. Finally,  log(x) = k*ln2 + log(1+f).
 *          = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo)))
 *     Here ln2 is split into two floating point number:
 *      ln2_hi + ln2_lo,
 *     where n*ln2_hi is always exact for |n| < 2000.
 *
 * Special cases:
 *  log(x) is NaN with signal if x < 0 (including -INF) ;
 *  log(+INF) is +INF; log(0) is -INF with signal;
 *  log(NaN) is that NaN with no signal.
 *
 * Accuracy:
 *  according to an error analysis, the error is always less than
 *  1 ulp (unit in the last place).
 *
 * Constants:
 * The hexadecimal values are the intended ones for the following
 * constants. The decimal values may be used, provided that the
 * compiler will convert from decimal to binary accurately enough
 * to produce the hexadecimal values shown.
 *
 * We always inline k_log1p(), since doing so produces a
 * substantial performance improvement (~40% on amd64).
 *
 * Return log(1+f) - f for 1+f in ~[sqrt(2)/2, sqrt(2)].
 *
 * Note: The implementation is eleven percent slower than GCC's log1p.
 * The function also returns different results.
 */
double kernel_log1p (double f) {  /* inlined 2.16.13 */
  double hfsq, s, z, R, w, t1, t2;
  s = f/(2.0 + f);
  z = s*s;
  w = z*z;
  t1 = w*(Lg2 + w*(Lg4 + w*Lg6));
  t2 = z*(Lg1 + w*(Lg3 + w*(Lg5 + w*Lg7)));
  R = t2 + t1;
  hfsq = 0.5*f*f;
  return s*(hfsq + R);
}

/* @(#)s_log1p.c 1.4 96/03/07 */
/*
 * ====================================================
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
 *
 * Developed at SunSoft, a Sun Microsystems, Inc. business.
 * Permission to use, copy, modify, and distribute this
 * software is freely granted, provided that this notice
 * is preserved.
 * ====================================================
 */

/* double log1p(double x)
 *
 * Method :
 *   1. Argument Reduction: find k and f such that
 *			1+x = 2^k * (1+f),
 *	   where  sqrt(2)/2 < 1+f < sqrt(2) .
 *
 *      Note. If k=0, then f=x is exact. However, if k!=0, then f
 *	may not be representable exactly. In that case, a correction
 *	term is need. Let u=1+x rounded. Let c = (1+x)-u, then
 *	log(1+x) - log(u) ~ c/u. Thus, we proceed to compute log(u),
 *	and add back the correction term c/u.
 *	(Note: when x > 2**53, one can simply return log(x))
 *
 *   2. Approximation of log1p(f).
 *	Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)
 *		 = 2s + 2/3 s**3 + 2/5 s**5 + .....,
 *	     	 = 2s + s*R
 *      We use a special Remes algorithm on [0,0.1716] to generate
 * 	a polynomial of degree 14 to approximate R The maximum error
 *	of this polynomial approximation is bounded by 2**-58.45. In
 *	other words,
 *		        2      4      6      8      10      12      14
 *	    R(z) ~ Lp1*s +Lp2*s +Lp3*s +Lp4*s +Lp5*s  +Lp6*s  +Lp7*s
 *  	(the values of Lp1 to Lp7 are listed in the program)
 *	and
 *	    |      2          14          |     -58.45
 *	    | Lp1*s +...+Lp7*s    -  R(z) | <= 2
 *	    |                             |
 *	Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2.
 *	In order to guarantee error in log below 1ulp, we compute log
 *	by
 *		log1p(f) = f - (hfsq - s*(hfsq+R)).
 *
 *	3. Finally, log1p(x) = k*ln2 + log1p(f).
 *		 	     = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo)))
 *	   Here ln2 is split into two floating point number:
 *			ln2_hi + ln2_lo,
 *	   where n*ln2_hi is always exact for |n| < 2000.
 *
 * Special cases:
 *	log1p(x) is NaN with signal if x < -1 (including -INF) ;
 *	log1p(+INF) is +INF; log1p(-1) is -INF with signal;
 *	log1p(NaN) is that NaN with no signal.
 *
 * Accuracy:
 *	according to an error analysis, the error is always less than
 *	1 ulp (unit in the last place).
 *
 * Constants:
 * The hexadecimal values are the intended ones for the following
 * constants. The decimal values may be used, provided that the
 * compiler will convert from decimal to binary accurately enough
 * to produce the hexadecimal values shown.
 *
 * Note: Assuming log() return accurate answer, the following
 * 	 algorithm can be used to compute log1p(x) to within a few ULP:
 *
 *		u = 1+x;
 *		if(u==1.0) return x ; else
 *			   return log(u)*(x/(u-1.0));
 *
 *	 See HP-15C Advanced Functions Handbook, p.193.
 *
 * On AMD Ryzen 5 5600H, the function is 18 % slower than GCC's log1p.
 */

LUALIB_API double sun_log1p (double x) {
  double hfsq, f, c, s, z, R, u;
  int k, hx, hu, ax;
  GET_HIGH_WORD(hx, x);  /* high word of x */
  ax = hx & 0x7fffffff;
  k = 1; c = 0;
  if (hx < 0x3FDA827A) {     /* x < 0.41422  */
    if (ax >= 0x3ff00000) {  /* x <= -1.0 */
      if (x == -1.0) return -two54/zero; /* log1p(-1)=+inf */
      else return (x - x)/(x - x);  /* log1p(x<-1)=NaN */
    }
    if (ax < 0x3e200000) {   /* |x| < 2**-29 */
      if(two54 + x > zero    /* raise inexact */
        && ax < 0x3c900000)  /* |x| < 2**-54 */
        return x;
      else
        return x - x*x*0.5;
    }
    if (hx > 0 || hx <= ((int)0xbfd2bec3)) {
      k = 0; f = x; hu = 1;
    }  /* -0.2929<x<0.41422 */
  }
  if (hx >= 0x7ff00000) return x + x;  /* x is inf or NaN */
  if (k != 0) {
    if (hx < 0x43400000) {
      u  = 1.0 + x;
      GET_HIGH_WORD(hu, u);  /* high word of u */
      k  = (hu >> 20) - 1023;
      c  = (k > 0) ? 1.0 - (u - x) : x - (u - 1.0);  /* correction term */
      c /= u;
    } else {
      u  = x;
      hu = __HI(u);  /* high word of u */
      k  = (hu>>20) - 1023;
      c  = 0;
    }
    hu &= 0x000fffff;
    if (hu < 0x6a09e) {
      SET_HIGH_WORD(u, hu | 0x3ff00000);  /* normalize u */
    } else {
      k += 1;
      SET_HIGH_WORD(u, hu | 0x3fe00000);  /* normalize u/2 */
      hu = (0x00100000 - hu) >> 2;
    }
    f = u - 1.0;
  }
  hfsq = 0.5*f*f;
  if (hu == 0) {  /* |f| < 2**-20 */
    if (f == zero) {
      if (k == 0)
        return zero;
      else {
        c += k*ln2_lo; return k*ln2_hi + c;
      }
    }
    R = hfsq*(1.0 - 0.66666666666666666*f);
    if (k == 0)
      return f - R;
    else
      return k*ln2_hi - ((R - (k*ln2_lo + c)) - f);
  }
  s = f/(2.0 + f);
  z = s*s;
  R = z*(Lg1 + z*(Lg2 + z*(Lg3 + z*(Lg4 + z*(Lg5 + z*(Lg6 + z*Lg7))))));
  if (k == 0) return f - (hfsq - s*(hfsq + R)); else
  return k*ln2_hi - ((hfsq - (s*(hfsq + R) + (k*ln2_lo + c))) - f);
}


/* @(#)e_log2.c 1.3 95/01/18 */
/*
 * ====================================================
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
 *
 * Developed at SunSoft, a Sun Microsystems, Inc. business.
 * Permission to use, copy, modify, and distribute this
 * software is freely granted, provided that this notice
 * is preserved.
 * ====================================================
 *
 * Return the base 2 logarithm of x.  See e_log.c and k_log.h for most
 * comments.
 *
 * This reduces x to {k, 1+f} exactly as in e_log.c, then calls the kernel,
 * then does the combining and scaling steps
 *    log2(x) = (f - 0.5*f*f + k_log1p(f)) / ln2 + k
 * in not-quite-routine extra precision.
 *
 * There is an alternative implementation in uClibc-0.9.33/libm/e_log2.c, it is slower, however.
 *
 */

const double
ivln2hi    =  1.44269504072144627571e+00, /* 0x3ff71547, 0x65200000 ~~ log2(e) */
ivln2lo    =  1.67517131648865118353e-10; /* 0x3de705fc, 0x2eefa200 */

LUALIB_API double sun_log2 (double x) {
  double f, hfsq, hi, lo, r, val_hi, val_lo, w, y;
  int32_t i, k, hx;
  uint32_t lx;
  EXTRACT_WORDS(hx, lx, x);
  k = 0;
  if (hx < 0x00100000) {  /* x < 2**-1022  */
    if ((((hx & 0x7fffffff) | lx) == 0) || hx < 0)
      return AGN_NAN;  /* =(x - x)/zero; */ /* log(0-#) = NaN, change, we do not return -inf for x=0 */
    k -= 54; x *= two54;    /* subnormal number, scale up x */
    GET_HIGH_WORD(hx, x);
  }
  if (unlikely(hx >= 0x7ff00000)) return x + x;  /* inf, nan, 2.5.15 optimisation */
  if (hx == 0x3ff00000 && lx == 0)
    return zero;  /* log(1) = +0 */
  k += (hx >> 20) - 1023;
  hx &= 0x000fffff;
  i = (hx + 0x95f64) & 0x100000;
  SET_HIGH_WORD(x, hx | (i ^ 0x3ff00000));  /* normalise x or x/2 */
  k += (i >> 20);
  y = (double)k;
  f = x - 1.0;
  hfsq = 0.5*f*f;
  r = kernel_log1p(f);
  /*
   * f-hfsq must (for args near 1) be evaluated in extra precision
   * to avoid a large cancellation when x is near sqrt(2) or 1/sqrt(2).
   * This is fairly efficient since f-hfsq only depends on f, so can
   * be evaluated in parallel with R.  Not combining hfsq with R also
   * keeps R small (though not as small as a true `lo' term would be),
   * so that extra precision is not needed for terms involving R.
   *
   * Compiler bugs involving extra precision used to break Dekker's
   * theorem for spitting f-hfsq as hi+lo, unless double_t was used
   * or the multi-precision calculations were avoided when double_t
   * has extra precision.  These problems are now automatically
   * avoided as a side effect of the optimization of combining the
   * Dekker splitting step with the clear-low-bits step.
   *
   * y must (for args near sqrt(2) and 1/sqrt(2)) be added in extra
   * precision to avoid a very large cancellation when x is very near
   * these values.  Unlike the above cancellations, this problem is
   * specific to base 2.  It is strange that adding +-1 is so much
   * harder than adding +-ln2 or +-log10_2.
   *
   * This uses Dekker's theorem to normalise y+val_hi, so the
   * compiler bugs are back in some configurations, sigh.  And I
   * don't want to used double_t to avoid them, since that gives a
   * pessimization and the support for avoiding the pessimization
   * is not yet available.
   *
   * The multi-precision calculations for the multiplications are
   * routine.
  */
  hi = f - hfsq;
  SET_LOW_WORD(hi, 0);
  lo = (f - hi) - hfsq + r;
  val_hi = hi*ivln2hi;
  val_lo = (lo + hi)*ivln2lo + lo*ivln2hi;
  /* spadd(val_hi, val_lo, y), except for not using double_t: */
  w = y + val_hi;
  val_lo += (y - w) + val_hi;
  val_hi = w;
  return val_lo + val_hi;
}


/* @(#)e_log10.c 1.3 95/01/18 */
/*
 * ====================================================
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
 *
 * Developed at SunSoft, a Sun Microsystems, Inc. business.
 * Permission to use, copy, modify, and distribute this
 * software is freely granted, provided that this notice
 * is preserved.
 * ====================================================
 */

/*
 * Return the base 10 logarithm of x.  See e_log.c and k_log.h for most
 * comments.
 *
 *    log10(x) = (f - 0.5*f*f + k_log1p(f)) / ln10 + k * log10(2)
 * in not-quite-routine extra precision.
 */

const double
ivln10hi   =  4.34294481878168880939e-01, /* 0x3fdbcb7b, 0x15200000 */
ivln10lo   =  2.50829467116452752298e-11, /* 0x3dbb9438, 0xca9aadd5 */
log10_2hi  =  3.01029995663611771306e-01, /* 0x3FD34413, 0x509F6000 */
log10_2lo  =  3.69423907715893078616e-13; /* 0x3D59FEF3, 0x11F12B36 */

LUALIB_API double sun_log10 (double x) {  /* four percent faster than GCC's log10 implementation */
  double f, hfsq, hi, lo, r, val_hi, val_lo, w, y, y2;
  int32_t i, k, hx;
  uint32_t lx;
  EXTRACT_WORDS(hx, lx, x);
  k = 0;
  if (hx < 0x00100000) {  /* x < 2**-1022  */
    /* changed 2.11.1 : log(+-0) = NaN && log(-#) = NaN */
    if (unlikely(hx < 0 || ((hx & 0x7fffffff) | lx) == 0)) return AGN_NAN;  /* = (x - x)/zero; 2.5.15 optimisation */
    k -= 54; x *= two54; /* subnormal number, scale up x */
    GET_HIGH_WORD(hx, x);
  }
  if (unlikely(hx >= 0x7ff00000)) return x + x;  /* nan, inf, 2.5.15 optimisation */
  if (hx == 0x3ff00000 && lx == 0)
    return zero;  /* log(1) = +0 */
  k += (hx >> 20) - 1023;
  hx &= 0x000fffff;
  i = (hx + 0x95f64) & 0x100000;
  SET_HIGH_WORD(x, hx | (i ^ 0x3ff00000));  /* normalise x or x/2 */
  k += (i >> 20);
  y = (double)k;
  f = x - 1.0;
  hfsq = 0.5*f*f;
  r = kernel_log1p(f);
  /* See e_log2.c for most details. */
  hi = f - hfsq;
  SET_LOW_WORD(hi, 0);
  lo = (f - hi) - hfsq + r;
  val_hi = hi*ivln10hi;
  y2 = y*log10_2hi;
  val_lo = y*log10_2lo + (lo + hi)*ivln10lo + lo*ivln10hi;
  /*
   * Extra precision in for adding y*log10_2hi is not strictly needed
   * since there is no very large cancellation near x = sqrt(2) or
   * x = 1/sqrt(2), but we do it anyway since it costs little on CPUs
   * with some parallelism and it reduces the error for many args.
   */
  w = y2 + val_hi;
  val_lo += (y2 - w) + val_hi;
  val_hi = w;
  return val_lo + val_hi;
}


LUALIB_API double sun_ilog10 (double x) {
  double f, hfsq, hi, lo, r, val_hi, val_lo, w, y, y2;
  int32_t i, k, hx, j0;
  uint32_t lx;
  EXTRACT_WORDS(hx, lx, x);
  k = 0;
  if (hx < 0x00100000) {  /* x < 2**-1022  */
    /* changed 2.11.1 : log(+-0) = NaN && log(-#) = NaN */
    if (unlikely(hx < 0 || ((hx & 0x7fffffff) | lx) == 0)) return AGN_NAN;  /* = (x - x)/zero; 2.5.15 optimisation */
    k -= 54; x *= two54; /* subnormal number, scale up x */
    GET_HIGH_WORD(hx, x);
  }
  if (unlikely(hx >= 0x7ff00000)) return x + x;  /* nan, inf, 2.5.15 optimisation */
  if (hx == 0x3ff00000 && lx == 0)
    return zero;  /* log(1) = +0 */
  k += (hx >> 20) - 1023;
  hx &= 0x000fffff;
  i = (hx + 0x95f64) & 0x100000;
  SET_HIGH_WORD(x, hx | (i ^ 0x3ff00000));  /* normalise x or x/2 */
  k += (i >> 20);
  y = (double)k;
  f = x - 1.0;
  hfsq = 0.5*f*f;
  r = kernel_log1p(f);
  /* See e_log2.c for most details. */
  hi = f - hfsq;
  SET_LOW_WORD(hi, 0);
  lo = (f - hi) - hfsq + r;
  val_hi = hi*ivln10hi;
  y2 = y*log10_2hi;
  val_lo = y*log10_2lo + (lo + hi)*ivln10lo + lo*ivln10hi;
  /*
   * Extra precision in for adding y*log10_2hi is not strictly needed
   * since there is no very large cancellation near x = sqrt(2) or
   * x = 1/sqrt(2), but we do it anyway since it costs little on CPUs
   * with some parallelism and it reduces the error for many args.
   */
  w = y2 + val_hi;
  val_lo += (y2 - w) + val_hi;
  val_hi = w;
  /* truncate towards 0, taken from sun_trunc */
  EXTRACT_WORDS(hx, lx, val_lo + val_hi);
  j0 = ((hx >> 20) & 0x7ff) - 0x3ff;  /* unbiased exponent: returns math.exponent(x) - 1, except 0 -> -1023 */
  if (j0 < 0) {  /* |x| < 1 */
    if (huge + x > 0.0 && hx != 0) {  /* raise inexact if x != 0 */
      hx = (hx < 0) ? 0x80000000 : 0;  /* return 0*sign(x) if |x| < 1 */
      lx = 0;
    }
  } else if (j0 < 20) {  /* integer part in high x */
    i = (0x000fffff) >> j0;
    if (((hx & i) | lx) == 0) return x;  /* x is integral */
    if (huge + x > 0.0) {                /* raise inexact flag */
      hx &= (~i); lx = 0;
    }
  } else if (j0 < 52) {  /* fraction part in low x */
    i = ((uint32_t)(0xffffffff)) >> (j0 - 20);
    if ((lx & i) == 0) return x;  /* x is integral */
    if (huge + x > 0.0) {         /* raise inexact flag */
      lx &= (~i);
    }
  } else {  /* should not happen: j > 51, no fraction part */
    if (j0 == 0x400) return x + x;  /* inf or NaN */
    else return x;                  /* x is integral */
  }
  INSERT_WORDS(x, hx, lx);
  return x;
}


/* see: https://bitbucket.org/zlwu/cegcc-mingw/src/0d9e81454bfa3231299396c92d4b019bf283cd2b/mingwex/complex/ctan.c?at=master&fileviewer=file-view-default
  ctan(a + I*b) = (sin(2*a) + I*sinh(2*b)) / (cos(2*a) + cosh(2*b)) */
#ifndef PROPCMPLX
LUALIB_API agn_Complex tools_ctan (agn_Complex z) {
  double re, im, den, si, co, sih, coh;
  agn_Complex r;
  re = 2*(__real__ z);
  im = 2*(__imag__ z);
  sun_sincos(re, &si, &co);
  sun_sinhcosh(im, &sih, &coh);
  den = co + coh;
  switch (tools_fpclassify(den)) {
    case FP_ZERO:
      __real__ r = HUGE_VAL;
      __imag__ r = HUGE_VAL;
      break;
    case FP_INFINITE:
      __real__ r = 0;
      __imag__ r = im > 0 ? 1 : -1;
      break;
    default: {
      __real__ r = si/den;
      __imag__ r = sih/den;
    }
  }
  return r;
}
#endif

/* openlibm-0.4.1/src/s_scalbn.c, do not use fdbmlib's implementation as the latter is inaccurate
 *
 * ====================================================
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
 *
 * Developed at SunSoft, a Sun Microsystems, Inc. business.
 * Permission to use, copy, modify, and distribute this
 * software is freely granted, provided that this notice
 * is preserved.
 * ====================================================
 */
/*
 * scalbn/ldexp (double x, int n)
 * scalbn/ldexp(x,n) returns x* 2**n  computed by  exponent
 * manipulation rather than by actually performing an
 * exponentiation or a multiplication.
 */

static const double
twom54  =  5.55111512312578270212e-17; /* 0x3C900000, 0x00000000 */

LUALIB_API double sun_ldexp (double x, int n) {
  int32_t k, hx, lx;
  if (!isfinite(x) || x == 0.0) return x;  /* work like GCC's scalbn/ldexp */
  EXTRACT_WORDS(hx, lx, x);
  k = (hx & 0x7ff00000) >> 20;  /* extract exponent */
  if (k == 0) { /* 0 or subnormal x */
    if ((lx | (hx & 0x7fffffff)) == 0)
      return x; /* +-0 */
    x *= two54;
    GET_HIGH_WORD(hx, x);
    k = ((hx & 0x7ff00000) >> 20) - 54;
  }
  if (unlikely(k == 0x7ff))  /* 2.5.15 optimisation */
    return x + x;  /* NaN or Inf */
  k = k + n;
  if (k > 0x7fe)
    return huge * copysign(huge, x);  /* overflow */
  if (n < -50000)
    return tiny * copysign(tiny, x);  /* underflow */
  if (k > 0) { /* normal result */
    SET_HIGH_WORD(x, (hx & 0x800fffff) | (k << 20));
    return x;
  }
  if (k <= -54) {
    if (n > 50000)  /* in case integer overflow in n+k */
      return huge * copysign(huge, x);  /* overflow */
    return tiny * copysign(tiny, x);    /* underflow */
    }
  k += 54;  /* subnormal result */
  SET_HIGH_WORD(x, (hx & 0x800fffff) | (k << 20));
  return x * twom54;
}


/* @(#)s_frexp.c 1.4 95/01/18 */
/*
 * ====================================================
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
 *
 * Developed at SunSoft, a Sun Microsystems, Inc. business.
 * Permission to use, copy, modify, and distribute this
 * software is freely granted, provided that this notice
 * is preserved.
 * ====================================================
 */
/*
 * for non-zero x
 *  x = frexp(arg,&exp);
 * return a double fp quantity x such that 0.5 <= |x| <1.0
 * and the corresponding binary exponent "exp". That is
 *  arg = x*2^exp.
 * If arg is inf, 0.0, or NaN, then frexp(arg,&exp) returns arg
 * with *exp=0.
 */
LUALIB_API double sun_frexp (double x, int *eptr) {
  int32_t hx, ix, lx;
  EXTRACT_WORDS(hx, lx, x);
  ix = 0x7fffffff & hx;
  *eptr = 0;
  if (ix >= 0x7ff00000 || ((ix | lx) == 0)) return x;  /* 0, inf, nan */
  if (ix < 0x00100000) {  /* subnormal */
    x *= two54;
    GET_HIGH_WORD(hx, x);
    ix = hx & 0x7fffffff;
    *eptr = -54;
  }
  *eptr += (ix >> 20) - 1022;
  hx = (hx & 0x800fffff) | 0x3fe00000;
  SET_HIGH_WORD(x, hx);
  return x;
}


/* Source: openlibm-0.4.1/src/k_exp.c

 * Copyright (c) 2011 David Schultz <das@FreeBSD.ORG>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/*
 * __ldexp_exp(x, expt) and __ldexp_cexp(x, expt) compute exp(x) * 2**expt.
 * They are intended for large arguments (real part >= ln(DBL_MAX))
 * where care is needed to avoid overflow.
 *
 * The present implementation is narrowly tailored for our hyperbolic and
 * exponential functions.  We assume expt is small (0 or -1), and the caller
 * has filtered out very large x, for which overflow would be inevitable.
 */
static const uint32_t k = 1799;    /* constant for reduction */
static const double kln2 =  1246.97177782734161156;  /* k * ln2 */

/* Source: openlibm-0.4.1/src/k_exp.c
 * Compute exp(x), scaled to avoid spurious overflow. An exponent is returned separately in 'expt'.
 *
 * Input:  ln(DBL_MAX) <= x < ln(2 * DBL_MAX / DBL_MIN_DENORM) ~= 1454.91
 * Output: 2**1023 <= y < 2**1024
 */
LUALIB_API double ds_frexp_exp (double x, int *expt) {
  double exp_x;
  uint32_t hx;
  /*
   * We use exp(x) = exp(x - kln2) * 2**k, carefully chosen to
   * minimize |exp(kln2) - 2**k|.  We also scale the exponent of
   * exp_x to MAX_EXP so that the result can be multiplied by
   * a tiny number without losing accuracy due to denormalisation.
  */
  exp_x = sun_exp(x - kln2);
  GET_HIGH_WORD(hx, exp_x);
  *expt = (hx >> 20) - (0x3ff + 1023) + k;
  SET_HIGH_WORD(exp_x, (hx & 0xfffff) | ((0x3ff + 1023) << 20));
  return exp_x;
}


/* Source: openlibm-0.4.1/src/k_exp.c
 * __ldexp_exp(x, expt) and __ldexp_cexp(x, expt) compute exp(x) * 2**expt.
 * They are intended for large arguments (real part >= ln(DBL_MAX))
 * where care is needed to avoid overflow.
 *
 * The present implementation is narrowly tailored for our hyperbolic and
 * exponential functions.  We assume expt is small (0 or -1), and the caller
 * has filtered out very large x, for which overflow would be inevitable.
 */
LUALIB_API double ds_ldexp_exp (double x, int expt) {
  double exp_x, scale;
  int ex_expt;
  exp_x = ds_frexp_exp(x, &ex_expt);
  expt += ex_expt;
  INSERT_WORDS(scale, (0x3ff + expt) << 20, 0);
  return (exp_x * scale);
}


/* (Complementary) Error Function for complex arguments
   written by Takuya Ooura in FORTRAN and available at http://www.kurims.kyoto-u.ac.jp/~ooura/gamerf.html.
   See gamerf2a.f file.

   Copyright(C) 1996 Takuya OOURA.
   You may use, copy, modify this code for any purpose and without fee.
   You may distribute this ORIGINAL package. */

static double e_p[13] = {
  2.94608570191793668e-01,
  1.81694307871527086e-01,
  6.91087778921425355e-02,
  1.62114197106901582e-02,
  2.34533471539159422e-03,
  2.09259199579049675e-04,
  1.15149016557480535e-05,
  3.90779571296927748e-07,
  8.17898509247247602e-09,
  1.05575446466983499e-10,
  8.40470321453263734e-13,
  4.12646136715431977e-15,
  1.24947948599560084e-17
};

static double e_q[13] = {
  6.04152433382652546e-02,
  5.43737190044387291e-01,
  1.51038108345663136e+00,
  2.96034692357499747e+00,
  4.89363471039948562e+00,
  7.31024444393009580e+00,
  1.02101761241668280e+01,
  1.35934297511096823e+01,
  1.74600053247586586e+01,
  2.18099028451137569e+01,
  2.66431223121749773e+01,
  3.19596637259423197e+01,
  3.77595270864157841e+01
};

static double e_r[13] = {
  1.56478036351085356e-01,
  2.45771407110492625e-01,
  1.19035163906534275e-01,
  3.55561834455977740e-02,
  6.55014550718381002e-03,
  7.44188068433574137e-04,
  5.21447276257559040e-05,
  2.25337799750608244e-06,
  6.00556181041662576e-08,
  9.87118243564461826e-10,
  1.00064645539515792e-11,
  6.25587539334288736e-14,
  2.41207864479170276e-16
};

static double e_s[13] = {
  0,
  2.41660973353061018e-01,
  9.66643893412244073e-01,
  2.17494876017754917e+00,
  3.86657557364897629e+00,
  6.04152433382652546e+00,
  8.69979504071019666e+00,
  1.18413876942999899e+01,
  1.54663022945959052e+01,
  1.95745388415979425e+01,
  2.41660973353061018e+01,
  2.92409777757203832e+01,
  3.47991801628407866e+01
};

#ifndef PROPCMPLX
/* Complementary Error Function erfc(z) = 1 - erf(z), for complex arguments */
LUALIB_API agn_Complex tools_cerfc (agn_Complex x) {
  agn_Complex y, z;
  const double pv = 1.27813464856668857e+01;
  const double ph = 6.64067324283344283e+00;
  y = x*x;
  if ((fabs(creal(x)) + fabs(cimag(x))) < ph) {
    z = tools_cexp(pv*x);
    if (creal(z) >= 0) {
      y = tools_cexp(-y)*x*(e_p[12]/(y + e_q[12]) + e_p[11]/(y + e_q[11])
          + e_p[10]/(y + e_q[10]) + e_p[9]/(y + e_q[9]) + e_p[8]/(y + e_q[8]) + e_p[7]/(y + e_q[7])
          + e_p[6]/(y + e_q[6]) + e_p[5]/(y + e_q[5]) + e_p[4]/(y + e_q[4]) + e_p[3]/(y + e_q[3])
          + e_p[2]/(y + e_q[2]) + e_p[1]/(y + e_q[1]) + e_p[0]/(y + e_q[0])) + 2/(1 + z);
    } else {
      y = tools_cexp(-y)*x*(e_r[12]/(y + e_s[12]) + e_r[11]/(y + e_s[11])
          + e_r[10]/(y + e_s[10]) + e_r[9]/(y + e_s[9]) + e_r[8]/(y + e_s[8]) + e_r[7]/(y + e_s[7])
          + e_r[6]/(y + e_s[6]) + e_r[5]/(y + e_s[5]) + e_r[4]/(y + e_s[4]) + e_r[3]/(y + e_s[3])
          + e_r[2]/(y + e_s[2]) + e_r[1]/(y + e_s[1]) + e_r[0]/y) + 2/(1 - z);
    }
  } else {
    y = tools_cexp(-y)*x*(e_p[12]/(y + e_q[12]) + e_p[11]/(y + e_q[11])
        + e_p[10]/(y + e_q[10]) + e_p[9]/(y + e_q[9]) + e_p[8]/(y + e_q[8]) + e_p[7]/(y + e_q[7])
        + e_p[6]/(y + e_q[6]) + e_p[5]/(y + e_q[5]) + e_p[4]/(y + e_q[4]) + e_p[3]/(y + e_q[3])
        + e_p[2]/(y + e_q[2]) + e_p[1]/(y + e_q[1]) + e_p[0]/(y + e_q[0]));
    if (creal(x) < 0) y = y + 2;
  }
  return y;
}

/* Error Function for complex arguments, erf(z)
   written by Takuya Ooura in FORTRAN and available at http://www.kurims.kyoto-u.ac.jp/~ooura/gamerf.html.
   See gamerf2a.f file, function cderf(x)

   Copyright(C) 1996 Takuya OOURA.
   You may use, copy, modify this code for any purpose and without fee.
   You may distribute this ORIGINAL package. */
LUALIB_API agn_Complex tools_cerf (agn_Complex x) {
  agn_Complex y, r;
  const double p0 =  1.12837916709551257;
  const double p1 = -3.76126389031837525e-01;
  const double p2 =  1.12837916709551257e-01;
  const double p3 = -2.68661706451312518e-02;
  const double p4 =  5.22397762544218784e-03;
  const double p5 = -8.54832702345085283e-04;
  const double p6 =  1.20553329817896643e-04;
  if (fabs(creal(x)) + fabs(cimag(x)) > 0.125) {
    if (creal(x) >= 0)
      r = 1 - tools_cerfc(x);  /* = erf(x) */
    else
      r = tools_cerfc(-x) - 1;
  } else {
    y = x*x;
    r = ((((((p6*y + p5)*y + p4)*y + p3)*y + p2)*y + p1)*y + p0)*x;
  }
  return r;
}
#else
/* Complementary Error Function for complex arguments; based on gamerf2a.f file (see above), converted with Maple V Release 4, 2.21.6 */
LUALIB_API void tools_cerfc (double z_re, double z_im, double *rre, double *rim) {
  double t1, t3, t6, t8, t9, t11, t12, t14, t16, t18, t20, t22, t24, t26, t28, t30,
         t32, t34, t36, t38, t40, t42, t44, t46, t48, t50, t52, t54, t56, t58, t60,
         t62, t64, t66, t68, t70, t72, t74, t76, t78, t80, t82, t84, t86, t88, t92,
         t118, t120, t121, t122, t123, t124, t125, y0, y1, z0, z1, si, co;
  const double pv = 1.27813464856668857e+01;
  const double ph = 6.64067324283344283e+00;
  t1 = z_re*z_re;
  t3 = z_im*z_im;
  y0 = t1 - t3;
  y1 = 2.0*z_re*z_im;
  if ((fabs(z_re) + fabs(z_im)) < ph) {
    t1 = sun_exp(pv*z_re);
    t3 = pv*z_im;
    sun_sincos(t3, &si, &co);
    z0 = t1*co;
    z1 = t1*si;
    if (z0 >= 0) {
      t1 = sun_exp(-y0);
      sun_sincos(y1, &si, &co);
      t3 = t1*co;
      t6 = t1*si;
      t8 = t3*z_re + t6*z_im;
      t9 = y0 + e_q[12];
      t11 = t9*t9;
      t12 = y1*y1;
      t14 = 1/(t11 + t12);
      t16 = y0 + e_q[11];
      t18 = t16*t16;
      t20 = 1/(t18 + t12);
      t22 = y0 + e_q[10];
      t24 = t22*t22;
      t26 = 1/(t24 + t12);
      t28 = y0 + e_q[9];
      t30 = t28*t28;
      t32 = 1/(t30 + t12);
      t34 = y0 + e_q[8];
      t36 = t34*t34;
      t38 = 1/(t36 + t12);
      t40 = y0 + e_q[7];
      t42 = t40*t40;
      t44 = 1/(t42 + t12);
      t46 = y0 + e_q[6];
      t48 = t46*t46;
      t50 = 1/(t48 + t12);
      t52 = y0 + e_q[5];
      t54 = t52*t52;
      t56 = 1/(t54 + t12);
      t58 = y0 + e_q[4];
      t60 = t58*t58;
      t62 = 1/(t60 + t12);
      t64 = y0 + e_q[3];
      t66 = t64*t64;
      t68 = 1/(t66 + t12);
      t70 = y0 + e_q[2];
      t72 = t70*t70;
      t74 = 1/(t72 + t12);
      t76 = y0 + e_q[1];
      t78 = t76*t76;
      t80 = 1/(t78 + t12);
      t82 = y0 + e_q[0];
      t84 = t82*t82;
      t86 = 1/(t84 + t12);
      t88 = e_p[12]*t9*t14 + e_p[11]*t16*t20   + e_p[10]*t22*t26 + e_p[9]*t28*t32 + e_p[8]*t34*t38
             + e_p[7]*t40*t44 + e_p[6]*t46*t50 + e_p[5]*t52*t56  + e_p[4]*t58*t62 + e_p[3]*t64*t68
             + e_p[2]*t70*t74 + e_p[1]*t76*t80 + e_p[0]*t82*t86;
      t92 = -t6*z_re + t3*z_im;
      t120 = -e_p[12]*y1*t14 - e_p[11]*y1*t20 - e_p[10]*y1*t26 - e_p[9]*y1*t32
              - e_p[8]*y1*t38 - e_p[7]*y1*t44 - e_p[6]*y1*t50  - e_p[5]*y1*t56
              - e_p[4]*y1*t62 - e_p[3]*y1*t68 - e_p[2]*y1*t74  - e_p[1]*y1*t80 - e_p[0]*y1*t86;
      t121 = 1.0 + z0;
      t122 = t121*t121;
      t123 = z1*z1;  /* 2.41.0 optimisation */
      t125 = 1/(t122 + t123);
      y0 = t8*t88 - t92*t120 + 2.0*t121*t125;
      y1 = t92*t88 + t8*t120 - 2.0*z1*t125;
    } else {
      t1 = sun_exp(-y0);
      sun_sincos(y1, &si, &co);  /* 2.21.6 tweak */
      t3 = t1*co;
      t6 = t1*si;
      t8 = t3*z_re + t6*z_im;
      t9 = y0 + e_s[12];
      t11 = t9*t9;
      t12 = y1*y1;
      t14 = 1/(t11 + t12);
      t16 = y0 + e_s[11];
      t18 = t16*t16;
      t20 = 1/(t18 + t12);
      t22 = y0 + e_s[10];
      t24 = t22*t22;
      t26 = 1/(t24 + t12);
      t28 = y0 + e_s[9];
      t30 = t28*t28;
      t32 = 1/(t30 + t12);
      t34 = y0 + e_s[8];
      t36 = t34*t34;
      t38 = 1/(t36 + t12);
      t40 = y0 + e_s[7];
      t42 = t40*t40;
      t44 = 1/(t42 + t12);
      t46 = y0 + e_s[6];
      t48 = t46*t46;
      t50 = 1/(t48 + t12);
      t52 = y0 + e_s[5];
      t54 = t52*t52;
      t56 = 1/(t54 + t12);
      t58 = y0 + e_s[4];
      t60 = t58*t58;
      t62 = 1/(t60 + t12);
      t64 = y0 + e_s[3];
      t66 = t64*t64;
      t68 = 1/(t66 + t12);
      t70 = y0 + e_s[2];
      t72 = t70*t70;
      t74 = 1/(t72 + t12);
      t76 = y0 + e_s[1];
      t78 = t76*t76;
      t80 = 1/(t78 + t12);
      t82 = y0*y0;  /* 2.41.0 optimisation */
      t84 = 1/(t82 + t12);
      t86 = e_r[12]*t9*t14 + e_r[11]*t16*t20  + e_r[10]*t22*t26 + e_r[9]*t28*t32 + e_r[8]*t34*t38
            + e_r[7]*t40*t44 + e_r[6]*t46*t50 + e_r[5]*t52*t56 +  e_r[4]*t58*t62 + e_r[3]*t64*t68
            + e_r[2]*t70*t74 + e_r[1]*t76*t80 + e_r[0]*y0*t84;
      t92 = -t6*z_re + t3*z_im;
      t118 = -e_r[12]*y1*t14 - e_r[11]*y1*t20 - e_r[10]*y1*t26 - e_r[9]*y1*t32
             -e_r[8]*y1*t38  - e_r[7]*y1*t44  - e_r[6]*y1*t50  - e_r[5]*y1*t56 - e_r[4]*y1*t62
             -e_r[3]*y1*t68  - e_r[2]*y1*t74  - e_r[1]*y1*t80  - e_r[0]*y1*t84;
      t120 = 1.0 - z0;
      t121 = t120*t120;
      t122 = z1*z1;  /* 2.41.0 optimisation */
      t124 = 1/(t121 + t122);
      y0 = t8*t86 - t92*t118 + 2.0*t120*t124;
      y1 = t92*t86 + t8*t118 + 2.0*z1*t124;
    }
  } else {
    t1 = sun_exp(-y0);
    sun_sincos(y1, &si, &co);  /* 2.21.6 tweak */
    t3 = t1*co;
    t6 = t1*si;
    t8 = t3*z_re + t6*z_im;
    t9 = y0 + e_q[12];
    t11 = t9*t9;
    t12 = y1*y1;
    t14 = 1/(t11 + t12);
    t16 = y0 + e_q[11];
    t18 = t16*t16;
    t20 = 1/(t18 + t12);
    t22 = y0 + e_q[10];
    t24 = t22*t22;
    t26 = 1/(t24 + t12);
    t28 = y0 + e_q[9];
    t30 = t28*t28;
    t32 = 1/(t30 + t12);
    t34 = y0 + e_q[8];
    t36 = t34*t34;
    t38 = 1/(t36 + t12);
    t40 = y0 + e_q[7];
    t42 = t40*t40;
    t44 = 1/(t42 + t12);
    t46 = y0 + e_q[6];
    t48 = t46*t46;
    t50 = 1/(t48 + t12);
    t52 = y0 + e_q[5];
    t54 = t52*t52;
    t56 = 1/(t54 + t12);
    t58 = y0 + e_q[4];
    t60 = t58*t58;
    t62 = 1/(t60 + t12);
    t64 = y0 + e_q[3];
    t66 = t64*t64;
    t68 = 1/(t66 + t12);
    t70 = y0 + e_q[2];
    t72 = t70*t70;
    t74 = 1/(t72 + t12);
    t76 = y0 + e_q[1];
    t78 = t76*t76;
    t80 = 1/(t78 + t12);
    t82 = y0 + e_q[0];
    t84 = t82*t82;
    t86 = 1/(t84 + t12);
    t88 =    e_p[12]*t9*t14 + e_p[11]*t16*t20 + e_p[10]*t22*t26 + e_p[9]*t28*t32 + e_p[8]*t34*t38
           + e_p[7]*t40*t44 + e_p[6]*t46*t50  + e_p[5]*t52*t56  + e_p[4]*t58*t62 + e_p[3]*t64*t68
           + e_p[2]*t70*t74 + e_p[1]*t76*t80  + e_p[0]*t82*t86;
    t92 = -t6*z_re+t3*z_im;
    t120 = -e_p[12]*y1*t14 - e_p[11]*y1*t20 - e_p[10]*y1*t26 - e_p[9]*y1*t32
           - e_p[8]*y1*t38 - e_p[7]*y1*t44  - e_p[6]*y1*t50  - e_p[5]*y1*t56 - e_p[4]*y1*t62
           - e_p[3]*y1*t68 - e_p[2]*y1*t74  - e_p[1]*y1*t80  - e_p[0]*y1*t86;
    y0 = t8*t88 - t92*t120;
    y1 = t92*t88 + t8*t120;
    if (z_re < 0) y0 += 2;
  }
  *rre = y0;
  *rim = y1;
}


/* Error Function for complex arguments; based on gamerf2a.f file (see above), converted with Maple V Release 4, 2.21.6 */
LUALIB_API void tools_cerf (double z_re, double z_im, double *rre, double *rim) {
  lua_Number y0, y1, re, im;
  const double p0 =  1.12837916709551257;
  const double p1 = -3.76126389031837525e-01;
  const double p2 =  1.12837916709551257e-01;
  const double p3 = -2.68661706451312518e-02;
  const double p4 =  5.22397762544218784e-03;
  const double p5 = -8.54832702345085283e-04;
  const double p6 =  1.20553329817896643e-04;
  if (fabs(z_re) + fabs(z_im) > 0.125) {
    if (z_re >= 0) {
      tools_cerfc(z_re, z_im, &re, &im);
      *rre = 1 - re;
      *rim = -im;
    } else {
      tools_cerfc(-z_re, -z_im, &re, &im);
      *rre = re - 1;
      *rim = im;
    }
  } else {  /* polynomial interpolation */
    double t1, t3, t2, t4, t6, t7, t9, t11, t12, t14, t15, t17, t19, t22, t24, t26, t27,
       t29, t31, t33, t35, t36, t38, t40, t41, t43, t44, t46, t47, t49, t51, t52, t54,
       t57, t59, t61, t64, t65, t67, t68, t82, t84, t86, t90, t98, t99, t101, t106;
    t1 = z_re*z_re;
    t3 = z_im*z_im;
    y0 = t1 - t3;
    y1 = 2.0*z_re*z_im;
    t1 = y0*y0;
    t2 = t1*t1;
    t3 = t2*y0;
    t4 = p5*t3;
    t6 = y1*y1;
    t7 = t6*t6;
    t9 = p6*t7*t6;
    t11 = t7*y1;
    t12 = p5*t11;
    t14 = t6*y1;
    t15 = p3*t14;
    t17 = p1*y1;
    t19 = p4*t7;
    t22 = p6*t2*t1;
    t24 = p2*t1;
    t26 = t1*y0;
    t27 = p3*t26;
    t29 = p1*y0;
    t31 = p2*t6;
    t33 = p4*t2;
    t35 = p4*t1;
    t36 = t6*z_re;
    t38 = p6*t2;
    t40 = t4*z_re - t9*z_re - t12*z_im + t15*z_im - t17*z_im + t19*z_re + t22*z_re + t24*z_re + t27*z_re + t29*z_re
          - t31*z_re + t33*z_re - 6.0*t35*t36 - 15.0*t38*t36;
    t41 = p5*t26;
    t43 = p2*y1;
    t44 = y0*z_im;
    t46 = p4*t26;
    t47 = y1*z_im;
    t49 = p5*t2;
    t51 = p5*t1;
    t52 = t14*z_im;
    t54 = p6*y0;
    t57 = p6*t26;
    t59 = p6*t3;
    t61 = p3*t1;
    t64 = p3*t6;
    t65 = y0*z_re;
    t67 = p5*y0;
    t68 = t7*z_re;
    t82 = y1*z_re;
    t84 = t29*z_im + t33*z_im + t17*z_re - t15*z_re - t9*z_im - t31*z_im + t24*z_im + t12*z_re + t19*z_im + t22*z_im
          + t27*z_im + t4*z_im + p0*z_im + 5.0*t49*t82;
    t86 = t14*z_re;
    t90 = t6*z_im;
    t98 = p6*t1;
    t99 = t7*z_im;
    t101 = p4*y0;
    t106 = -20.0*t57*t86 - 10.0*t51*t86 + 2.0*t43*t65 - 6.0*t35*t90 + 6.0*t54*t11*z_re + 3.0*t61*t82 + 4.0*t46*t82 + 6.0*t59*t82
           - 15.0*t38*t90 + 15.0*t98*t99 - 4.0*t101*t86 - 10.0*t41*t90 + 5.0*t67*t99 - 3.0*t64*t44;
    *rre = t40 - 10.0*t41*t36 - 2.0*t43*t44 - 4.0*t46*t47 - 5.0*t49*t47 + 10.0*t51*t52
           - 6.0*t54*t11*z_im + 20.0*t57*t52 - 6.0*t59*t47 - 3.0*t61*t47 + p0*z_re
           - 3.0*t64*t65 + 5.0*t67*t68 + 15.0*t98*t68 + 4.0*t101*t52;
    *rim = t84 + t106;
  }
}
#endif


/* erfcx(x) = exp(x^2) erfc(x) function, also known as `scaled complementary error function`, for real x.
   Can be used instead of `erfc` to avoid arithmetic underflow.

   Written by Steven G. Johnson, October 2012, modified by a walz. MIT licence.

   Taken from: http://ab-initio.mit.edu/Faddeeva.cc

   This function combines a few different ideas.

   First, for x > 50, it uses a continued-fraction expansion (same as
   for the Faddeeva function, but with algebraic simplifications for z=i*x).

   Second, for 0 <= x <= 50, it uses Chebyshev polynomial approximations,
   but with two twists:

      a) It maps x to y = 4 / (4+x) in [0,1].  This simple transformation,
         inspired by a similar transformation in the octave-forge/specfun
         erfcx by Soren Hauberg, results in much faster Chebyshev convergence
         than other simple transformations I have examined.

      b) Instead of using a single Chebyshev polynomial for the entire
         [0,1] y interval, we break the interval up into 100 equal
         subintervals, with a switch/lookup table, and use much lower
         degree Chebyshev polynomials in each subinterval. This greatly
         improves performance in my tests.

   For x < 0, we use the relationship erfcx(-x) = 2 exp(x^2) - erfc(x),
   with the usual checks for overflow etcetera.

   Performance-wise, it seems to be substantially faster than either
   the SLATEC DERFC function [or an erfcx function derived therefrom]
   or Cody's CALERF function (from netlib.org/specfun), while
   retaining near machine precision in accuracy.

   Given y100=100*y, where y = 4/(4+x) for x >= 0, compute erfc(x).

   Uses a look-up table of 100 different Chebyshev polynomials
   for y intervals [0,0.01], [0.01,0.02], ...., [0.99,1], generated
   with the help of Maple and a little shell script.   This allows
   the Chebyshev polynomials to be of significantly lower degree (about 1/4)
   compared to fitting the whole [0,1] interval with a single polynomial. */

static double erfcx_y100 (double y100) {
  int x = luai_numint(y100);
  double t = 2*y100 - (2*x + 1);
  switch (x) {
    case 0:
      return 0.70878032454106438663e-3 + (0.71234091047026302958e-3 + (0.35779077297597742384e-5 + (0.17403143962587937815e-7 + (0.81710660047307788845e-10 + (0.36885022360434957634e-12 + 0.15917038551111111111e-14 * t) * t) * t) * t) * t) * t;
    case 1:
      return 0.21479143208285144230e-2 + (0.72686402367379996033e-3 + (0.36843175430938995552e-5 + (0.18071841272149201685e-7 + (0.85496449296040325555e-10 + (0.38852037518534291510e-12 + 0.16868473576888888889e-14 * t) * t) * t) * t) * t) * t;
    case 2:
      return 0.36165255935630175090e-2 + (0.74182092323555510862e-3 + (0.37948319957528242260e-5 + (0.18771627021793087350e-7 + (0.89484715122415089123e-10 + (0.40935858517772440862e-12 + 0.17872061464888888889e-14 * t) * t) * t) * t) * t) * t;
    case 3:
      return 0.51154983860031979264e-2 + (0.75722840734791660540e-3 + (0.39096425726735703941e-5 + (0.19504168704300468210e-7 + (0.93687503063178993915e-10 + (0.43143925959079664747e-12 + 0.18939926435555555556e-14 * t) * t) * t) * t) * t) * t;
    case 4:
      return 0.66457513172673049824e-2 + (0.77310406054447454920e-3 + (0.40289510589399439385e-5 + (0.20271233238288381092e-7 + (0.98117631321709100264e-10 + (0.45484207406017752971e-12 + 0.20076352213333333333e-14 * t) * t) * t) * t) * t) * t;
    case 5:
      return 0.82082389970241207883e-2 + (0.78946629611881710721e-3 + (0.41529701552622656574e-5 + (0.21074693344544655714e-7 + (0.10278874108587317989e-9 + (0.47965201390613339638e-12 + 0.21285907413333333333e-14 * t) * t) * t) * t) * t) * t;
    case 6:
      return 0.98039537275352193165e-2 + (0.80633440108342840956e-3 + (0.42819241329736982942e-5 + (0.21916534346907168612e-7 + (0.10771535136565470914e-9 + (0.50595972623692822410e-12 + 0.22573462684444444444e-14 * t) * t) * t) * t) * t) * t;
    case 7:
      return 0.11433927298290302370e-1 + (0.82372858383196561209e-3 + (0.44160495311765438816e-5 + (0.22798861426211986056e-7 + (0.11291291745879239736e-9 + (0.53386189365816880454e-12 + 0.23944209546666666667e-14 * t) * t) * t) * t) * t) * t;
    case 8:
      return 0.13099232878814653979e-1 + (0.84167002467906968214e-3 + (0.45555958988457506002e-5 + (0.23723907357214175198e-7 + (0.11839789326602695603e-9 + (0.56346163067550237877e-12 + 0.25403679644444444444e-14 * t) * t) * t) * t) * t) * t;
    case 9:
      return 0.14800987015587535621e-1 + (0.86018092946345943214e-3 + (0.47008265848816866105e-5 + (0.24694040760197315333e-7 + (0.12418779768752299093e-9 + (0.59486890370320261949e-12 + 0.26957764568888888889e-14 * t) * t) * t) * t) * t) * t;
    case 10:
      return 0.16540351739394069380e-1 + (0.87928458641241463952e-3 + (0.48520195793001753903e-5 + (0.25711774900881709176e-7 + (0.13030128534230822419e-9 + (0.62820097586874779402e-12 + 0.28612737351111111111e-14 * t) * t) * t) * t) * t) * t;
    case 11:
      return 0.18318536789842392647e-1 + (0.89900542647891721692e-3 + (0.50094684089553365810e-5 + (0.26779777074218070482e-7 + (0.13675822186304615566e-9 + (0.66358287745352705725e-12 + 0.30375273884444444444e-14 * t) * t) * t) * t) * t) * t;
    case 12:
      return 0.20136801964214276775e-1 + (0.91936908737673676012e-3 + (0.51734830914104276820e-5 + (0.27900878609710432673e-7 + (0.14357976402809042257e-9 + (0.70114790311043728387e-12 + 0.32252476000000000000e-14 * t) * t) * t) * t) * t) * t;
    case 13:
      return 0.21996459598282740954e-1 + (0.94040248155366777784e-3 + (0.53443911508041164739e-5 + (0.29078085538049374673e-7 + (0.15078844500329731137e-9 + (0.74103813647499204269e-12 + 0.34251892320000000000e-14 * t) * t) * t) * t) * t) * t;
    case 14:
      return 0.23898877187226319502e-1 + (0.96213386835900177540e-3 + (0.55225386998049012752e-5 + (0.30314589961047687059e-7 + (0.15840826497296335264e-9 + (0.78340500472414454395e-12 + 0.36381553564444444445e-14 * t) * t) * t) * t) * t) * t;
    case 15:
      return 0.25845480155298518485e-1 + (0.98459293067820123389e-3 + (0.57082915920051843672e-5 + (0.31613782169164830118e-7 + (0.16646478745529630813e-9 + (0.82840985928785407942e-12 + 0.38649975768888888890e-14 * t) * t) * t) * t) * t) * t;
    case 16:
      return 0.27837754783474696598e-1 + (0.10078108563256892757e-2 + (0.59020366493792212221e-5 + (0.32979263553246520417e-7 + (0.17498524159268458073e-9 + (0.87622459124842525110e-12 + 0.41066206488888888890e-14 * t) * t) * t) * t) * t) * t;
    case 17:
      return 0.29877251304899307550e-1 + (0.10318204245057349310e-2 + (0.61041829697162055093e-5 + (0.34414860359542720579e-7 + (0.18399863072934089607e-9 + (0.92703227366365046533e-12 + 0.43639844053333333334e-14 * t) * t) * t) * t) * t) * t;
    case 18:
      return 0.31965587178596443475e-1 + (0.10566560976716574401e-2 + (0.63151633192414586770e-5 + (0.35924638339521924242e-7 + (0.19353584758781174038e-9 + (0.98102783859889264382e-12 + 0.46381060817777777779e-14 * t) * t) * t) * t) * t) * t;
    case 19:
      return 0.34104450552588334840e-1 + (0.10823541191350532574e-2 + (0.65354356159553934436e-5 + (0.37512918348533521149e-7 + (0.20362979635817883229e-9 + (0.10384187833037282363e-11 + 0.49300625262222222221e-14 * t) * t) * t) * t) * t) * t;
    case 20:
      return 0.36295603928292425716e-1 + (0.11089526167995268200e-2 + (0.67654845095518363577e-5 + (0.39184292949913591646e-7 + (0.21431552202133775150e-9 + (0.10994259106646731797e-11 + 0.52409949102222222221e-14 * t) * t) * t) * t) * t) * t;
    case 21:
      return 0.38540888038840509795e-1 + (0.11364917134175420009e-2 + (0.70058230641246312003e-5 + (0.40943644083718586939e-7 + (0.22563034723692881631e-9 + (0.11642841011361992885e-11 + 0.55721092871111111110e-14 * t) * t) * t) * t) * t) * t;
    case 22:
      return 0.40842225954785960651e-1 + (0.11650136437945673891e-2 + (0.72569945502343006619e-5 + (0.42796161861855042273e-7 + (0.23761401711005024162e-9 + (0.12332431172381557035e-11 + 0.59246802364444444445e-14 * t) * t) * t) * t) * t) * t;
    case 23:
      return 0.43201627431540222422e-1 + (0.11945628793917272199e-2 + (0.75195743532849206263e-5 + (0.44747364553960993492e-7 + (0.25030885216472953674e-9 + (0.13065684400300476484e-11 + 0.63000532853333333334e-14 * t) * t) * t) * t) * t) * t;
    case 24:
      return 0.45621193513810471438e-1 + (0.12251862608067529503e-2 + (0.77941720055551920319e-5 + (0.46803119830954460212e-7 + (0.26375990983978426273e-9 +     (0.13845421370977119765e-11 + 0.66996477404444444445e-14 * t) * t) * t) * t) * t) * t;
    case 25:
      return 0.48103121413299865517e-1 + (0.12569331386432195113e-2 + (0.80814333496367673980e-5 + (0.48969667335682018324e-7 + (0.27801515481905748484e-9 + (0.14674637611609884208e-11 + 0.71249589351111111110e-14 * t) * t) * t) * t) * t) * t;
    case 26:
      return 0.50649709676983338501e-1 + (0.12898555233099055810e-2 + (0.83820428414568799654e-5 + (0.51253642652551838659e-7 + (0.29312563849675507232e-9 + (0.15556512782814827846e-11 + 0.75775607822222222221e-14 * t) * t) * t) * t) * t) * t;
    case 27:
      return 0.53263363664388864181e-1 + (0.13240082443256975769e-2 + (0.86967260015007658418e-5 + (0.53662102750396795566e-7 + (0.30914568786634796807e-9 + (0.16494420240828493176e-11 + 0.80591079644444444445e-14 * t) * t) * t) * t) * t) * t;
    case 28:
      return 0.55946601353500013794e-1 + (0.13594491197408190706e-2 + (0.90262520233016380987e-5 + (0.56202552975056695376e-7 + (0.32613310410503135996e-9 + (0.17491936862246367398e-11 + 0.85713381688888888890e-14 * t) * t) * t) * t) * t) * t;
    case 29:
      return 0.58702059496154081813e-1 + (0.13962391363223647892e-2 + (0.93714365487312784270e-5 + (0.58882975670265286526e-7 + (0.34414937110591753387e-9 + (0.18552853109751857859e-11 + 0.91160736711111111110e-14 * t) * t) * t) * t) * t) * t;
    case 30:
      return 0.61532500145144778048e-1 + (0.14344426411912015247e-2 + (0.97331446201016809696e-5 + (0.61711860507347175097e-7 + (0.36325987418295300221e-9 + (0.19681183310134518232e-11 + 0.96952238400000000000e-14 * t) * t) * t) * t) * t) * t;
    case 31:
      return 0.64440817576653297993e-1 + (0.14741275456383131151e-2 + (0.10112293819576437838e-4 + (0.64698236605933246196e-7 + (0.38353412915303665586e-9 + (0.20881176114385120186e-11 + 0.10310784480000000000e-13 * t) * t) * t) * t) * t) * t;
    case 32:
      return 0.67430045633130393282e-1 + (0.15153655418916540370e-2 + (0.10509857606888328667e-4 + (0.67851706529363332855e-7 + (0.40504602194811140006e-9 + (0.22157325110542534469e-11 + 0.10964842115555555556e-13 * t) * t) * t) * t) * t) * t;
    case 33:
      return 0.70503365513338850709e-1 + (0.15582323336495709827e-2 + (0.10926868866865231089e-4 + (0.71182482239613507542e-7 + (0.42787405890153386710e-9 + (0.23514379522274416437e-11 + 0.11659571751111111111e-13 * t) * t) * t) * t) * t) * t;
    case 34:
      return 0.73664114037944596353e-1 + (0.16028078812438820413e-2 + (0.11364423678778207991e-4 + (0.74701423097423182009e-7 + (0.45210162777476488324e-9 + (0.24957355004088569134e-11 + 0.12397238257777777778e-13 * t) * t) * t) * t) * t) * t;
    case 35:
      return 0.76915792420819562379e-1 + (0.16491766623447889354e-2 + (0.11823685320041302169e-4 + (0.78420075993781544386e-7 + (0.47781726956916478925e-9 + (0.26491544403815724749e-11 + 0.13180196462222222222e-13 * t) * t) * t) * t) * t) * t;
    case 36:
      return 0.80262075578094612819e-1 + (0.16974279491709504117e-2 + (0.12305888517309891674e-4 + (0.82350717698979042290e-7 + (0.50511496109857113929e-9 + (0.28122528497626897696e-11 + 0.14010889635555555556e-13 * t) * t) * t) * t) * t) * t;
    case 37:
      return 0.83706822008980357446e-1 + (0.17476561032212656962e-2 + (0.12812343958540763368e-4 + (0.86506399515036435592e-7 + (0.53409440823869467453e-9 + (0.29856186620887555043e-11 + 0.14891851591111111111e-13 * t) * t) * t) * t) * t) * t;
    case 38:
      return 0.87254084284461718231e-1 + (0.17999608886001962327e-2 + (0.13344443080089492218e-4 + (0.90900994316429008631e-7 + (0.56486134972616465316e-9 + (0.31698707080033956934e-11 + 0.15825697795555555556e-13 * t) * t) * t) * t) * t) * t;
    case 39:
      return 0.90908120182172748487e-1 + (0.18544478050657699758e-2 + (0.13903663143426120077e-4 + (0.95549246062549906177e-7 + (0.59752787125242054315e-9 + (0.33656597366099099413e-11 + 0.16815130613333333333e-13 * t) * t) * t) * t) * t) * t;
    case 40:
      return 0.94673404508075481121e-1 + (0.19112284419887303347e-2 + (0.14491572616545004930e-4 + (0.10046682186333613697e-6 + (0.63221272959791000515e-9 + (0.35736693975589130818e-11 + 0.17862931591111111111e-13 * t) * t) * t) * t) * t) * t;
    case 41:
      return 0.98554641648004456555e-1 + (0.19704208544725622126e-2 + (0.15109836875625443935e-4 + (0.10567036667675984067e-6 + (0.66904168640019354565e-9 + (0.37946171850824333014e-11 + 0.18971959040000000000e-13 * t) * t) * t) * t) * t) * t;
    case 42:
      return 0.10255677889470089531e0 + (0.20321499629472857418e-2 + (0.15760224242962179564e-4 + (0.11117756071353507391e-6 + (0.70814785110097658502e-9 + (0.40292553276632563925e-11 + 0.20145143075555555556e-13 * t) * t) * t) * t) * t) * t;
    case 43:
      return 0.10668502059865093318e0 + (0.20965479776148731610e-2 + (0.16444612377624983565e-4 + (0.11700717962026152749e-6 + (0.74967203250938418991e-9 + (0.42783716186085922176e-11 + 0.21385479360000000000e-13 * t) * t) * t) * t) * t) * t;
    case 44:
      return 0.11094484319386444474e0 + (0.21637548491908170841e-2 + (0.17164995035719657111e-4 + (0.12317915750735938089e-6 + (0.79376309831499633734e-9 + (0.45427901763106353914e-11 + 0.22696025653333333333e-13 * t) * t) * t) * t) * t) * t;
    case 45:
      return 0.11534201115268804714e0 + (0.22339187474546420375e-2 + (0.17923489217504226813e-4 + (0.12971465288245997681e-6 + (0.84057834180389073587e-9 + (0.48233721206418027227e-11 + 0.24079890062222222222e-13 * t) * t) * t) * t) * t) * t;
    case 46:
      return 0.11988259392684094740e0 + (0.23071965691918689601e-2 + (0.18722342718958935446e-4 + (0.13663611754337957520e-6 + (0.89028385488493287005e-9 + (0.51210161569225846701e-11 + 0.25540227111111111111e-13 * t) * t) * t) * t) * t) * t;
    case 47:
      return 0.12457298393509812907e0 + (0.23837544771809575380e-2 + (0.19563942105711612475e-4 + (0.14396736847739470782e-6 + (0.94305490646459247016e-9 + (0.54366590583134218096e-11 + 0.27080225920000000000e-13 * t) * t) * t) * t) * t) * t;
    case 48:
      return 0.12941991566142438816e0 + (0.24637684719508859484e-2 + (0.20450821127475879816e-4 + (0.15173366280523906622e-6 + (0.99907632506389027739e-9 +  (0.57712760311351625221e-11 + 0.28703099555555555556e-13 * t) * t) * t) * t) * t) * t;
    case 49:
      return 0.13443048593088696613e0 + (0.25474249981080823877e-2 + (0.21385669591362915223e-4 + (0.15996177579900443030e-6 + (0.10585428844575134013e-8 + (0.61258809536787882989e-11 + 0.30412080142222222222e-13 * t) * t) * t) * t) * t) * t;
    case 50:
      return 0.13961217543434561353e0 + (0.26349215871051761416e-2 + (0.22371342712572567744e-4 + (0.16868008199296822247e-6 + (0.11216596910444996246e-8 + (0.65015264753090890662e-11 + 0.32210394506666666666e-13 * t) * t) * t) * t) * t) * t;
    case 51:
      return 0.14497287157673800690e0 + (0.27264675383982439814e-2 + (0.23410870961050950197e-4 + (0.17791863939526376477e-6 + (0.11886425714330958106e-8 + (0.68993039665054288034e-11 + 0.34101266222222222221e-13 * t) * t) * t) * t) * t) * t;
    case 52:
      return 0.15052089272774618151e0 + (0.28222846410136238008e-2 + (0.24507470422713397006e-4 + (0.18770927679626136909e-6 + (0.12597184587583370712e-8 + (0.73203433049229821618e-11 + 0.36087889048888888890e-13 * t) * t) * t) * t) * t) * t;
    case 53:
      return 0.15626501395774612325e0 + (0.29226079376196624949e-2 + (0.25664553693768450545e-4 + (0.19808568415654461964e-6 + (0.13351257759815557897e-8 + (0.77658124891046760667e-11 + 0.38173420035555555555e-13 * t) * t) * t) * t) * t) * t;
    case 54:
      return 0.16221449434620737567e0 + (0.30276865332726475672e-2 + (0.26885741326534564336e-4 + (0.20908350604346384143e-6 + (0.14151148144240728728e-8 + (0.82369170665974313027e-11 + 0.40360957457777777779e-13 * t) * t) * t) * t) * t) * t;
    case 55:
      return 0.16837910595412130659e0 + (0.31377844510793082301e-2 + (0.28174873844911175026e-4 + (0.22074043807045782387e-6 + (0.14999481055996090039e-8 + (0.87348993661930809254e-11 + 0.42653528977777777779e-13 * t) * t) * t) * t) * t) * t;
    case 56:
      return 0.17476916455659369953e0 + (0.32531815370903068316e-2 + (0.29536024347344364074e-4 + (0.23309632627767074202e-6 + (0.15899007843582444846e-8 + (0.92610375235427359475e-11 + 0.45054073102222222221e-13 * t) * t) * t) * t) * t) * t;
    case 57:
      return 0.18139556223643701364e0 + (0.33741744168096996041e-2 + (0.30973511714709500836e-4 + (0.24619326937592290996e-6 + (0.16852609412267750744e-8 + (0.98166442942854895573e-11 + 0.47565418097777777779e-13 * t) * t) * t) * t) * t) * t;
    case 58:
      return 0.18826980194443664549e0 + (0.35010775057740317997e-2 + (0.32491914440014267480e-4 + (0.26007572375886319028e-6 + (0.17863299617388376116e-8 + (0.10403065638343878679e-10 + 0.50190265831111111110e-13 * t) * t) * t) * t) * t) * t;
    case 59:
      return 0.19540403413693967350e0 + (0.36342240767211326315e-2 + (0.34096085096200907289e-4 + (0.27479061117017637474e-6 + (0.18934228504790032826e-8 + (0.11021679075323598664e-10 + 0.52931171733333333334e-13 * t) * t) * t) * t) * t) * t;
    case 60:
      return 0.20281109560651886959e0 + (0.37739673859323597060e-2 + (0.35791165457592409054e-4 + (0.29038742889416172404e-6 + (0.20068685374849001770e-8 + (0.11673891799578381999e-10 + 0.55790523093333333334e-13 * t) * t) * t) * t) * t) * t;
    case 61:
      return 0.21050455062669334978e0 + (0.39206818613925652425e-2 + (0.37582602289680101704e-4 + (0.30691836231886877385e-6 + (0.21270101645763677824e-8 + (0.12361138551062899455e-10 + 0.58770520160000000000e-13 * t) * t) * t) * t) * t) * t;
    case 62:
      return 0.21849873453703332479e0 + (0.40747643554689586041e-2 + (0.39476163820986711501e-4 + (0.32443839970139918836e-6 + (0.22542053491518680200e-8 + (0.13084879235290858490e-10 + 0.61873153262222222221e-13 * t) * t) * t) * t) * t) * t;
    case 63:
      return 0.22680879990043229327e0 + (0.42366354648628516935e-2 + (0.41477956909656896779e-4 + (0.34300544894502810002e-6 + (0.23888264229264067658e-8 + (0.13846596292818514601e-10 + 0.65100183751111111110e-13 * t) * t) * t) * t) * t) * t;
    case 64:
      return 0.23545076536988703937e0 + (0.44067409206365170888e-2 + (0.43594444916224700881e-4 + (0.36268045617760415178e-6 + (0.25312606430853202748e-8 + (0.14647791812837903061e-10 + 0.68453122631111111110e-13 * t) * t) * t) * t) * t) * t;
    case 65:
      return 0.24444156740777432838e0 + (0.45855530511605787178e-2 + (0.45832466292683085475e-4 + (0.38352752590033030472e-6 + (0.26819103733055603460e-8 + (0.15489984390884756993e-10 + 0.71933206364444444445e-13 * t) * t) * t) * t) * t) * t;
    case 66:
      return 0.25379911500634264643e0 + (0.47735723208650032167e-2 + (0.48199253896534185372e-4 + (0.40561404245564732314e-6 + (0.28411932320871165585e-8 + (0.16374705736458320149e-10 + 0.75541379822222222221e-13 * t) * t) * t) * t) * t) * t;
    case 67:
      return 0.26354234756393613032e0 + (0.49713289477083781266e-2 + (0.50702455036930367504e-4 + (0.42901079254268185722e-6 + (0.30095422058900481753e-8 + (0.17303497025347342498e-10 + 0.79278273368888888890e-13 * t) * t) * t) * t) * t) * t;
    case 68:
      return 0.27369129607732343398e0 + (0.51793846023052643767e-2 + (0.53350152258326602629e-4 + (0.45379208848865015485e-6 + (0.31874057245814381257e-8 + (0.18277905010245111046e-10 + 0.83144182364444444445e-13 * t) * t) * t) * t) * t) * t;
    case 69:
      return 0.28426714781640316172e0 + (0.53983341916695141966e-2 + (0.56150884865255810638e-4 + (0.48003589196494734238e-6 + (0.33752476967570796349e-8 + (0.19299477888083469086e-10 + 0.87139049137777777779e-13 * t) * t) * t) * t) * t) * t;
    case 70:
      return 0.29529231465348519920e0 + (0.56288077305420795663e-2 + (0.59113671189913307427e-4 + (0.50782393781744840482e-6 + (0.35735475025851713168e-8 + (0.20369760937017070382e-10 + 0.91262442613333333334e-13 * t) * t) * t) * t) * t) * t;
    case 71:
      return 0.30679050522528838613e0 + (0.58714723032745403331e-2 + (0.62248031602197686791e-4 + (0.53724185766200945789e-6 + (0.37827999418960232678e-8 + (0.21490291930444538307e-10 + 0.95513539182222222221e-13 * t) * t) * t) * t) * t) * t;
    case 72:
      return 0.31878680111173319425e0 + (0.61270341192339103514e-2 + (0.65564012259707640976e-4 + (0.56837930287837738996e-6 + (0.40035151353392378882e-8 + (0.22662596341239294792e-10 + 0.99891109760000000000e-13 * t) * t) * t) * t) * t) * t;
    case 73:
      return 0.33130773722152622027e0 + (0.63962406646798080903e-2 + (0.69072209592942396666e-4 + (0.60133006661885941812e-6 + (0.42362183765883466691e-8 + (0.23888182347073698382e-10 + 0.10439349811555555556e-12 * t) * t) * t) * t) * t) * t;
    case 74:
      return 0.34438138658041336523e0 + (0.66798829540414007258e-2 + (0.72783795518603561144e-4 + (0.63619220443228800680e-6 + (0.44814499336514453364e-8 + (0.25168535651285475274e-10 + 0.10901861383111111111e-12 * t) * t) * t) * t) * t) * t;
    case 75:
      return 0.35803744972380175583e0 + (0.69787978834882685031e-2 + (0.76710543371454822497e-4 + (0.67306815308917386747e-6 + (0.47397647975845228205e-8 + (0.26505114141143050509e-10 + 0.11376390933333333333e-12 * t) * t) * t) * t) * t) * t;
    case 76:
      return 0.37230734890119724188e0 + (0.72938706896461381003e-2 + (0.80864854542670714092e-4 + (0.71206484718062688779e-6 + (0.50117323769745883805e-8 + (0.27899342394100074165e-10 + 0.11862637614222222222e-12 * t) * t) * t) * t) * t) * t;
    case 77:
      return 0.38722432730555448223e0 + (0.76260375162549802745e-2 + (0.85259785810004603848e-4 + (0.75329383305171327677e-6 + (0.52979361368388119355e-8 + (0.29352606054164086709e-10 + 0.12360253370666666667e-12 * t) * t) * t) * t) * t) * t;
    case 78:
      return 0.40282355354616940667e0 + (0.79762880915029728079e-2 + (0.89909077342438246452e-4 + (0.79687137961956194579e-6 + (0.55989731807360403195e-8 + (0.30866246101464869050e-10 + 0.12868841946666666667e-12 * t) * t) * t) * t) * t) * t;
    case 79:
      return 0.41914223158913787649e0 + (0.83456685186950463538e-2 + (0.94827181359250161335e-4 + (0.84291858561783141014e-6 + (0.59154537751083485684e-8 + (0.32441553034347469291e-10 + 0.13387957943111111111e-12 * t) * t) * t) * t) * t) * t;
    case 80:
      return 0.43621971639463786896e0 + (0.87352841828289495773e-2 + (0.10002929142066799966e-3 + (0.89156148280219880024e-6 + (0.62480008150788597147e-8 + (0.34079760983458878910e-10 + 0.13917107176888888889e-12 * t) * t) * t) * t) * t) * t;
    case 81:
      return 0.45409763548534330981e0 + (0.91463027755548240654e-2 + (0.10553137232446167258e-3 + (0.94293113464638623798e-6 + (0.65972492312219959885e-8 + (0.35782041795476563662e-10 + 0.14455745872000000000e-12 * t) * t) * t) * t) * t) * t;
    case 82:
      return 0.47282001668512331468e0 + (0.95799574408860463394e-2 + (0.11135019058000067469e-3 + (0.99716373005509038080e-6 + (0.69638453369956970347e-8 + (0.37549499088161345850e-10 + 0.15003280712888888889e-12 * t) * t) * t) * t) * t) * t;
    case 83:
      return 0.49243342227179841649e0 + (0.10037550043909497071e-1 + (0.11750334542845234952e-3 + (0.10544006716188967172e-5 + (0.73484461168242224872e-8 + (0.39383162326435752965e-10 + 0.15559069118222222222e-12 * t) * t) * t) * t) * t) * t;
    case 84:
      return 0.51298708979209258326e0 + (0.10520454564612427224e-1 + (0.12400930037494996655e-3 + (0.11147886579371265246e-5 + (0.77517184550568711454e-8 + (0.41283980931872622611e-10 + 0.16122419680000000000e-12 * t) * t) * t) * t) * t) * t;
    case 85:
      return 0.53453307979101369843e0 + (0.11030120618800726938e-1 + (0.13088741519572269581e-3 + (0.11784797595374515432e-5 + (0.81743383063044825400e-8 + (0.43252818449517081051e-10 + 0.16692592640000000000e-12 * t) * t) * t) * t) * t) * t;
    case 86:
      return 0.55712643071169299478e0 + (0.11568077107929735233e-1 + (0.13815797838036651289e-3 + (0.12456314879260904558e-5 + (0.86169898078969313597e-8 + (0.45290446811539652525e-10 + 0.17268801084444444444e-12 * t) * t) * t) * t) * t) * t;
    case 87:
      return 0.58082532122519320968e0 + (0.12135935999503877077e-1 + (0.14584223996665838559e-3 + (0.13164068573095710742e-5 + (0.90803643355106020163e-8 + (0.47397540713124619155e-10 + 0.17850211608888888889e-12 * t) * t) * t) * t) * t) * t;
    case 88:
      return 0.60569124025293375554e0 + (0.12735396239525550361e-1 + (0.15396244472258863344e-3 + (0.13909744385382818253e-5 + (0.95651595032306228245e-8 + (0.49574672127669041550e-10 + 0.18435945564444444444e-12 * t) * t) * t) * t) * t) * t;
    case 89:
      return 0.63178916494715716894e0 + (0.13368247798287030927e-1 + (0.16254186562762076141e-3 + (0.14695084048334056083e-5 + (0.10072078109604152350e-7 + (0.51822304995680707483e-10 + 0.19025081422222222222e-12 * t) * t) * t) * t) * t) * t;
    case 90:
      return 0.65918774689725319200e0 + (0.14036375850601992063e-1 + (0.17160483760259706354e-3 + (0.15521885688723188371e-5 + (0.10601827031535280590e-7 + (0.54140790105837520499e-10 + 0.19616655146666666667e-12 * t) * t) * t) * t) * t) * t;
    case 91:
      return 0.68795950683174433822e0 + (0.14741765091365869084e-1 + (0.18117679143520433835e-3 + (0.16392004108230585213e-5 + (0.11155116068018043001e-7 + (0.56530360194925690374e-10 + 0.20209663662222222222e-12 * t) * t) * t) * t) * t) * t;
    case 92:
      return 0.71818103808729967036e0 + (0.15486504187117112279e-1 + (0.19128428784550923217e-3 + (0.17307350969359975848e-5 + (0.11732656736113607751e-7 + (0.58991125287563833603e-10 + 0.20803065333333333333e-12 * t) * t) * t) * t) * t) * t;
    case 93:
      return 0.74993321911726254661e0 + (0.16272790364044783382e-1 + (0.20195505163377912645e-3 + (0.18269894883203346953e-5 + (0.12335161021630225535e-7 + (0.61523068312169087227e-10 + 0.21395783431111111111e-12 * t) * t) * t) * t) * t) * t;
    case 94:
      return 0.78330143531283492729e0 + (0.17102934132652429240e-1 + (0.21321800585063327041e-3 + (0.19281661395543913713e-5 + (0.12963340087354341574e-7 + (0.64126040998066348872e-10 + 0.21986708942222222222e-12 * t) * t) * t) * t) * t) * t;
    case 95:
      return 0.81837581041023811832e0 + (0.17979364149044223802e-1 + (0.22510330592753129006e-3 + (0.20344732868018175389e-5 + (0.13617902941839949718e-7 + (0.66799760083972474642e-10 + 0.22574701262222222222e-12 * t) * t) * t) * t) * t) * t;
    case 96:
      return 0.85525144775685126237e0 + (0.18904632212547561026e-1 + (0.23764237370371255638e-3 + (0.21461248251306387979e-5 + (0.14299555071870523786e-7 + (0.69543803864694171934e-10 + 0.23158593688888888889e-12 * t) * t) * t) * t) * t) * t;
    case 97:
      return 0.89402868170849933734e0 + (0.19881418399127202569e-1 + (0.25086793128395995798e-3 + (0.22633402747585233180e-5 + (0.15008997042116532283e-7 + (0.72357609075043941261e-10 + 0.23737194737777777778e-12 * t) * t) * t) * t) * t) * t;
    case 98:
      return 0.93481333942870796363e0 + (0.20912536329780368893e-1 + (0.26481403465998477969e-3 + (0.23863447359754921676e-5 + (0.15746923065472184451e-7 + (0.75240468141720143653e-10 + 0.24309291271111111111e-12 * t) * t) * t) * t) * t) * t;
    case 99:
      return 0.97771701335885035464e0 + (0.22000938572830479551e-1 + (0.27951610702682383001e-3 + (0.25153688325245314530e-5 + (0.16514019547822821453e-7 + (0.78191526829368231251e-10 + 0.24873652355555555556e-12 * t) * t) * t) * t) * t) * t;
  }
  /* we only get here if y = 1, i.e. |x| < 4*eps, in which case erfcx is within 1e-15 of 1.. */
  return 1.0;
}


/* Scaled Complementary Error Function erfcx(x) = exp(x^2)*erfc(x), 2.21.6 */
LUALIB_API double tools_erfcx (double x) {
  if (x >= 0) {
    if (x > 50) {   /* continued-fraction expansion is faster */
      if (x > 5e7)  /* 1-term expansion, important to avoid overflow */
        return INVSQRTPI/x;
      else {
        /* 5-term expansion (rely on compiler for CSE), simplified from: ispi/(x+0.5/(x+1/(x+1.5/(x+2/x)))) */
        double dblx = x*x;
        return INVSQRTPI*(dblx*(dblx + 4.5) + 2)/(x*(dblx*(dblx + 5) + 3.75));
      }
    } else
      return erfcx_y100(400/(4 + x));
  } else
    return x < -26.7 ? HUGE_VAL : (x < -6.1 ? 2*sun_exp(x*x)
                                : 2*sun_exp(x*x) - erfcx_y100(400/(4 - x)));
}


/* Compute a scaled Dawson integral
            FADDEEVA(w_im)(x) = 2*Dawson(x)/sqrt(pi)
   equivalent to the imaginary part w(x) for real x.

   Uses methods similar to the erfcx calculation above: continued fractions
   for large |x|, a lookup table of Chebyshev polynomials for smaller |x|,
   and finally a Taylor expansion for |x|<0.01.

   Written by Steven G. Johnson, October 2012, modified by a walz. MIT licence.

   Taken from: http://ab-initio.mit.edu/Faddeeva.cc

   Given y100=100*y, where y = 1/(1+x) for x >= 0, compute w_im(x).

   Uses a look-up table of 100 different Chebyshev polynomials
   for y intervals [0,0.01], [0.01,0.02], ...., [0.99,1], generated
   with the help of Maple and a little shell script.   This allows
   the Chebyshev polynomials to be of significantly lower degree (about 1/30)
   compared to fitting the whole [0,1] interval with a single polynomial. */

static double w_im_y100 (double y100, double x) {
  int q = luai_numint(y100);
  double t = 2*y100 - (2*q + 1);
  switch (q) {
    case 0:
      return 0.28351593328822191546e-2 + (0.28494783221378400759e-2 + (0.14427470563276734183e-4 + (0.10939723080231588129e-6 + (0.92474307943275042045e-9 + (0.89128907666450075245e-11 + 0.92974121935111111110e-13 * t) * t) * t) * t) * t) * t;
    case 1:
      return 0.85927161243940350562e-2 + (0.29085312941641339862e-2 + (0.15106783707725582090e-4 + (0.11716709978531327367e-6 + (0.10197387816021040024e-8 + (0.10122678863073360769e-10 + 0.10917479678400000000e-12 * t) * t) * t) * t) * t) * t;
    case 2:
      return 0.14471159831187703054e-1 + (0.29703978970263836210e-2 + (0.15835096760173030976e-4 + (0.12574803383199211596e-6 + (0.11278672159518415848e-8 + (0.11547462300333495797e-10 + 0.12894535335111111111e-12 * t) * t) * t) * t) * t) * t;
    case 3:
      return 0.20476320420324610618e-1 + (0.30352843012898665856e-2 + (0.16617609387003727409e-4 + (0.13525429711163116103e-6 + (0.12515095552507169013e-8 + (0.13235687543603382345e-10 + 0.15326595042666666667e-12 * t) * t) * t) * t) * t) * t;
    case 4:
      return 0.26614461952489004566e-1 + (0.31034189276234947088e-2 + (0.17460268109986214274e-4 + (0.14582130824485709573e-6 + (0.13935959083809746345e-8 + (0.15249438072998932900e-10 + 0.18344741882133333333e-12 * t) * t) * t) * t) * t) * t;
    case 5:
      return 0.32892330248093586215e-1 + (0.31750557067975068584e-2 + (0.18369907582308672632e-4 + (0.15761063702089457882e-6 + (0.15577638230480894382e-8 + (0.17663868462699097951e-10 + (0.22126732680711111111e-12 + 0.30273474177737853668e-14 * t) * t) * t) * t) * t) * t) * t;
    case 6:
      return 0.39317207681134336024e-1 + (0.32504779701937539333e-2 + (0.19354426046513400534e-4 + (0.17081646971321290539e-6 + (0.17485733959327106250e-8 + (0.20593687304921961410e-10 + (0.26917401949155555556e-12 + 0.38562123837725712270e-14 * t) * t) * t) * t) * t) * t) * t;
    case 7:
      return 0.45896976511367738235e-1 + (0.33300031273110976165e-2 + (0.20423005398039037313e-4 + (0.18567412470376467303e-6 + (0.19718038363586588213e-8 + (0.24175006536781219807e-10 + (0.33059982791466666666e-12 + 0.49756574284439426165e-14 * t) * t) * t) * t) * t) * t) * t;
    case 8:
      return 0.52640192524848962855e-1 + (0.34139883358846720806e-2 + (0.21586390240603337337e-4 + (0.20247136501568904646e-6 + (0.22348696948197102935e-8 + (0.28597516301950162548e-10 + (0.41045502119111111110e-12 + 0.65151614515238361946e-14 * t) * t) * t) * t) * t) * t) * t;
    case 9:
      return 0.59556171228656770456e-1 + (0.35028374386648914444e-2 + (0.22857246150998562824e-4 + (0.22156372146525190679e-6 + (0.25474171590893813583e-8 + (0.34122390890697400584e-10 + (0.51593189879111111110e-12 + 0.86775076853908006938e-14 * t) * t) * t) * t) * t) * t) * t;
    case 10:
      return 0.66655089485108212551e-1 + (0.35970095381271285568e-2 + (0.24250626164318672928e-4 + (0.24339561521785040536e-6 + (0.29221990406518411415e-8 + (0.41117013527967776467e-10 + (0.65786450716444444445e-12 + 0.11791885745450623331e-13 * t) * t) * t) * t) * t) * t) * t;
    case 11:
      return 0.73948106345519174661e-1 + (0.36970297216569341748e-2 + (0.25784588137312868792e-4 + (0.26853012002366752770e-6 + (0.33763958861206729592e-8 + (0.50111549981376976397e-10 + (0.85313857496888888890e-12 + 0.16417079927706899860e-13 * t) * t) * t) * t) * t) * t) * t;
    case 12:
      return 0.81447508065002963203e-1 + (0.38035026606492705117e-2 + (0.27481027572231851896e-4 + (0.29769200731832331364e-6 + (0.39336816287457655076e-8 + (0.61895471132038157624e-10 + (0.11292303213511111111e-11 + 0.23558532213703884304e-13 * t) * t) * t) * t) * t) * t) * t;
    case 13:
      return 0.89166884027582716628e-1 + (0.39171301322438946014e-2 + (0.29366827260422311668e-4 + (0.33183204390350724895e-6 + (0.46276006281647330524e-8 + (0.77692631378169813324e-10 + (0.15335153258844444444e-11 + 0.35183103415916026911e-13 * t) * t) * t) * t) * t) * t) * t;
    case 14:
      return 0.97121342888032322019e-1 + (0.40387340353207909514e-2 + (0.31475490395950776930e-4 + (0.37222714227125135042e-6 + (0.55074373178613809996e-8 + (0.99509175283990337944e-10 + (0.21552645758222222222e-11 + 0.55728651431872687605e-13 * t) * t) * t) * t) * t) * t) * t;
    case 15:
      return 0.10532778218603311137e0 + (0.41692873614065380607e-2 + (0.33849549774889456984e-4 + (0.42064596193692630143e-6 + (0.66494579697622432987e-8 + (0.13094103581931802337e-9 + (0.31896187409777777778e-11 + 0.97271974184476560742e-13 * t) * t) * t) * t) * t) * t) * t;
    case 16:
      return 0.11380523107427108222e0 + (0.43099572287871821013e-2 + (0.36544324341565929930e-4 + (0.47965044028581857764e-6 + (0.81819034238463698796e-8 + (0.17934133239549647357e-9 + (0.50956666166186293627e-11 + (0.18850487318190638010e-12 + 0.79697813173519853340e-14 * t) * t) * t) * t) * t) * t) * t) * t;
    case 17:
      return 0.12257529703447467345e0 + (0.44621675710026986366e-2 + (0.39634304721292440285e-4 + (0.55321553769873381819e-6 + (0.10343619428848520870e-7 + (0.26033830170470368088e-9 + (0.87743837749108025357e-11 + (0.34427092430230063401e-12 + 0.10205506615709843189e-13 * t) * t) * t) * t) * t) * t) * t) * t;
    case 18:
      return 0.13166276955656699478e0 + (0.46276970481783001803e-2 + (0.43225026380496399310e-4 + (0.64799164020016902656e-6 + (0.13580082794704641782e-7 + (0.39839800853954313927e-9 + (0.14431142411840000000e-10 + 0.42193457308830027541e-12 * t) * t) * t) * t) * t) * t) * t;
    case 19:
      return 0.14109647869803356475e0 + (0.48088424418545347758e-2 + (0.47474504753352150205e-4 + (0.77509866468724360352e-6 + (0.18536851570794291724e-7 + (0.60146623257887570439e-9 + (0.18533978397305276318e-10 + (0.41033845938901048380e-13 - 0.46160680279304825485e-13 * t) * t) * t) * t) * t) * t) * t) * t;
    case 20:
      return 0.15091057940548936603e0 + (0.50086864672004685703e-2 + (0.52622482832192230762e-4 + (0.95034664722040355212e-6 + (0.25614261331144718769e-7 + (0.80183196716888606252e-9 + (0.12282524750534352272e-10 + (-0.10531774117332273617e-11 - 0.86157181395039646412e-13 * t) * t) * t) * t) * t) * t) * t) * t;
    case 21:
      return 0.16114648116017010770e0 + (0.52314661581655369795e-2 + (0.59005534545908331315e-4 + (0.11885518333915387760e-5 + (0.33975801443239949256e-7 + (0.82111547144080388610e-9 + (-0.12357674017312854138e-10 + (-0.24355112256914479176e-11 - 0.75155506863572930844e-13 * t) * t) * t) * t) * t) * t) * t) * t;
    case 22:
      return 0.17185551279680451144e0 + (0.54829002967599420860e-2 + (0.67013226658738082118e-4 + (0.14897400671425088807e-5 + (0.40690283917126153701e-7 + (0.44060872913473778318e-9 + (-0.52641873433280000000e-10 - 0.30940587864543343124e-11 * t) * t) * t) * t) * t) * t) * t;
    case 23:
      return 0.18310194559815257381e0 + (0.57701559375966953174e-2 + (0.76948789401735193483e-4 + (0.18227569842290822512e-5 + (0.41092208344387212276e-7 + (-0.44009499965694442143e-9 + (-0.92195414685628803451e-10 + (-0.22657389705721753299e-11 + 0.10004784908106839254e-12 * t) * t) * t) * t) * t) * t) * t) * t;
    case 24:
      return 0.19496527191546630345e0 + (0.61010853144364724856e-2 + (0.88812881056342004864e-4 + (0.21180686746360261031e-5 + (0.30652145555130049203e-7 + (-0.16841328574105890409e-8 + (-0.11008129460612823934e-9 + (-0.12180794204544515779e-12 + 0.15703325634590334097e-12 * t) * t) * t) * t) * t) * t) * t) * t;
    case 25:
      return 0.20754006813966575720e0 + (0.64825787724922073908e-2 + (0.10209599627522311893e-3 + (0.22785233392557600468e-5 + (0.73495224449907568402e-8 + (-0.29442705974150112783e-8 + (-0.94082603434315016546e-10 + (0.23609990400179321267e-11 + 0.14141908654269023788e-12 * t) * t) * t) * t) * t) * t) * t) * t;
    case 26:
      return 0.22093185554845172146e0 + (0.69182878150187964499e-2 + (0.11568723331156335712e-3 + (0.22060577946323627739e-5 + (-0.26929730679360840096e-7 + (-0.38176506152362058013e-8 + (-0.47399503861054459243e-10 + (0.40953700187172127264e-11 + 0.69157730376118511127e-13 * t) * t) * t) * t) * t) * t) * t) * t;
    case 27:
      return 0.23524827304057813918e0 + (0.74063350762008734520e-2 + (0.12796333874615790348e-3 + (0.18327267316171054273e-5 + (-0.66742910737957100098e-7 + (-0.40204740975496797870e-8 + (0.14515984139495745330e-10 + (0.44921608954536047975e-11 - 0.18583341338983776219e-13 * t) * t) * t) * t) * t) * t) * t) * t;
    case 28:
      return 0.25058626331812744775e0 + (0.79377285151602061328e-2 + (0.13704268650417478346e-3 + (0.11427511739544695861e-5 + (-0.10485442447768377485e-6 + (-0.34850364756499369763e-8 + (0.72656453829502179208e-10 + (0.36195460197779299406e-11 - 0.84882136022200714710e-13 * t) * t) * t) * t) * t) * t) * t) * t;
    case 29:
      return 0.26701724900280689785e0 + (0.84959936119625864274e-2 + (0.14112359443938883232e-3 + (0.17800427288596909634e-6 + (-0.13443492107643109071e-6 + (-0.23512456315677680293e-8 + (0.11245846264695936769e-9 + (0.19850501334649565404e-11 - 0.11284666134635050832e-12 * t) * t) * t) * t) * t) * t) * t) * t;
    case 30:
      return 0.28457293586253654144e0 + (0.90581563892650431899e-2 + (0.13880520331140646738e-3 + (-0.97262302362522896157e-6 + (-0.15077100040254187366e-6 + (-0.88574317464577116689e-9 + (0.12760311125637474581e-9 + (0.20155151018282695055e-12 - 0.10514169375181734921e-12 * t) * t) * t) * t) * t) * t) * t) * t;
    case 31:
      return 0.30323425595617385705e0 + (0.95968346790597422934e-2 + (0.12931067776725883939e-3 + (-0.21938741702795543986e-5 + (-0.15202888584907373963e-6 + (0.61788350541116331411e-9 + (0.11957835742791248256e-9 + (-0.12598179834007710908e-11 - 0.75151817129574614194e-13 * t) * t) * t) * t) * t) * t) * t) * t;
    case 32:
      return 0.32292521181517384379e0 + (0.10082957727001199408e-1 + (0.11257589426154962226e-3 + (-0.33670890319327881129e-5 + (-0.13910529040004008158e-6 + (0.19170714373047512945e-8 + (0.94840222377720494290e-10 + (-0.21650018351795353201e-11 - 0.37875211678024922689e-13 * t) * t) * t) * t) * t) * t) * t) * t;
    case 33:
      return 0.34351233557911753862e0 + (0.10488575435572745309e-1 + (0.89209444197248726614e-4 + (-0.43893459576483345364e-5 + (-0.11488595830450424419e-6 + (0.28599494117122464806e-8 + (0.61537542799857777779e-10 - 0.24935749227658002212e-11 * t) * t) * t) * t) * t) * t) * t;
    case 34:
      return 0.36480946642143669093e0 + (0.10789304203431861366e-1 + (0.60357993745283076834e-4 + (-0.51855862174130669389e-5 + (-0.83291664087289801313e-7 + (0.33898011178582671546e-8 + (0.27082948188277716482e-10 + (-0.23603379397408694974e-11 + 0.19328087692252869842e-13 * t) * t) * t) * t) * t) * t) * t) * t;
    case 35:
      return 0.38658679935694939199e0 + (0.10966119158288804999e-1 + (0.27521612041849561426e-4 + (-0.57132774537670953638e-5 + (-0.48404772799207914899e-7 + (0.35268354132474570493e-8 + (-0.32383477652514618094e-11 + (-0.19334202915190442501e-11 + 0.32333189861286460270e-13 * t) * t) * t) * t) * t) * t) * t) * t;
    case 36:
      return 0.40858275583808707870e0 + (0.11006378016848466550e-1 + (-0.76396376685213286033e-5 + (-0.59609835484245791439e-5 + (-0.13834610033859313213e-7 + (0.33406952974861448790e-8 + (-0.26474915974296612559e-10 + (-0.13750229270354351983e-11 + 0.36169366979417390637e-13 * t) * t) * t) * t) * t) * t) * t) * t;
    case 37:
      return 0.43051714914006682977e0 + (0.10904106549500816155e-1 + (-0.43477527256787216909e-4 + (-0.59429739547798343948e-5 + (0.17639200194091885949e-7 + (0.29235991689639918688e-8 + (-0.41718791216277812879e-10 + (-0.81023337739508049606e-12 + 0.33618915934461994428e-13 * t) * t) * t) * t) * t) * t) * t) * t;
    case 38:
      return 0.45210428135559607406e0 + (0.10659670756384400554e-1 + (-0.78488639913256978087e-4 + (-0.56919860886214735936e-5 + (0.44181850467477733407e-7 + (0.23694306174312688151e-8 + (-0.49492621596685443247e-10 + (-0.31827275712126287222e-12 + 0.27494438742721623654e-13 * t) * t) * t) * t) * t) * t) * t) * t;
    case 39:
      return 0.47306491195005224077e0 + (0.10279006119745977570e-1 + (-0.11140268171830478306e-3 + (-0.52518035247451432069e-5 + (0.64846898158889479518e-7 + (0.17603624837787337662e-8 + (-0.51129481592926104316e-10 + (0.62674584974141049511e-13 + 0.20055478560829935356e-13 * t) * t) * t) * t) * t) * t) * t) * t;
    case 40:
      return 0.49313638965719857647e0 + (0.97725799114772017662e-2 + (-0.14122854267291533334e-3 + (-0.46707252568834951907e-5 + (0.79421347979319449524e-7 + (0.11603027184324708643e-8 + (-0.48269605844397175946e-10 + (0.32477251431748571219e-12 + 0.12831052634143527985e-13 * t) * t) * t) * t) * t) * t) * t) * t;
    case 41:
      return 0.51208057433416004042e0 + (0.91542422354009224951e-2 + (-0.16726530230228647275e-3 + (-0.39964621752527649409e-5 + (0.88232252903213171454e-7 + (0.61343113364949928501e-9 + (-0.42516755603130443051e-10 + (0.47910437172240209262e-12 + 0.66784341874437478953e-14 * t) * t) * t) * t) * t) * t) * t) * t;
    case 42:
      return 0.52968945458607484524e0 + (0.84400880445116786088e-2 + (-0.18908729783854258774e-3 + (-0.32725905467782951931e-5 + (0.91956190588652090659e-7 + (0.14593989152420122909e-9 + (-0.35239490687644444445e-10 + 0.54613829888448694898e-12 * t) * t) * t) * t) * t) * t) * t;
    case 43:
      return 0.54578857454330070965e0 + (0.76474155195880295311e-2 + (-0.20651230590808213884e-3 + (-0.25364339140543131706e-5 + (0.91455367999510681979e-7 + (-0.23061359005297528898e-9 + (-0.27512928625244444444e-10 + 0.54895806008493285579e-12 * t) * t) * t) * t) * t) * t) * t;
    case 44:
      return 0.56023851910298493910e0 + (0.67938321739997196804e-2 + (-0.21956066613331411760e-3 + (-0.18181127670443266395e-5 + (0.87650335075416845987e-7 + (-0.51548062050366615977e-9 + (-0.20068462174044444444e-10 + 0.50912654909758187264e-12 * t) * t) * t) * t) * t) * t) * t;
    case 45:
      return 0.57293478057455721150e0 + (0.58965321010394044087e-2 + (-0.22841145229276575597e-3 + (-0.11404605562013443659e-5 + (0.81430290992322326296e-7 + (-0.71512447242755357629e-9 + (-0.13372664928000000000e-10 + 0.44461498336689298148e-12 * t) * t) * t) * t) * t) * t) * t;
    case 46:
      return 0.58380635448407827360e0 + (0.49717469530842831182e-2 + (-0.23336001540009645365e-3 + (-0.51952064448608850822e-6 + (0.73596577815411080511e-7 + (-0.84020916763091566035e-9 + (-0.76700972702222222221e-11 + 0.36914462807972467044e-12 * t) * t) * t) * t) * t) * t) * t;
    case 47:
      return 0.59281340237769489597e0 + (0.40343592069379730568e-2 + (-0.23477963738658326185e-3 + (0.34615944987790224234e-7 + (0.64832803248395814574e-7 + (-0.90329163587627007971e-9 + (-0.30421940400000000000e-11 + 0.29237386653743536669e-12 * t) * t) * t) * t) * t) * t) * t;
    case 48:
      return 0.59994428743114271918e0 + (0.30976579788271744329e-2 + (-0.23308875765700082835e-3 + (0.51681681023846925160e-6 + (0.55694594264948268169e-7 + (-0.91719117313243464652e-9 + (0.53982743680000000000e-12 + 0.22050829296187771142e-12 * t) * t) * t) * t) * t) * t) * t;
    case 49:
      return 0.60521224471819875444e0 + (0.21732138012345456060e-2 + (-0.22872428969625997456e-3 + (0.92588959922653404233e-6 + (0.46612665806531930684e-7 + (-0.89393722514414153351e-9 + (0.31718550353777777778e-11 + 0.15705458816080549117e-12 * t) * t) * t) * t) * t) * t) * t;
    case 50:
      return 0.60865189969791123620e0 + (0.12708480848877451719e-2 + (-0.22212090111534847166e-3 + (0.12636236031532793467e-5 + (0.37904037100232937574e-7 + (-0.84417089968101223519e-9 + (0.49843180828444444445e-11 + 0.10355439441049048273e-12 * t) * t) * t) * t) * t) * t) * t;
    case 51:
      return 0.61031580103499200191e0 + (0.39867436055861038223e-3 + (-0.21369573439579869291e-3 + (0.15339402129026183670e-5 + (0.29787479206646594442e-7 + (-0.77687792914228632974e-9 + (0.61192452741333333334e-11 + 0.60216691829459295780e-13 * t) * t) * t) * t) * t) * t) * t;
    case 52:
      return 0.61027109047879835868e0 + (-0.43680904508059878254e-3 + (-0.20383783788303894442e-3 + (0.17421743090883439959e-5 + (0.22400425572175715576e-7 + (-0.69934719320045128997e-9 + (0.67152759655111111110e-11 + 0.26419960042578359995e-13 * t) * t) * t) * t) * t) * t) * t;
    case 53:
      return 0.60859639489217430521e0 + (-0.12305921390962936873e-2 + (-0.19290150253894682629e-3 + (0.18944904654478310128e-5 + (0.15815530398618149110e-7 + (-0.61726850580964876070e-9 + 0.68987888999111111110e-11 * t) * t) * t) * t) * t) * t;
    case 54:
      return 0.60537899426486075181e0 + (-0.19790062241395705751e-2 + (-0.18120271393047062253e-3 + (0.19974264162313241405e-5 + (0.10055795094298172492e-7 + (-0.53491997919318263593e-9 + (0.67794550295111111110e-11 - 0.17059208095741511603e-13 * t) * t) * t) * t) * t) * t) * t;
    case 55:
      return 0.60071229457904110537e0 + (-0.26795676776166354354e-2 + (-0.16901799553627508781e-3 + (0.20575498324332621581e-5 + (0.51077165074461745053e-8 + (-0.45536079828057221858e-9 + (0.64488005516444444445e-11 - 0.29311677573152766338e-13 * t) * t) * t) * t) * t) * t) * t;
    case 56:
      return 0.59469361520112714738e0 + (-0.33308208190600993470e-2 + (-0.15658501295912405679e-3 + (0.20812116912895417272e-5 + (0.93227468760614182021e-9 + (-0.38066673740116080415e-9 + (0.59806790359111111110e-11 - 0.36887077278950440597e-13 * t) * t) * t) * t) * t) * t) * t;
    case 57:
      return 0.58742228631775388268e0 + (-0.39321858196059227251e-2 + (-0.14410441141450122535e-3 + (0.20743790018404020716e-5 + (-0.25261903811221913762e-8 + (-0.31212416519526924318e-9 + (0.54328422462222222221e-11 - 0.40864152484979815972e-13 * t) * t) * t) * t) * t) * t) * t;
    case 58:
      return 0.57899804200033018447e0 + (-0.44838157005618913447e-2 + (-0.13174245966501437965e-3 + (0.20425306888294362674e-5 + (-0.53330296023875447782e-8 + (-0.25041289435539821014e-9 + (0.48490437205333333334e-11 - 0.42162206939169045177e-13 * t) * t) * t) * t) * t) * t) * t;
    case 59:
      return 0.56951968796931245974e0 + (-0.49864649488074868952e-2 + (-0.11963416583477567125e-3 + (0.19906021780991036425e-5 + (-0.75580140299436494248e-8 + (-0.19576060961919820491e-9 + (0.42613011928888888890e-11 - 0.41539443304115604377e-13 * t) * t) * t) * t) * t) * t) * t;
    case 60:
      return 0.55908401930063918964e0 + (-0.54413711036826877753e-2 + (-0.10788661102511914628e-3 + (0.19229663322982839331e-5 + (-0.92714731195118129616e-8 + (-0.14807038677197394186e-9 + (0.36920870298666666666e-11 - 0.39603726688419162617e-13 * t) * t) * t) * t) * t) * t) * t;
    case 61:
      return 0.54778496152925675315e0 + (-0.58501497933213396670e-2 + (-0.96582314317855227421e-4 + (0.18434405235069270228e-5 + (-0.10541580254317078711e-7 + (-0.10702303407788943498e-9 + (0.31563175582222222222e-11 - 0.36829748079110481422e-13 * t) * t) * t) * t) * t) * t) * t;
    case 62:
      return 0.53571290831682823999e0 + (-0.62147030670760791791e-2 + (-0.85782497917111760790e-4 + (0.17553116363443470478e-5 + (-0.11432547349815541084e-7 + (-0.72157091369041330520e-10 + (0.26630811607111111111e-11 - 0.33578660425893164084e-13 * t) * t) * t) * t) * t) * t) * t;
    case 63:
      return 0.52295422962048434978e0 + (-0.65371404367776320720e-2 + (-0.75530164941473343780e-4 + (0.16613725797181276790e-5 + (-0.12003521296598910761e-7 + (-0.42929753689181106171e-10 + (0.22170894940444444444e-11 - 0.30117697501065110505e-13 * t) * t) * t) * t) * t) * t) * t;
    case 64:
      return 0.50959092577577886140e0 + (-0.68197117603118591766e-2 + (-0.65852936198953623307e-4 + (0.15639654113906716939e-5 + (-0.12308007991056524902e-7 + (-0.18761997536910939570e-10 + (0.18198628922666666667e-11 - 0.26638355362285200932e-13 * t) * t) * t) * t) * t) * t) * t;
    case 65:
      return 0.49570040481823167970e0 + (-0.70647509397614398066e-2 + (-0.56765617728962588218e-4 + (0.14650274449141448497e-5 + (-0.12393681471984051132e-7 + (0.92904351801168955424e-12 + (0.14706755960177777778e-11 - 0.23272455351266325318e-13 * t) * t) * t) * t) * t) * t) * t;
    case 66:
      return 0.48135536250935238066e0 + (-0.72746293327402359783e-2 + (-0.48272489495730030780e-4 + (0.13661377309113939689e-5 + (-0.12302464447599382189e-7 + (0.16707760028737074907e-10 + (0.11672928324444444444e-11 - 0.20105801424709924499e-13 * t) * t) * t) * t) * t) * t) * t;
    case 67:
      return 0.46662374675511439448e0 + (-0.74517177649528487002e-2 + (-0.40369318744279128718e-4 + (0.12685621118898535407e-5 + (-0.12070791463315156250e-7 + (0.29105507892605823871e-10 + (0.90653314645333333334e-12 - 0.17189503312102982646e-13 * t) * t) * t) * t) * t) * t) * t;
    case 68:
      return 0.45156879030168268778e0 + (-0.75983560650033817497e-2 + (-0.33045110380705139759e-4 + (0.11732956732035040896e-5 + (-0.11729986947158201869e-7 + (0.38611905704166441308e-10 + (0.68468768305777777779e-12 - 0.14549134330396754575e-13 * t) * t) * t) * t) * t) * t) * t;
    case 69:
      return 0.43624909769330896904e0 + (-0.77168291040309554679e-2 + (-0.26283612321339907756e-4 + (0.10811018836893550820e-5 + (-0.11306707563739851552e-7 + (0.45670446788529607380e-10 + (0.49782492549333333334e-12 - 0.12191983967561779442e-13 * t) * t) * t) * t) * t) * t) * t;
    case 70:
      return 0.42071877443548481181e0 + (-0.78093484015052730097e-2 + (-0.20064596897224934705e-4 + (0.99254806680671890766e-6 + (-0.10823412088884741451e-7 + (0.50677203326904716247e-10 + (0.34200547594666666666e-12 - 0.10112698698356194618e-13 * t) * t) * t) * t) * t) * t) * t;
    case 71:
      return 0.40502758809710844280e0 + (-0.78780384460872937555e-2 + (-0.14364940764532853112e-4 + (0.90803709228265217384e-6 + (-0.10298832847014466907e-7 + (0.53981671221969478551e-10 + (0.21342751381333333333e-12 - 0.82975901848387729274e-14 * t) * t) * t) * t) * t) * t) * t;
    case 72:
      return 0.38922115269731446690e0 + (-0.79249269708242064120e-2 + (-0.91595258799106970453e-5 + (0.82783535102217576495e-6 + (-0.97484311059617744437e-8 + (0.55889029041660225629e-10 + (0.10851981336888888889e-12 - 0.67278553237853459757e-14 * t) * t) * t) * t) * t) * t) * t;
    case 73:
      return 0.37334112915460307335e0 + (-0.79519385109223148791e-2 + (-0.44219833548840469752e-5 + (0.75209719038240314732e-6 + (-0.91848251458553190451e-8 + (0.56663266668051433844e-10 + (0.23995894257777777778e-13 - 0.53819475285389344313e-14 * t) * t) * t) * t) * t) * t) * t;
    case 74:
      return 0.35742543583374223085e0 + (-0.79608906571527956177e-2 + (-0.12530071050975781198e-6 + (0.68088605744900552505e-6 + (-0.86181844090844164075e-8 + (0.56530784203816176153e-10 + (-0.43120012248888888890e-13 - 0.42372603392496813810e-14 * t) * t) * t) * t) * t) * t) * t;
    case 75:
      return 0.34150846431979618536e0 + (-0.79534924968773806029e-2 + (0.37576885610891515813e-5 + (0.61419263633090524326e-6 + (-0.80565865409945960125e-8 + (0.55684175248749269411e-10 + (-0.95486860764444444445e-13 - 0.32712946432984510595e-14 * t) * t) * t) * t) * t) * t) * t;
    case 76:
      return 0.32562129649136346824e0 + (-0.79313448067948884309e-2 + (0.72539159933545300034e-5 + (0.55195028297415503083e-6 + (-0.75063365335570475258e-8 + (0.54281686749699595941e-10 - 0.13545424295111111111e-12 * t) * t) * t) * t) * t) * t;
    case 77:
      return 0.30979191977078391864e0 + (-0.78959416264207333695e-2 + (0.10389774377677210794e-4 + (0.49404804463196316464e-6 + (-0.69722488229411164685e-8 + (0.52469254655951393842e-10 - 0.16507860650666666667e-12 * t) * t) * t) * t) * t) * t;
    case 78:
      return 0.29404543811214459904e0 + (-0.78486728990364155356e-2 + (0.13190885683106990459e-4 + (0.44034158861387909694e-6 + (-0.64578942561562616481e-8 + (0.50354306498006928984e-10 - 0.18614473550222222222e-12 * t) * t) * t) * t) * t) * t;
    case 79:
      return 0.27840427686253660515e0 + (-0.77908279176252742013e-2 + (0.15681928798708548349e-4 + (0.39066226205099807573e-6 + (-0.59658144820660420814e-8 + (0.48030086420373141763e-10 - 0.20018995173333333333e-12 * t) * t) * t) * t) * t) * t;
    case 80:
      return 0.26288838011163800908e0 + (-0.77235993576119469018e-2 + (0.17886516796198660969e-4 + (0.34482457073472497720e-6 + (-0.54977066551955420066e-8 + (0.45572749379147269213e-10 - 0.20852924954666666667e-12 * t) * t) * t) * t) * t) * t;
    case 81:
      return 0.24751539954181029717e0 + (-0.76480877165290370975e-2 + (0.19827114835033977049e-4 + (0.30263228619976332110e-6 + (-0.50545814570120129947e-8 + (0.43043879374212005966e-10 - 0.21228012028444444444e-12 * t) * t) * t) * t) * t) * t;
    case 82:
      return 0.23230087411688914593e0 + (-0.75653060136384041587e-2 + (0.21524991113020016415e-4 + (0.26388338542539382413e-6 + (-0.46368974069671446622e-8 + (0.40492715758206515307e-10 - 0.21238627815111111111e-12 * t) * t) * t) * t) * t) * t;
    case 83:
      return 0.21725840021297341931e0 + (-0.74761846305979730439e-2 + (0.23000194404129495243e-4 + (0.22837400135642906796e-6 + (-0.42446743058417541277e-8 + (0.37958104071765923728e-10 - 0.20963978568888888889e-12 * t) * t) * t) * t) * t) * t;
    case 84:
      return 0.20239979200788191491e0 + (-0.73815761980493466516e-2 + (0.24271552727631854013e-4 + (0.19590154043390012843e-6 + (-0.38775884642456551753e-8 + (0.35470192372162901168e-10 - 0.20470131678222222222e-12 * t) * t) * t) * t) * t) * t;
    case 85:
      return 0.18773523211558098962e0 + (-0.72822604530339834448e-2 + (0.25356688567841293697e-4 + (0.16626710297744290016e-6 + (-0.35350521468015310830e-8 + (0.33051896213898864306e-10 - 0.19811844544000000000e-12 * t) * t) * t) * t) * t) * t;
    case 86:
      return 0.17327341258479649442e0 + (-0.71789490089142761950e-2 + (0.26272046822383820476e-4 + (0.13927732375657362345e-6 + (-0.32162794266956859603e-8 + (0.30720156036105652035e-10 - 0.19034196304000000000e-12 * t) * t) * t) * t) * t) * t;
    case 87:
      return 0.15902166648328672043e0 + (-0.70722899934245504034e-2 + (0.27032932310132226025e-4 + (0.11474573347816568279e-6 + (-0.29203404091754665063e-8 + (0.28487010262547971859e-10 - 0.18174029063111111111e-12 * t) * t) * t) * t) * t) * t;
    case 88:
      return 0.14498609036610283865e0 + (-0.69628725220045029273e-2 + (0.27653554229160596221e-4 + (0.92493727167393036470e-7 + (-0.26462055548683583849e-8 + (0.26360506250989943739e-10 - 0.17261211260444444444e-12 * t) * t) * t) * t) * t) * t;
    case 89:
      return 0.13117165798208050667e0 + (-0.68512309830281084723e-2 + (0.28147075431133863774e-4 + (0.72351212437979583441e-7 + (-0.23927816200314358570e-8 + (0.24345469651209833155e-10 - 0.16319736960000000000e-12 * t) * t) * t) * t) * t) * t;
    case 90:
      return 0.11758232561160626306e0 + (-0.67378491192463392927e-2 + (0.28525664781722907847e-4 + (0.54156999310046790024e-7 + (-0.21589405340123827823e-8 + (0.22444150951727334619e-10 - 0.15368675584000000000e-12 * t) * t) * t) * t) * t) * t;
    case 91:
      return 0.10422112945361673560e0 + (-0.66231638959845581564e-2 + (0.28800551216363918088e-4 + (0.37758983397952149613e-7 + (-0.19435423557038933431e-8 + (0.20656766125421362458e-10 - 0.14422990012444444444e-12 * t) * t) * t) * t) * t) * t;
    case 92:
      return 0.91090275493541084785e-1 + (-0.65075691516115160062e-2 + (0.28982078385527224867e-4 + (0.23014165807643012781e-7 + (-0.17454532910249875958e-8 + (0.18981946442680092373e-10 - 0.13494234691555555556e-12 * t) * t) * t) * t) * t) * t;
    case 93:
      return 0.78191222288771379358e-1 + (-0.63914190297303976434e-2 + (0.29079759021299682675e-4 + (0.97885458059415717014e-8 + (-0.15635596116134296819e-8 + (0.17417110744051331974e-10 - 0.12591151763555555556e-12 * t) * t) * t) * t) * t) * t;
    case 94:
      return 0.65524757106147402224e-1 + (-0.62750311956082444159e-2 + (0.29102328354323449795e-4 + (-0.20430838882727954582e-8 + (-0.13967781903855367270e-8 + (0.15958771833747057569e-10 - 0.11720175765333333333e-12 * t) * t) * t) * t) * t) * t;
    case 95:
      return 0.53091065838453612773e-1 + (-0.61586898417077043662e-2 + (0.29057796072960100710e-4 + (-0.12597414620517987536e-7 + (-0.12440642607426861943e-8 + (0.14602787128447932137e-10 - 0.10885859114666666667e-12 * t) * t) * t) * t) * t) * t;
    case 96:
      return 0.40889797115352738582e-1 + (-0.60426484889413678200e-2 + (0.28953496450191694606e-4 + (-0.21982952021823718400e-7 + (-0.11044169117553026211e-8 + (0.13344562332430552171e-10 - 0.10091231402844444444e-12 * t) * t) * t) * t) * t) * t;
    case 97: case 98: case 99: case 100: {  /* use Taylor expansion for small x (|x| <= 0.0309...) */
      /* (2/sqrt(pi)) * (x - 2/3 x^3  + 4/15 x^5  - 8/105 x^7 + 16/945 x^9) */
      double x2 = x*x;
      return x*(1.1283791670955125739
                  - x2*(0.75225277806367504925
                          - x2*(0.30090111122547001970
                                  - x2*(0.085971746064420005629
                                          - x2 * 0.016931216931216931217))));
    }
  }
  /* Since 0 <= y100 < 101, this is only reached if x is NaN, in which case we should return NaN. */
  return AGN_NAN;
}

/* Scaled Dawson Integral w_im(x) = 2*calc.dawson(x)/sqrt(pi), 2.21.6 */
LUALIB_API double tools_w_im (double x) {
  if (x >= 0) {
    if (x > 45) {   /* continued-fraction expansion is faster */
      if (x > 5e7)  /* 1-term expansion, important to avoid overflow */
        return INVSQRTPI/x;
      else {
        /* 5-term expansion (rely on compiler for CSE), simplified from: ispi/(x-0.5/(x-1/(x-1.5/(x-2/x)))) */
        double dblx = x*x;
        return INVSQRTPI*(dblx*(dblx - 4.5) + 2)/(x*(dblx*(dblx - 5) + 3.75));
      }
    }
    return w_im_y100(100/(1 + x), x);
  } else {  /* = -FADDEEVA(w_im)(-x) */
    if (x < -45) {   /* continued-fraction expansion is faster */
      if (x < -5e7)  /* 1-term expansion, important to avoid overflow */
        return INVSQRTPI/x;
        /* 5-term expansion (rely on compiler for CSE), simplified from: ispi/(x-0.5/(x-1/(x-1.5/(x-2/x)))) */
      else {
        double dblx = x*x;
        return INVSQRTPI*(dblx*(dblx - 4.5) + 2)/(x*(dblx*(dblx - 5) + 3.75));
      }
    }
    return -w_im_y100(100/(1 - x), -x);
  }
}


/* Faddeeva / scaled complex error function w(z)
   Written by Steven G. Johnson, October 2012, modified by a walz. MIT licence.
   Taken from: http://ab-initio.mit.edu/Faddeeva.cc */
#ifndef PROPCMPLX
#define C(a,b) ((a) + I*(b))
#else
PROPCMPLX_CMPLX C (double a, double b) {
  PROPCMPLX_CMPLX z;
  z.cmplx[0] = a;
  z.cmplx[1] = b;
  return z;
}
#endif
#define FADDEEVA(name)    tools_ ## name
#define FADDEEVA_RE(name) tools_ ## name

/* return sinc(x) = sin(x)/x, given both x and sin(x)
   [since we only use this in cases where sin(x) has already been computed] */
static inline double aux_sinc (double x, double sinx) {
  return fabs(x) < 1e-4 ? 1 - 0.1666666666666666666667*x*x : sinx/x;
}

/* sinh(x) via Taylor series, accurate to machine precision for |x| < 1e-2 */
static inline double aux_sinh_taylor (double x) {
  double dblx = x*x;
  return x*(1 + dblx*(0.1666666666666666666667 + 0.00833333333333333333333*dblx));
}

/* precomputed table of expa2n2[n-1] = exp(-a2*n*n) for double-precision a2 = 0.26865... in FADDEEVA(w), below. */
static const double expa2n2[] = {
  7.64405281671221563e-01,
  3.41424527166548425e-01,
  8.91072646929412548e-02,
  1.35887299055460086e-02,
  1.21085455253437481e-03,
  6.30452613933449404e-05,
  1.91805156577114683e-06,
  3.40969447714832381e-08,
  3.54175089099469393e-10,
  2.14965079583260682e-12,
  7.62368911833724354e-15,
  1.57982797110681093e-17,
  1.91294189103582677e-20,
  1.35344656764205340e-23,
  5.59535712428588720e-27,
  1.35164257972401769e-30,
  1.90784582843501167e-34,
  1.57351920291442930e-38,
  7.58312432328032845e-43,
  2.13536275438697082e-47,
  3.51352063787195769e-52,
  3.37800830266396920e-57,
  1.89769439468301000e-62,
  6.22929926072668851e-68,
  1.19481172006938722e-73,
  1.33908181133005953e-79,
  8.76924303483223939e-86,
  3.35555576166254986e-92,
  7.50264110688173024e-99,
  9.80192200745410268e-106,
  7.48265412822268959e-113,
  3.33770122566809425e-120,
  8.69934598159861140e-128,
  1.32486951484088852e-135,
  1.17898144201315253e-143,
  6.13039120236180012e-152,
  1.86258785950822098e-160,
  3.30668408201432783e-169,
  3.43017280887946235e-178,
  2.07915397775808219e-187,
  7.36384545323984966e-197,
  1.52394760394085741e-206,
  1.84281935046532100e-216,
  1.30209553802992923e-226,
  5.37588903521080531e-237,
  1.29689584599763145e-247,
  1.82813078022866562e-258,
  1.50576355348684241e-269,
  7.24692320799294194e-281,
  2.03797051314726829e-292,
  3.34880215927873807e-304,
  0.0  /* underflow (also prevents reads past array end, below) */
};

#ifndef PROPCMPLX
#define cmplxreal(z) creal(z)
#define cmplximag(z) cimag(z)
#else
#define cmplxreal(z) (z).cmplx[0]
#define cmplximag(z) (z).cmplx[1]
#endif

#ifndef PROPCMPLX
LUALIB_API agn_Complex tools_w (agn_Complex z, double relerr) {
#else
LUALIB_API PROPCMPLX_CMPLX tools_w (PROPCMPLX_CMPLX z, double relerr) {
#endif
  double a, a2, c, sum1, sum2, sum3, sum4, sum5;
#ifndef PROPCMPLX
  agn_Complex ret;
#else
  PROPCMPLX_CMPLX ret;
#endif
  if (cmplxreal(z) == 0.0)
    return C(FADDEEVA_RE(erfcx)(cmplximag(z)), cmplxreal(z));  /* give correct sign of 0 in cimag(w) */
  else if (cmplximag(z) == 0)
    return C(sun_exp(-tools_square(cmplxreal(z))), FADDEEVA(w_im)(cmplxreal(z)));
  if (relerr <= DBL_EPSILON) {
    relerr = DBL_EPSILON;
    a = 0.518321480430085929872;   /* pi/sqrt(-log(eps*0.5)) */
    c = 0.329973702884629072537;   /* (2/pi)*a */
    a2 = 0.268657157075235951582;  /* a^2 */
  } else {
    if (relerr > 0.1) relerr = 0.1;  /* not sensible to compute < 1 digit */
    a = PI/sqrt(-sun_log(relerr*0.5));
    c = (2/PI)*a;
    a2 = a*a;
  }
  const double x = fabs(cmplxreal(z));
  const double y = cmplximag(z), ya = fabs(y);
  ret = C(0.0, 0.0);  /* return value */
  sum1 = 0; sum2 = 0; sum3 = 0; sum4 = 0; sum5 = 0;
  if (ya > 7 || (x > 6  /* continued fraction is faster.
                    As pointed out by M. Zaghloul, the continued
                    fraction seems to give a large relative error in
                    Re w(z) for |x| ~ 6 and small |y|, so use
                    algorithm 816 in this region: */
                 && (ya > 0.1 || (x > 8 && ya > 1e-10) || x > 28))) {
    /* Poppe & Wijers suggest using a number of terms nu = 3 + 1442 / (26*rho + 77)
       where rho = sqrt((x/x0)^2 + (y/y0)^2) where x0=6.3, y0=4.4.
       (They only use this expansion for rho >= 1, but rho a little less
        than 1 seems okay too.)
       Instead, I did my own fit to a slightly different function
       that avoids the hypotenuse calculation, using NLopt to minimize
       the sum of the squares of the errors in nu with the constraint
       that the estimated nu be >= minimum nu to attain machine precision.
       I also separate the regions where nu == 2 and nu == 1. */
    double xs = y < 0 ? -cmplxreal(z) : cmplxreal(z);  /* compute for -z if y < 0 */
    if (x + ya > 4000) {   /* nu <= 2 */
      if (x + ya > 1e7) {  /* nu == 1, w(z) = i/sqrt(pi)/z */
        if (x > ya) {      /* scale to avoid overflow */
          double yax = ya/xs;
          double denom = INVSQRTPI/(xs + yax*ya);
          ret = C(denom*yax, denom);
        } else if (isinf(ya))
          return ((isnan(x) || y < 0) ? C(AGN_NAN, AGN_NAN) : C(0, 0));
        else {
          double xya = xs / ya;
          double denom = INVSQRTPI / (xya*xs + ya);
          ret = C(denom, denom*xya);
        }
      }
      else {  /* nu == 2, w(z) = i/sqrt(pi) * z / (z*z - 0.5) */
        double dr = xs*xs - ya*ya - 0.5, di = 2*xs*ya;
        double denom = INVSQRTPI / (dr*dr + di*di);
        ret = C(denom*(xs*di - ya*dr), denom*(xs*dr + ya*di));
      }
    } else {  /* compute nu(z) estimate and do general continued fraction */
      const double c0 = 3.9, c1 = 11.398, c2 = 0.08254, c3 = 0.1421, c4 = 0.2023;  /* fit */
      double nu = sun_floor(c0 + c1/(c2*x + c3*ya + c4));
      double wr = xs, wi = ya;
      for (nu = 0.5*(nu - 1); nu > 0.4; nu -= 0.5) {  /* w <- z - nu/w: */
        double denom = nu/(wr*wr + wi*wi);
        wr = xs - wr*denom;
        wi = ya + wi*denom;
      }
      {  /* w(z) = i/sqrt(pi)/w: */
        double denom = INVSQRTPI/(wr*wr + wi*wi);
        ret = C(denom*wi, denom*wr);
      }
    }
    if (y < 0) {
      /* use w(z) = 2.0*exp(-z*z) - w(-z),
         but be careful of overflow in exp(-z*z) = exp(-(xs*xs-ya*ya) -2*i*xs*ya) */
#ifndef PROPCMPLX
      agn_Complex z = C( (ya - xs)*(xs + ya), 2*xs*y );
      return 2.0*cexp(z) - ret;
#else
      double z[2];
      agnc_exp(z, (ya - xs)*(xs + ya), 2*xs*y);
      agnc_mul(z, z[0], z[1], 2.0, 0);
      agnc_sub(z, z[0], z[1], ret.cmplx[0], ret.cmplx[1]);
      return C(z[0], z[1]);
#endif
    } else
      return ret;
  }
  /* Note: The test that seems to be suggested in the paper is x <
     sqrt(-log(DBL_MIN)), about 26.6, since otherwise exp(-x^2)
     underflows to zero and sum1,sum2,sum4 are zero.  However, long
     before this occurs, the sum1,sum2,sum4 contributions are
     negligible in double precision; I find that this happens for x >
     about 6, for all y.  On the other hand, I find that the case
     where we compute all of the sums is faster (at least with the
     precomputed expa2n2 table) until about x=10.  Furthermore, if we
     try to compute all of the sums for x > 20, I find that we
     sometimes run into numerical problems because underflow/overflow
     problems start to appear in the various coefficients of the sums,
     below.  Therefore, we use x < 10 here. */
  else if (x < 10) {
    double expx2;
    double prod2ax = 1, prodm2ax = 1;
    if (isnan(y)) return C(y, y);
    /* Somewhat ugly copy-and-paste duplication here, but I see significant
       speedups from using the special-case code with the precomputed
       exponential, and the x < 5e-4 special case is needed for accuracy. */
    if (relerr == DBL_EPSILON) { // use precomputed exp(-a2*(n*n)) table
      if (x < 5e-4) {  /* compute sum4 and sum5 together as sum5-sum4 */
        int n;
        const double x2 = x*x;
        expx2 = 1 - x2*(1 - 0.5*x2);  /* exp(-x*x) via Taylor */
        /* compute exp(2*a*x) and exp(-2*a*x) via Taylor, to double precision */
        const double ax2 = 1.036642960860171859744*x;  /* 2*a*x */
        const double exp2ax =
          1 + ax2*(1 + ax2*(0.5 + 0.166666666666666666667*ax2));
        const double expm2ax =
          1 - ax2*(1 - ax2*(0.5 - 0.166666666666666666667*ax2));
        for (n=1; 1; ++n) {
          const double coef = expa2n2[n - 1]*expx2/(a2*(n*n) + y*y);
          prod2ax *= exp2ax;
          prodm2ax *= expm2ax;
          sum1 += coef;
          sum2 += coef * prodm2ax;
          sum3 += coef * prod2ax;
          /* really = sum5 - sum4 */
          sum5 += coef*(2*a)*n*aux_sinh_taylor((2*a)*n*x);
          /* test convergence via sum3 */
          if (coef*prod2ax < relerr*sum3) break;
        }
      } else {  /* x > 5e-4, compute sum4 and sum5 separately */
        int n;
        expx2 = tools_expx2(x, -1);  /* = sun_exp(-x*x); 2.21.7 */
        const double exp2ax = sun_exp((2*a)*x), expm2ax = 1/exp2ax;
        for (n=1; 1; ++n) {
          const double coef = expa2n2[n - 1]*expx2/(a2*(n*n) + y*y);
          prod2ax *= exp2ax;
          prodm2ax *= expm2ax;
          sum1 += coef;
          sum2 += coef*prodm2ax;
          sum4 += (coef*prodm2ax)*(a*n);
          sum3 += coef*prod2ax;
          sum5 += (coef*prod2ax)*(a*n);
          /* test convergence via sum5, since this sum has the slowest decay */
          if ((coef*prod2ax)*(a*n) < relerr*sum5) break;
        }
      }
    } else {  /* relerr != DBL_EPSILON, compute exp(-a2*(n*n)) on the fly */
      int n;
      const double exp2ax = sun_exp((2*a)*x), expm2ax = 1/exp2ax;
      if (x < 5e-4) {  /* compute sum4 and sum5 together as sum5-sum4 */
        const double x2 = x*x;
        expx2 = 1 - x2*(1 - 0.5*x2);  /* exp(-x*x) via Taylor */
        for (n=1; 1; ++n) {
          const double coef = sun_exp(-a2*(n*n))*expx2/(a2*(n*n) + y*y);
          prod2ax *= exp2ax;
          prodm2ax *= expm2ax;
          sum1 += coef;
          sum2 += coef*prodm2ax;
          sum3 += coef*prod2ax;
          /* really = sum5 - sum4 */
          sum5 += coef*(2*a)*n*aux_sinh_taylor((2*a)*n*x);
          /* test convergence via sum3 */
          if (coef*prod2ax < relerr*sum3) break;
        }
      } else {  /* x > 5e-4, compute sum4 and sum5 separately */
        expx2 = tools_expx2(x, -1);  /* = sun_exp(-x*x); 2.21.7 */
        for (n=1; 1; ++n) {
          const double coef = sun_exp(-a2*(n*n))*expx2/(a2*(n*n) + y*y);
          prod2ax *= exp2ax;
          prodm2ax *= expm2ax;
          sum1 += coef;
          sum2 += coef*prodm2ax;
          sum4 += (coef*prodm2ax)*(a*n);
          sum3 += coef*prod2ax;
          sum5 += (coef*prod2ax)*(a*n);
          /* test convergence via sum5, since this sum has the slowest decay */
          if ((coef*prod2ax)*(a*n) < relerr*sum5) break;
        }
      }
    }
    const double expx2erfcxy =  /* avoid spurious overflow for large negative y */
      y > -6  /* for y < -6, erfcx(y) = 2*exp(y*y) to double precision */
      ? expx2*FADDEEVA_RE(erfcx)(y) : 2*sun_exp(y*y - x*x);
    if (y > 5) {  /* imaginary terms cancel */
      const double sinxy = sun_sin(x*y);
      ret = C((expx2erfcxy - c*y*sum1)*sun_cos(2*x*y)
          + (c*x*expx2)*sinxy*aux_sinc(x*y, sinxy), 0);
    } else {
      double xs = cmplxreal(z);
      const double sinxy = sin(xs*y);
      const double sin2xy = sin(2*xs*y), cos2xy = cos(2*xs*y);
      const double coef1 = expx2erfcxy - c*y*sum1;
      const double coef2 = c*xs*expx2;
      ret = C(coef1*cos2xy + coef2*sinxy*aux_sinc(xs*y, sinxy),
              coef2*aux_sinc(2*xs*y, sin2xy) - coef1 * sin2xy);
    }
  } else {  /* x large: only sum3 & sum5 contribute (see above note) */
    int dn;
    if (isnan(x)) return C(x,x);
    if (isnan(y)) return C(y,y);
    ret = C(sun_exp(-x*x), 0);  /* |y| < 1e-10, so we only need exp(-x*x) term
      (round instead of ceil as in original paper; note that x/a > 1 here) */
    double n0 = sun_floor(x/a + 0.5);  /* sum in both directions, starting at n0; 2.41.0 tweak */
    double dx = a*n0 - x;
    sum3 = sun_exp(-dx*dx)/(a2*(n0*n0) + y*y);
    sum5 = a*n0*sum3;
    double exp1 = sun_exp(4*a*dx), exp1dn = 1;
    for (dn=1; n0 - dn > 0; ++dn) {  /* loop over n0-dn and n0+dn terms */
      double np = n0 + dn, nm = n0 - dn;
      double tp = sun_exp(-tools_square(a*dn + dx));
      double tm = tp*(exp1dn *= exp1);  /* trick to get tm from tp */
      tp /= (a2*(np*np) + y*y);
      tm /= (a2*(nm*nm) + y*y);
      sum3 += tp + tm;
      sum5 += a*(np*tp + nm*tm);
      if (a*(np*tp + nm*tm) < relerr*sum5) goto finish;
    }
    while (1) {  /* loop over n0+dn terms only (since n0-dn <= 0) */
      double np = n0 + dn++;
      double tp = sun_exp(-tools_square(a*dn + dx))/(a2*(np*np) + y*y);
      sum3 += tp;
      sum5 += a*np*tp;
      if (a*np*tp < relerr*sum5) goto finish;
    }
  }
finish:
#ifndef PROPCMPLX
  return ret + C((0.5*c)*y*(sum2 + sum3),
                 (0.5*c)*copysign(sum5 - sum4, cmplxreal(z)));
#else
  {
    double zz[2];
    agnc_add(zz, cmplxreal(ret), cmplximag(ret), (0.5*c)*y*(sum2 + sum3), (0.5*c)*copysign(sum5 - sum4, cmplxreal(z)));
    return C(zz[0], zz[1]);
  }
#endif
}


#ifndef PROPCMPLX

/* Old implementation just for reference:
LUALIB_API double tools_cabs (agn_Complex z) {
  lua_Number a, b, t;
  a = creal(z); b = cimag(z);
  if ( tools_isnan(a) || tools_isnan(b) ) return AGN_NAN;
  if ( tools_isinf(a) != 0 || tools_isinf(b) != 0 ) return HUGE_VAL;
  if (a < 0) a = -a;
  if (b < 0) b = -b;
  if (b > a) { SWAP(a, b, t) } //  now b is the smaller values
  if (a + b == a) return (a);  // -> any a && b == 0
  t = b/a;  // minimum divided by maximum
  return a * sqrt(1 + t*t);
} */

LUALIB_API double tools_cabs (agn_Complex z) {  /* 2.11.1; changed 4.5.6 but this is just a very minor tweak */
  int32_t ha, la, hb, lb;
  lua_Number a, b, t;
  a = creal(z); b = cimag(z);
  EXTRACT_WORDS(ha, la, a);
  EXTRACT_WORDS(hb, lb, b);
  ha &= 0x7fffffff;
  hb &= 0x7fffffff;
  SET_HIGH_WORD(a, ha);
  SET_HIGH_WORD(b, hb);
  if (ha >= 0x7ff00000 || ((hb | lb) == 0)) return a;  /* b == 0 || a in { inf, nan } */
  if (hb >= 0x7ff00000 || ((ha | la) == 0)) return b;  /* a == 0 || b in { inf, nan } */
  if (hb > ha) { SWAP(a, b, t); }
  t = b/a;  /* minimum divided by maximum */
  return a * sqrt(1 + t*t);
}

LUALIB_API agn_Complex tools_crecip (agn_Complex x) {  /* 2.35.3 */
  agn_Complex  r;
  lua_Number c, d;
  c = creal(x); d = cimag(x);
  if (c == 0.0) {
    if (d == 0.0) (r) = AGN_NAN;
    else {
      __real__ r = 0.0;
      __imag__ r = -1/d;
    }
  } else {
    lua_Number t4 = 1/sun_pytha(c, d);  /* 4.5.6 optimisation */
    __real__ r = c*t4;
    __imag__ r = (d == -0) ? 0.0 : -d*t4;  /* 4.5.6 optimisation */
  }
  return r;
}


LUALIB_API agn_Complex tools_cdiv (agn_Complex x, agn_Complex y) {  /* 2.11.1 */
  agn_Complex r;
  lua_Number a, b, c, d, t2, t3, t5;
  a = creal(x); b = cimag(x); c = creal(y); d = cimag(y);
  if ((c) == 0 && (d) == 0) (r) = AGN_NAN;
  else {
    t2 = c*c;
    t3 = d*d;
    t5 = 1/(t2 + t3);
    __real__ r = a*c*t5 + b*d*t5;
    __imag__ r = b*c*t5 - a*d*t5;
  }
  return r;
}


LUALIB_API agn_Complex tools_cexp (agn_Complex z) {  /* 2.11.1 */
  agn_Complex r;
  lua_Number a, b, t1;
  a = __real__ z;
  b = __imag__ z;
  t1 = sun_exp(a);
  sun_sincos(b, &a, &b);
  __real__ r = t1*b;
  __imag__ r = t1*a;
  return r;
}


LUALIB_API agn_Complex ds_ldexp_cexp (agn_Complex z, int expt) {
  double x, y, exp_x, scale1, scale2;
  int ex_expt, half_expt;
  x = creal(z);
  y = cimag(z);
  exp_x = ds_frexp_exp(x, &ex_expt);
  expt += ex_expt;
  /* Arrange so that scale1 * scale2 == 2**expt. We use this to compensate for scalbn being horrendously slow. */
  half_expt = 0.5*expt;
  INSERT_WORDS(scale1, (0x3ff + half_expt) << 20, 0);
  half_expt = expt - half_expt;
  INSERT_WORDS(scale2, (0x3ff + half_expt) << 20, 0);
  return (cpack(cos(y)*exp_x*scale1*scale2, sun_sin(y)*exp_x*scale1*scale2));
}


LUALIB_API agn_Complex tools_cpow (agn_Complex a, agn_Complex z) {  /* 2.11.1, 13 % faster than GCC's implementation */
  agn_Complex w;
  double x, y, r, theta, absa, arga, si, co, x0, y0;
  x0 = creal(a); y0 = cimag(a);
  x = creal(z); y = cimag(z);
  if (x0 == 0 && y0 == 0 && x <= 0) {  /* modified */
    return (y == 0) ? AGN_NAN : 0;
  }
  if (y == 0.0) {    /* real power ? */
    if (x == 0.0) {  /* raise to the power of zero ? */
      return (x0 == 0 && y0 == 0) ? AGN_NAN : 1 + 0*I;
    } else if (tools_isint(x) && fabs(x) <= AGN_MAXIPOWITER) {
      /* 2.35.3 extension to save computation time */
      int i, isneg;
      lua_Number ra, rb, t;
      isneg = x < 0; if (isneg) x = -x;
      ra = x0; rb = y0;
      for (i=1; i < (int)x; i++) { /* the loop is faster than the tools_intpow solution */
        t = x0;
        x0 = x0*ra - y0*rb;
        y0 = t*rb + y0*ra;
      }
      return (isneg) ? tools_crecip(x0 + y0*I) : x0 + y0*I;
    }
  }
  absa = sun_hypot(x0, y0);
  if (absa == 0.0) {  /* 2.35.2 addition */
    return 0.0 + 0.0*I;
  }
  arga = sun_atan2(y0, x0);
  r = sun_pow(absa, x, 0);
  theta = x*arga;
  if (y != 0.0) {
    r = r*sun_exp(-y*arga);
    theta = theta + y*sun_log(absa);
  }
  sun_sincos(theta, &si, &co);
  __real__ w = r*co;
  __imag__ w = r*si;  /* we deliberately do not zero values close to 0 ! */
  return w;
}


LUALIB_API agn_Complex tools_clog (agn_Complex z) {  /* 2.11.1, 25 % faster than GCC's clog */
  agn_Complex r;
  lua_Number a, b, t;
  a = __real__ z;
  b = __imag__ z;
  if (a == 0 && b == 0) {
    __real__ r = AGN_NAN;
    __imag__ r = 0;
  } else {
    /* t = a*a + b*b; */
    t = sun_pytha(a, b);  /* 3.16.4 protection against underflow and overflow */
    __real__ r = sun_ldexp(sun_log(t), -1);
    __imag__ r = sun_atan2(b, a);
  }
  return r;
}


/* 2.11.2, this Maple-based implementation is 12 % faster than
   https://sourceforge.net/p/mingw-w64/mingw-w64/ci/2bdcae2a1af9ee60c5e9ee1c5d80553295c092b4/tree/mingw-w64-crt/complex/csqrt.c#l60
   the latter 8 % faster than David Schultz's 2007 FreeBSD version */
LUALIB_API agn_Complex tools_csqrt (agn_Complex z) {
  agn_Complex r;
  lua_Number a, b, t1, t2, t4, t6, t10;
  a = creal(z); b = cimag(z);
  if (b == 0) {
    if (a < 0) {
      __real__ r = 0;
      __imag__ r = sqrt(-a);
    } else {
      __real__ r = sqrt(a);
      __imag__ r = 0;
    }
  } else {
    t1 = a*a; t2 = b*b; t4 = sqrt(t1 + t2);
    t6 = sqrt(2.0*t4 + 2.0*a); t10 = sqrt(2.0*t4 - 2.0*a);
    __real__ r = t6/2;
    __imag__ r = tools_csgn(b, -a)*t10/2;
  }
  return r;
}


/*
 * Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */

/*							csqrt()
 *
 *	Complex square root
 *
 * SYNOPSIS:
 *
 * agn_Complex csqrt();
 * agn_Complex z, w;
 *
 * w = csqrt (z);
 *
 * DESCRIPTION:
 *
 * If z = x + iy,  r = |z|, then
 *
 *                       1/2
 * Re w  =  [ (r + x)/2 ]   ,
 *
 *                       1/2
 * Im w  =  [ (r - x)/2 ]   .
 *
 * Cancellation error in r-x or r+x is avoided by using the identity  2 Re w Im w  =  y.
 *
 * Note that -w is also a square root of z.  The root chosen is always in the right half plane
 * and Im w has the same sign as y.
 *
 * ACCURACY:
 *                      Relative error:
 * arithmetic   domain     # trials      peak         rms
 *    DEC       -10,+10     25000       3.2e-17     9.6e-18
 *    IEEE      -10,+10   1,000,000     2.9e-16     6.1e-17
 */

LUALIB_API agn_Complex slm_csqrt (agn_Complex z) {
  agn_Complex w;
  double x, y, r, t, scale;
  x = creal(z);
  y = cimag(z);
  if (y == 0.0) {
    if (x == 0.0) {
      w = 0.0 + y * I;
    } else {
      r = fabs(x);
      r = sqrt(r);
      w = (x < 0.0) ? 0.0 + I*copysign(r, y) : r + I*y;
    }
    return (w);
  }
  if (x == 0.0) {
    r = fabs(y);
    r = sqrt(0.5*r);
    if (y > 0)
      w = r + r*I;
    else
      w = r - r*I;
    return (w);
  }
  /* Rescale to avoid internal overflow or underflow.  */
  if ((fabs(x) > 4.0) || (fabs(y) > 4.0)) {
    x *= 0.25;
    y *= 0.25;
    scale = 2.0;
  } else {
    x *= 1.8014398509481984e16;  /* 2^54 */
    y *= 1.8014398509481984e16;
    scale = 7.450580596923828125e-9; /* 2^-27 */
  }
  w = x + y*I;
  r = cabs(w);
  if (x > 0) {
    t = sqrt(0.5*r + 0.5*x);
    r = scale*fabs((0.5*y)/t);
    t *= scale;
  } else {
    r = sqrt(0.5*r - 0.5*x);
    t = scale*fabs((0.5*y)/r);
    r *= scale;
  }
  if (y < 0)
    w = t - r*I;
  else
    w = t + r*I;
  return (w);
}


LUALIB_API agn_Complex slm_crsqrt (agn_Complex z) {  /* 2.35.3 */
  agn_Complex r = slm_csqrt(z);
  return (r == 0.0+I*0.0) ? AGN_NAN : 1/r;
}


/* Source: https://sourceforge.net/p/mingw-w64/mingw-w64/ci/2bdcae2a1af9ee60c5e9ee1c5d80553295c092b4/tree/mingw-w64-crt/complex/casin.c
 *
 * This file has no copyright assigned and is placed in the Public Domain.
 *
 * casin.c
 * Contributed by Danny Smith
 * 2003-10-20

 MapleV R4:

 > z := a + I*b;

                             z := a + I b

 > convert(arcsin(x), ln);

                                   2 1/2
                       -I ln((1 - x )    + I x)
*/
LUALIB_API agn_Complex tools_casin (agn_Complex z) {  /* 2.11.2 */
  agn_Complex r, zz;
  double x, y;
  x = __real__ z;
  y = __imag__ z;
  if (y == 0) {  /* 2.17.5 fix for any imag(z) = 0 and |x| > 1 */
    int32_t hx;
    uint32_t lx;
    EXTRACT_WORDS(hx, lx, x);
    hx &= 0x7fffffff;
    if ((hx | ((lx | (-lx)) >> 31)) <= 0x3ff00000) {  /* |x| <= 1 ? */
      __real__ r = sun_asin(x);  /* we use Sun's implementation since GCC 3.4.6's asin is buggy with 0 + I*b */
      __imag__ r = 0;
      return r;
    }
  }
  __real__ zz = 1 - (x - y)*(x + y);
  __imag__ zz = -2*x*y;
  zz = tools_csqrt(zz);
  __real__ zz -= y;
  __imag__ zz += x;
  zz = tools_clog(zz);
  __real__ r =   __imag__ zz;
  __imag__ r = - __real__ zz;
  return r;
}


/*
for i from -10 to 10 by 0.1 do
   for j from -10 to 10 by 0.1 do
      xx := arccos((i!j));
      yy := math.cacos((i!j));
      if xx ~<> yy then
         if real(xx) |- real(yy) > 1e-15 or imag(xx) |- imag(yy) > 1e-15 then
            print(i, j, xx, yy, real(xx) |- real(yy), imag(xx) |- imag(yy))
         fi
      fi;
   od
od;
*/

/* based on: https://sourceforge.net/p/mingw-w64/mingw-w64/ci/2bdcae2a1af9ee60c5e9ee1c5d80553295c092b4/tree/mingw-w64-crt/complex/cacos.c
 * This file has no copyright assigned and is placed in the Public Domain.
 * This file is part of the w64 mingw-runtime package.
 * No warranty is given; refer to the file DISCLAIMER.PD within this package. */
LUALIB_API agn_Complex tools_cacos (agn_Complex z) {  /* 2.40.0 */
  agn_Complex r, zz;
  zz = tools_casin(z);
  __real__ r = PIO2 - __real__ zz;
  __imag__ r = - __imag__ zz;
  return r;
}


LUALIB_API agn_Complex tools_ctanh (agn_Complex z) {  /* 2.11.2, 30 % faster than GCC's ctanh; this is 5 % faster than
  evalc((exp(z) - exp(-z))/(exp(z) + exp(-z))) */
  agn_Complex r;
  double rtwo, itwo, denom, si, co, sih, coh;
  rtwo = 2.0 * __real__ z;
  itwo = 2.0 * __imag__ z;
  sun_sincos(itwo, &si, &co);
  sun_sinhcosh(rtwo, &sih, &coh);
  denom = coh + co;
  if (denom == 0.0) {
    __real__ r = HUGE_VAL;
    __imag__ r = HUGE_VAL;
  } else if (tools_isinf(denom)) {
    __real__ r = rtwo > 0 ? 1.0 : -1.0;
    __imag__ r = 0.0;
  } else {
    __real__ r = sih/denom;
    __imag__ r = si/denom;
  }
  return r;
}


/* Source: https://sourceforge.net/p/mingw-w64/mingw-w64/ci/2bdcae2a1af9ee60c5e9ee1c5d80553295c092b4/tree/mingw-w64-crt/complex/catan.c */
LUALIB_API agn_Complex tools_catan (agn_Complex z) {  /* 2.11.2, 15 percent faster than GCC's catan */
  agn_Complex r, t;
  double x, y;
  x = __real__ z;
  y = __imag__ z;
  if (y == 0) {  /* 2.11.2, added to prevent round-off errors */
    __real__ r = sun_atan(x);
    __imag__ r = 0;
  } else if (x == 0 && (fabs(y)) == 1) {
    __real__ r = AGN_NAN;  /* modified */
    __imag__ r = AGN_NAN;  /* modified */
  } else if (tools_isinf(sun_hypot(x, y))) {
    __real__ r = (x > 0 ? PIO2 : -PIO2);
    __imag__ r = 0.0;
  } else {
    __real__ t = -x;
    __imag__ t = 1 - y;
    __real__ r = x;
    __imag__ r = y + 1;
    t = tools_clog(tools_cdiv(r, t));
    __real__ r  = -0.5 * __imag__ t;
    __imag__ r =   0.5 * __real__ t;
    if (x == 0 && y > 1)  /* 2.11.2, added to prevent wrong results */
      __real__ r = - __real__ r;
  }
  return r;
}


LUALIB_API agn_Complex tools_catanh (agn_Complex z) {  /* 2.40.0 */
  return -1.0*I*tools_catan(z*I);
}

#else  /* PROPCMPLX */

LUALIB_API double tools_cabs (double a, double b) {  /* 2.21.11; changed 4.5.6 but this is just a very minor tweak */
  int32_t ha, la, hb, lb;
  lua_Number t;
  EXTRACT_WORDS(ha, la, a);
  EXTRACT_WORDS(hb, lb, b);
  ha &= 0x7fffffff;
  hb &= 0x7fffffff;
  SET_HIGH_WORD(a, ha);
  SET_HIGH_WORD(b, hb);
  if (ha >= 0x7ff00000 || ((hb | lb) == 0)) return a;  /* b == 0 || a in { inf, nan } */
  if (hb >= 0x7ff00000 || ((ha | la) == 0)) return b;  /* a == 0 || b in { inf, nan } */
  if (hb > ha) { SWAP(a, b, t); }
  t = b/a;  /* minimum divided by maximum */
  return a * sqrt(1 + t*t);
}


LUALIB_API void tools_crecip (double x, double y, double *re, double *im) {  /* 2.35.3 */
  lua_Number e = sun_pytha(x, y);  /* 4.5.6 optimisation */
  *re = x/e;
  *im = (e == 0.0) ? AGN_NAN : ( (y != 0.0) ? -y/e : 0.0 );  /* avoid -0 which confuses inverse trigs */
}


LUALIB_API void slm_csqrt (double x, double y, double *re, double *im) {  /* 2.35.2, see slm_csqrt */
  double r, t, scale;
  if (y == 0.0) {
    if (x == 0.0) {
      *re = 0; *im = y;
    } else {
      r = fabs(x);
      r = sqrt(r);
      if (x < 0.0) {
        *re = 0.0; *im = copysign(r, y);
      } else {
        *re = r; *im = y;
      }
    }
    return;
  }
  if (x == 0.0) {
    r = fabs(y);
    r = sqrt(0.5*r);
    *re = r;
    *im = (y > 0) ? r : -r;
    return;
  }
  /* Rescale to avoid internal overflow or underflow.  */
  if ((fabs(x) > 4.0) || (fabs(y) > 4.0)) {
    x *= 0.25;
    y *= 0.25;
    scale = 2.0;
  } else {
    x *= 1.8014398509481984e16;  /* 2^54 */
    y *= 1.8014398509481984e16;
    scale = 7.450580596923828125e-9; /* 2^-27 */
  }
  r = sun_hypot(x, y);
  if (x > 0) {
    t = sqrt(0.5*r + 0.5*x);
    r = scale*fabs((0.5*y)/t);
    t *= scale;
  } else {
    r = sqrt(0.5*r - 0.5*x);
    t = scale*fabs((0.5*y)/r);
    r *= scale;
  }
  *re = t;
  *im = (y < 0) ? -r : r;
  return;
}


LUALIB_API void slm_crsqrt (double x, double y, double *re, double *im) {  /* 2.35.3 */
  double a, b;
  slm_csqrt(x, y, &a, &b);
  tools_crecip(a, b, re, im);
}


LUALIB_API void tools_clog (double a, double b, double *re, double *im) {  /* 2.40.0 */
  lua_Number t;
  if (a == 0 && b == 0) {
    *re = AGN_NAN;
    *im = 0;
  } else {
    /* t = a*a + b*b; */
    t = sun_pytha(a, b);  /* 3.16.4 protection against underflow and overflow */
    *re = sun_ldexp(sun_log(t), -1);
    *im = sun_atan2(b, a);
  }
  return;
}


LUALIB_API void tools_casinh (double a, double b, double *outa, double *outb) {
  if (b == 0) {
    *outa = sun_asinh(a);
    *outb = 0;
  } else {
    double t3, t4, t6, t6h, t8, t8h, t10, t12, t18, rx, ry, z0;
    t3 = a*a;
    t4 = b*b;
    z0 = t3 + t4 + 1.0;  /* 3.5.0 tweak */
    t6 = sqrt(z0 + 2.0*b);
    t8 = sqrt(z0 - 2.0*b);
    t6h = 0.5*t6;  /* 3.5.0 tweak */
    t8h = 0.5*t8;  /* dito */
    z0 = t6h + t8h;  /* dito */
    t10 = tools_square(z0);
    t12 = sqrt(t10 - 1.0);
    rx = tools_csgn(a, b)*sun_log(z0 + t12);
    t18 = t6h - t8h;  /* 3.5.0 tweak */
    tools_adjust(t18, 1, AGN_EPSILON, -1);
    ry = sun_asin(t18);  /* arcsin is defined in [-1, 1] */
    *outa = copysign(rx, a);
    *outb = copysign(ry, b);
  }
}


/* Complex arcsin not using type complex but two doubles, each. */
LUALIB_API void tools_casin (double x, double y, double *outx, double *outy) {
  lua_Number a, b;
  tools_casinh(y, x, &a, &b);  /* casinh(CMPLX(cimag(z), creal(z))); */
  if (y == 0 && x > 1) a = -a;  /* 2.17.6 fix */
  *outx = b; *outy = a;  /* (CMPLX(cimag(w), creal(w))); */
}


/* Complex arccos not using type complex but two doubles, each. */
LUALIB_API void tools_cacos (double x, double y, double *outx, double *outy) {
  lua_Number a, b;
  tools_casinh(y, x, &a, &b);
  if (y != 0 || x <= 1) a = -a;  /* 2.17.6 fix */
  b = PIO2 - b;
  *outx = b; *outy = a;
}


/* Complex arcsec not using type complex but two doubles, each. */
LUALIB_API void tools_casec (double x, double y, double *outx, double *outy) {
  lua_Number a, b, e;
  e = x*x + y*y;
  x = x/e; y = (-y)/e;  /* recip */
  tools_casinh(y, x, &a, &b);
  if (y != 0 || x > 1) a = -a;  /* 2.17.6 fix */
  b = PIO2 - b;
  *outx = b; *outy = a;
}


/*
Maple V R4 (Maple 7):
> y := a + I*b: x := c + I*d:
> f := evalc(arctan(y, x)):
> readlib(C):  # with(codegen, C):
> C(f, optimized);
*/
/* ! a = y0, b = y1, c = x0, d = x1 !
Rewritten 4.5.5 to fix various issues */
void tools_catan2 (double a, double b, double c, double d, double *real, double *imag) {
  lua_Number t1, t2, t3, t4, t6, t8, t10, t13, t15, t16, t17, t19,
    t20, t22, t24, t28, t30, t31, t32, t42, t44, t47, t49, t51, t54,
    t60;
  if (a == 0.0 && c == 0.0) {
    if (b == d) {  /* 2.1.8 patch */
      if (b > 0.0) {
        *real = PI*0.25;  /* 2.17.7 optimisation */
        *imag = 0;
        return;
      } else if (b < 0.0) {
        *real = -PI*0.75;  /* 2.17.7 optimisation */
        *imag = 0;
        return;
      } else if (b == 0.0) {
        *real = 0.0;
        *imag = 0.0;
        return;
      }
    } else if (b == 0.0 && d < 0.0) {  /* 4.5.5 tuning */
      *real = PI;
      *imag = 0.0;
      return;
    }
  }
  t1 = d*0.5 + a*0.5;
  t2 = a*a;
  t3 = t2*t2;
  t4 = b*b;
  t6 = c*c;
  t8 = d*d;
  t10 = t4*t4;
  t13 = t6*t6;
  t15 = t8*t8;
  t16 = a*b;
  t17 = c*d;
  t19 = t3 + 2.0*t2*t4 + 2.0*t2*t6 - 2.0*t2*t8 + t10 - 2.0*t4*t6 + 2.0*t4*t8 + t13 + 2.0*t6*t8 + t15 + 8.0*t16*t17;
  t20 = sqrt(t19);
  t22 = sqrt(2.0*(t20 + t2 - t4 + t6 - t8));
  t24 = 1/t20;
  t28 = tools_csgn(2.0*(t16 + t17), -t2 + t4 - t6 + t8);
  t30 = 2.0*(t20 - t2 + t4 - t6 + t8);
  t31 = sqrt(t30);
  t32 = t31*t24;
  t42 = c - b;
  t44 = t28*t28;
  t47 = 1/(t20*0.5 + t2*0.5 - t4*0.5 + t6*0.5 - t8*0.5 + t44*t30/4);
  t49 = d+a;
  t51 = t31*t47;
  t54 = tools_square(t42*t22*t47*0.5 + t49*t28*t51*0.5);
  t60 = tools_square(t49*t22*t47*0.5 - t42*t28*t51*0.5);
  *real = sun_atan2(t1*t22*t24 + (-c*0.5 + b*0.5)*t28*t32, (c*0.5 - b*0.5)*t22*t24 + t1*t28*t32);
  *imag = -0.5*sun_log(t54 + t60);
}
#endif  /* of PROPCMPLX */


/* cpsi.cpp -- Complex psi function.
        Algorithms and coefficient values from "Computation of Special
        Functions", Zhang and Jin, John Wiley and Sons, 1996.

   (C) 2003, C. Bond. All rights reserved.

   Taken from: http://www.crbond.com/math.html, 3.1.4 */

LUALIB_API void tools_cpsi (double x, double y, double *re, double *im) {
  double x0, x1, y1, psr, psi;
  double q0, q2, rr, ri, th, tn, tm, ct2;
  int n,k;
  static double a[] = {
    -0.8333333333333e-01,
     0.83333333333333333e-02,
    -0.39682539682539683e-02,
     0.41666666666666667e-02,
    -0.75757575757575758e-02,
     0.21092796092796093e-01,
    -0.83333333333333333e-01,
     0.4432598039215686};
  x1 = 0.0;
  n = 0;
  th = 0;  /* to prevent compiler warnings */
  if ((y == 0.0) && (x == (int)x) && (x <= 0.0)) {
    *re = HUGE_VAL; *im = HUGE_VAL;
  } else {
    if (x < 0.0) {
      x1 = x;
      y1 = y;
      x = -x;
      y = -y;
    }
    x0 = x;
    if (x < 8.0) {
      n = 8 - (int)x;
      x0 = x + n;
    }
    if ((x0 == 0.0) && (y != 0.0))
      th = 0.5*M_PI;
    if (x0 != 0.0)
      th = sun_atan(y/x0);
    q2 = x0*x0 + y*y;
    q0 = sqrt(q2);
    psr = sun_log(q0) - 0.5*x0/q2;
    psi = th + 0.5*y/q2;
    for (k=1; k <= 8; k++) {
      psr += (a[k-1]*cephes_powi(q2, -k)*sun_cos(2.0*k*th));  /* tuned 3.16.3 */
      psi -= (a[k-1]*cephes_powi(q2, -k)*sun_sin(2.0*k*th));  /* dito */
    }
    if (x < 8.0) {
      rr = 0.0;
      ri = 0.0;
      for (k=1; k <= n; k++) {
        rr += ((x0 - k)/(tools_square(x0 - k) + y*y));
        ri += (y/(tools_square(x0 - k) + y*y));
      }
      psr -= rr;
      psi += ri;
    }
    if (x1 < 0.0) {
      tn = sun_tan(M_PI*x);
      tm = sun_tanh(M_PI*y);
      ct2 = tn*tn + tm*tm;
      psr = psr + x/(x*x + y*y) + M_PI*(tn - tn*tm*tm)/ct2;
      psi = psi - y/(x*x + y*y) - M_PI*tm*(1.0 + tn*tn)/ct2;
      x = x1;
      y = y1;
    }
    *re = psr; *im = psi;
  }
}


/* rsqrt algorithm taken from document:
   FAST COMPENSATED ALGORITHMS FOR THE RECIPROCAL SQUARE ROOT, THE RECIPROCAL HYPOTENUSE, AND GIVENS ROTATIONS
   CARLOS F. BORGES
   DEPARTMENT OF APPLIED MATHEMATICS
   NAVAL POSTGRADUATE SCHOOL
   MONTEREY CA 93943

   This implementation is 30 % SLOWER than the naive 1/sqrt approach; 2.35.3 */
LUALIB_API double tools_invsqrt (double x) {
  double r, d, p, t, v;
  r = 1/x;
  d = fma(-r, x, 1);
  p = sqrt(r);
  t = fma(-p, p, r);
  v = 0.5*fma(x, t, d);
  return fma(p, v, p);
}


/* rhypot algorithm taken from document:
   FAST COMPENSATED ALGORITHMS FOR THE RECIPROCAL SQUARE ROOT, THE RECIPROCAL HYPOTENUSE, AND GIVENS ROTATIONS
   CARLOS F. BORGES
   DEPARTMENT OF APPLIED MATHEMATICS
   NAVAL POSTGRADUATE SCHOOL
   MONTEREY CA 93943

   This implementation is 35 % FASTER than the naive 1/hypot approach; 2.35.3; patched 4.5.6 */
LUALIB_API double tools_invhypot (double x, double y) {
  int32_t hx, hy, ht;
  double o, oe, r, d, p, t, v, xsq, ysq;
  GET_HIGH_WORD(hx, x);  /* over/underflow protection added by awalz */
  hx &= 0x7fffffff;
  GET_HIGH_WORD(hy, y);
  hy &= 0x7fffffff;
  if (hy > hx) { SWAP(x, y, t); SWAP(hx, hy, ht); }
  SET_HIGH_WORD(x, hx);
  SET_HIGH_WORD(y, hy);
  xsq = x*x;
  ysq = y*y;
  o = xsq + ysq;
  r = 1/o;
  p = sqrt(r);
  t = fma(-p, p, r);
  oe = ysq - (o - xsq) + fma(x, x, -xsq) + fma(y, y, -ysq);
  d = fma(-r, oe, fma(-r, o, 1));
  v = 0.5*fma(o, t, d);
  return fma(p, v, p);
}


LUALIB_API double tools_invpytha (double x, double y) {  /* 4.5.6, based on tools_invhypot */
  int32_t hx, hy, ht;
  double o, oe, r, d, p, t, v, xsq, ysq;
  GET_HIGH_WORD(hx, x);
  hx &= 0x7fffffff;
  GET_HIGH_WORD(hy, y);
  hy &= 0x7fffffff;
  if (hy > hx) { SWAP(x, y, t); SWAP(hx, hy, ht); }
  SET_HIGH_WORD(x, hx);
  SET_HIGH_WORD(y, hy);
  xsq = x*x;
  ysq = y*y;
  o = xsq + ysq;
  r = 1/o;
  p = sqrt(r);
  t = fma(-p, p, r);
  oe = ysq - (o - xsq) + fma(x, x, -xsq) + fma(y, y, -ysq);
  d = fma(-r, oe, fma(-r, o, 1));
  v = 0.5*fma(o, t, d);
  v = fma(p, v, p);
  return v*v;
}


/* @(#)e_acos.c 1.3 95/01/18 */
/*
 * ====================================================
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
 *
 * Developed at SunSoft, a Sun Microsystems, Inc. business.
 * Permission to use, copy, modify, and distribute this
 * software is freely granted, provided that this notice
 * is preserved.
 * ====================================================
 */

/* __ieee754_acos(x)
 * Method :
 *  acos(x)  = pi/2 - asin(x)
 *  acos(-x) = pi/2 + asin(x)
 * For |x|<=0.5
 *  acos(x) = pi/2 - (x + x*x^2*R(x^2))  (see asin.c)
 * For x>0.5
 *   acos(x) = pi/2 - (pi/2 - 2asin(sqrt((1-x)/2)))
 *    = 2asin(sqrt((1-x)/2))
 *    = 2s + 2s*z*R(z)   ...z=(1-x)/2, s=sqrt(z)
 *    = 2f + (2c + 2s*z*R(z))
 *     where f=hi part of s, and c = (z-f*f)/(s+f) is the correction term
 *     for f so that f+c ~ sqrt(z).
 * For x<-0.5
 *  acos(x) = pi - 2asin(sqrt((1-|x|)/2))
 *    = pi - 0.5*(s+s*z*R(z)), where z=(1-|x|)/2,s=sqrt(z)
 *
 * Special cases:
 *  if x is NaN, return x itself;
 *  if |x|>1, return NaN with invalid signal.
 *
 * Function needed: sqrt
 */

static const double
pio2_hi =  1.57079632679489655800e+00, /* 0x3FF921FB, 0x54442D18 */
pio2_lo =  6.12323399573676603587e-17, /* 0x3C91A626, 0x33145C07 */
pS0 =  1.66666666666666657415e-01, /* 0x3FC55555, 0x55555555 */
pS1 = -3.25565818622400915405e-01, /* 0xBFD4D612, 0x03EB6F7D */
pS2 =  2.01212532134862925881e-01, /* 0x3FC9C155, 0x0E884455 */
pS3 = -4.00555345006794114027e-02, /* 0xBFA48228, 0xB5688F3B */
pS4 =  7.91534994289814532176e-04, /* 0x3F49EFE0, 0x7501B288 */
pS5 =  3.47933107596021167570e-05, /* 0x3F023DE1, 0x0DFDF709 */
qS1 = -2.40339491173441421878e+00, /* 0xC0033A27, 0x1C8A2D4B */
qS2 =  2.02094576023350569471e+00, /* 0x40002AE5, 0x9C598AC8 */
qS3 = -6.88283971605453293030e-01, /* 0xBFE6066C, 0x1B8D0159 */
qS4 =  7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */

LUALIB_API double sun_acos (double x) {  /* as fast as GCC's acos implementation */
  double z, p, q, r, w, s, c, df;
  int32_t hx, lx, ix;
  GET_HIGH_WORD(hx, x);
  ix = hx & 0x7fffffff;
  if (ix > 0x3ff00000) return AGN_NAN;  /* |x| > 1 ? modified */
  GET_LOW_WORD(lx, x);
  if (((ix - 0x3ff00000) | lx) == 0) {  /* |x|==1 */
    if (hx > 0) return 0.0;    /* acos(1) = 0  */
    else return pi + 2.0*pio2_lo;  /* acos(-1)= pi */
  }
  if (ix < 0x3fe00000) {  /* |x| < 0.5 */
    if (ix <= 0x3c600000) return pio2_hi + pio2_lo;/*if|x|<2**-57*/
    z = x*x;
    p = z*(pS0 + z*(pS1 + z*(pS2 + z*(pS3 + z*(pS4 + z*pS5)))));
    q = one + z*(qS1 + z*(qS2 + z*(qS3 + z*qS4)));
    r = p/q;
    return pio2_hi - (x - (pio2_lo - x*r));
  } else if (hx < 0) {    /* x < -0.5 */
    z = (one + x)*0.5;
    p = z*(pS0 + z*(pS1 + z*(pS2 + z*(pS3 + z*(pS4 + z*pS5)))));
    q = one + z*(qS1 + z*(qS2 + z*(qS3 + z*qS4)));
    s = sqrt(z);
    r = p/q;
    w = r*s - pio2_lo;
    return pi - 2.0*(s+w);
  } else {      /* x > 0.5 */
    z = (one - x)*0.5;
    s = sqrt(z);
    df = s;
    SET_LOW_WORD(df, 0);
    c  = (z - df*df)/(s + df);
    p = z*(pS0 + z*(pS1 + z*(pS2 + z*(pS3 + z*(pS4 + z*pS5)))));
    q = one + z*(qS1 + z*(qS2 + z*(qS3 + z*qS4)));
    r = p/q;
    w = r*s + c;
    return 2.0*(df + w);
  }
}


/* @(#)e_asin.c 1.4 96/03/07 */
/*
 * ====================================================
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
 *
 * Developed at SunSoft, a Sun Microsystems, Inc. business.
 * Permission to use, copy, modify, and distribute this
 * software is freely granted, provided that this notice
 * is preserved.
 * ====================================================
 */
/* __ieee754_asin(x)
 * Method :
 *  Since  asin(x) = x + x^3/6 + x^5*3/40 + x^7*15/336 + ...
 *  we approximate asin(x) on [0,0.5] by
 *    asin(x) = x + x*x^2*R(x^2)
 *  where
 *    R(x^2) is a rational approximation of (asin(x)-x)/x^3
 *  and its Remes error is bounded by
 *    |(asin(x)-x)/x^3 - R(x^2)| < 2^(-58.75)
 *
 *  For x in [0.5,1]
 *    asin(x) = pi/2-2*asin(sqrt((1-x)/2))
 *  Let y = (1-x), z = y/2, s := sqrt(z), and pio2_hi+pio2_lo=pi/2;
 *  then for x>0.98
 *    asin(x) = pi/2 - 2*(s+s*z*R(z))
 *      = pio2_hi - (2*(s+s*z*R(z)) - pio2_lo)
 *  For x<=0.98, let pio4_hi = pio2_hi/2, then
 *    f = hi part of s;
 *    c = sqrt(z) - f = (z-f*f)/(s+f)   ...f+c=sqrt(z)
 *  and
 *    asin(x) = pi/2 - 2*(s+s*z*R(z))
 *      = pio4_hi+(pio4-2s)-(2s*z*R(z)-pio2_lo)
 *      = pio4_hi+(pio4-2f)-(2s*z*R(z)-(pio2_lo+2c))
 *
 * Special cases:
 *  if x is NaN, return x itself;
 *  if |x|>1, return NaN with invalid signal.
 *
 */
static const double
pio4_hi =  7.85398163397448278999e-01; /* 0x3FE921FB, 0x54442D18 */

LUALIB_API double sun_asin (double x) {  /* as fast as GCC's asin implementation */
  double t, w, p, q, c, r, s;
  int32_t hx, lx, ix;
  GET_HIGH_WORD(hx, x);
  ix = hx & 0x7fffffff;
  if (ix > 0x3ff00000) return AGN_NAN;  /* |x| > 1 ? modified */
  GET_LOW_WORD(lx, x);
  if (((ix - 0x3ff00000) | lx) == 0) {  /* |x|==1 */
    /* asin(1)=+-pi/2 with inexact */
    return x*pio2_hi + x*pio2_lo;
  } else if (ix < 0x3fe00000) {  /* |x|<0.5 */
    if (ix < 0x3e400000) {    /* if |x| < 2**-27 */
      t = 0;  /* avoid compiler warning */
      if (huge + x > one) return x;/* return x with inexact if x!=0*/
  } else {
    t = x*x;
  }
    p = t*(pS0 + t*(pS1 + t*(pS2 + t*(pS3 + t*(pS4 + t*pS5)))));
    q = one + t*(qS1 + t*(qS2 + t*(qS3 + t*qS4)));
    w = p/q;
    return x + x*w;
  }
  /* 1> |x|>= 0.5 */
  w = one - fabs(x);
  t = w*0.5;
  p = t*(pS0 + t*(pS1 + t*(pS2 + t*(pS3 + t*(pS4 + t*pS5)))));
  q = one + t*(qS1 + t*(qS2 + t*(qS3 + t*qS4)));
  s = sqrt(t);
  if (ix >= 0x3FEF3333) {   /* if |x| > 0.975 */
    w = p/q;
    t = pio2_hi - (2.0*(s + s*w) - pio2_lo);
  } else {
    w  = s;
    SET_LOW_WORD(w, 0);
    c  = (t - w*w)/(s + w);
    r  = p/q;
    p  = 2.0*s*r - (pio2_lo - 2.0*c);
    q  = pio4_hi - 2.0*w;
    t  = pio4_hi - (p - q);
  }
  if (hx > 0) return t; else return -t;
}


/* @(#)e_pow.c 1.5 04/04/22 SMI */
/*
 * ====================================================
 * Copyright (C) 2004 by Sun Microsystems, Inc. All rights reserved.
 *
 * Permission to use, copy, modify, and distribute this
 * software is freely granted, provided that this notice
 * is preserved.
 * ====================================================
 */
/* __ieee754_pow(x,y) return x**y
 *
 *          n
 * Method:  Let x =  2   * (1+f)
 *  1. Compute and return log2(x) in two pieces:
 *    log2(x) = w1 + w2,
 *     where w1 has 53-24 = 29 bit trailing zeros.
 *  2. Perform y*log2(x) = n+y' by simulating muti-precision
 *     arithmetic, where |y'|<=0.5.
 *  3. Return x**y = 2**n*exp(y'*log2)
 *
 * Special cases:
 *  1.  (anything) ** 0  is 1
 *  2.  (anything) ** 1  is itself
 *  3.  (anything) ** NAN is NAN
 *  4.  NAN ** (anything except 0) is NAN
 *  5.  +-(|x| > 1) **  +INF is +INF
 *  6.  +-(|x| > 1) **  -INF is +0
 *  7.  +-(|x| < 1) **  +INF is +0
 *  8.  +-(|x| < 1) **  -INF is +INF
 *  9.  +-1         ** +-INF is NAN
 *  10. +0 ** (+anything except 0, NAN)               is +0
 *  11. -0 ** (+anything except 0, NAN, odd integer)  is +0
 *  12. +0 ** (-anything except 0, NAN)               is +INF
 *  13. -0 ** (-anything except 0, NAN, odd integer)  is +INF
 *  14. -0 ** (odd integer) = -( +0 ** (odd integer) )
 *  15. +INF ** (+anything except 0,NAN) is +INF
 *  16. +INF ** (-anything except 0,NAN) is +0
 *  17. -INF ** (anything)  = -0 ** (-anything)
 *  18. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer)
 *  19. (-anything except 0 and inf) ** (non-integer) is NAN
 *
 * Accuracy:
 *  pow(x,y) returns x**y nearly rounded. In particular
 *      pow(integer,integer)
 *  always returns the correct integer provided it is
 *  representable.
 *
 * Constants :
 * The hexadecimal values are the intended ones for the following
 * constants. The decimal values may be used, provided that the
 * compiler will convert from decimal to binary accurately enough
 * to produce the hexadecimal values shown.
 */

const double
bp[] = {1.0, 1.5,},
dp_h[] = { 0.0, 5.84962487220764160156e-01,}, /* 0x3FE2B803, 0x40000000 */
dp_l[] = { 0.0, 1.35003920212974897128e-08,}, /* 0x3E4CFDEB, 0x43CFD006 */
two53  =  9007199254740992.0,  /* 0x43400000, 0x00000000 */
  /* poly coefs for (3/2)*(log(x)-2s-2/3*s**3 */
L1  =  5.99999999999994648725e-01, /* 0x3FE33333, 0x33333303 */
L2  =  4.28571428578550184252e-01, /* 0x3FDB6DB6, 0xDB6FABFF */
L3  =  3.33333329818377432918e-01, /* 0x3FD55555, 0x518F264D */
L4  =  2.72728123808534006489e-01, /* 0x3FD17460, 0xA91D4101 */
L5  =  2.30660745775561754067e-01, /* 0x3FCD864A, 0x93C9DB65 */
L6  =  2.06975017800338417784e-01, /* 0x3FCA7E28, 0x4A454EEF */
lg2  =  6.93147180559945286227e-01, /* 0x3FE62E42, 0xFEFA39EF */
lg2_h  =  6.93147182464599609375e-01, /* 0x3FE62E43, 0x00000000 */
lg2_l  = -1.90465429995776804525e-09, /* 0xBE205C61, 0x0CA86C39 */
ovt =  8.0085662595372944372e-0017, /* -(1024-log2(ovfl+.5ulp)) */
cp    =  9.61796693925975554329e-01, /* 0x3FEEC709, 0xDC3A03FD =2/(3ln2) */
cp_h  =  9.61796700954437255859e-01, /* 0x3FEEC709, 0xE0000000 =(float)cp */
cp_l  = -7.02846165095275826516e-09, /* 0xBE3E2FE0, 0x145B01F5 =tail of cp_h*/
ivln2    =  1.44269504088896338700e+00, /* 0x3FF71547, 0x652B82FE =1/ln2 */
ivln2_h  =  1.44269502162933349609e+00, /* 0x3FF71547, 0x60000000 =24b 1/ln2*/
ivln2_l  =  1.92596299112661746887e-08; /* 0x3E54AE0B, 0xF85DDF44 =1/ln2 tail*/

#define INTMATHTHRESHOLD 6

LUALIB_API double sun_pow (double x, double y, int optimised) {  /* 1.3 times faster */
  double z, ax, z_h, z_l, p_h, p_l, y1, t1, t2, r, s, t, u, v, w;
  int32_t i, j, k, yisint, n, hx, hy, sx, sy, ix, iy;
  uint32_t lx, ly;
  EXTRACT_WORDS(hx, lx, x);
  EXTRACT_WORDS(hy, ly, y);
  sx = hx & 0x80000000;  /* sign of x */
  ix = hx ^ sx;  /* |x| */
  /* ix = hx & 0x7fffffff;  */
  sy = hy & 0x80000000;  /* sign of y */
  iy = hy ^ sy;  /* |y| */
  /* iy = hy & 0x7fffffff; */ /* |y| */
  /* y==zero: x**0 = 1 */
  if ((iy | ly) == 0)
    /* 0^0 -> nan, else 1; 2.29.5 modification: behave like Maple does */
    return (hx | lx) == 0 ? AGN_NAN : one;
  /* x==1: 1**y = 1, even if y is NaN */
  if (hx == 0x3ff00000 && lx == 0) return one;
    /* y!=zero: result is NaN if either arg is NaN */
  if (ix > 0x7ff00000 || ((ix == 0x7ff00000) && (lx != 0)) ||
      iy > 0x7ff00000 || ((iy == 0x7ff00000) && (ly != 0)))
    return (x + 0.0) + (y + 0.0);
  /* determine if y is an odd int when x < 0
   * yisint = 0  ... y is not an integer
   * yisint = 1  ... y is an odd int
   * yisint = 2  ... y is an even int */
  yisint = 0;
  /* if (hx < 0) { */
  if (sx || optimised) {
    /* we check for integral x either for all x, or for x < 0 only, depending on the switch */
    if (iy >= 0x43400000) yisint = 2;  /* even integer y */
    else if (iy >= 0x3ff00000) {
      k = (iy >> 20) - 0x3ff;  /* exponent */
      if (k > 20) {
        j = ly >> (52 - k);
        if ((j << (52 - k)) == ly) yisint = 2 - (j & 1);
      } else if (ly == 0) {
        j = iy >> (20 - k);
        if ((j << (20 - k)) == iy) yisint = 2 - (j & 1);
      }
    }
  }
  /* special value of y including integral y */
  if (ly == 0) {
    /* x == 0 && y is a negative integer ? -> return undefined instead of infinity, 2.32.2 fix */
    if ((hx | lx) == 0 && sy) return AGN_NAN;
    if (unlikely(iy == 0x7ff00000)) {  /* y is +-inf, 2.5.15 optimisation */
      if (((ix - 0x3ff00000) | lx) == 0)
        return one;  /* (-1)**+-inf is NaN */
      else if (ix >= 0x3ff00000)  /* (|x|>1)**+-inf = inf,0 */
        /* return (hy >= 0) ? y : zero; */
        return (!sy) ? y : zero;
      else  /* (|x|<1)**-,+inf = inf,0 */
        /* return (hy < 0) ? -y : zero; */
        return (sy) ? -y : zero;
    }
    if (iy == 0x3ff00000) {  /* y is +-1 */
      if ((hx | hy) == 0 && hy == -0x40100000) return AGN_NAN;  /* x = 0, y = -1 -> 0^(-1) = nan, 2.29.5 modification: behave like Maple does */
      /* return (hy < 0) ? one/x : x; */
      return (sy) ? one/x : x;
    }
    if (hy == 0x40000000) return x*x;   /* y is 2 */
    if (hy == 0x40080000) return x*x*x; /* y is 3 */
    if (hy == 0x40100000) {             /* y is 4 */
      u = x*x;
      return u*u;
    }
    if (hy == 0x3fe00000) {             /* y is 0.5 */
      /* if (hx >= 0) */ /* x >= +0 */
      if (!sx)  /* x >= +0 */
        return sqrt(x);
    }
    if (optimised && yisint) {
      int32_t uy = ((((uint32_t)hy) >> 20) & 0x7ff) - 0x3ff;  /* unbiased exponent: math.exponent(x) - 1, except 0 -> -1023 */
      if (uy < INTMATHTHRESHOLD) {  /* |y| < 2^6 = 64 */
        double r;
        int n = (int)y;
        r = 1.0;
        if (sy) {
          x = 1.0/x;
          n = -n;
        }
        /* 2.29.1 change, see https://stackoverflow.com/questions/213042/how-do-you-do-exponentiation-in-c by user kallikak */
        while (n) {
          if (n & 1) r *= x;  /* power is odd ? */
          x *= x;
          n >>= 1;
        }
        return r;
      }
    }
  }
  ax = fabs(x);
  /* special value of x */
  if (lx == 0) {
    if (ix == 0x7ff00000 || ix == 0|| ix == 0x3ff00000) {
      z = ax;      /* x is +-0,+-inf,+-1 */
      /* if (hy < 0) z = one/z; */ /* z = (1/|x|) */
      if (sy) z = one/z;  /* y < 0, z = (1/|x|) */
      /* if (hx < 0) { */ /* x < 0 */
      if (sx) {  /* x < 0 */
        if (((ix - 0x3ff00000) | yisint) == 0) {
          z = (z - z)/(z - z);  /* (-1)**non-int is NaN */
        } else if (yisint == 1)
          z = -z;  /* (x<0)**odd = -(|x|**odd) */
      } else if (hx == 0) {  /* x = 0; 2.29.5 modification: behave like Maple does */
        if (hy <= 0) z = (z - z)/(z - z);  /* 0^(y <= 0) -> nan */
      }
      return z;
    }
  }
  /* CYGNUS LOCAL + fdlibm-5.3 fix: This used to be
  n = (hx >> 31) + 1;
       but ANSI C says a right shift of a signed negative quantity is implementation defined.  */
  n = ((uint32_t)hx >> 31) - 1;
  /* (x<0)**(non-int) is NaN */
  if ((n | yisint) == 0) return (x - x)/(x - x);
  s = one; /* s (sign of result -ve**odd) = -1 else = 1 */
  if ((n | (yisint - 1)) == 0) s = -one; /* (-ve)**(odd int) */
  /* |y| is huge */
  if (iy > 0x41e00000) { /* if |y| > 2**31 */
    if (iy > 0x43f00000) {  /* if |y| > 2**64, must o/uflow */
      if (ix <= 0x3fefffff) return (hy < 0) ? huge*huge : tiny*tiny;
      if (ix >= 0x3ff00000) return (hy > 0) ? huge*huge : tiny*tiny;
    }
    /* over/underflow if x is not close to one */
    if (ix < 0x3fefffff) return (hy < 0) ? s*huge*huge : s*tiny*tiny;
    if (ix > 0x3ff00000) return (hy > 0) ? s*huge*huge : s*tiny*tiny;
    /* now |1-x| is tiny <= 2**-20, suffice to compute
     log(x) by x-x^2/2+x^3/3-x^4/4 */
    t = ax - one;    /* t has 20 trailing zeros */
    w = (t*t)*(0.5 - t*(0.3333333333333333333333 - t*0.25));
    u = ivln2_h*t;  /* ivln2_h has 21 sig. bits */
    v = t*ivln2_l - w*ivln2;
    t1 = u + v;
    SET_LOW_WORD(t1, 0);
    t2 = v - (t1 - u);
  } else {
    double ss, s2, s_h, s_l, t_h, t_l;
    n = 0;
    /* take care subnormal number */
    if (ix < 0x00100000) {
      ax *= two53; n -= 53; GET_HIGH_WORD(ix, ax);
    }
    n += ((ix) >> 20) - 0x3ff;
    j = ix & 0x000fffff;
    /* determine interval */
    ix = j | 0x3ff00000;    /* normalise ix */
    if (j <= 0x3988E) k = 0;    /* |x|<sqrt(3/2) */
    else if (j < 0xBB67A) k = 1;  /* |x|<sqrt(3)   */
    else {
      k = 0; n += 1; ix -= 0x00100000;
    }
    SET_HIGH_WORD(ax, ix);
    /* compute ss = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
    u = ax - bp[k];    /* bp[0]=1.0, bp[1]=1.5 */
    v = one/(ax + bp[k]);
    ss = u*v;
    s_h = ss;
    SET_LOW_WORD(s_h, 0);
    /* t_h=ax+bp[k] High */
    t_h = zero;
    SET_HIGH_WORD(t_h, ((ix >> 1) | 0x20000000) + 0x00080000 + (k << 18));
    t_l = ax - (t_h - bp[k]);
    s_l = v*((u - s_h*t_h) - s_h*t_l);
    /* compute log(ax) */
    s2 = ss*ss;
    r = s2*s2*(L1 + s2*(L2 + s2*(L3 + s2*(L4 + s2*(L5 + s2*L6)))));
    r += s_l*(s_h + ss);
    s2 = s_h*s_h;
    t_h = 3.0 + s2 + r;
    SET_LOW_WORD(t_h, 0);
    t_l = r - ((t_h - 3.0) - s2);
    /* u+v = ss*(1+...) */
    u = s_h*t_h;
    v = s_l*t_h + t_l*ss;
    /* 2/(3log2)*(ss+...) */
    p_h = u + v;
    SET_LOW_WORD(p_h, 0);
    p_l = v - (p_h - u);
    z_h = cp_h*p_h;    /* cp_h+cp_l = 2/(3*log2) */
    z_l = cp_l*p_h + p_l*cp + dp_l[k];
    /* log2(ax) = (ss+..)*2/(3*log2) = n + dp_h + z_h + z_l */
    t = (double)n;
    t1 = (((z_h + z_l) + dp_h[k]) + t);
    SET_LOW_WORD(t1, 0);
    t2 = z_l - (((t1 - t) - dp_h[k]) - z_h);
  }
  /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */
  y1  = y;
  SET_LOW_WORD(y1, 0);
  p_l = (y - y1)*t1 + y*t2;
  p_h = y1*t1;
  z = p_l + p_h;
  EXTRACT_WORDS(j, i, z);
  if (j >= 0x40900000) {  /* z >= 1024 */
    if (((j - 0x40900000) | i) != 0)  /* if z > 1024 */
      return s*huge*huge;  /* overflow */
    else {
      if (p_l + ovt > z - p_h) return s*huge*huge;  /* overflow */
    }
  } else if ((j & 0x7fffffff) >= 0x4090cc00 ) {  /* z <= -1075 */
    if (((j - 0xc090cc00) | i) != 0)  /* z < -1075 */
      return s*tiny*tiny;  /* underflow */
    else {
      if (p_l <= z - p_h) return s*tiny*tiny;  /* underflow */
    }
  }
  /* compute 2**(p_h + p_l) */
  i = j & 0x7fffffff;
  k = (i >> 20) - 0x3ff;
  n = 0;
  if (i > 0x3fe00000) {  /* if |z| > 0.5, set n = [z+0.5] */
    n = j + (0x00100000 >> (k + 1));
    k = ((n & 0x7fffffff) >> 20) - 0x3ff;  /* new k for n */
    t = zero;
    SET_HIGH_WORD(t, n &~ (0x000fffff >> k));
    n = ((n & 0x000fffff) | 0x00100000) >> (20 - k);
    if (j < 0) n = -n;
    p_h -= t;
  }
  t = p_l + p_h;
  SET_LOW_WORD(t, 0);
  u = t*lg2_h;
  v = (p_l - (t - p_h))*lg2 + t*lg2_l;
  z = u + v;
  w = v - (z - u);
  t  = z*z;
  t1  = z - t*(P1 + t*(P2 + t*(P3 + t*(P4 + t*P5))));
  r  = (z*t1)/(t1 - two) - (w + z*w);
  z  = one - (r - z);
  GET_HIGH_WORD(j, z);
  j += (n << 20);
  if ((j >> 20) <= 0) z = sun_ldexp(z, n);  /* subnormal output */
  else SET_HIGH_WORD(z, j);
  return s*z;
}


/*
 * ====================================================
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
 *
 * Developed at SunPro, a Sun Microsystems, Inc. business.
 * Permission to use, copy, modify, and distribute this
 * software is freely granted, provided that this notice
 * is preserved.
 * ====================================================
 */

/*
 * ceil(x)
 * Return x rounded toward -inf to integral value
 * Method:
 *  Bit twiddling.
 * Exception:
 *  Inexact flag raised if x not equal to ceil(x).
 */

LUALIB_API double sun_ceil (double x) {
  int32_t i0, i1, j0;
  uint32_t i, j;
  EXTRACT_WORDS(i0, i1, x);
  j0 = ((i0 >> 20) & 0x7ff) - 0x3ff;  /* unbiased exponent: math.exponent(x) - 1, except 0 -> -1023 */
  if (j0 < 20) {  /* integer part in high x */
    if (j0 < 0) {  /* |x| < 1, raise inexact if x != 0 */
      if (huge + x > 0.0) {  /* return 0*sign(x) if |x| < 1 */
        if (i0 < 0) { i0 = 0x80000000; i1 = 0; }
        else if ((i0 | i1) != 0) { i0 = 0x3ff00000; i1 = 0; }
      }
    } else {
      i = (0x000fffff) >> j0;
      if (((i0 & i) | i1) == 0) return x;  /* x is integral */
      if (huge + x > 0.0) {                /* raise inexact flag */
        if (i0 > 0) i0 += (0x00100000) >> j0;
        i0 &= (~i); i1 = 0;
      }
    }
  } else if (j0 > 51) {  /* no fraction part */
    if (j0 == 0x400) return x + x;  /* inf or NaN */
    else return x;                  /* x is integral */
  } else {  /* fraction part in low x */
    i = ((uint32_t)(0xffffffff)) >> (j0 - 20);
    if ((i1 & i) == 0) return x;    /* x is integral */
    if (huge + x > 0.0) {           /* raise inexact flag */
      if (i0 > 0) {
        if (j0 == 20) i0 += 1;
        else {
          j = i1 + (1 << (52 - j0));
          if (j < i1) i0 += 1;      /* got a carry */
          i1 = j;
        }
      }
      i1 &= (~i);
    }
  }
  INSERT_WORDS(x, i0, i1);
  return x;
}


/*
 * ====================================================
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
 *
 * Developed at SunPro, a Sun Microsystems, Inc. business.
 * Permission to use, copy, modify, and distribute this
 * software is freely granted, provided that this notice
 * is preserved.
 * ====================================================
 */
/*
 * floor(x)
 * Return x rounded toward -inf to integral value
 * Method:
 *  Bit twiddling.
 * Exception:
 *  Inexact flag raised if x not equal to floor(x).
 */

LUALIB_API double sun_floor (double x) {
  int32_t i0, i1, j0;
  uint32_t i, j;
  EXTRACT_WORDS(i0, i1, x);
  j0 = ((i0 >> 20) & 0x7ff) - 0x3ff;  /* unbiased exponent: math.exponent(x) - 1, except 0 -> -1023 */
  if (j0 < 20) {             /* integer part in high x */
    if (j0 < 0) {            /* |x| < 1, raise inexact if x != 0 */
      if (huge + x > 0.0) {  /* return 0*sign(x) if |x|<1 */
        if (i0 >= 0) {
          i0 = i1 = 0;
        } else if (((i0 & 0x7fffffff) | i1) != 0) {
          i0 = 0xbff00000; i1 = 0;
        }
      }
    } else {
      i = (0x000fffff) >> j0;
      if (((i0 & i) | i1) == 0) return x;  /* x is integral */
      if (huge + x > 0.0) {                /* raise inexact flag */
        if (i0 < 0) i0 += (0x00100000 >> j0);
        i0 &= (~i); i1 = 0;
      }
    }
  } else if (j0 > 51) {  /* no fraction part */
    if (j0 == 0x400) return x + x;  /* inf or NaN */
    else return x;                  /* x is integral */
  } else {  /* fraction part in low x */
    i = ((uint32_t)(0xffffffff)) >> (j0 - 20);
    if ((i1 & i) == 0) return x;   /* x is integral */
    if (huge + x > 0.0) {          /* raise inexact flag */
      if (i0 < 0) {
        if (j0 == 20) i0 += 1;
        else {
          j = i1 + (1 << (52 - j0));
          if (j < i1) i0 += 1;     /* got a carry */
          i1 = j;
        }
      }
      i1 &= (~i);
    }
  }
  INSERT_WORDS(x, i0, i1);
  return x;
}


/* Rounds x to the next integral towards 0. If x is integral, just returns x. 4 % slower than GCC's trunc,
   15 % faster than a (x < 0) ? ceil(x) : floor(x) macro. Contraction of sun_floor and sun_ceil. 2.29.2,
   patched 2.29.3 */
LUALIB_API double sun_trunc (double x) {
  int32_t hx, lx, j0;
  uint32_t i;
  EXTRACT_WORDS(hx, lx, x);
  j0 = ((hx >> 20) & 0x7ff) - 0x3ff;  /* unbiased exponent: returns math.exponent(x) - 1, except 0 -> -1023 */
  if (j0 < 0) {  /* |x| < 1 */
    if (huge + x > 0.0 && hx != 0) {   /* raise inexact if x != 0 */
      hx = (hx < 0) ? 0x80000000 : 0;  /* return 0*sign(x) if |x| < 1 */
      lx = 0;
    }
  } else if (j0 < 20) {  /* integer part in high x */
    i = (0x000fffff) >> j0;
    if (((hx & i) | lx) == 0) return x;  /* x is integral */
    if (huge + x > 0.0) {                /* raise inexact flag */
      hx &= (~i); lx = 0;
    }
  } else if (j0 < 52) {  /* fraction part in low x */
    i = ((uint32_t)(0xffffffff)) >> (j0 - 20);
    if ((lx & i) == 0) return x;  /* x is integral */
    if (huge + x > 0.0) {         /* raise inexact flag */
      lx &= (~i);
    }
  } else {  /* j > 51: no fraction part */
    if (j0 == 0x400) return x + x;  /* inf or NaN */
    else return x;                  /* x is integral */
  }
  INSERT_WORDS(x, hx, lx);
  return x;
}


/* Returns ((b) != 0) ? (tools_sign((a))*tools_sign((b))*sun_floor(fabs((a)/(b)))) : (AGN_NAN); more than 6 % faster than luai_numintdiv macro; tuned by 10 % 2.29.5 */
LUALIB_API double sun_intdiv (double x, double y) {
  int32_t hx, lx, j0;
  uint32_t i;
  if (y == 0.0) return AGN_NAN;
  x /= y;
  EXTRACT_WORDS(hx, lx, x);
  j0 = ((hx >> 20) & 0x7ff) - 0x3ff;  /* exponent of x */
  if (j0 < 20) {   /* integer part in high x */
    if (j0 < 0) {  /* |x| < 1 */
      INSERT_WORDS(x, hx & 0x80000000, 0);  /* x = +-0 */
    } else {
      i = (0x000fffff) >> j0;
      if (((hx & i) | lx) != 0) {  /* x is non-integral */
        INSERT_WORDS(x, hx & (~i), 0);
      }
    }
  } else if (j0 > 51) {  /* no fraction part */
    if (unlikely(j0 != 0x400)) {  /* not inf/NaN, 2.5.15 optimisation */
      x *= one;  /* create necessary signal */
    }
  } else {  /* fraction part in low x */
    i = ((uint32_t)(0xffffffff)) >> (j0 - 20);
    if ((lx & i) != 0) {  /* x is non-integral */
      INSERT_WORDS(x, hx, lx & (~i));
    }
  }
  return x;
}


/* Computes both the integer quotient and the integer remainder of the number a divided by the number b. 2.29.5 */
/* #include "lobject.h"
   #define ispow2(x) (!(!(int64_t)(x)) & !((int64_t)(x) & ((int64_t)(x) - 1))) */
LUALIB_API double sun_iqr (double x, double y, double *remainder) {
  int32_t hq, lq, j0;
  uint32_t i;
  double q;
  if (y == 0.0 || tools_isfrac(x) || tools_isfrac(y) ) {  /* behave like Maple's irem and iquo functions */
    *remainder = AGN_NAN;
    return AGN_NAN;
  }
  /* we are integral now */
  /* we are skipping a test for power-of-2 denominators as this slows down the function significantly
   if (x >= 0 && y > 0 && ispow2(y)) {
    *remainder = (int64_t)x & ((int64_t)y - 1ULL);
    return (unsigned int)x >> luaO_log2((unsigned int)y);
  } */
  q = x/y;
  EXTRACT_WORDS(hq, lq, q);
  j0 = ((hq >> 20) & 0x7ff) - 0x3ff;  /* exponent of x */
  if (j0 < 20) {   /* integer part in high x */
    if (j0 < 0) {  /* |x| < 1 */
      INSERT_WORDS(q, hq & 0x80000000, 0);  /* q = +-0 */
    } else {
      i = (0x000fffff) >> j0;
      if (((hq & i) | lq) != 0) {  /* x is non-integral */
        INSERT_WORDS(q, hq & (~i), 0);
      }
    }
  } else if (unlikely(j0 > 51)) {  /* no fraction part */
    if (likely(j0 != 0x400)) {  /* not inf/NaN, 2.5.15 optimisation */
      q *= one;  /* create necessary signal */
    }
  } else {  /* fraction part in low x */
    i = ((uint32_t)(0xffffffff)) >> (j0 - 20);
    if ((lq & i) != 0) {  /* x is non-integral */
      INSERT_WORDS(q, hq, lq & (~i));
    }
  }
  *remainder = x - q*y;  /* bit operations are in no way faster */
  return q;
}


/* Adapted for Newlib, 2009.  (Allow for int < 32 bits; return *quo=0 during
 * errors to make test scripts easier.)
 * Taken from file: newlib-4.2.0.20211231/newlib/libm/common/s_remquo.c, modified.
 * @(#)e_fmod.c 1.3 95/01/18 */
/*
 * ====================================================
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
 *
 * Developed at SunSoft, a Sun Microsystems, Inc. business.
 * Permission to use, copy, modify, and distribute this
 * software is freely granted, provided that this notice
 * is preserved.
 * ====================================================
 */
/*
FUNCTION
  <<remquo>>, ---remainder and part of quotient

INDEX
  remquo

SYNOPSIS
  double sun_remquo(double <[x]>, double <[y]>, int *<[quo]>);

DESCRIPTION
  This function works like sun_iqr; it does NOT return the following:

  Deactivated GCC mode: <<remquo>> computes the same remainder as the <<remainder>>
  functions; this value is in the range -<[y]>/2 ... +<[y]>/2.  In the object pointed to
  by <<quo>> it stores a value whose sign is the sign of <<x>>/<<y>> and whose magnitude
  is congruent modulo 2**n to the magnitude of the integral quotient of <<x>>/<<y>>.
  (That is, <<quo>> is given the n lsbs of the quotient, not counting the sign.)  This
  implementation uses n=31 if int is 32 bits or more, otherwise, n is 1 less than the
  width of int.

  Deactivated GCC mode: For example:
  . remquo(-29.0, 3.0, &<[quo]>)
  returns -1.0 and sets <[quo]>=10, and
  . remquo(-98307.0, 3.0, &<[quo]>)
  returns -0.0 and sets <[quo]>=-32769, although for 16-bit int, <[quo]>=-1.
  In the latter case, the actual quotient of -(32769=0x8001) is reduced to -1
  because of the 15-bit limitation for the quotient.

RETURNS
  Deactivated GCC mode: When either argument is NaN, NaN is returned.  If <[y]> is 0 or
  <[x]> is infinite (and neither is NaN), a domain error occurs (i.e. the "invalid"
  floating point exception is raised or errno is set to EDOM), and NaN is returned.
  Otherwise, <<remquo>> returns <[x]> REM <[y]>, which is different from sun_iqr !

BUGS
  IEEE754-2008 calls for <<remquo>>(subnormal, inf) to cause the "underflow"
  floating-point exception.  This implementation does not.

SPEED
  In `Maple mode` the function is as fast as sun_iqr, and in deactivated `GCC mode`
  ten percent faster than GCC's built-in remquo.

PORTABILITY
  C99, POSIX. */
/* Deactivated GCC mode: For quotient, return either all 31 bits that can from calculation
 * (using int32_t), or as many as can fit into an int that is smaller than 32 bits. */

#if INT_MAX > 0x7FFFFFFFL
  #define QUO_MASK 0x7FFFFFFF
# else
  #define QUO_MASK INT_MAX
#endif

/*
 * Return the IEEE remainder and set *quo to the last n bits of the
 * quotient, rounded to the nearest integer.  We choose n=31--if that many fit--
 * because we wind up computing all the integer bits of the quotient anyway as
 * a side-effect of computing the remainder by the shift and subtract
 * method.  In practice, this is far more bits than are needed to use
 * remquo in reduction algorithms.
 */
/* #define REMQUOGCCMODE 1 */

LUALIB_API double sun_remquo (double x, double y, int *quo) {
  int32_t n, hx, hy, hz, ix, iy, sx;
#ifdef REMQUOGCCMODE
  int32_t i;
#endif
  uint32_t lx, ly, lz, q, sxy;
  EXTRACT_WORDS(hx, lx, x);
  EXTRACT_WORDS(hy, ly, y);
#ifndef REMQUOGCCMODE
  if (0 && (tools_isfracwords(hx, lx) || tools_isfracwords(hy, ly))) {  /* behave like Maple's irem and iquo functions */
    *quo = 0;
    return AGN_NAN;
  }
#endif
  sxy = (hx ^ hy) & 0x80000000;
  sx = hx & 0x80000000;  /* sign of x */
  hx ^= sx;  /* |x| */
  hy &= 0x7fffffff;  /* |y| */
  /* purge off exception values */
  if ((hy | ly) == 0 || (hx >= 0x7ff00000) ||  /* y=0,or x not finite */
    ((hy | ((ly | -ly) >> 31)) > 0x7ff00000))  {  /* or y is NaN */
    *quo = 0;  /* Not necessary, but return consistent value */
    return (x*y)/(x*y);
  }
  if (hx <= hy) {
    if ((hx < hy) || (lx < ly)) {
      q = 0;
      goto fixup;  /* |x|<|y| return x or x-y */
    }
    if (lx == ly) {
      *quo = (sxy) ? -1 : 1;
      return Zero[(uint32_t)sx >> 31];  /* |x|=|y| return x*0 */
    }
  }
#ifdef REMQUOGCCMODE
  /* determine ix = ilogb(x) */
  if (hx < 0x00100000) {  /* subnormal x */
    if (hx == 0) {
      for (ix = -1043, i=lx; i > 0; i <<= 1) ix -= 1;
    } else {
      for (ix = -1022, i=(hx << 11); i > 0; i <<= 1) ix -= 1;
    }
  } else ix = (hx >> 20) - 1023;
  /* determine iy = ilogb(y) */
  if (hy < 0x00100000) {  /* subnormal y */
    if (hy == 0) {
      for (iy = -1043, i=ly; i > 0; i <<= 1) iy -=1;
    } else {
      for (iy = -1022, i=(hy << 11); i > 0; i <<= 1) iy -= 1;
    }
  } else iy = (hy >> 20) - 1023;
  /* set up {hx,lx}, {hy,ly} and align y to x */
  if (ix >= -1022)
    hx = 0x00100000 | (0x000fffff & hx);
  else {  /* subnormal x, shift x to normal */
    n = -1022 - ix;
    if (n <= 31) {
      hx = (hx << n) | (lx >> (32 - n));
      lx <<= n;
    } else {
      hx = lx << (n - 32);
      lx = 0;
    }
  }
  if (iy >= -1022)
    hy = 0x00100000 | (0x000fffff & hy);
  else {  /* subnormal y, shift y to normal */
    n = -1022 - iy;
    if (n <= 31) {
      hy = (hy << n) | (ly >> (32 - n));
      ly <<= n;
    } else {
      hy = ly << (n - 32);
      ly = 0;
    }
  }
#else
  /* determine ix = ilogb(x) */
  ix = (hx >> 20) - 1023;
  /* determine iy = ilogb(y) */
  iy = (hy >> 20) - 1023;
  /* set up {hx,lx}, {hy,ly} and align y to x */
  hx = 0x00100000 | (0x000fffff & hx);
  hy = 0x00100000 | (0x000fffff & hy);
#endif
  /* fix point fmod */
  n = ix - iy;
  q = 0;
  while (n--) {
    hz = hx - hy; lz = lx - ly;
    if (lx < ly) hz -= 1;
    if (hz < 0) { hx = hx + hx + (lx >> 31); lx = lx + lx; }
    else { hx = hz + hz + (lz >> 31); lx = lz + lz; q++; }
    q <<= 1;
  }
  hz = hx - hy; lz = lx - ly;
  if (lx < ly) hz -= 1;
  if (hz >= 0) { hx = hz; lx = lz; q++; }
    /* convert back to floating value and restore the sign */
  if ((hx | lx) == 0) {  /* return sign(x)*0 */
    q &= QUO_MASK;
    *quo = (sxy) ? -q : q;
    return Zero[(uint32_t)sx >> 31];
  }
  while (hx < 0x00100000) {  /* normalize x */
    hx = hx + hx + (lx >> 31); lx = lx + lx;
    iy -= 1;
  }
#ifdef REMQUOGCCMODE
  if (iy >= -1022) {  /* normalize output */
    hx = ((hx - 0x00100000) | ((iy + 1023) << 20));
  } else {  /* subnormal output */
    n = -1022 - iy;
    if (n <= 20) {
      lx = (lx >> n) | ((uint32_t)hx << (32 - n));
      hx >>= n;
    } else if (n <= 31) {
      lx = (hx << (32 - n)) | (lx >> n); hx = sx;
    } else {
      lx = hx >> (n - 32); hx = sx;
    }
  }
#else
  hx = ((hx - 0x00100000) | ((iy + 1023) << 20));
#endif
fixup:
#ifdef REMQUOGCCMODE
  INSERT_WORDS(x, hx, lx);
  y = fabs(y);
  if (y < 0x1p-1021) {
    if (x + x > y || (x + x == y && (q & 1))) {
      q++;
      x -= y;
    }
  } else if (x > 0.5*y || (x == 0.5*y && (q & 1))) {
    q++;
    x -= y;
  }
#else
  /* return Maple iquo and irem results, not GCC's. */
  INSERT_WORDS(x, hx ^ sx, lx);
#endif
  /* with non-integral x or y, the many quotients would be wrong */
  q &= QUO_MASK;
  *quo = (sxy) ? -q : q;
  return x;
}


/* Adapted for Newlib, 2009.  (Allow for int < 32 bits.)
 * Taken from file: newlib-4.2.0.20211231/newlib/libm/common/s_remquo.c, modified.
 * @(#)e_fmod.c 1.3 95/01/18 */
/*
 * ====================================================
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
 *
 * Developed at SunSoft, a Sun Microsystems, Inc. business.
 * Permission to use, copy, modify, and distribute this
 * software is freely granted, provided that this notice
 * is preserved.
 * ====================================================
 */
/*
FUNCTION
  <<sun_quotient>>, ---integral quotient

INDEX
  quo

SYNOPSIS
  double sun_quotient(double <[x]>, double <[y]>);

DESCRIPTION
  This function works like sun_iqr but returns the integer quotient only. x, y should be
  integral, otherwise the function returns undefined.

BUGS
  IEEE754-2008 calls for <<quo>>(subnormal, inf) to cause the "underflow" floating-point
  exception. This implementation does not.

SPEED
  Ten percent slower than sun_intdiv.

PORTABILITY
  C99, POSIX. */
/*
 * Return quotient set to the last n bits, rounded to the nearest integer. We choose n=31
 * - if that many fit - because we wind up computing all the integer bits of the quotient
 * anyway. In practice, this is far more bits than are needed.
 */

LUALIB_API double sun_quotient (double x, double y) {
  int32_t n, hx, hy, hz, ix, iy, sx;
  uint32_t lx, ly, lz, q, sxy;
  EXTRACT_WORDS(hx, lx, x);
  EXTRACT_WORDS(hy, ly, y);
  if (tools_isfracwords(hx, lx) || tools_isfracwords(hy, ly)) {  /* behave like Maple's irem and iquo functions */
    return AGN_NAN;  /* we cannot check for floats later !!! */
  }
  sxy = (hx ^ hy) & 0x80000000;
  sx = hx & 0x80000000;  /* sign of x */
  hx ^= sx;  /* |x| */
  hy &= 0x7fffffff;  /* |y| */
  /* purge off exception values */
  if ((hy | ly) == 0 || (hx >= 0x7ff00000) ||  /* y=0,or x not finite */
    ((hy | ((ly | -ly) >> 31)) > 0x7ff00000))  {  /* or y is NaN */
    return (x*y)/(x*y);
  }
  if (hx <= hy) {  /* with integral x, y we have both hx <= hy and hx > hy */
    if ((hx < hy) || (lx < ly)) {
      q = 0;
      goto fixup;  /* |x|<|y| return x or x-y */
    }
    if (lx == ly) {
      return (sxy) ? -1.0 : 1.0;
    }
  }
  /* determine ix = ilogb(x) */
  ix = (hx >> 20) - 1023;
  /* determine iy = ilogb(y) */
  iy = (hy >> 20) - 1023;
  /* set up {hx,lx}, {hy,ly} and align y to x */
  hx = 0x00100000 | (0x000fffff & hx);
  hy = 0x00100000 | (0x000fffff & hy);
  /* fix point fmod */
  n = ix - iy;
  q = 0;
  while (n--) {
    hz = hx - hy; lz = lx - ly;
    /* cannot happen with integral x, y: "if (lx < ly) { hz -= 1; }" */
    if (hz < 0) { hx = hx + hx + (lx >> 31); lx = lx + lx; }
    else { hx = hz + hz + (lz >> 31); lx = lz + lz; q++; }
    q <<= 1;
  }
  /* cannot happen with integral x, y: "if (lx < ly) { hz -= 1; }" */
  q += (hx - hy >= 0);
fixup:
  /* return Maple iquo result, not GCC's. */
  /* with non-integral x or y, the many quotients would be wrong */
  q &= QUO_MASK;
  return (sxy) ? -(double)q : (double)q;
}


/*
 * ====================================================
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
 *
 * Developed at SunPro, a Sun Microsystems, Inc. business.
 * Permission to use, copy, modify, and distribute this
 * software is freely granted, provided that this notice
 * is preserved.
 * ====================================================
 */
/* ilogb(double x)
 * return the binary exponent of x
 * ilogb(+-0) = undefined (formerly FP_ILOGB0)
 * ilogb(+-inf) = HUGE_VAL (formerly INT_MAX)
 * ilogb(NaN) = AGN_NAN (formerly FP_ILOGBNAN, no signal is raised)
 */
LUALIB_API double sun_ilogb (double x) {
  int32_t hx, lx, ix;
  GET_HIGH_WORD(hx, x);
  hx &= 0x7fffffff;       /* |x| */
  if (hx < 0x00100000) {  /* zero or subnormal ? */
    GET_LOW_WORD(lx, x);
    if ((hx | lx) == 0) return AGN_NAN;  /* 2.11.1 fix, +-0, ilogb(0) = undefined */
    /* subnormal x */
    ix = -1043;
    if (hx != 0) {
      ix = -1022;
      lx = (hx << 11);
    }
    /* each leading zero mantissa bit makes exponent smaller */
    for (; lx > 0; lx <<= 1) ix--;
    return ix;
  }
  if (hx < 0x7ff00000)  /* normal x */
    return (hx >> 20) - 1023;
  if (FP_ILOGBNAN != (~0U >> 1)) {
    GET_LOW_WORD(lx, x);
    if (hx == 0x7ff00000 && lx == 0)  /* +-inf */
      return HUGE_VAL;  /* 2.11.1 fix, ~0U >> 1; */ /* = INT_MAX */
  }
  /* NAN. ilogb(NAN) = FP_ILOGBNAN */
  return AGN_NAN;  /* 2.11.1 fix, FP_ILOGBNAN; */
}


/* Taken from https://chromium.googlesource.com/external/github.com/kripken/emscripten/+/refs/tags/1.37.5/system/lib/libc/musl/src/math/ilogbl.c
   The SunPro implementation is the same. */
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
LUALIB_API long double tools_ilogbl (long double x) {
  return sun_ilogb(x);
}
#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
LUALIB_API long double tools_ilogbl (long double x) {
  union ldshape u = {x};
  uint64_t m = u.i.m;
  int e = u.i.se & 0x7fff;
  if (!e) {
    if (m == 0) return AGN_NAN;   /* awalz, changed */
    /* subnormal x */
    for (e = -0x3fff + 1; m >> 63 == 0; e--, m <<= 1);
    return e;
  }
  if (e == 0x7fff) {
    return m << 1 ? AGN_NAN : HUGE_VAL;  /* dito */
  }
  return (double)(e - 0x3fff);
}
#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384
LUALIB_API long double tools_ilogbl (long double x) {
  union ldshape u = {x};
  int e = u.i.se & 0x7fff;
  if (!e) {
    if (x == 0) return AGN_NAN;  /* dito */
    /* subnormal x */
    x *= 0x1p120;
    return tools_ilogbl(x) - 120;
  }
  if (e == 0x7fff) {
    u.i.se = 0;
    return u.f ? AGN_NAN : HUGE_VAL;  /* dito */
  }
  return (double)(e - 0x3fff);
}
#endif


/*
 * Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */
/*
 *      Base 2 logarithm, long double precision
 *
 *
 * SYNOPSIS:
 *
 * long double x, y, log2l();
 *
 * y = log2l( x );
 *
 *
 * DESCRIPTION:
 *
 * Returns the base 2 logarithm of x.
 *
 * The argument is separated into its exponent and fractional
 * parts.  If the exponent is between -1 and +1, the (natural)
 * logarithm of the fraction is approximated by
 *
 *     log(1+x) = x - 0.5 x**2 + x**3 P(x)/Q(x).
 *
 * Otherwise, setting  z = 2(x-1)/x+1),
 *
 *     log(x) = z + z**3 P(z)/Q(z).
 *
 *
 * ACCURACY:
 *
 *                      Relative error:
 * arithmetic   domain     # trials      peak         rms
 *    IEEE      0.5, 2.0     30000      9.8e-20     2.7e-20
 *    IEEE     exp(+-10000)  70000      5.4e-20     2.3e-20
 *
 * In the tests over the interval exp(+-10000), the logarithms
 * of the random arguments were uniformly distributed over
 * [-10000, +10000].
 */

#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
LUALIB_API long double tools_log2l (long double x) {
  return log2(x);
}
#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
/* Coefficients for ln(1+x) = x - x**2/2 + x**3 P(x)/Q(x)
 * 1/sqrt(2) <= x < sqrt(2)
 * Theoretical peak relative error = 6.2e-22
 */
static const long double log2lP[] = {
   4.9962495940332550844739E-1L,
   1.0767376367209449010438E1L,
   7.7671073698359539859595E1L,
   2.5620629828144409632571E2L,
   4.2401812743503691187826E2L,
   3.4258224542413922935104E2L,
   1.0747524399916215149070E2L,
};
static const long double log2lQ[] = {
/* 1.0000000000000000000000E0,*/
   2.3479774160285863271658E1L,
   1.9444210022760132894510E2L,
   7.7952888181207260646090E2L,
   1.6911722418503949084863E3L,
   2.0307734695595183428202E3L,
   1.2695660352705325274404E3L,
   3.2242573199748645407652E2L,
};

/* Coefficients for log(x) = z + z^3 P(z^2)/Q(z^2),
 * where z = 2(x-1)/(x+1)
 * 1/sqrt(2) <= x < sqrt(2)
 * Theoretical peak relative error = 6.16e-22
 */
static const long double log2lR[4] = {
   1.9757429581415468984296E-3L,
  -7.1990767473014147232598E-1L,
   1.0777257190312272158094E1L,
  -3.5717684488096787370998E1L,
};

static const long double log2lS[4] = {
/* 1.00000000000000000000E0L,*/
  -2.6201045551331104417768E1L,
   1.9361891836232102174846E2L,
  -4.2861221385716144629696E2L,
};
/* log2(e) - 1 */

LUALIB_API long double tools_log2l (long double x) {
  long double y, z;
  int e;
  if (tools_fpisnanl(x)) return x;
  if (x == INFINITY) return x;
  if (x <= 0.0) {
    if (x == 0.0) return -1/(x*x); /* -inf with divbyzero */
    return 0/0.0f; /* nan with invalid */
  }
  /* separate mantissa from exponent */
  /* Note, frexp is used so that denormal numbers will be handled properly. */
  x = frexpl(x, &e);
  /* logarithm using log(x) = z + z**3 P(z)/Q(z), where z = 2(x-1)/x+1) */
  if (e > 2 || e < -2) {
    if (x < SQRTH) {  /* 2(2x-1)/(2x+1) */
      e -= 1;
      z = x - 0.5;
      y = 0.5*z + 0.5;
    } else {  /* 2 (x-1)/(x+1) */
      z = x - 0.5;
      z -= 0.5;
      y = 0.5*x + 0.5;
    }
    x = z/y;
    z = x*x;
#ifndef __ARMCPU
    y = x*(z*__polevll(z, log2lR, 3)/__p1evll(z, log2lS, 3));
#else
    y = x*(z*polevl(z, log2lR, 3)/p1evl(z, log2lS, 3));
#endif
    goto done;
  }
  /* logarithm using log(1+x) = x - .5x**2 + x**3 P(x)/Q(x) */
  if (x < SQRTH) {
    e -= 1;
    x = 2.0*x - 1.0;
  } else {
    x = x - 1.0;
  }
  z = x*x;
#ifndef __ARMCPU
  y = x*(z*__polevll(x, log2lP, 6)/__p1evll(x, log2lQ, 7));
#else
  y = x*(z*polevl(x, log2lP, 6)/p1evl(x, log2lQ, 7));
#endif
  y = y - 0.5*z;
done:
  /* Multiply log of fraction by log2(e) and base 2 exponent by 1
   * ***CAUTION*** This sequence of operations is critical and it may be horribly defeated by some compiler optimizers. */
  z = y*LOG2EA;
  z += x*LOG2EA;
  z += y;
  z += x;
  z += e;
  return z;
}
#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384
/* TODO: broken implementation to make things compile */
long double tools_log2l (long double x) {
  return log2(x);
}
#endif


/*
 * Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */
/*
 *      Common logarithm, long double precision
 *
 *
 * SYNOPSIS:
 *
 * long double x, y, log10l();
 *
 * y = log10l( x );
 *
 *
 * DESCRIPTION:
 *
 * Returns the base 10 logarithm of x.
 *
 * The argument is separated into its exponent and fractional
 * parts.  If the exponent is between -1 and +1, the logarithm
 * of the fraction is approximated by
 *
 *     log(1+x) = x - 0.5 x**2 + x**3 P(x)/Q(x).
 *
 * Otherwise, setting  z = 2(x-1)/x+1),
 *
 *     log(x) = z + z**3 P(z)/Q(z).
 *
 *
 * ACCURACY:
 *
 *                      Relative error:
 * arithmetic   domain     # trials      peak         rms
 *    IEEE      0.5, 2.0     30000      9.0e-20     2.6e-20
 *    IEEE     exp(+-10000)  30000      6.0e-20     2.3e-20
 *
 * In the tests over the interval exp(+-10000), the logarithms
 * of the random arguments were uniformly distributed over
 * [-10000, +10000].
 *
 * ERROR MESSAGES:
 *
 * log singularity:  x = 0; returns MINLOG
 * log domain:       x < 0; returns MINLOG
 */

#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
LUALIB_API long double tools_log10l (long double x) {
	return log10(x);
}
#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
/* Coefficients for log(1+x) = x - x**2/2 + x**3 P(x)/Q(x)
 * 1/sqrt(2) <= x < sqrt(2)
 * Theoretical peak relative error = 6.2e-22
 */
static const long double log10lP[] = {
   4.9962495940332550844739E-1L,
   1.0767376367209449010438E1L,
   7.7671073698359539859595E1L,
   2.5620629828144409632571E2L,
   4.2401812743503691187826E2L,
   3.4258224542413922935104E2L,
   1.0747524399916215149070E2L,
};

static const long double log10lQ[] = {
/* 1.0000000000000000000000E0,*/
   2.3479774160285863271658E1L,
   1.9444210022760132894510E2L,
   7.7952888181207260646090E2L,
   1.6911722418503949084863E3L,
   2.0307734695595183428202E3L,
   1.2695660352705325274404E3L,
   3.2242573199748645407652E2L,
};

/* Coefficients for log(x) = z + z^3 P(z^2)/Q(z^2),
 * where z = 2(x-1)/(x+1)
 * 1/sqrt(2) <= x < sqrt(2)
 * Theoretical peak relative error = 6.16e-22
 */
static const long double log10lR[4] = {
   1.9757429581415468984296E-3L,
  -7.1990767473014147232598E-1L,
   1.0777257190312272158094E1L,
  -3.5717684488096787370998E1L,
};

static const long double log10lS[4] = {
/* 1.00000000000000000000E0L,*/
  -2.6201045551331104417768E1L,
   1.9361891836232102174846E2L,
  -4.2861221385716144629696E2L,
};

/* log10(2) */
#define L102A 0.3125L
#define L102B -1.1470004336018804786261e-2L
/* log10(e) */
#define L10EA 0.5L
#define L10EB -6.5705518096748172348871e-2L

LUALIB_API long double tools_log10l (long double x) {
	long double y, z;
	int e;
	if (tools_fpisnanl(x)) return x;
	if (x <= 0.0) {
		if (x == 0.0) return -1.0/(x*x);
    return (x - x)/0.0;
  }
  if (x == INFINITY) return INFINITY;
  /* separate mantissa from exponent. Note, frexp is used so that denormal numbers will be handled properly. */
  x = frexpl(x, &e);
  /* logarithm using log(x) = z + z**3 P(z)/Q(z), where z = 2(x-1)/x+1) */
  if (e > 2 || e < -2) {
    if (x < SQRTH) {  /* 2(2x-1)/(2x+1) */
      e -= 1;
      z = x - 0.5;
      y = 0.5*z + 0.5;
    } else {  /*  2 (x-1)/(x+1)   */
      z = x - 0.5;
      z -= 0.5;
      y = 0.5*x  + 0.5;
    }
    x = z/y;
    z = x*x;
#ifndef __ARMCPU
    y = x*(z*__polevll(z, log10lR, 3)/__p1evll(z, log10lS, 3));
#else
    y = x*(z*polevl(z, log10lR, 3)/p1evl(z, log10lS, 3));
#endif
    goto done;
  }
  /* logarithm using log(1+x) = x - .5x**2 + x**3 P(x)/Q(x) */
  if (x < SQRTH) {
    e -= 1;
    x = 2.0*x - 1.0;
  } else {
    x = x - 1.0;
  }
  z = x*x;
#ifndef __ARMCPU
  y = x*(z*__polevll(x, log10lP, 6)/__p1evll(x, log10lQ, 7));
#else
  y = x*(z*polevl(x, log10lP, 6)/p1evl(x, log10lQ, 7));
#endif
  y = y - 0.5*z;
done:
  /* Multiply log of fraction by log10(e) and base 2 exponent by log10(2).
   * ***CAUTION*** This sequence of operations is critical and it may be horribly defeated by some compiler optimizers. */
  z = y*(L10EB);
  z += x*(L10EB);
  z += e*(L102B);
  z += y*(L10EA);
  z += x*(L10EA);
  z += e*(L102A);
  return z;
}
#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384
/* TODO: broken implementation to make things compile */
LUALIB_API long double log10l (long double x) {
  return log10(x);
}
#endif


/* @(#)s_logb.c 5.1 93/09/24 */
/*
 * ====================================================
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
 *
 * Developed at SunPro, a Sun Microsystems, Inc. business.
 * Permission to use, copy, modify, and distribute this
 * software is freely granted, provided that this notice
 * is preserved.
 * ====================================================
 */

/*
 * double logb(x) = entier(log2(x)) = floor(log2(x)); Contrary to ilog2, returns -infinity with x = 0.
 * IEEE 754 logb. Included to pass IEEE test suite. Not recommended. Use ilogb instead.
 */
LUALIB_API double sun_logb (double x) {  /* not faster than GCC's logb */
  int32_t lx, ix;
  EXTRACT_WORDS(ix, lx, x);
  ix &= 0x7fffffff;  /* high |x| */
  if ((ix | lx) == 0) return -1.0/fabs(x);     /* x == 0 */
  if (unlikely(ix >= 0x7ff00000)) return x*x;  /* inf, nan, 2.5.15 optimisation */
  if (unlikely(ix < 0x00100000)) {  /* subnormal, 2.5.15 optimisation */
    x *= two54;  /* convert to normal */
    GET_HIGH_WORD(ix, x);
    ix &= 0x7fffffff;  /* high |x| */
    return (double) ((ix >> 20) - 1023 - 54);
  } else
    return (double) ((ix >> 20) - 1023);
}


/* @(#)s_cbrt.c 5.1 93/09/24 */
/*
 * ====================================================
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
 *
 * Developed at SunPro, a Sun Microsystems, Inc. business.
 * Permission to use, copy, modify, and distribute this
 * software is freely granted, provided that this notice
 * is preserved.
 * ====================================================
 *
 * Optimized by Bruce D. Evans.
 *
 * cbrt(x)
 * Return cube root of x
 */
static const uint32_t
B1 = 715094163, /* B1 = (1023-1023/3-0.03306235651)*2**20 */
B2 = 696219795; /* B2 = (1023-1023/3-54/3-0.03306235651)*2**20 */

/* |1/cbrt(x) - p(x)| < 2**-23.5 (~[-7.93e-8, 7.929e-8]). */
static const double
PP0 =  1.87595182427177009643,    /* 0x3ffe03e6, 0x0f61e692 */
PP1 = -1.88497979543377169875,    /* 0xbffe28e0, 0x92f02420 */
PP2 =  1.621429720105354466140,    /* 0x3ff9f160, 0x4a49d6c2 */
PP3 = -0.758397934778766047437,    /* 0xbfe844cb, 0xbee751d9 */
PP4 =  0.145996192886612446982;    /* 0x3fc2b000, 0xd4e4edd7 */

LUALIB_API double sun_cbrt (double x) {
  int32_t hx;
  union {
    double value;
    uint64_t bits;
  } u;
  double r, s, t = 0.0, w;
  uint32_t sign, high, low;
  EXTRACT_WORDS(hx, low, x);
  sign = hx & 0x80000000;  /* sign = sign(x) */
  hx ^= sign;
  if (unlikely(hx >= 0x7ff00000)) return x + x;  /* cbrt(NaN,INF) is itself, 2.5.15 optimisation */
  /*
   * Rough cbrt to 5 bits:
   *    cbrt(2**e*(1+m) ~= 2**(e/3)*(1+(e%3+m)/3)
   * where e is integral and >= 0, m is real and in [0, 1), and "/" and
   * "%" are integer division and modulus with rounding towards minus
   * infinity.  The RHS is always >= the LHS and has a maximum relative
   * error of about 1 in 16.  Adding a bias of -0.03306235651 to the
   * (e%3+m)/3 term reduces the error to about 1 in 32. With the IEEE
   * floating point representation, for finite positive normal values,
   * ordinary integer divison of the value in bits magically gives
   * almost exactly the RHS of the above provided we first subtract the
   * exponent bias (1023 for doubles) and later add it back.  We do the
   * subtraction virtually to keep e >= 0 so that ordinary integer
   * division rounds towards minus infinity; this is also efficient.
   */
  if (hx < 0x00100000) {      /* zero or subnormal? */
    if ((hx | low) == 0)
      return x;              /* cbrt(0) is itself */
    SET_HIGH_WORD(t, 0x43500000);  /* set t= 2**54 */
    t *= x;
    GET_HIGH_WORD(high, t);
    INSERT_WORDS(t, sign | ((high & 0x7fffffff)/3 + B2), 0);
  } else
    INSERT_WORDS(t, sign | (hx/3 + B1), 0);
  /*
   * New cbrt to 23 bits:
   *    cbrt(x) = t*cbrt(x/t**3) ~= t*P(t**3/x)
   * where P(r) is a polynomial of degree 4 that approximates 1/cbrt(r)
   * to within 2**-23.5 when |r - 1| < 1/10.  The rough approximation
   * has produced t such than |t/cbrt(x) - 1| ~< 1/32, and cubing this
   * gives us bounds for r = t**3/x.
   *
   * Try to optimize for parallel evaluation as in k_tanf.c.
   */
  r = (t*t)*(t/x);
  t = t*((PP0 + r*(PP1 + r*PP2)) + ((r*r)*r)*(PP3 + r*PP4));
  /*
   * Round t away from zero to 23 bits (sloppily except for ensuring that
   * the result is larger in magnitude than cbrt(x) but not much more than
   * 2 23-bit ulps larger).  With rounding towards zero, the error bound
   * would be ~5/6 instead of ~4/6.  With a maximum error of 2 23-bit ulps
   * in the rounded t, the infinite-precision error in the Newton
   * approximation barely affects third digit in the final error
   * 0.667; the error in the rounded t can be up to about 3 23-bit ulps
   * before the final error is larger than 0.667 ulps.
   */
  u.value = t;
  u.bits = (u.bits + 0x80000000) & 0xffffffffc0000000ULL;
  t = u.value;
  /* one step Newton iteration to 53 bits with error < 0.667 ulps */
  s = t*t;        /* t*t is exact */
  r = x/s;        /* error <= 0.5 ulps; |r| < |t| */
  w = t + t;      /* t+t is exact */
  r = (r - t)/(w + r);  /* r-t is exact; w+r ~= 3*t */
  t = t + t*r;    /* error <= 0.5 + 0.5/3 + epsilon */
  return t;
}


/* Processes int's ONLY. 20 % faster than the `iqr` function. d must not be 0 !
   Returns both quotient = n / d and remainder = math.modulus(n, d) for integers n and d. If n is non-negative and the computed
   remainder n % d is negative, then quotient = quotient + 1 and remainder = remainder - d. Likewise, if n is negative and the
   computed remainder is positive, then quotient = quotient - 1 and remainder = remainder + d. */
LUALIB_API div_t tools_div (int n, int d) {  /* 2.11.3 */
  div_t r;
  r.quot = n/d;
  r.rem =  n%d;
  if (n >= 0 && r.rem < 0) {
    r.quot++;
    r.rem -= d;
  } else if (n < 0 && r.rem > 0) {
    r.quot--;
    r.rem += d;
  }
  return (r);
}


/* Taken from: https://graphics.stanford.edu/~seander/bithacks.html
   four percent faster than graphics.sanford.edu's version without table look-up
   'Find the log base 2 of an N-bit integer in O(lg(N)) operations'. */
static const int MultiplyDeBruijnBitPosition[32] =  {
  0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30,
  8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31
};

LUALIB_API int tools_uintlog2 (uint32_t v) {  /* 2.14.0 */
  v |= v >> 1;  /* first round down to one less than a power of 2 */
  v |= v >> 2;
  v |= v >> 4;
  v |= v >> 8;
  v |= v >> 16;
  return MultiplyDeBruijnBitPosition[(uint32_t)(v * 0x07C4ACDDU) >> 27];
}


LUALIB_API float tools_lbfast (float x) {  /* 2.14.6, 18 percent faster than sun_log. */
  ieee_float_shape_type p, q;
  p.value = x;
  q.word = (p.word & 0x007FFFFF) | 0x3f000000;
  x = (float)p.word * 1.1920928955078125e-7f;
  return x - 124.22551499f - 1.498030302f * q.value  - 1.72587999f / (0.3520887068f + q.value);
  /* multiply result by 0.69314718f = (float)LN2 to get natural logarithm ln(x) */
}

/* Taken from: http://aggregate.org/MAGIC/#Absolute%20Value%20of%20a%20Float
   Population count (popcount) of set bits (one bits) */
LUALIB_API uint32_t tools_onebits (register uint32_t x) {
#ifdef __GNUC__
  return __builtin_popcount(x);  /* 2.14.12 */
#else
  x -= ((x >> 1) & 0x55555555);
  x = (((x >> 2) & 0x33333333) + (x & 0x33333333));
  x = (((x >> 4) + x) & 0x0f0f0f0f);
  x += (x >> 8);
  x += (x >> 16);
  return x & 0x0000003f;
#endif
}


/* flip all the bits (not bytes) in n */
LUALIB_API uint32_t tools_flip (register uint32_t n) {
  int i, r = 0;
  for (i=0; i < CHAR_BIT * sizeof(uint32_t); i++) {
    r += (n & 0x0001);
    r <<= 1;
    n >>= 1;
  }
  return r;
}

/* Remove trailing zeros from a uint32_t, 2.11.3
   Source: https://stackoverflow.com/questions/10343730/strip-leading-zeroes-in-binary-number-in-c-c */
LUALIB_API int tools_remtrailzeros (register uint32_t n) {
  int flipped = tools_flip(n);
  /* shift to remove trailing zeroes */
  while (!(flipped & 0x0001))
    flipped >>= 1;
  return flipped;
}


/* Count number of leading zeros (clz32). 2.11.3
   Source: http://aggregate.org/MAGIC/#Leading%20Zero%20Count */
#define WORDBITSUINT32   (CHAR_BIT*sizeof(uint32_t))  /* WORDBITS is the number of bits in a word, usually 32 here */
LUALIB_API uint32_t tools_clz32 (register uint32_t x, uint32_t *y) {  /* 2.16.8 fix */
  x |= (x >> 1);
  x |= (x >> 2);
  x |= (x >> 4);
  x |= (x >> 8);
  x |= (x >> 16);
  *y = x;
#ifdef __GNUC__
  return (WORDBITSUINT32 - __builtin_popcount(x));
#else
  return (WORDBITSUINT32 - tools_onebits(x));
#endif
}


/* Count number of trailing zeros (ctz32). 2.14.10
   Source: musl-1.1.22/src/internal/atomic.h */
LUALIB_API uint32_t tools_ctz32 (register uint32_t x, uint32_t *y) {  /* 2.16.8 fix */
  uint32_t clz = tools_clz32(x & -x, y);
  /* mask y ! */
  return WORDBITSUINT32 - (x != 0)*(1 + clz);
}


/* taken from:
   https://stackoverflow.com/questions/18622130/how-to-count-trailing-zeros-in-a-binary-representation-of-an-integer-number-with
   by user DosMan; this is fastest that seems to be on the net, with __builtin_ctz still faster. 2.38.2; GCC alias to built-in
   version is in agnhlps.h */
#ifndef __GNUC__
LUALIB_API int tools_ctz (register uint32_t x) {
  uint32_t t, r;
  if (x == 0) return WORDBITSUINT32;
  r = 0;
  x &= ~x + 1;  /* clears all but the lowest bit of x, changed 2.38.2 */
  t = x >> 16; if (t != 0) { x = t; r += 16; }
  t = x >> 8;  if (t != 0) { x = t; r += 8; }
  t = x >> 4;  if (t != 0) { x = t; r += 4; }
  return r + ((x >> 1) - (x >> 3));
}
#endif


/* Returns the position of the most significant bit, taken from
   https://stackoverflow.com/questions/671815/what-is-the-fastest-most-efficient-way-to-find-the-highest-set-bit-msb-in-an-i,
   7 percent faster than the former implementation; use uint8_t, not uint32_t 2.38.2 */
LUALIB_API uint8_t tools_msb (register uint32_t x) {
#if 0 && defined(__GNUC__) && defined(__INTEL)  /* there is no gain in speed with the asm call */
  register int bitpos = 0;
  asm ("bsrl %1,%0": "+r" (bitpos):"r" (x));  /* bit scan reverse, _most_ significant set bit (1 bit); use bsfl for most sigbit */
  return (x != 0)*(bitpos + 1);
#else
  uint8_t k = 0;
  if (x == 0) return 0;
  if (x > 0x0000FFFFu) { x >>= 16; k  = 16; }
  if (x > 0x000000FFu) { x >>= 8;  k |= 8;  }
  if (x > 0x0000000Fu) { x >>= 4;  k |= 4;  }
  if (x > 0x00000003u) { x >>= 2;  k |= 2;  }
  k |= (x & 2) >> 1;
  return k + 1;
#endif
}

/* Returns the position of the least significant bit plus one; __builtin_ffsll or a non-table-based solution are not faster */
LUALIB_API uint8_t tools_lsb (register uint32_t x) {  /* 2.12.3, least-significant bit  */
  if (x < 1) return 0;
  x &= ~x + 1;  /* clears all but the lowest bit of x */
  return tools_uintlog2(x) + 1;
}


/* number of bits to consider in a number */
#ifndef LUA_NBITS
#define LUA_NBITS  32
#endif

/* A lua_Unsigned with its first LUA_NBITS bits equal to 1. (Shift must be made in two parts to avoid problems
   when LUA_NBITS is equal to the number of bits in a lua_Unsigned.) */
#ifndef ALLONES
#define ALLONES    (~(((~(uint32_t)0UL) << (LUA_NBITS - 1)) << 1UL))
#endif

/* macro to trim extra bits */
#ifndef trim
#define trim(x)    ((x) & ALLONES)
#endif

/* Arithmetic shift for unsigned 4-byte integers, blend of Lua 5.3.5 C functions src/lbitlib.c b_arshift & b_shift, 2.21.7 */
LUALIB_API uint32_t tools_uarshift32 (uint32_t x, int32_t n) {
  if (n == 0) return x;
  else if (n < 0 || !(x & ((uint32_t)1UL << (LUA_NBITS - 1)))) {  /* left shift */
    n = -n;
    if (n >= LUA_NBITS) x = 0;
    else x <<= n;
    return trim(x);
  } else {  /* right shift */
    if (n >= LUA_NBITS)
      x = ALLONES;
    else {  /* as published on StackOverflow, see link above */
      if (n >= LUA_NBITS) x = ALLONES;
      else
        x = trim((x >> n) | ~(trim(~(uint32_t)0) >> n));  /* add signal bit */
    }
    return x;
  }
}


/* Rotate-left; see: https://stackoverflow.com/questions/776508/best-practices-for-circular-shift-rotate-operations-in-c, 2.27.0 */
LUALIB_API uint32_t tools_rotl32 (uint32_t n, unsigned int c) {
  const unsigned int ROT32MASK = (CHAR_BIT*sizeof(n) - 1);
  c &= ROT32MASK;
  return (n << c) | (n >> ((-c) & ROT32MASK));
}


/* Rotate-right, 2.27.0 */
LUALIB_API uint32_t tools_rotr32 (uint32_t n, unsigned int c) {
  const unsigned int ROT32MASK = (CHAR_BIT*sizeof(n) - 1);
  c &= ROT32MASK;
  return (n >> c) | (n << ((-c) & ROT32MASK));
}


/* Force LOGICAL right shift by casting to unsigned types. Taken from Rupert Tombs' saru.c
   logical right shift implementation, MIT LICENSE Copyright (c) 2020 Rupert Tombs; taken from:
   https://github.com/Rupt/c-arithmetic-right-shift; see also:
   https://stackoverflow.com/questions/7622/are-the-shift-operators-arithmetic-or-logical-in-c */
#define SARUBODY(type, utype) \
  const int logical = (((utype) -1) >> 1) > 0; \
  utype fixu = -(logical & (m < 0)); \
  utype saru = ((utype) m >> n) | (fixu ^ (fixu >> n)); \
  return *(type*)&saru;

LUALIB_API int32_t tools_slo32 (int32_t m, char n) {
  SARUBODY(int32_t, uint32_t);
}

/* Force ARITHMETIC right shift by casting to unsigned types. Taken from Rupert Tombs' sar.c
   arithmetic right shift implementation, MIT LICENSE Copyright (c) 2020 Rupert Tombs; see links
   above. */
#define SARBODY(type, utype) \
    const int logical = (((type) -1) >> 1) > 0; \
    utype fixu = -(logical & (m < 0)); \
    type fix = *(type*) &fixu; \
    return (m >> n) | (fix ^ (fix >> n))

LUALIB_API int32_t tools_sar32 (int32_t m, char n) {
  SARBODY(int32_t, uint32_t);
}


/* Modulo operator for unsigned ints, slower than C's `%' on x86 CPUs, 2.12.0 RC 4
   Source: https://stackoverflow.com/questions/24219482/alternative-of-modulo-operator-in-embedded-c, 2.27.0 */
LUALIB_API unsigned int tools_modulo (unsigned int num, unsigned int div) {
  if (div == 0) return -1;  /* avoid stack overflows, i.e. segfaults */
  if (num - div >= div)
    num = tools_modulo(num, div + div);
  while (num >= div)
    num -= div;
  return num;
}


#define _PHIM1       (PHI - 1.0)
#define _W           (4294967296.0)
#define _SI          (_W * _PHIM1)
const uint32_t FIBMODSEED = 2654435769UL;  /* = floor(2^32/golden_ratio) and rounding towards the nearest odd integer so that the bottom bit is set */

/* See: https://de.wikipedia.org/wiki/Multiplikative_Methode, citing from Wikipedia:
   "Die multiplikative Methode ist ein Schema, nach dem Hash-Funktionen entwickelt werden knnen. Dabei wird das Produkt
   des Schlssels mit einer Zahl A A gebildet und der ganzzahlige Anteil abgeschnitten, so dass der Schlssel in das
   Intervall [0,1] abgebildet wird. Das Ergebnis wird mit der Anzahl der Hashtabellenadressen m multipliziert und nach unten
   abgerundet."
   See also:
   https://probablydance.com/2018/06/16/fibonacci-hashing-the-optimization-that-the-world-forgot-or-a-better-alternative-to-integer-modulo */
LUALIB_API uint32_t tools_fibmod (uint32_t k, uint32_t m, int exact) {  /* 2.14.8 */
  if (exact) {
    if (tools_ispow2_uint32(m)) {  /* 25 % faster, see bottom of https://de.wikipedia.org/wiki/Multiplikative_Methode, 2.38.2 */
      uint32_t i = tools_uintlog2(m);
      uint64_t t = (FIBMODSEED * (uint64_t)k) & 0x00000000FFFFFFFF;
      return t >> (WORDBITSUINT32 - i);  /* returns a value in the uint32_t domain */
    } else
      return sun_floor( ( m * fmod(_SI*k, _W) )/_W);
  } else {  /* "double p = k*_PHIM1; floor(m*(p - floor(p)))" is slower */
    double p = (double)k * _PHIM1;
    return m*(p - (uint32_t)p);  /* 63 % faster but result may vary across platforms */
  }
  /* result is in the range 0 .. m - 1 if m is odd, and in 1 .. m - 1 otherwise, unless m = 0 or k = 0 where 0 is returned */
}


/* MurmurHash3, taken from https://github.com/jonahharris/libcuckoofilter/tree/master, MIT licence */
LUALIB_API uint32_t tools_murmurhash3 (const void *key, uint32_t key_length_in_bytes, uint32_t seed) {
  int i, l;
  uint32_t c1, c2, r1, r2, m, n, h, k;
  uint8_t *d;
  const uint32_t *chunks;
  const uint8_t *tail;
  c1 = 0xcc9e2d51;
  c2 = 0x1b873593;
  r1 = 15;
  r2 = 13;
  m = 5;
  n = 0xe6546b64;
  h = 0;
  k = 0;
  d = (uint8_t *)key;
  i = 0;
  l = (key_length_in_bytes/sizeof(uint32_t));
  chunks = NULL;
  tail = NULL;
  h = seed;
  chunks = (const uint32_t *)(d + l*sizeof(uint32_t));
  tail = (const uint8_t *)(d + l*sizeof(uint32_t));
  for (i = -l; i != 0; ++i) {
    k = chunks[i];
    k *= c1;
    k = (k << r1) | (k >> (32 - r1));
    k *= c2;
    h ^= k;
    h = (h << r2) | (h >> (32 - r2));
    h = h * m + n;
  }
  k = 0;
  switch (key_length_in_bytes & 3) {
    case 3: k ^= (tail[2] << 16);
    case 2: k ^= (tail[1] << 8);
    case 1:
      k ^= tail[0];
      k *= c1;
      k = (k << r1) | (k >> (32 - r1));
      k *= c2;
      h ^= k;
  }
  h ^= key_length_in_bytes;
  h ^= (h >> 16);
  h *= 0x85ebca6b;
  h ^= (h >> 13);
  h *= 0xc2b2ae35;
  h ^= (h >> 16);
  return h;
}


/* Converts a 32 bit IEEE float represented as 4 bytes in network byte order to a float in host byte order
   the 4 bytes are passed as an unsigned 32bit integer.
   Taken from: https://www.ridgesolutions.ie/index.php/2018/07/27/code-for-ntohf/
    */
LUALIB_API float tools_ntohf (uint32_t net32) {
  union {
    float f;
    uint32_t u;
  } value;
  /* swap bytes if necessary and store to value */
  value.u = tools_ntohl(net32);
  /* return value as float */
  return value.f;
}

#if BYTE_ORDER == LITTLE_ENDIAN
LUALIB_API uint16_t tools_htons (uint16_t net16) {
  return (net16 >> 8) | (net16 << 8);
}

LUALIB_API uint32_t tools_htonl (uint32_t net32) {
  return tools_htons(net32 >> 16) | (tools_htons((uint16_t)net32) << 16);
}
#else
LUALIB_API uint16_t tools_htons (uint16_t net16) {
  return net16;
}

LUALIB_API uint32_t tools_htonl (uint32_t net32) {
  return net32;
}
#endif

LUALIB_API uint16_t tools_ntohs (uint16_t net16) {
  return tools_htons(net16);
}

/* Taken from https://codereview.stackexchange.com/questions/149717/implementation-of-c-standard-library-function-ntohl
   by user Matthieu M. */
LUALIB_API uint32_t tools_ntohl (uint32_t const net32) {
  uint8_t data[4] = {};
  memcpy(&data, &net32, sizeof(data));
  return ((uint32_t)data[3] << 0)
       | ((uint32_t)data[2] << 8)
       | ((uint32_t)data[1] << 16)
       | ((uint32_t)data[0] << 24);
}

/* RFC 1071 Internet Checksum
   taken from: https://stackoverflow.com/questions/54796296/rfc-1071-checksum-for-char-array
   Posted by Barmar. Contrary to other implementations, Barma's is safe with objects of odd size.
   See: RFC 1071 - http://tools.ietf.org/html/rfc1071
   Many, many collisions ! */

LUALIB_API uint16_t tools_rfc1071 (void *vdata, size_t length) {
  /* cast the data pointer to one that can be indexed. */
  size_t i;
  char* data = (char*)vdata;
  /* Initialise the accumulator. */
  uint32_t acc = 0xffff;
  /* handle complete 16-bit blocks. */
  for (i=0; i + 1 < length; i += 2) {
     uint16_t word;
     memcpy(&word, data + i, 2);
     acc += tools_ntohs(word);
     if (acc > 0xffff) acc -= 0xffff;
  }
  /* handle any partial block at the end of the data. */
  if (length & 1) {
    uint16_t word = 0;
    memcpy(&word, data + length - 1, 1);
    acc += tools_ntohs(word);
    if (acc > 0xffff) acc -= 0xffff;
  }
  /* Return the checksum in network byte order. */
  return tools_htons(~acc);
}


/* Core algorithm found at:
   https://hbfs.wordpress.com/2013/12/10/the-speed-of-gcd/
   written by Steven Pigeon */
LUALIB_API long int tools_gcd (double x, double y) {
  int32_t hx, hy;
  uint32_t lx, ly, t, u, v, shift;  /* sxy; */
  EXTRACT_WORDS(hx, lx, x);
  EXTRACT_WORDS(hy, ly, y);
  if (tools_isfracwords(hx, lx) || tools_isfracwords(hy, ly)) return 1;
  /* sxy = (hx ^ hy) & 0x80000000; sign of x*y product */
  hx &= 0x7fffffff;  /* |x| */
  hy &= 0x7fffffff;  /* |y| */
  INSERT_WORDS(x, hx, lx);
  INSERT_WORDS(y, hy, ly);
  u = (uint32_t)x;
  v = (uint32_t)y;
  t = u | v;
  if (u == 0 || v == 0) return t;  /* return (v) or (u), resp. */
  shift = tools_ctz(t);
  do {
    v >>= tools_ctz(v);
    if (u > v) SWAP(u, v, t);
    v -= u;
  } while (v);
  return u << shift; /* scale by common factor. */
}


LUALIB_API int tools_anykey (void) {
  int i;
#if defined(_WIN32)
  i = kbhit();
  if (i) { getch(); }  /* 0.31.4, `clear the buffer` */
#elif defined(__OS2__) || defined(__unix__) || defined(__APPLE__)
  i = kbhit();
#else
  i = -1;
#endif
  return i;
}


/* suited for negative values as well, 2.12.6, taken from: http://www.strudel.org.uk/itoa/ */
LUALIB_API char *tools_itoa (int64_t val, int base) {
  int i, isneg;
  static char buf[33] = { 0 };  /* NULL terminator, changed 2.12.7 for possible minus */
  isneg = 0;
  i = 31;  /* changed 2.12.7 for possible minus */
  if (val == 0) buf[i--] = '0';  /* 2.12.7 extension */
  else if (val < 0) { val = -val; isneg = 1; }  /* 2.12.7 extension */
  if (base < 1 || val > INT_MAX) return NULL;   /* 2.12.7 extension */
  for (; val && i ; --i, val /= base)
    buf[i] = "0123456789abcdef"[val % base];
  if (isneg) buf[i--] = '-';  /* 2.12.7 extension */
  return &buf[i + 1];
}


/* Kahan-Ozawa Summation, 2.13.0 */
LUALIB_API double tools_koadd (volatile double s, volatile double x, volatile double *q) {
  volatile lua_Number sold, u, v, w, t;
  v = x - *q;
  sold = s;
  s += v;
  if (fabs(x) < fabs(*q)) {
    t = x;
    x = -(*q);
    *q = t;
  }
  u = (v - x) + *q;
  if (fabs(sold) < fabs(v)) {
    t = sold;
    sold = v;
    v = t;
  }
  w = (s - sold) - v;
  *q = u + w;
  return s;  /* s now contains sumdata */
}


/* Kahan-Babuska Summation, 3.7.2 */
LUALIB_API double tools_kbadd (volatile double s, volatile double x, volatile double *cs, volatile double *ccs) {
  volatile lua_Number t, c, cc;
  t = s + x;
  c = (fabs(s) >= fabs(x)) ? (s - t) + x : (x - t) + s;
  s = t;
  t = *cs + c;
  cc = (fabs(*cs) >= fabs(c)) ? (*cs - t) + c : (c - t) + *cs;
  *cs = t;
  *ccs += cc;
  return s;  /* s contains the raw, uncorrected sum, after the last summation add cs and ccs to s to get the corrected sum */
}


#ifndef __ARMCPU  /* 2.37.1 */
LUALIB_API long double tools_koaddl (volatile long double s, volatile long double x, volatile long double *q) {  /* 2.34.10 */
  volatile long double sold, u, v, w, t;
  v = x - *q;
  sold = s;
  s += v;
  if (fabsl(x) < fabsl(*q)) {
    t = x;
    x = -(*q);
    *q = t;
  }
  u = (v - x) + *q;
  if (fabsl(sold) < fabsl(v)) {
    t = sold;
    sold = v;
    v = t;
  }
  w = (s - sold) - v;
  *q = u + w;
  return s;  /* s now contains sumdata */
}
#endif


/* The following algorithm has been taken from the presentation:
   `Techniques for Floating-Point Arithmetic` by Jeff Arnold, as of May 05, 2014
   WARMING: Note that when multiplying multiple values, you cannot just add the correction variable to one of the arguments
   of the next call to tools_examul (regardless of whether using Kahan summation or not), or add up the correction values
   independently during iteration as this would make the result much more worse than just doing a plain multiplication. */

#define EFTDELTA   27  /* = ceil(53 bits in mantissa / 2) */

/* Precise Splitting EFT (error-free transformations) for Binary64 */
#define eftsplit64(x,xh,xl,c) { \
  xh = ((c)*(x)) + ((x) - (c*(x))); \
  xl = (x) - xh; \
}

static const double eftc = (double)((1UL << EFTDELTA) + 1);

LUALIB_API double tools_examul (const double a, const double b, double *q) {
  /* no unsafe optimizations ! */
  volatile double ah, al, bh, bl, r;
  eftsplit64(a, ah, al, eftc);
  eftsplit64(b, bh, bl, eftc);
  r = a*b;
  *q = -r + ah*bh;
  *q += ah*bl;
  *q += al*bh;
  *q += al*bl;
  return r;
}


/* Evaluate polynomial
 *
 * SYNOPSIS:
 *
 * int N;
 * double x, y, coef[N+1], polevl[];
 *
 * y = polevl( x, coef, N );
 *
 * DESCRIPTION:
 *
 * Evaluates polynomial of degree N:
 *
 *                     2          N
 * y  =  C  + C x + C x  +...+ C x
 *        0    1     2          N
 *
 * Coefficients are stored in reverse order:
 *
 * coef[0] = C  , ..., coef[N] = C  .
 *            N                   0
 *
 * SPEED:
 *
 * In the interest of speed, there are no checks for out
 * of bounds arithmetic.  This routine is used by most of
 * the functio  ns in the library.  Depending on available
 * equipment features, the user may wish to rewrite the
 * program in microcode or assembly language.
 *
 */

/*
Cephes Math Library Release 2.1:  December, 1988
Copyright 1984, 1987, 1988 by Stephen L. Moshier
Direct inquiries to 30 Frost Street, Cambridge, MA 02140

Kahan-Ozawa extension and `secure fix` by alex walz, 2.13.0/2.35.1; n must be at least 2. The array must be in descending order,
i.e. highest coefficient comes in first. Example: 10*x^3 + 11*x^2 + 12*x + 13 -> [10, 11, 12, 13]. n > 0. For eveything else see
tools_polyevalfma. */
LUALIB_API double tools_polyeval (double x, double *coeffs, int n) {
  volatile double r, cs, ccs, *p;
  p = coeffs;
  r = *p++; n--;
  cs = ccs = 0;
  while (n--) {
    r = tools_kbadd(r*x, *p++, &cs, &ccs);  /* 3.7.2 change to Kahan-Babuska summation */
  }
  return r + cs + ccs;
}


LUALIB_API double tools_polyevals (double x, double *coeffs, int n) {  /* 2.41.3, simple version, for ZX */
  volatile double r, *p;
  p = coeffs;
  r = *p++; n--;
  while (n--) {  /* execute at least once */
    r = fma(r, x, *p++);
  }
  return r;
}


/* Polynomial evaluator:  P[0] x^(n-1)  +  P[1] x^(n-2)  +  ...  +  P[n-1]. 2.35.1
   The function assumes that P[N] = 1.0 and is omitted from the array.
   The array must be in descending order, i.e. the highest coefficient comes in first.
   Example: 10*x^3 + 11*x^2 + 12*x + 13 -> [10, 11, 12, 13]. n > 0.
   The function is at least twice as fast as tools_polyeval. */

/* Maple V Release 4:
# conventional method:
deg := 20:
r := 0:
for i from deg to 0 by -1 do
   r := r + x^i*p[deg-i]
od:
convert(r, horner, [x], optimized);

# parallel version, split into even and odd degrees to finally be summed up:
deg := 21:
r := 0: s := 0:
for i from deg to 0 by -1 do
   if i mod 2 = 0 then
      r := r + x^i*p[deg-i]
   else
      s := s + x^i*p[deg-i]
   fi
od:
t := convert(r, horner, x): subs(x^2=xsq, t);
u := convert(s, horner, x): subs(x^2=xsq, u);

-----------------------------------------------

Agena test:

import calc, testlib;

for i to 40 do
   f := testlib.polygens(nseq(1, i));  # old
   watch();
   to 10m do
      x := f(Pi)
   od;
   t1 := watch();
   g := calc.polygen(nseq(1, i));  # new
   watch();
   to 10m do
      y := g(Pi)
   od;
   t2 := watch();
   print(i, t1/t2*100 - 100)
   if x <> y then
       print('ERROR', i, x, y)
   fi
od;

1       7.4550253005668
2       12.706499436369
3       18.851406014577
4       22.749366392495
5       31.021881720025
6       36.494616977522
7       42.266834619959
8       46.874964644239
9       53.363726941433
10      55.95102209681
11      65.256120759807
12      68.415321534175
13      74.730002579864
14      77.061287177921
15      82.072506907961
16      85.336020842929
17      89.167453025333
18      93.274001454094
19      97.460958372905
20      98.945357798174
21      97.163735514656
22      102.83104092243
23      106.7811535534
24      110.30197006597
25      112.34676334241
26      121.61454468992
27      125.62335280042
28      128.02710340805
29      130.40934298055
30      133.03130373133
31      136.17885580018
32      -0.40202083099375
33      -0.62172949742596
34      -0.60781480536588
35      -0.84138946390519
*/
LUALIB_API double tools_polyevalfma (double x, LongDoubleArray *coeffs, int n) {  /* 2.35.1 */
  long double r, *p;
  p = coeffs->v;
  n--;
  switch (n) {  /* 3.1.2 speed upgrade from foggy +3.5 % (degree 0) to real +133 % (degree 30)
       We will split apart a polynomial into one of even degree and into one of odd degree, then simply summing the results up,
       the performance boost is 30 percentage points over the `unsplit hard-coded-non-loop method`.
       See https://en.wikipedia.org/wiki/Horner%27s_method, chapter `Parallel Evaluation`.
       Do not use `volatile` as it slows down the code significantly (at least -12 %) on MinGW/GCC 9.2.0 on Windows 8.1;
       On `What is the largest degree of polynomials most commonly used ?`, see:
       https://math.stackexchange.com/questions/2197171/when-where-and-how-often-do-you-find-polynomials-of-higher-degrees-than-two
       One answer, from mathreadler, is: `15`. */
    case 30: {  /* actually n = 31; from here on to maybe degree quite below 15, we are going esoteric, anyway: the speed plus is amazing. */
      long double s, xsq;
      xsq = x*x;
      s = p[30]+(p[28]+(p[26]+(p[24]+(p[22]+(p[20]+(p[18]+(p[16]+(p[14]+(p[12]+(p[10]+(p[8]+(p[6]+(p[4]+(p[2]+p[0]*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq;
      r = (p[29]+(p[27]+(p[25]+(p[23]+(p[21]+(p[19]+(p[17]+(p[15]+(p[13]+(p[11]+(p[9]+(p[7]+(p[5]+(p[3]+p[1]*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*x;
      r += s;
      break;
    }
    case 29: {  /* actually n = 30 */
      long double s, xsq;
      xsq = x*x;
      s = (p[28]+(p[26]+(p[24]+(p[22]+(p[20]+(p[18]+(p[16]+(p[14]+(p[12]+(p[10]+(p[8]+(p[6]+(p[4]+(p[2]+p[0]*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*x;
      r = p[29]+(p[27]+(p[25]+(p[23]+(p[21]+(p[19]+(p[17]+(p[15]+(p[13]+(p[11]+(p[9]+(p[7]+(p[5]+(p[3]+p[1]*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq;
      r += s;
      break;
    }
    case 28: {  /* actually n = 29 */
      long double s, xsq;
      xsq = x*x;
      s = p[28]+(p[26]+(p[24]+(p[22]+(p[20]+(p[18]+(p[16]+(p[14]+(p[12]+(p[10]+(p[8]+(p[6]+(p[4]+(p[2]+p[0]*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq;
      r = (p[27]+(p[25]+(p[23]+(p[21]+(p[19]+(p[17]+(p[15]+(p[13]+(p[11]+(p[9]+(p[7]+(p[5]+(p[3]+p[1]*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*x;
      r += s;
      break;
    }
    case 27: {  /* actually n = 28 */
      long double s, xsq;
      xsq = x*x;
      s = (p[26]+(p[24]+(p[22]+(p[20]+(p[18]+(p[16]+(p[14]+(p[12]+(p[10]+(p[8]+(p[6]+(p[4]+(p[2]+p[0]*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*x;
      r = p[27]+(p[25]+(p[23]+(p[21]+(p[19]+(p[17]+(p[15]+(p[13]+(p[11]+(p[9]+(p[7]+(p[5]+(p[3]+p[1]*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq;
      r += s;
      break;
    }
    case 26: {  /* actually n = 27 */
      long double s, xsq;
      xsq = x*x;
      s = p[26]+(p[24]+(p[22]+(p[20]+(p[18]+(p[16]+(p[14]+(p[12]+(p[10]+(p[8]+(p[6]+(p[4]+(p[2]+p[0]*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq;
      r = (p[25]+(p[23]+(p[21]+(p[19]+(p[17]+(p[15]+(p[13]+(p[11]+(p[9]+(p[7]+(p[5]+(p[3]+p[1]*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*x;
      r += s;
      break;
    }
    case 25: {  /* actually n = 26 */
      long double s, xsq;
      xsq = x*x;
      s = (p[24]+(p[22]+(p[20]+(p[18]+(p[16]+(p[14]+(p[12]+(p[10]+(p[8]+(p[6]+(p[4]+(p[2]+p[0]*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*x;
      r = p[25]+(p[23]+(p[21]+(p[19]+(p[17]+(p[15]+(p[13]+(p[11]+(p[9]+(p[7]+(p[5]+(p[3]+p[1]*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq;
      r += s;
      break;
    }
    case 24: {  /* actually n = 25 */
      long double s, xsq;
      xsq = x*x;
      s = p[24]+(p[22]+(p[20]+(p[18]+(p[16]+(p[14]+(p[12]+(p[10]+(p[8]+(p[6]+(p[4]+(p[2]+p[0]*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq;
      r = (p[23]+(p[21]+(p[19]+(p[17]+(p[15]+(p[13]+(p[11]+(p[9]+(p[7]+(p[5]+(p[3]+p[1]*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*x;
      r += s;
      break;
    }
    case 23: {  /* actually n = 24 */
      long double s, xsq;
      xsq = x*x;
      s = (p[22]+(p[20]+(p[18]+(p[16]+(p[14]+(p[12]+(p[10]+(p[8]+(p[6]+(p[4]+(p[2]+p[0]*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*x;
      r = p[23]+(p[21]+(p[19]+(p[17]+(p[15]+(p[13]+(p[11]+(p[9]+(p[7]+(p[5]+(p[3]+p[1]*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq;
      r += s;
      break;
    }
    case 22: {  /* actually n = 23 */
      long double s, xsq;
      xsq = x*x;
      s = p[22]+(p[20]+(p[18]+(p[16]+(p[14]+(p[12]+(p[10]+(p[8]+(p[6]+(p[4]+(p[2]+p[0]*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq;
      r = (p[21]+(p[19]+(p[17]+(p[15]+(p[13]+(p[11]+(p[9]+(p[7]+(p[5]+(p[3]+p[1]*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*x;
      r += s;
      break;
    }
    case 21: {  /* actually n = 22 */
      long double s, xsq;
      xsq = x*x;
      s = (p[20]+(p[18]+(p[16]+(p[14]+(p[12]+(p[10]+(p[8]+(p[6]+(p[4]+(p[2]+p[0]*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*x;
      r = p[21]+(p[19]+(p[17]+(p[15]+(p[13]+(p[11]+(p[9]+(p[7]+(p[5]+(p[3]+p[1]*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq;
      r += s;
      break;
    }
    case 20: {  /* actually n = 21 */
      /* r = p[20]+(p[19]+(p[18]+(p[17]+(p[16]+(p[15]+(p[14]+(p[13]+(p[12]+(p[11]+(p[10]+(p[9]+(p[8]+(p[7]+(p[6]+(p[5]+(p[4]+(p[3]+(p[2]+(p[1]+p[0]*x)*x)*x)*x)*x)*x)*x)*x)*x)*x)*x)*x)*x)*x)*x)*x)*x)*x)*x)*x; */
      long double s, xsq;
      xsq = x*x;
      s = p[20]+(p[18]+(p[16]+(p[14]+(p[12]+(p[10]+(p[8]+(p[6]+(p[4]+(p[2]+p[0]*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq;
      r = (p[19]+(p[17]+(p[15]+(p[13]+(p[11]+(p[9]+(p[7]+(p[5]+(p[3]+p[1]*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*x;
      r += s;
      break;
    }
    case 19: {  /* actually n = 20 */
      /* r = p[19]+(p[18]+(p[17]+(p[16]+(p[15]+(p[14]+(p[13]+(p[12]+(p[11]+(p[10]+(p[9]+(p[8]+(p[7]+(p[6]+(p[5]+(p[4]+(p[3]+(p[2]+(p[1]+p[0]*x)*x)*x)*x)*x)*x)*x)*x)*x)*x)*x)*x)*x)*x)*x)*x)*x)*x)*x; */
      long double s, xsq;
      xsq = x*x;
      s = (p[18]+(p[16]+(p[14]+(p[12]+(p[10]+(p[8]+(p[6]+(p[4]+(p[2]+p[0]*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*x;
      r = p[19]+(p[17]+(p[15]+(p[13]+(p[11]+(p[9]+(p[7]+(p[5]+(p[3]+p[1]*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq;
      r += s;
      break;
    }
    case 18: {  /* actually n = 19 */
      /* r = p[18]+(p[17]+(p[16]+(p[15]+(p[14]+(p[13]+(p[12]+(p[11]+(p[10]+(p[9]+(p[8]+(p[7]+(p[6]+(p[5]+(p[4]+(p[3]+(p[2]+(p[1]+p[0]*x)*x)*x)*x)*x)*x)*x)*x)*x)*x)*x)*x)*x)*x)*x)*x)*x)*x; */
      long double s, xsq;
      xsq = x*x;
      s = p[18]+(p[16]+(p[14]+(p[12]+(p[10]+(p[8]+(p[6]+(p[4]+(p[2]+p[0]*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq;
      r = (p[17]+(p[15]+(p[13]+(p[11]+(p[9]+(p[7]+(p[5]+(p[3]+p[1]*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*x;
      r += s;
      break;
    }
    case 17: {  /* actually n = 18 */
      /* r = p[17]+(p[16]+(p[15]+(p[14]+(p[13]+(p[12]+(p[11]+(p[10]+(p[9]+(p[8]+(p[7]+(p[6]+(p[5]+(p[4]+(p[3]+(p[2]+(p[1]+p[0]*x)*x)*x)*x)*x)*x)*x)*x)*x)*x)*x)*x)*x)*x)*x)*x)*x; */
      long double s, xsq;
      xsq = x*x;
      s = (p[16]+(p[14]+(p[12]+(p[10]+(p[8]+(p[6]+(p[4]+(p[2]+p[0]*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*x;
      r = p[17]+(p[15]+(p[13]+(p[11]+(p[9]+(p[7]+(p[5]+(p[3]+p[1]*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq;
      r += s;
      break;
    }
    case 16: {  /* actually n = 17 */
      /* r = p[16]+(p[15]+(p[14]+(p[13]+(p[12]+(p[11]+(p[10]+(p[9]+(p[8]+(p[7]+(p[6]+(p[5]+(p[4]+(p[3]+(p[2]+(p[1]+p[0]*x)*x)*x)*x)*x)*x)*x)*x)*x)*x)*x)*x)*x)*x)*x)*x; */
      long double s, xsq;
      xsq = x*x;
      s = p[16]+(p[14]+(p[12]+(p[10]+(p[8]+(p[6]+(p[4]+(p[2]+p[0]*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq;
      r = (p[15]+(p[13]+(p[11]+(p[9]+(p[7]+(p[5]+(p[3]+p[1]*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*x;
      r += s;
      break;
    }
    case 15: {  /* actually n = 16 */
      /* r = p[15]+(p[14]+(p[13]+(p[12]+(p[11]+(p[10]+(p[9]+(p[8]+(p[7]+(p[6]+(p[5]+(p[4]+(p[3]+(p[2]+(p[1]+p[0]*x)*x)*x)*x)*x)*x)*x)*x)*x)*x)*x)*x)*x)*x)*x; */
      long double s, xsq;
      xsq = x*x;
      s = (p[14]+(p[12]+(p[10]+(p[8]+(p[6]+(p[4]+(p[2]+p[0]*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*x;
      r = p[15]+(p[13]+(p[11]+(p[9]+(p[7]+(p[5]+(p[3]+p[1]*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq;
      r += s;
      break;
    }
    case 14: {  /* actually n = 15 */
      /* r = p[14]+(p[13]+(p[12]+(p[11]+(p[10]+(p[9]+(p[8]+(p[7]+(p[6]+(p[5]+(p[4]+(p[3]+(p[2]+(p[1]+p[0]*x)*x)*x)*x)*x)*x)*x)*x)*x)*x)*x)*x)*x)*x; */
      long double s, xsq;
      xsq = x*x;
      s = p[14]+(p[12]+(p[10]+(p[8]+(p[6]+(p[4]+(p[2]+p[0]*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*xsq;
      r = (p[13]+(p[11]+(p[9]+(p[7]+(p[5]+(p[3]+p[1]*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*x;
      r += s;
      break;
    }
    case 13: {  /* actually n = 14 */
      /* r = p[13]+(p[12]+(p[11]+(p[10]+(p[9]+(p[8]+(p[7]+(p[6]+(p[5]+(p[4]+(p[3]+(p[2]+(p[1]+p[0]*x)*x)*x)*x)*x)*x)*x)*x)*x)*x)*x)*x)*x; */
      long double s, xsq;
      xsq = x*x;
      s = (p[12]+(p[10]+(p[8]+(p[6]+(p[4]+(p[2]+p[0]*xsq)*xsq)*xsq)*xsq)*xsq)*xsq)*x;
      r = p[13]+(p[11]+(p[9]+(p[7]+(p[5]+(p[3]+p[1]*xsq)*xsq)*xsq)*xsq)*xsq)*xsq;
      r += s;
      break;
    }
    case 12: {  /* actually n = 13 */
      /* r = p[12]+(p[11]+(p[10]+(p[9]+(p[8]+(p[7]+(p[6]+(p[5]+(p[4]+(p[3]+(p[2]+(p[1]+p[0]*x)*x)*x)*x)*x)*x)*x)*x)*x)*x)*x)*x; */
      long double s, xsq;
      xsq = x*x;
      s = p[12]+(p[10]+(p[8]+(p[6]+(p[4]+(p[2]+p[0]*xsq)*xsq)*xsq)*xsq)*xsq)*xsq;
      r = (p[11]+(p[9]+(p[7]+(p[5]+(p[3]+p[1]*xsq)*xsq)*xsq)*xsq)*xsq)*x;
      r += s;
      break;
    }
    case 11: {  /* actually n = 12 */
      /* r = p[11]+(p[10]+(p[9]+(p[8]+(p[7]+(p[6]+(p[5]+(p[4]+(p[3]+(p[2]+(p[1]+p[0]*x)*x)*x)*x)*x)*x)*x)*x)*x)*x)*x; */
      long double s, xsq;
      xsq = x*x;
      s = (p[10]+(p[8]+(p[6]+(p[4]+(p[2]+p[0]*xsq)*xsq)*xsq)*xsq)*xsq)*x;
      r = p[11]+(p[9]+(p[7]+(p[5]+(p[3]+p[1]*xsq)*xsq)*xsq)*xsq)*xsq;
      r += s;
      break;
    }
    case 10: {  /* actually n = 11 */
      /* r = p[10]+(p[9]+(p[8]+(p[7]+(p[6]+(p[5]+(p[4]+(p[3]+(p[2]+(p[1]+p[0]*x)*x)*x)*x)*x)*x)*x)*x)*x)*x; */
      long double s, xsq;
      xsq = x*x;
      s = p[10]+(p[8]+(p[6]+(p[4]+(p[2]+p[0]*xsq)*xsq)*xsq)*xsq)*xsq;
      r = (p[9]+(p[7]+(p[5]+(p[3]+p[1]*xsq)*xsq)*xsq)*xsq)*x;
      r += s;
      break;
    }
    case 9: {  /* actually n = 10 */
      /* r = p[9]+(p[8]+(p[7]+(p[6]+(p[5]+(p[4]+(p[3]+(p[2]+(p[1]+p[0]*x)*x)*x)*x)*x)*x)*x)*x)*x; */
      long double s, xsq;
      xsq = x*x;
      s = (p[8]+(p[6]+(p[4]+(p[2]+p[0]*xsq)*xsq)*xsq)*xsq)*x;
      r = p[9]+(p[7]+(p[5]+(p[3]+p[1]*xsq)*xsq)*xsq)*xsq;
      r += s;
      break;
    }
    case 8: {  /* actually n = 9 */
      /* r = p[8]+(p[7]+(p[6]+(p[5]+(p[4]+(p[3]+(p[2]+(p[1]+p[0]*x)*x)*x)*x)*x)*x)*x)*x; */
      long double s, xsq;
      xsq = x*x;
      s = p[8]+(p[6]+(p[4]+(p[2]+p[0]*xsq)*xsq)*xsq)*xsq;
      r = (p[7]+(p[5]+(p[3]+p[1]*xsq)*xsq)*xsq)*x;
      r += s;
      break;
    }
    case 7: { /* actually n = 8 */
      /* r = p[7]+(p[6]+(p[5]+(p[4]+(p[3]+(p[2]+(p[1]+p[0]*x)*x)*x)*x)*x)*x)*x; */
      long double s, xsq;
      xsq = x*x;
      s = (p[6]+(p[4]+(p[2]+p[0]*xsq)*xsq)*xsq)*x;
      r = p[7]+(p[5]+(p[3]+p[1]*xsq)*xsq)*xsq;
      r += s;
      break;
    }
    case 6: {  /* actually n = 7 */
      /* r = p[6]+(p[5]+(p[4]+(p[3]+(p[2]+(p[1]+p[0]*x)*x)*x)*x)*x)*x; */
      long double s, xsq;
      xsq = x*x;
      s = p[6]+(p[4]+(p[2]+p[0]*xsq)*xsq)*xsq;
      r = (p[5]+(p[3]+p[1]*xsq)*xsq)*x;
      r += s;
      break;
    }
    /* from here on any tuning does not make sense any longer */
    case 5: { /* actually n = 6 */
      /* r = p[5]+(p[4]+(p[3]+(p[2]+(p[1]+p[0]*x)*x)*x)*x)*x; */
      long double s, xsq;
      xsq = x*x;
      s = (p[4]+(p[2]+p[0]*xsq)*xsq)*x;
      r = p[5]+(p[3]+p[1]*xsq)*xsq;
      r += s;
      break;
    }
    case 4: {  /* actually n = 5, anything below this polynomial degree is futile snd esoteric */
      /* r = p[4]+(p[3]+(p[2]+(p[1]+p[0]*x)*x)*x)*x; */
      long double s, xsq;
      xsq = x*x;
      s = p[4]+(p[2]+p[0]*xsq)*xsq;
      r = (p[3]+p[1]*xsq)*x;
      r += s;
      break;
    }
    case 3:  /* actually n = 4 */
      r = p[3]+(p[2]+(p[1]+p[0]*x)*x)*x;
      break;
    case 2:  /* actually n = 3 */
      r = p[2]+(p[1]+p[0]*x)*x;
      break;
    case 1:  /* actually n = 2 */
      r = p[1]+p[0]*x;
      break;
    case 0:  /* actually n = 1 */
      r = *p;
      break;
    default: {  /* now we get very slowly */
      r = *p++;
      while (n--) {
        r = fmal(r, x, *p++);
      }
    }
  }
  return r;
}


/* Polynomial evaluator:  P[0] x^(n-1)  +  P[1] x^(n-2)  +  ...  +  P[n-1]. 2.35.1
   The function assumes that P[N] = 1.0 and is omitted from the array.
   The array must be in descending order. The function is at least twice as fast as tools_polyeval. */
LUALIB_API double tools_polyevalfmas (double x, LongDoubleArray *coeffs, int n) {  /* 2.35.1 */
  long double r, *p;
  p = coeffs->v;
  r = *p++; n--;
  while (n--) {
    r = fmal(r, x, *p++);
  }
  return r;
}


/* Checks whether a number has a fractional part. 2.13.0, based on Sun's rint, s_rint.c, due to call overhead,
   only slightly faster than ISFLOAT macro. */
LUALIB_API int tools_isfrac (double x) {
  int32_t hx, ux;
  uint32_t i, lx;
  EXTRACT_WORDS(hx, lx, x);
  ux = ((((uint32_t)hx) >> 20) & 0x7ff) - 0x3ff;  /* unbiased exponent: math.exponent(x) - 1, except 0 -> -1023 */
  if (unlikely(ux > 51))  /* very large number not correctly representible any more ?  2.5.15 optimisation */
    return 0;   /* covers +/-inf, nan, as well. */
  if (ux < 20) {
    if (ux < 0) {  /* |x| < 1 */
      return ((hx & 0x7fffffff) | lx) != 0;
    } else {
      i = (0x000fffff) >> ux;
      return ((hx & i) | lx) != 0;
  }
  } else {
    i = ((uint32_t)(0xffffffff)) >> (ux - 20);
    return (lx & i) != 0;
  }
}


static FORCE_INLINE int tools_isfracwords (int32_t hx, uint32_t lx) {
  int32_t ux;
  uint32_t i;
  ux = ((((uint32_t)hx) >> 20) & 0x7ff) - 0x3ff;  /* unbiased exponent: math.exponent(x) - 1, except 0 -> -1023 */
  if (unlikely(ux > 51))  /* very large number not correctly representible any more ?  2.5.15 optimisation */
    return 0;   /* covers +/-inf, nan, as well. */
  if (ux < 20) {
    if (ux < 0) {  /* |x| < 1 */
      return ((hx & 0x7fffffff) | lx) != 0;
    } else {
      i = (0x000fffff) >> ux;
      return ((hx & i) | lx) != 0;
  }
  } else {
    i = ((uint32_t)(0xffffffff)) >> (ux - 20);
    return (lx & i) != 0;
  }
}


/* 2.13.0, based on Sun's rint, s_rint.c, due to call overhead, only one percent faster than ISINT macro. */
LUALIB_API int tools_isint (double x) {
  int32_t hx, ux;
  uint32_t i, lx;
  EXTRACT_WORDS(hx, lx, x);
  ux = ((((uint32_t)hx) >> 20) & 0x7ff) - 0x3ff;  /* unbiased exponent: math.exponent(x) - 1, except 0 -> -1023 */
  if (unlikely(ux > 51)) /* value not correctly representable, not +/-inf, nan ?  2.5.15 optimisation */
    /* (hx & 0x7fffffff) < 0x7ff00000) -> real number, but not +/-inf, nan */
    return ((hx & 0x7fffffff) < 0x7ff00000);  /* with +/-inf, nan, return 0 */
  if (ux < 20) {
    if (ux < 0) {  /* |x| < 1 */
      return ((hx & 0x7fffffff) | lx) == 0;
    } else {
      i = (0x000fffff) >> ux;
      return ((hx & i) | lx) == 0;
    }
  } else {
    i = ((uint32_t)(0xffffffff)) >> (ux - 20);
    return (lx & i) == 0;
  }
}


/* Checks for an integer that is also correctly representable. 2.16.6 */
LUALIB_API int tools_isregint (double x) {
  int32_t hx, ux;
  uint32_t i, lx;
  EXTRACT_WORDS(hx, lx, x);
  ux = ((((uint32_t)hx) >> 20) & 0x7ff) - 0x3ff;  /* unbiased exponent: math.exponent(x) - 1, except 0 -> -1023 */
  if (unlikely(ux > 51)) {  /* value not correctly representable, not +/-inf, nan ?  2.5.15 optimisation */
    /* (hx & 0x7fffffff) < 0x7ff00000) -> real number, but not +/-inf, nan */
    if ((hx & 0x7fffffff) < 0x7ff00000) return 0;  /* with +/-inf, nan, return 0 */
    /* check for irregular numbers */
    hx &= 0x7fffffff;  /* absolute value */
    if (hx >= 0x43400000) return 0;
    return !(hx == 0x43400000 && lx == 0);
  }
  if (ux < 20) {
    if (ux < 0) {  /* |x| < 1 */
      return ((hx & 0x7fffffff) | lx) == 0;
    } else {
      i = (0x000fffff) >> ux;
      return ((hx & i) | lx) == 0;
    }
  } else {
    i = ((uint32_t)(0xffffffff)) >> (ux - 20);
    return (lx & i) == 0;
  }
}


LUALIB_API int tools_isregintx (double x, int32_t *ux) {  /* 2.32.3 */
  int32_t hx;
  uint32_t i, lx;
  EXTRACT_WORDS(hx, lx, x);
  *ux = ((((uint32_t)hx) >> 20) & 0x7ff) - 0x3ff;  /* unbiased exponent: math.exponent(x) - 1, except 0 -> -1023 */
  if (unlikely(*ux > 51)) {  /* value not correctly representable, not +/-inf, nan ?  2.5.15 optimisation */
    /* (hx & 0x7fffffff) < 0x7ff00000) -> real number, but not +/-inf, nan */
    if ((hx & 0x7fffffff) < 0x7ff00000) return 0;  /* with +/-inf, nan, return 0 */
    /* check for irregular numbers */
    hx &= 0x7fffffff;  /* absolute value */
    if (hx >= 0x43400000) return 0;
    return !(hx == 0x43400000 && lx == 0);
  }
  if (*ux < 20) {
    if (*ux < 0) {  /* |x| < 1 */
      return ((hx & 0x7fffffff) | lx) == 0;
    } else {
      i = (0x000fffff) >> *ux;
      return ((hx & i) | lx) == 0;
    }
  } else {
    i = ((uint32_t)(0xffffffff)) >> (*ux - 20);
    return (lx & i) == 0;
  }
}


LUALIB_API int tools_isregnonnegint (double x) {
  int32_t hx, ux;
  uint32_t i, lx;
  EXTRACT_WORDS(hx, lx, x);
  ux = ((((uint32_t)hx) >> 20) & 0x7ff) - 0x3ff;  /* unbiased exponent: math.exponent(x) - 1, except 0 -> -1023 */
  if (unlikely(ux > 51)) {  /* value not correctly representable, not +/-inf, nan ?  2.5.15 optimisation */
    /* (hx & 0x7fffffff) < 0x7ff00000) -> real number, but not +/-inf, nan */
    if ((hx & 0x7fffffff) < 0x7ff00000) return 0;  /* with +/-inf, nan, return 0 */
    /* check for irregular numbers */
    hx &= 0x7fffffff;  /* absolute value */
    if (hx >= 0x43400000) return 0;
    return !(hx == 0x43400000 && lx == 0);
  }
  if (ux < 20) {
    if (ux < 0) {  /* |x| < 1 */
      return (((hx & 0x7fffffff) | lx) == 0) && !(hx >> 31);
    } else {
      i = (0x000fffff) >> ux;
      return (((hx & i) | lx) == 0) && !(hx >> 31);
    }
  } else {
    i = ((uint32_t)(0xffffffff)) >> (ux - 20);
    return ((lx & i) == 0) && !(hx >> 31);
  }
}


LUALIB_API int tools_isregnonnegintx (double x, int32_t *ux) {
  int32_t hx;
  uint32_t i, lx;
  EXTRACT_WORDS(hx, lx, x);
  *ux = ((((uint32_t)hx) >> 20) & 0x7ff) - 0x3ff;  /* unbiased exponent: math.exponent(x) - 1, except 0 -> -1023 */
  if (unlikely(*ux > 51)) {  /* value not correctly representable, not +/-inf, nan ?  2.5.15 optimisation */
    /* (hx & 0x7fffffff) < 0x7ff00000) -> real number, but not +/-inf, nan */
    if ((hx & 0x7fffffff) < 0x7ff00000) return 0;  /* with +/-inf, nan, return 0 */
    /* check for irregular numbers */
    hx &= 0x7fffffff;  /* absolute value */
    if (hx >= 0x43400000) return 0;
    return !(hx == 0x43400000 && lx == 0);
  }
  if (*ux < 20) {
    if (*ux < 0) {  /* |x| < 1 */
      return (((hx & 0x7fffffff) | lx) == 0) && !(hx >> 31);
    } else {
      i = (0x000fffff) >> *ux;
      return (((hx & i) | lx) == 0) && !(hx >> 31);
    }
  } else {
    i = ((uint32_t)(0xffffffff)) >> (*ux - 20);
    return ((lx & i) == 0) && !(hx >> 31);
  }
}


/* 2.13.0, based on Sun's rint, s_rint.c, due to call overhead, only slightly faster than ISINT macro. */
LUALIB_API int tools_isnegint (double x) {
  int32_t hx, ux;
  uint32_t i, lx;
  EXTRACT_WORDS(hx, lx, x);
  ux = ((((uint32_t)hx) >> 20) & 0x7ff) - 0x3ff;  /* unbiased exponent: math.exponent(x) - 1, except 0 -> -1023 */
  if (unlikely(ux > 51))  /* very large number not correctly representible any more ?  2.5.15 optimisation */
    /* (hx & 0x7fffffff) < 0x7ff00000) -> real number, but not +/-inf, nan
       (hx >> 31) -> signbit (1 = minus, 0 = zero or +) */
    return ((hx & 0x7fffffff) < 0x7ff00000) && (hx >> 31);
  if (ux < 20) {
    if (ux < 0) {  /* |x| < 1 */
      return 0;
    } else {
      i = (0x000fffff) >> ux;
      return (((hx & i) | lx) == 0) && (hx >> 31);
  }
  } else {
    i = ((uint32_t)(0xffffffff)) >> (ux - 20);
    return ((lx & i) == 0) && (hx >> 31);
  }
}


/* Checks whether x is positive; idea taken from: newlib-3.1.0/newlib/libm/mathfp/s_ispos.c */
LUALIB_API int tools_ispositive (double x) {  /* 2.16.1 not exposed; 2.29.2 exposed */
  uint32_t hx, lx;
  EXTRACT_WORDS(hx, lx, x);
  if (((hx & 0x7fffffff) | ((lx | -lx) >> 31)) > 0x7ff00000) return 0;  /* x is NaN */
  return !(hx & 0x80000000);  /* for x = +/-0, finite or +/-inf */
}


LUALIB_API int tools_isnonnegative (double x) {  /* x >= 0, 2.29.2 */
  uint32_t hx, lx;
  EXTRACT_WORDS(hx, lx, x);
  if (((hx & 0x7fffffff) | ((lx | -lx) >> 31)) > 0x7ff00000) return 0;  /* x is NaN */
  return ((hx | lx) == 0) || (!(hx & 0x80000000));  /* for x = +/-0, finite or +/-inf */
}


LUALIB_API int tools_isposint (double x) {  /* 2.13.0 */
  int32_t hx, ux;
  uint32_t i, lx;
  EXTRACT_WORDS(hx, lx, x);
  ux = ((((uint32_t)hx) >> 20) & 0x7ff) - 0x3ff;  /* unbiased exponent: math.exponent(x) - 1, except 0 -> -1023 */
  if (unlikely(ux > 51))  /* very large number not correctly representible any more ?  2.5.15 optimisation */
    /* (hx & 0x7fffffff) < 0x7ff00000) -> real number, but not +/-inf, nan
       (hx >> 31) -> signbit (1 = minus, 0 = zero or +) */
    return ((hx & 0x7fffffff) < 0x7ff00000) && !(hx >> 31);  /* 2.16.6 fix */
  if (ux < 20) {
    if (ux < 0) {  /* |x| < 1 */
      return 0;
    } else {
      i = (0x000fffff) >> ux;
      return (((hx & i) | lx) == 0) && !(hx >> 31);
    }
  } else {
    i = ((uint32_t)(0xffffffff)) >> (ux - 20);
    return ((lx & i) == 0) && !(hx >> 31);
  }
}


LUALIB_API int tools_isposintwords (int32_t hx, uint32_t lx, int32_t *ux) {  /* 3.7.6 */
  uint32_t i;
  *ux = ((((uint32_t)hx) >> 20) & 0x7ff) - 0x3ff;  /* unbiased exponent: math.exponent(x) - 1, except 0 -> -1023 */
  if (unlikely(*ux > 51))  /* very large number not correctly representible any more ?  2.5.15 optimisation */
    /* (hx & 0x7fffffff) < 0x7ff00000) -> real number, but not +/-inf, nan
       (hx >> 31) -> signbit (1 = minus, 0 = zero or +) */
    return ((hx & 0x7fffffff) < 0x7ff00000) && !(hx >> 31);  /* 2.16.6 fix */
  if (*ux < 20) {
    if (*ux < 0) {  /* |x| < 1 */
      return 0;
    } else {
      i = (0x000fffff) >> *ux;
      return (((hx & i) | lx) == 0) && !(hx >> 31);
    }
  } else {
    i = ((uint32_t)(0xffffffff)) >> (*ux - 20);
    return ((lx & i) == 0) && !(hx >> 31);
  }
}


LUALIB_API int tools_isnonnegint (double x) {  /* x >= 0 and integral x, 2.14.13 */
  int32_t hx, ux;
  uint32_t i, lx;
  EXTRACT_WORDS(hx, lx, x);
  ux = ((((uint32_t)hx) >> 20) & 0x7ff) - 0x3ff;  /* unbiased exponent: math.exponent(x) - 1, except 0 -> -1023 */
  if (unlikely(ux > 51))  /* very large number not correctly representible any more ?  2.5.15 optimisation */
    /* (hx & 0x7fffffff) < 0x7ff00000) -> real number, but not +/-inf, nan
       (hx >> 31) -> signbit (1 = minus, 0 = zero or +) */
    return ((hx & 0x7fffffff) < 0x7ff00000) && (hx >> 31);
  if (ux < 20) {
    if (((hx & 0x7fffffff) | lx) == 0)  /* x == +-0 */
      return 1;
    if (ux < 0) {  /* |x| < 1 */
      return 0;
    } else {
      i = (0x000fffff) >> ux;
      return (((hx & i) | lx) == 0) && (hx >> 31 == 0);
    }
  } else {
    i = ((uint32_t)(0xffffffff)) >> (ux - 20);
    return ((lx & i) == 0) && (hx >> 31 == 0);
  }
}


/* 4.11.0, based on Sun's rint, s_rint.c, due to call overhead, only one percent faster than ISINT macro. */
LUALIB_API int tools_isnonzeroint (double x) {
  int32_t hx, ux;
  uint32_t i, lx;
  EXTRACT_WORDS(hx, lx, x);
  ux = ((((uint32_t)hx) >> 20) & 0x7ff) - 0x3ff;  /* unbiased exponent: math.exponent(x) - 1, except 0 -> -1023 */
  if (unlikely(ux > 51)) /* value not correctly representable, not +/-inf, nan ?  2.5.15 optimisation */
    /* (hx & 0x7fffffff) < 0x7ff00000) -> real number, but not +/-inf, nan */
    return ((hx & 0x7fffffff) < 0x7ff00000);  /* with +/-inf, nan, return 0 */
  if (ux < 20) {
    if (((hx & 0x7fffffff) | lx) == 0)  /* x == +-0 */
      return 0;
    if (ux < 0) {  /* |x| < 1 */
      return ((hx & 0x7fffffff) | lx) == 0;
    } else {
      i = (0x000fffff) >> ux;
      return ((hx & i) | lx) == 0;
    }
  } else {
    i = ((uint32_t)(0xffffffff)) >> (ux - 20);
    return (lx & i) == 0;
  }
}


LUALIB_API int tools_isnonposint (double x) {  /* x <= 0 and integral x, 2.29.2; FIXME there are some issues here */
  int32_t hx, ux;
  uint32_t i, lx;
  EXTRACT_WORDS(hx, lx, x);
  ux = ((((uint32_t)hx) >> 20) & 0x7ff) - 0x3ff;  /* unbiased exponent: math.exponent(x) - 1, except 0 -> -1023 */
  if (unlikely(ux > 51))  /* very large number not correctly representible any more ?  2.5.15 optimisation */
    /* (hx & 0x7fffffff) < 0x7ff00000) -> real number, but not +/-inf, nan
       (hx >> 31) -> signbit (1 = minus, 0 = zero or +) */
    return ((hx & 0x7fffffff) < 0x7ff00000) && (hx >> 31);
  if (ux < 20) {
    if (ux < 0) return ((hx & 0x7fffffff) | lx) == 0;  /* |x| < 1 */
    else {
      i = (0x000fffff) >> ux;
      return (((hx & i) | lx) == 0) && (hx >> 31);  /* 3.7.7/8 fix for x < 0 */
    }
  } else {
    i = ((uint32_t)(0xffffffff)) >> (ux - 20);
    return ((lx & i) == 0) && (hx >> 31);  /* 3.7.7/8 fix for x < 0 */
  }
}


/* Is x an unsigned int ?  2.16.11, bits: 16 = 2-byte uint, 32 = 4-byte uint, etc. */
LUALIB_API int tools_isuint (double x, int bits) {
  int32_t hx, ux;
  uint32_t i, lx;
  EXTRACT_WORDS(hx, lx, x);
  ux = ((((uint32_t)hx) >> 20) & 0x7ff) - 0x3ff;  /* unbiased exponent: math.exponent(x) - 1, except 0 -> -1023 */
  if (!(hx & 0x80000000) && ux > bits - 1) return 0;
  if (unlikely(ux > 51))  /* very large number not correctly representible any more ?  2.5.15 optimisation */
    /* (hx & 0x7fffffff) < 0x7ff00000) -> real number, but not +/-inf, nan
       (hx >> 31) -> signbit (1 = minus, 0 = zero or +) */
    return ((hx & 0x7fffffff) < 0x7ff00000) && (hx >> 31);
  if (ux < 20) {
    if (((hx & 0x7fffffff) | lx) == 0)  /* x == +-0 */
      return 1;
    if (ux < 0) {  /* |x| < 1 */
      return 0;
    } else {
      i = (0x000fffff) >> ux;
      return (((hx & i) | lx) == 0) && (hx >> 31 == 0);
    }
  } else {
    i = ((uint32_t)(0xffffffff)) >> (ux - 20);
    return ((lx & i) == 0) && (hx >> 31 == 0);
  }
}


LUALIB_API int tools_ispow2 (double x) {  /* is number a power of base 2 ?  2.32.3 */
  int32_t ux;
  if (!tools_isregnonnegintx(x, &ux)) {
    if (ux > 51)  /* value not correctly representable, not +/-inf, nan ? */
       return -1;
    else
      /* if lx = 0, then the fraction not necessarily is a power of two, so check whether log2(x) is integral,
         see: https://www.ritambhara.in/check-if-number-is-a-power-of-2 */
      return x < 0 ? 0 : tools_isint(sun_log2(x));
  } else {
#if defined(__GNUC__) && defined(__INTEL)
    /* 2 % tweak on Intel platforms taken from:
       https://stackoverflow.com/questions/600293/how-to-check-if-a-number-is-a-power-of-2
       by user bugs king; 2.38.2 */
    register int bitpos1, bitpos2;
    uint32_t y = x;
    bitpos1 = 0; bitpos2 = 32;
    asm ("bsrl %1,%0": "+r" (bitpos1):"r" (y));  /* bit scan reverse, _most_ significant set bit (1 bit) */
    asm ("bsfl %1,%0": "+r" (bitpos2):"r" (y));  /* bit scan forward, least significant bit */
    return bitpos1 == bitpos2;  /* if only one bit has been set, we have a power of two */
#else
    int64_t y = x;  /* __builtin_popcount == 1 is slower */
    return !(!y) & !(y & (y - 1));  /* = (x != 0) && 2 << (y - 1); */
#endif
  }
}


/* LUAI_UINT32 m_w = AGN_RANDOM_MW; */ /* must not be zero, nor 0x464fffff; for definition, see agnconf.h */
/* LUAI_UINT32 m_z = AGN_RANDOM_MZ; */ /* must not be zero, nor 0x9068ffff; for definition, see agnconf.h */

#define int2double  ( 1.0/(UINT32_MAX + 1.0) )  /* 2.10.1 fix */
#define bias        ( int2double/2.0 )          /* 2.10.1 fix */

/* returns a random number in the open interval (0, 1) */
LUALIB_API double tools_random (int mode) {
  if (mode) {  /* suggested by Slobodan, 2.5.1: create really random numbers */
    Time64_T tgmt = time(NULL);
    m_w = (tgmt + m_w) % 113 + 1;
    m_z = (tgmt + m_z) % 113 + 1;
  }
  m_z = 36969*(m_z & 65535) + (m_z >> 16);
  m_w = 18000*(m_w & 65535) + (m_w >> 16);
  /* 2.10.1 out-of-range fix, DO NOT CHANGE or divide by UINT32_MAX + x; no performance decrease */
  return ((m_z << 16) + m_w)*int2double + bias;
}


/* returns an integer in the range [l, u], mode = 1: create really random result; 2.14.3 */
LUALIB_API int tools_randomrange (int l, int u, int mode) {
  double r = tools_random(mode);
  r *= 10000;
  r = (r - sun_trunc(r)) * 10000;
  r = tools_reducerange(r, 0, 1);
  return sun_floor(r*(u - l + 1)) + l;
}


LUALIB_API ptrdiff_t tools_posrelat (ptrdiff_t pos, size_t len) {  /* 2.14.4 */
  /* relative string position: negative means back from end; pos must be nonzero; returns a nonzero result. */
  /* Lua 5.1.3 patch 9 */
  if (pos < 0) {
    pos += (ptrdiff_t)len + 1;
  }
  return (pos >= 0)*pos;
}


/* based on Sun's __ieee754_pow
 *
 * @(#)e_pow.c 5.1 93/09/24
 *
 * ====================================================
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
 *
 * Developed at SunPro, a Sun Microsystems, Inc. business.
 * Permission to use, copy, modify, and distribute this
 * software is freely granted, provided that this notice
 * is preserved.
 * ====================================================
 */
LUALIB_API int tools_isevenorodd (double x) {
  int32_t j, k, r, hx, ix;
  uint32_t lx;
  EXTRACT_WORDS(hx, lx, x);
  ix = hx & 0x7fffffff;
  if ((ix | lx) == 0) return 2;    /* x = 0, even, must be checked explicitly */
  if (unlikely(ix >= 0x7ff00000)) return 0;  /* +/-inf or nan, 2.5.15 optimisation */
  /* determine if y is an odd int when x < 0
   * yisint = 0  ... y is not an integer
   * yisint = 1  ... y is an odd int
   * yisint = 2  ... y is an even int
   */
  if (ix >= 0x43400000) return 2;  /* even integer y */
  r = 0;
  if (ix >= 0x3ff00000) {
    k = (ix >> 20) - 0x3ff;   /* exponent */
    if (k > 20) {
      j = lx >> (52 - k);
      if ((j << (52 - k)) == lx) r = 2 - (j & 1);
    } else if (lx == 0) {
      j = ix >> (20 - k);
      if ((j << (20 - k)) == ix) r = 2 - (j & 1);
    }
  }
  return r;
}


/* Allocates a string buffer by internally determining its most efficient size, aligned along the "long" boundary. The
   return is a char* pointer to the beginning of the string. The function zeros only the last few bytes and assumes that
   the `leading` rest will be filled by real characters later on. Just pass l as the number of characters, excluding the
   terminating \0, and do not multiply it by sizeof(char). The function automatically adds a terminating \0.
   Works like lapi.c/agn_stralloc, 2.16.5. See also: tools_optstrlen. */
LUALIB_API char *tools_stralloc (size_t len) {
  char *buf;
  size_t rem, l;
  l = len;
  /* determine optimal size, including terminating \0 */
  rem = (++l) & AGN_ALIGNMASK;  /* length including \0, 2.17.2 optimisation */
  l += (rem != 0)*(AGN_BLOCKSIZE - rem);
  /* now commit memory */
  buf = (char *)malloc(l*sizeof(char));
  if (buf == NULL) return NULL;
  tools_bzero(buf + len, l - len);  /* only zero the bytes that will not be filled by characters later on */
  return buf;
}


#if ULONG_MAX - 20L > 4294967275L
#define AGN_BLOCKSIZEGT32 1
#else
#define AGN_BLOCKSIZEGT32 0
#endif

#define charinchunk(p, mask)   (tools_strisnull((p) ^ (mask)))
#define createmask() { \
  mask = c0 << 8 | c0; \
  mask = mask << 16 | mask; \
}

/* similar to strpbrk, but only checks if any of the characters in p has been found in s; otherwise returns 0. Based on
   former tools_strpbrk which returns a pointer or NULL. does not work in 64-bit environments.
   1 % faster if there is a hit, 10 % faster if not. 2.16.12/2.26.3 */
#ifdef IS32BITALIGNED
LUALIB_API int tools_hasstrchr (const char *src, const char *p, size_t src_len) {  /* p represents a SET of single chars */
#if AGN_BLOCKSIZEGT32 == 1
  int i;
#endif
  int32_t isunaligned;
  size_t l;
  unsigned char c0;
  BLOCK_T *s0, mask;
  const unsigned char *s, *c;
  s = (const unsigned char *)src;
  isunaligned = tools_strisunaligned(s);
  c0 = 0;
  l = src_len;
  for (c=(const unsigned char *)p; *c; c++) {  /* iterate set of `needle` chars */
    c0 = uchar(*c);
    while (isunaligned) {  /* 2.26.2 optimisation */
      if (!l--) goto strpbrklabel;  /* proceed with next char in needle p, 2.16.12 patch */
      if (*s++ == c0) return 1;
    }
    if (tools_largeenough(l)) {  /* 4-/8-byte chunks instead of single chars can be compared */
      s0 = (BLOCK_T *)s;  /* 2.25.5 fix */
      createmask();
#if AGN_BLOCKSIZEGT32 == 1
      for (i=sizeof(BLOCK_T)*8; i < mul8(AGN_BLOCKSIZE); i <<= 1)  /* 2.17.8 tweak */
        mask = (mask << i) | mask;
#endif
      while (tools_largeenough(l)) {  /* 4/8 byte chunks can be compared */
        if (charinchunk(*s0, mask)) return 1;  /* c has been found */
        l -= AGN_BLOCKSIZE;
        s0++;
      }
      s = (unsigned char *)s0;
    }
    while (l--) {  /* check the rest of the string, 2.16.12 patch */
      if (*s++ == c0) return 1;
    }
strpbrklabel:
    s = (const unsigned char *)src;  /* reset src pointer to its beginning */
    l = src_len;
  }  /* proceed with next char in needle p */
  return 0;
}

/* Searches character c in haystack src. n is the number of characters in haystack src to be searched.
   May not work in 64-bit mode. Taken from musl-1.2.2, src/string/memchr.c, MIT licence; 2.26.2, 6 % faster
   than the previous implementation, 2 % faster than the built-in GCC version. */
LUALIB_API void *tools_memchr (const void *src, int c, size_t n) {  /* 2.16.12 */
  const unsigned char *s = src;
  c = (unsigned char)c;
  for (; ((uintptr_t)s & AGN_BLOCKSIZE) && n && *s != c; s++, n--);
  if (n && *s != c) {
    typedef size_t __attribute__((__may_alias__)) word;
    const word *w;
    size_t k = ONES * c;
    for (w = (const void *)s; n >= SIZEST && NONULL(*w^k); w++, n -= SIZEST);
    s = (const void *)w;
  }
  for (; n && *s != c; s++, n--);
  return n ? (void *)s : 0;
}

/* Searches pattern p in haystack src; l is the number of chars to be checked in haystack; does not work
   in 64-bit environments. */
LUALIB_API int tools_memcmp (const void *src, const void *p, size_t l) {  /* 2.16.12 */
  unsigned char *s, *t;
  s = (unsigned char *)src;
  t = (unsigned char *)p;
  if (tools_largeenough(l) && tools_stringsarealigned(s, t)) {
    /* process 4 or 8 bytes at a time if string src (s) is long enough and both strings are aligned */
    BLOCK_T *p, *q;
    p = (BLOCK_T *)s;  /* 2.25.5 fix */
    q = (BLOCK_T *)t;  /* 2.25.5 fix */
    while (tools_largeenough(l)) {
      if (*p != *q) break;  /* mismatch */
      p++; q++;
      l -= AGN_BLOCKSIZE;
    }
    s = (unsigned char*)p;
    t = (unsigned char*)q;
  }
  /* no further 4/8-byte chunk present, compare byte-by-byte */
  while (l--) {
    if (*s != *t) return (*(unsigned char *)s) - (*(unsigned char *)t);  /* 2.26.0 change */
    s++; t++;
  }
  return 0;
}
#endif  /* of IS32BITALIGNED */


/* Search for pattern `s2' of size l2 inside haystack `s1` of size l1. 10 % faster than lmemfind in lstrlib.c 2.16.12;
   various libc memmem versions are not faster, including MUSL 1.2.2. */
LUALIB_API const char *tools_lmemfind (const char *s1, size_t l1, const char *s2, size_t l2) {
  if (l2 == 0) return s1;  /* empty strings are everywhere */
  else if (l2 > l1 || s1 == NULL) return NULL;  /* avoids a negative `l1' or end of string, 2.28.5 patch */
  else {
    const char *init;  /* to search for a `*s2' inside `s1' */
    l2--;  /* 1st char will be checked by `memchr' */
    l1 = l1 - l2;  /* `s2' cannot be found after that */
    while (l1 > 0 && (init = (const char *)tools_memchr(s1, *s2, l1)) != NULL) {
      init++;  /* 1st char is already checked */
      if (tools_memcmp(init, s2 + 1, l2) == 0)  /* compare the rest of the characters */
        return init - 1;
      else {  /* correct `l1' and `s1' to try again */
        l1 -= init - s1;
        s1 = init;
      }
    }
    return NULL;  /* not found */
  }
}


#define STRCHRLIMIT 64
LUALIB_API int tools_strinalphabet (const char *p, const char *alpha, size_t plen, size_t alphalen) {
#ifdef IS32BITALIGNED
  /* p is the string to be checked if each character in it is part of alpha(bet); alpha(bet) represents a SET of single chars,
     plen is not used, only to provide compatibility with non-32-bit aligned platforms */
#if AGN_BLOCKSIZEGT32 == 1
  int i;
#endif
  int rc;
  int32_t isunaligned;
  size_t l;
  unsigned char c0;
  BLOCK_T *s0, mask;
  const unsigned char *s, *c;
  s = (const unsigned char *)alpha;
  isunaligned = tools_strisunaligned(s);
  c0 = 0;
  l = alphalen;
  rc = 0;
  for (c=(const unsigned char *)p; *c; c++) {  /* iterate set of `needle` chars */
    rc = 0;
    c0 = uchar(*c);
    while (isunaligned) {
      if (!l--) {
        if (!rc) return 0;
        goto strpbrklabel;  /* proceed with next char in needle p */
      }
      if (*s++ == c0) {
        rc = 1;
        goto strpbrklabel;  /* proceed with next char in needle p */
      }
    }
    if (tools_largeenough(l)) {  /* 4-/8-byte chunks instead of single chars can be compared */
      s0 = (BLOCK_T *)s;  /* 2.25.5 fix */
      createmask();
#if AGN_BLOCKSIZEGT32 == 1
      for (i=sizeof(BLOCK_T)*8; i < mul8(AGN_BLOCKSIZE); i <<= 1)
        mask = (mask << i) | mask;
#endif
      while (tools_largeenough(l)) {  /* 4/8 byte chunks can be compared */
        if (charinchunk(*s0, mask)) {  /* c has been found */
          rc = 1;
          goto strpbrklabel;
        }
        l -= AGN_BLOCKSIZE;
        s0++;
      }
      s = (unsigned char *)s0;
    }
    while (l--) {  /* check the rest of the string, 2.16.12 patch */
      if (*s++ == c0) {
        rc = 1;
        goto strpbrklabel;
      }
    }
    if (!rc) return 0;
strpbrklabel:
    s = (const unsigned char *)alpha;  /* reset alpha pointer to its beginning */
    l = alphalen;
  }  /* proceed with next char in needle p */
  return rc;
}
#else
  if (plen == 0) plen++;  /* for embedded zeros or empty strings */
  if (alphalen == 0) return 0;
  if (alphalen < STRCHRLIMIT) {
    while (plen && (strchr(alpha, uchar(*p++))) - alpha < alphalen) plen--;
  } else {
    unsigned char in[256] = { 0 };
    while (alphalen--) in[uchar(*alpha++)] = 1;
    while (plen && in[uchar(*p++)]) plen--;
  }
  return plen == 0;
}
#endif

/* Duplicate a string avoiding segmentation faults if the argument is NULL, which is undefined behaviour
   for C's strdup() function. The return is a NULL-terminated new string aligned to the "word" boundary.
   FREE IT !  The minimum size of the resulting block is AGN_BLOCKSIZE, even on non-word-aligned platforms.
   2.17.1 */
LUALIB_API char *tools_strdup (const char *s) {
  char *buf;
  size_t rem, l, len;
  if (!s) return NULL;
  len = l = tools_strlen(s);  /* 2.17.8 tweak */
  /* determine optimal size, including terminating \0 */
  rem = (++l) & AGN_ALIGNMASK;  /* `aligned` length including \0, 2.17.2 optimisation */
  l += (rem != 0)*(AGN_BLOCKSIZE - rem);
  /* now commit memory */
  buf = (char *)malloc(l*sizeof(char));
  if (buf == NULL) return NULL;
  tools_memcpy(buf, s, len);  /* 2.26.1 fix */
  tools_bzero(buf + len, l - len);  /* set trailing zero bytes */
  return buf;
}


/* Taken from MUSL 1.2.3 library, file src/string/strstr.c */
static char *twobyte_strstr (const unsigned char *h, const unsigned char *n) {
  uint16_t nw = n[0]<<8 | n[1], hw = h[0]<<8 | h[1];
  for (h++; *h && hw != nw; hw = hw<<8 | *++h);
  return *h ? (char *)h-1 : 0;
}

static char *threebyte_strstr (const unsigned char *h, const unsigned char *n) {
  uint32_t nw = (uint32_t)n[0]<<24 | n[1]<<16 | n[2]<<8;
  uint32_t hw = (uint32_t)h[0]<<24 | h[1]<<16 | h[2]<<8;
  for (h+=2; *h && hw != nw; hw = (hw|*++h)<<8);
  return *h ? (char *)h-2 : 0;
}

static char *fourbyte_strstr (const unsigned char *h, const unsigned char *n) {
  uint32_t nw = (uint32_t)n[0]<<24 | n[1]<<16 | n[2]<<8 | n[3];
  uint32_t hw = (uint32_t)h[0]<<24 | h[1]<<16 | h[2]<<8 | h[3];
  for (h+=3; *h && hw != nw; hw = hw<<8 | *++h);
  return *h ? (char *)h-3 : 0;
}

#define MAX(a,b) ((a)>(b)?(a):(b))
#define MIN(a,b) ((a)<(b)?(a):(b))

#define BITOP(a,b,op) \
 ((a)[(size_t)(b)/(8*sizeof *(a))] op (size_t)1<<((size_t)(b)%(8*sizeof *(a))))

static char *twoway_strstr (const unsigned char *h, const unsigned char *n) {
  const unsigned char *z;
  size_t l, ip, jp, k, p, ms, p0, mem, mem0;
  size_t byteset[32 / sizeof(size_t)] = {0};
  size_t shift[256];
  /* Computing length of needle and fill shift table */
  for (l=0; n[l] && h[l]; l++)
    BITOP(byteset, n[l], |=), shift[n[l]] = l + 1;
  if (n[l]) return 0; /* hit the end of h */
  /* Compute maximal suffix */
  ip = -1; jp = 0; k = p = 1;
  while (jp + k < l) {
    if (n[ip + k] == n[jp + k]) {
      if (k == p) {
        jp += p;
        k = 1;
      } else k++;
    } else if (n[ip + k] > n[jp + k]) {
      jp += k;
      k = 1;
      p = jp - ip;
    } else {
      ip = jp++;
      k = p = 1;
    }
  }
  ms = ip;
  p0 = p;
  /* And with the opposite comparison */
  ip = -1; jp = 0; k = p = 1;
  while (jp + k < l) {
    if (n[ip + k] == n[jp + k]) {
      if (k == p) {
        jp += p;
        k = 1;
      } else k++;
    } else if (n[ip + k] < n[jp + k]) {
      jp += k;
      k = 1;
      p = jp - ip;
    } else {
      ip = jp++;
      k = p = 1;
    }
  }
  if (ip + 1 > ms+1) ms = ip;
  else p = p0;
  /* Periodic needle? */
  if (tools_memcmp(n, n + p, ms + 1)) {
    mem0 = 0;
    p = MAX(ms, l - ms - 1) + 1;
  } else mem0 = l - p;
  mem = 0;
  /* Initialize incremental end-of-haystack pointer */
  z = h;
  /* Search loop */
  for (;;) {
    /* Update incremental end-of-haystack pointer */
    if (z - h < l) {
      /* Fast estimate for MAX(l,63) */
      size_t grow = l | 63;
      const unsigned char *z2 = tools_memchr(z, 0, grow);
      if (z2) {
        z = z2;
        if (z - h < l) return 0;
      } else z += grow;
    }
    /* Check last byte first; advance by shift on mismatch */
    if (BITOP(byteset, h[l - 1], &)) {
      k = l-shift[h[l - 1]];
      if (k) {
        if (k < mem) k = mem;
        h += k;
        mem = 0;
        continue;
      }
    } else {
      h += l;
      mem = 0;
      continue;
    }
    /* Compare right half */
    for (k=MAX(ms + 1,mem); n[k] && n[k] == h[k]; k++);
    if (n[k]) {
      h += k-ms;
      mem = 0;
      continue;
    }
    /* Compare left half */
    for (k=ms + 1; k>mem && n[k - 1] == h[k - 1]; k--);
    if (k <= mem) return (char *)h;
    h += p;
    mem = mem0;
  }
}

/* This MUSL 1.2.3 implementation is a littler bit slower than GCC's implementation. Not exposed. 2.29.3 */
LUALIB_API char *tools_strstr (const char *h, const char *n) {
  /* Return immediately on empty needle */
  if (!n[0]) return (char *)h;
  /* Use faster algorithms for short needles */
  h = strchr(h, *n);
  if (!h || !n[1]) return (char *)h;
  if (!h[1]) return 0;
  if (!n[2]) return twobyte_strstr((void *)h, (void *)n);
  if (!h[2]) return 0;
  if (!n[3]) return threebyte_strstr((void *)h, (void *)n);
  if (!h[3]) return 0;
  if (!n[4]) return fourbyte_strstr((void *)h, (void *)n);
  return twoway_strstr((void *)h, (void *)n);
}


/* Taken from https://stackoverflow.com/questions/23999797/implementing-strnstr, written by chqrlie; 2.37.7 */
LUALIB_API char *tools_strnstr (const char *s, const char *p, size_t slen) {
  /* simplistic algorithm with O(n2) worst case */
  size_t i, plen;
  char c = *p;
  if (c == '\0') return (char *)s;  /* pattern is the empty string */
  for (plen=tools_strlen(p); plen <= slen; slen--, s++) {
    if (*s == c) {
      for (i=1; ; i++) {
        if (i == plen) return (char *)s;
        if (s[i] != p[i]) break;
      }
    }
  }
  return NULL;
}


LUALIB_API char *tools_strndup (const char *s, size_t l) {  /* 2.27.0 */
  char *buf;
  size_t rem, len;
  if (!s) return NULL;
  len = l;
  /* determine optimal size, including terminating \0 */
  rem = (++l) & AGN_ALIGNMASK;  /* `aligned` length including \0 */
  l += (rem != 0)*(AGN_BLOCKSIZE - rem);
  /* now commit memory */
  buf = (char *)malloc(l*sizeof(char));
  if (buf == NULL) return NULL;
  /* tools_memcpy definition in *.h assures that tools_memcpy defaults to memcpy on non-word-aligned platforms */
  tools_memcpy(buf, s, len);
  tools_bzero(buf + len, l - len);  /* set trailing zero bytes */
  return buf;
}


/* from http://www.manpagez.com/man/3/strsep/

   strsep -- separate strings

   The strsep() function locates, in the string referenced by *str, the
   first occurrence of any character in the string delim (or the terminating
   `\0' character) and replaces it with a `\0'.  The location of the next
   character after the delimiter character (or NULL, if the end of the string
   was reached) is stored in *str. The original value of *str is returned.

   An ``empty'' field (i.e., a character in the string delim occurs as the
   first character of *str) can be detected by comparing the location
   referenced by the returned pointer to `\0'.

   n will include the length of the token found, without any terminating \0.

   If *str is initially NULL, strsep() returns NULL.

   HISTORY

   The strsep() function is intended as a replacement for the strtok() function.
   While the strtok() function should be preferred for portability reasons (it
   conforms to ISO/IEC 9899:1990 (``ISO C90'')) it is unable to handle empty fields,
   i.e., detect fields delimited by two adjacent delimiter characters, or to be used
   for more than a single string at a time. The strsep() function first appeared in 4.4BSD.

   EXAMPLES

   The following uses strsep() to parse a string, and prints each token in separate line:

     char *token, *string, *tofree;
     size_t n;

     tofree = string = strdup("abc,def,ghi");  // chain to prevent memory leaks !
     assert(string != NULL);

     while ((token = strsep(&string, ",", &n)) != NULL)
       printf("%s\n", token);

     free(tofree);

   The following uses strsep() to parse a string, containing tokens delimited by white space, into an argument vector:

     char **ap, *argv[10], *inputstring;

     for (ap = argv; (*ap = strsep(&inputstring, " \t")) != NULL;)
       if (**ap != '\0')
         if (++ap >= &argv[10])
           break;
*/

LUALIB_API char *tools_strsep (char **str, const char *delim, size_t *l) {
  char *start, *end;
  *l = 0;
  if ( (start = *str) == NULL) return NULL;
  if (delim[0] == '\0' || delim[1] == '\0') {
    char chr = delim[0];
    if (chr == '\0')
      end = NULL;
    else {
      if (*start == chr)
        end = start;
      else if (*start == '\0')
        end = NULL;
      else
        end = strchr(start + 1, chr);
    }
  } else
    end = strpbrk(start, delim);
  if (end) {
    *end++ = '\0';
    *str = end;
    *l = end - start - 1;
  } else {
    char *p = start;
    *str = NULL;
    while (*p++) {};
    *l = p - start - 1;
    if (start == (p - 1)) start = NULL;  /* we are really at the end */
  }
  return start;
}


/* Allocates a new block and optionally fills the new space with zeros if zero is 1. The resulting block is word-aligned.
   The function ensures that there will be enough space to include a terminating \0 for strings. */
LUALIB_API void *tools_malloc (size_t newsize, int zero) {  /* 2.27.0 */
  void *new;
  size_t rem;
  /* determine optimal `aligned` size, including terminating \0 */
  newsize += CHARSIZE;  /* space for terminating \0 */
  rem = newsize & AGN_ALIGNMASK;  /* `aligned` length */
  newsize += (rem != 0)*(AGN_BLOCKSIZE - rem);
  /* now commit memory */
  new = (void *)malloc(newsize);  /* newsize will always be non-zero */
  if (!new) return NULL;
  /* if size is 0, memset does nothing:
     https://stackoverflow.com/questions/11402058/memset-used-with-zero-length-parameter-ignore-or-watch-out */
  tools_bzero(new, (zero != 0)*newsize);
  return new;
}


/* Reallocates a block, determines the optimal new size and optionally fills the new space with zeros if zero is 1.
   The resulting new block is word-aligned. */
LUALIB_API void *tools_realloc (void *buf, size_t oldsize, size_t newsize, int zero) {  /* 2.27.0 */
  size_t rem;
  void *new;
  if (!buf) return NULL;
  /* determine optimal `aligned` size, including terminating \0 */
  rem = newsize & AGN_ALIGNMASK;  /* `aligned` length including \0 */
  newsize += (rem != 0)*(AGN_BLOCKSIZE - rem);
  /* now commit memory */
  new = (void *)realloc(buf, newsize);
  /* if newsize is zero realloc frees the block and returns NULL */
  if (!new) return NULL;  /* do not destroy buf if something went wrong */
  buf = new;
  if (zero && newsize > oldsize)
    tools_bzero(buf + oldsize, newsize - oldsize);  /* set trailing zero bytes */
  return buf;
}


/* Creates or resizes a memory block.

   If a new block shall be created (malloc), set buf to NULL, oldsize to 0 and newsize to the block size.
   If a block shall be resized (realloc), pass the block for buf, and the old and new sizes. If applicable,
   the function fills the block with trailing zeros if zero is set to 1.
   The function ensures that there will be enough space to include a terminating \0 for strings. */
LUALIB_API void *tools_memalloc (void *buf, size_t oldsize, size_t newsize, int zero) {  /* 2.27.0 */
  size_t rem;
  /* determine optimal `aligned` size, including terminating \0 */
  newsize += CHARSIZE;  /* space for terminating \0 */
  rem = newsize & AGN_ALIGNMASK;
  newsize += (rem != 0)*(AGN_BLOCKSIZE - rem);
  if (buf == NULL) {  /* malloc: buf shall be NULL, oldsize = 0 and newsize > 0 */
    /* now commit memory */
    buf = (void *)malloc(newsize);  /* newsize will always be non-zero */
    if (!buf) return NULL;
  } else {
    void *new;
    if (!buf) return NULL;
    /* now commit memory */
    new = (void *)realloc(buf, newsize);
    /* if newsize is zero realloc frees the block and returns NULL */
    if (!new) return NULL;  /* do not destroy buf if something went wrong */
    buf = new;
    zero *= (newsize > oldsize);
  }
  /* set trailing zero bytes; if size is 0, memset does nothing:
     https://stackoverflow.com/questions/11402058/memset-used-with-zero-length-parameter-ignore-or-watch-out */
  tools_bzero(buf + oldsize, zero*(newsize - oldsize));
  return buf;
}


#if defined(__GNUC__) && defined(__INTEL)
/* Taken from https://forum.osdev.org/viewtopic.php?f=1&t=32866; 10 % faster than MinGW GCC's memset on Win10 x64 with c <  */
LUALIB_API void *asm_memset (void *d, int v, size_t n) {
  void *t = d;
  __asm__ volatile (
    "rep stosb"
    :"=D"(d),"=c"(n)
    :"0"(d),"a"(v),"1"(n)
    :"memory"
  );
  return t;
}
#endif


#define SZINT  ((int)sizeof(lua_Integer))
LUALIB_API lua_Integer lua_unpackint (const char *str, int islittle, int size, int issigned, int *rc) {  /* 2.27.2 */
  lua_Unsigned res;
  int i, limit;
  limit = (size  <= SZINT) ? size : SZINT;
  res = 0;
  for (i = limit - 1; i >= 0; i--) {
    res <<= CHAR_BIT;  /* usually CHAR_BIT = 8 */
    res |= (lua_Unsigned)(unsigned char)str[islittle ? i : size - 1 - i];
  }
  if (size < SZINT) {  /* real size smaller than lua_Integer? */
    if (issigned) {  /* needs sign extension? */
      lua_Unsigned mask = (lua_Unsigned)1 << (size*CHAR_BIT - 1);
      res = ((res ^ mask) - mask);  /* do sign extension */
    }
  }
  else if (size > SZINT) {  /* must check unread bytes */
    int mask = (!issigned || (lua_Integer)res >= 0) ? 0 : (1 << CHAR_BIT) - 1;  /* usually unsigned mask (1 << CHAR_BIT) - 1 = 0xFF */
    for (i = limit; i < size; i++) {
      if (l_unlikely((unsigned char)str[islittle ? i : size - 1 - i] != mask))
        return 0;
    }
  }
  *rc = 1;
  return (lua_Integer)res;
}

#ifdef IS32BIT
static lua_Integer getlastchunk (const char *s, int tobigendian) {  /* changed 2.27.5 */
  char *new;
  lua_Integer q;
  int rc;
  /* prevent invalid reads in lua_unpackint by ensuring a null-terminated string; 2.27.3 optimisation */
  new = tools_strdup((const char *)s);
  q = lua_unpackint((char *)new, !tobigendian, sizeof(uint32_t), 0, &rc);
  xfree(new);
  (void)rc;
  return q;
}
#else
static lua_Integer getlastchunk (uint64_t *p, int tobigendian, uint32_t *low, int *rc) {
  unsigned char *s;
  uint32_t hx;
  uint64_t l, last, nzeros;
  ieee_uint_shape_type v;
  l = 0;
  s = (unsigned char*)p;
  last = 0;
  while (*s) {
    last = (last << 8) | *s++;  /* 8, not AGN_BLOCKSIZE ! */
    l++;
  }
  /* fill up uint32_t with zeros */
  nzeros = AGN_BLOCKSIZE - l;
  *rc = nzeros >= sizeof(uint32_t) ? -1 : 0;  /* -1: do not push second number, 0 push second number; 2.27.3a fix */
  if (*rc == -1) {
    if (tobigendian) {
      last = last << mul8(nzeros);  /* = x << 3 */
      *low = (uint32_t)((last & 0xFFFFFFFF00000000ULL) >> 32);
    } else {
       *low = (uint32_t)last;
    }
    return 0;
  }
  last = last << mul8(nzeros);  /* = x << 3 */
  /* represent the rest of the string with the same byte order as with *p above */
#if BYTE_ORDER != BIG_ENDIAN
  if (!tobigendian) tools_swapuint64_t(&last);
#endif
  v.val = last;
  if (tobigendian) {
    hx = v.parts.lx;
    *low = v.parts.hx;
  } else {
    hx = v.parts.hx;
    *low = v.parts.lx;
  }
  return hx;
}
#endif


#ifdef IS32BIT
/* rc = 0: last chunk, rc = 1: more to come, rc = -1: invalid call */
LUALIB_API uint32_t tools_strtouint32 (const char *src, size_t l, int *rc, int tobigendian) {  /* 2.25.2, extended 2.27.5 */
  if (!src || tools_streq(src, "")) {
    *rc = -1;  /* 2.27.3 fix */
    return 0;
  }
  if (tools_strisaligned(src)) {
    unsigned char *s = (unsigned char *)src;
    uint32_t *p = (uint32_t *)s;
    if (tools_largeenough(l)&& NONULL(*p)) {  /* complete 4-byte chunk ? */
      *rc = 1;  /* more to come */
#if BYTE_ORDER == BIG_ENDIAN
      return (tobigendian) ? (uint32_t)*p : tools_swapuint32(*p);
#else
      return (tobigendian) ? tools_swapuint32(*p) : (uint32_t)*p;
#endif
    }
    src = (const char *)p;
  } /* chunk containing terminating \0 */
  *rc = 0;  /* last chunk */
  return getlastchunk(src, tobigendian);
}
#else
/* rc = -1: do not push second number, 0: push second number, 1: nothing to push */
LUALIB_API uint32_t tools_strtouint32 (const char *src, size_t l, int *rc, uint32_t *low, int tobigendian) {  /* 2.25.5, extended 2.27.5 */
  unsigned char *s;
  uint64_t *p;
  uint32_t hx, lx;
  ieee_uint_shape_type v;
  s = (unsigned char *)src;
  if (!src || tools_streq(src, "") || tools_strisunaligned(s)) {
    *low = 0;  /* 2.27.3 fix */
    *rc = 1;  /* push nothing */
    return 0;
  }
  p = (uint64_t *)s;
  if (tools_largeenough(l) && NONULL(*p)) {  /* complete 8-byte chunk ? */
    v.val = *p;
    hx = v.parts.hx;
    lx = v.parts.lx;
#if BYTE_ORDER == BIG_ENDIAN
    tools_swapuint32_t(&hx);  /* convert to Little Endian */
    tools_swapuint32_t(&lx);  /* convert to Little Endian */
#endif
#if BYTE_ORDER != BIG_ENDIAN
    if (tobigendian) {
      tools_swapuint32_t(&hx);
      tools_swapuint32_t(&lx);
    }
#endif
    *low = lx;
    *rc = 0;
  } else {  /* chunk containing terminating \0 */
    hx = getlastchunk(p, tobigendian, low, rc);
    /* rc = -1: do not push second number, 0 push second number */
  }
  return hx;
}
#endif


/* Converts a string src of length l to an array of uint32_t's; automatically word-aligns src. chunks
   is the number of word-aligned 4/8-byte chunks allocated. */
#ifdef IS32BIT
LUALIB_API uint32_t *tools_strtouint32s (const char *src, size_t l, size_t *chunks, int tobigendian) {  /* 2.17.2 */
  size_t c;
  unsigned char *s;
  uint32_t *res, *p;
  int rc;
#if BYTE_ORDER == BIG_ENDIAN
  uint32_t t;
#endif
  *chunks = 0;
  if (!src || tools_strisunaligned(src)) return NULL;
  s = (unsigned char *)src;
  tools_optstrlen(l, chunks);
  res = (uint32_t *)malloc((*chunks)*sizeof(uint32_t));
  if (res == NULL) return NULL;  /* 4.11.5 fix */
  if (l != 0 && (l & AGN_ALIGNMASK) == 0) (*chunks)--;
  c = 0;
  p = (uint32_t *)s;  /* 2.25.5 fix */
  while (tools_largeenough(l)) {
#if BYTE_ORDER == BIG_ENDIAN
    res[c] = (tobigendian) ? (uint32_t)*p : tools_swapuint32(*p);
#else
    res[c] = (tobigendian) ? tools_swapuint32(*p) : (uint32_t)*p;
#endif
    p++; l -= AGN_BLOCKSIZE; c++;
  }
  src = (const char *)p;
  res[c] = getlastchunk(src, tobigendian);
  *chunks = c + 1;
  (void)rc;
  return res;
}
#else
LUALIB_API uint32_t *tools_strtouint32s (const char *src, size_t l, size_t *chunks, int tobigendian) {  /* 64-bit, 2.25.5 */
  size_t c, lrest;
  unsigned char *s;
  uint32_t *res;
  ieee_uint_shape_type v;
#if BYTE_ORDER == BIG_ENDIAN
  uint64_t t;
#endif
  uint64_t *p, last;
  *chunks = 0;  /* 2.27.3a fix */
  s = (unsigned char *)src;
  if (!src || tools_strisunaligned(s)) return NULL;
  tools_optstrlen(l, chunks);
  *chunks = 2*(*chunks) - (l % AGN_BLOCKSIZE == 0);
  res = (uint32_t *)malloc((*chunks)*sizeof(uint32_t));
  if (res == NULL) return NULL;  /* 4.11.5 fix */
  if (l != 0 && (l & AGN_ALIGNMASK) == 0) (*chunks)--;
  c = 0;
  p = (uint64_t *)s;
  while (tools_largeenough(l)) {
    v.val = *p++;
    res[c] = v.parts.lx;
#if BYTE_ORDER == BIG_ENDIAN
    res[c] = tools_swapuint32(res[c]);  /* convert to Little Endian */
#endif
    if (tobigendian) {
      res[c] = tools_swapuint32(res[c]);
    }
    res[++c] = v.parts.hx;
#if BYTE_ORDER == BIG_ENDIAN
    res[c] = tools_swapuint32(res[c]);  /* convert to Little Endian */
#endif
    if (tobigendian) {
      res[c] = tools_swapuint32(res[c]);
    }
    l -= AGN_BLOCKSIZE; c++;
  }
  lrest = AGN_BLOCKSIZE - l;
  size_t offset = 0;
  if (lrest != AGN_BLOCKSIZE) {  /* we have a non 8-byte rest */
    s = (unsigned char*)p;
    last = 0;
    while (l--) {
      last = (last << 8) | *s++;  /* 8, not AGN_BLOCKSIZE ! */
    }
    /* fill up uint64_t with zeros */
    last = last << mul8(lrest);
    /* represent the rest of the string with the same byte order as with *p above */
    tools_swapuint64_t(&last);
    v.val = last;
    res[c++] = v.parts.lx;
    res[c] = v.parts.hx;
    if (tobigendian) {
      res[c - 1] = tools_swapuint32(res[c - 1]);
      res[c] = tools_swapuint32(res[c]);
    }
    offset = (lrest >= sizeof(uint32_t)) && (lrest < AGN_BLOCKSIZE);
    c++;
  }
  *chunks = c - offset;
  return res;
}
#endif


#define ispathsep(c)   ((c) == '/' || (c) == '\\')

/* returns the directory name; contrary to GCC's dirname(), subsequent calls to if so not modify `path`. FREE IT ! */
LUALIB_API char *tools_getdirname (char *path) {  /* 2.17.2 */
  char *cur, *subs;
  size_t l;
  int e;
  l = tools_strlen(path);  /* 2.17.8 tweak */
  str_charreplace(path, '\\', '/', 0);
  cur = path + l;  /* set cursor to '\0' */
  while (path < cur && ispathsep(*cur)) cur--;   /* remove trailing slashes */
  while (path < cur && !ispathsep(*cur)) cur--;  /* remove file name */
  subs = str_substr(path, 0, cur - path - 1, &e);
  return (e != 0) ? NULL: subs;  /* NULL: memory allocation error */
}


/* @(#)e_j1.c 1.3 95/01/18 */
/*
 * ====================================================
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
 *
 * Developed at SunSoft, a Sun Microsystems, Inc. business.
 * Permission to use, copy, modify, and distribute this
 * software is freely granted, provided that this notice
 * is preserved.
 * ====================================================
 */

/* __ieee754_j1(x), __ieee754_y1(x)
 * Bessel function of the first and second kinds of order zero.
 * Method -- j1(x):
 *  1. For tiny x, we use j1(x) = x/2 - x^3/16 + x^5/384 - ...
 *  2. Reduce x to |x| since j1(x)=-j1(-x),  and
 *     for x in (0,2)
 *    j1(x) = x/2 + x*z*R0/S0,  where z = x*x;
 *     (precision:  |j1/x - 1/2 - R0/S0 |<2**-61.51 )
 *     for x in (2,inf)
 *     j1(x) = sqrt(2/(pi*x))*(p1(x)*cos(x1)-q1(x)*sin(x1))
 *     y1(x) = sqrt(2/(pi*x))*(p1(x)*sin(x1)+q1(x)*cos(x1))
 *      where x1 = x-3*pi/4. It is better to compute sin(x1),cos(x1)
 *     as follow:
 *    cos(x1) =  cos(x)cos(3pi/4)+sin(x)sin(3pi/4)
 *      =  1/sqrt(2) * (sin(x) - cos(x))
 *    sin(x1) =  sin(x)cos(3pi/4)-cos(x)sin(3pi/4)
 *      = -1/sqrt(2) * (sin(x) + cos(x))
 *      (To avoid cancellation, use
 *    sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x))
 *       to compute the worse one.)
 *
 *  3 Special cases
 *    j1(nan)= nan
 *    j1(0) = 0
 *    j1(inf) = 0
 *
 * Method -- y1(x):
 *  1. screen out x<=0 cases: y1(0)=-inf, y1(x<0)=NaN
 *  2. For x<2.
 *     Since
 *    y1(x) = 2/pi*(j1(x)*(ln(x/2)+Euler)-1/x-x/2+5/64*x^3-...)
 *     therefore y1(x)-2/pi*j1(x)*ln(x)-1/x is an odd function.
 *     We use the following function to approximate y1,
 *    y1(x) = x*U(z)/V(z) + (2/pi)*(j1(x)*ln(x)-1/x), z= x^2
 *     where for x in [0,2] (abs err less than 2**-65.89)
 *    U(z) = U0[0] + U0[1]*z + ... + U0[4]*z^4
 *    V(z) = 1  + v0[0]*z + ... + v0[4]*z^5
 *     Note: For tiny x, 1/x dominate y1 and hence
 *    y1(tiny) = -2/pi/tiny, (choose tiny<2**-54)
 *  3. For x>=2.
 *     y1(x) = sqrt(2/(pi*x))*(p1(x)*sin(x1)+q1(x)*cos(x1))
 *      where x1 = x-3*pi/4. It is better to compute sin(x1),cos(x1)
 *     by method mentioned above.
 */

static double pone (double), qone (double);

static const double
invsqrtpi=  5.64189583547756279280e-01, /* 0x3FE20DD7, 0x50429B6D */
tpi      =  6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
     /* R0/S0 on [0, 2.00] */
R02  =  1.56249999999999947958e-02, /* 0x3F8FFFFF, 0xFFFFFFFD */
R03  = -1.89979294238854721751e-04, /* 0xBF28E6A5, 0xB61AC6E9 */
R04  =  1.82954049532700665670e-06, /* 0x3EBEB1D1, 0x0C503919 */
R05  = -4.61832688532103189199e-09, /* 0xBE33D5E7, 0x73D63FCE */
S01  =  1.56191029464890010492e-02, /* 0x3F8FFCE8, 0x82C8C2A4 */
S02  =  1.16926784663337450260e-04, /* 0x3F1EA6D2, 0xDD57DBF4 */
S03  =  5.13546550207318111446e-07, /* 0x3EA13B54, 0xCE84D5A9 */
S04  =  1.16614003333790000205e-09, /* 0x3E1408BC, 0xF4745D8F */
r00  = -6.25000000000000000000e-02, /* 0xBFB00000, 0x00000000 */
r01  =  1.40705666955189706048e-03, /* 0x3F570D9F, 0x98472C61 */
r02  = -1.59955631084035597520e-05, /* 0xBEF0C5C6, 0xBA169668 */
r03  =  4.96727999609584448412e-08, /* 0x3E6AAAFA, 0x46CA0BD9 */
s01  =  1.91537599538363460805e-02, /* 0x3F939D0B, 0x12637E53 */
s02  =  1.85946785588630915560e-04, /* 0x3F285F56, 0xB9CDF664 */
s03  =  1.17718464042623683263e-06, /* 0x3EB3BFF8, 0x333F8498 */
s04  =  5.04636257076217042715e-09, /* 0x3E35AC88, 0xC97DFF2C */
s05  =  1.23542274426137913908e-11; /* 0x3DAB2ACF, 0xCFB97ED8 */

LUALIB_API double sun_j1 (double x) {  /* 2.17.4 */
  double z, s, c, ss, cc, r, u, v, y;
  int32_t hx, ix;
  GET_HIGH_WORD(hx, x);
  ix = hx & 0x7fffffff;
  if (ix >= 0x7ff00000) return one/x;  /* x is inf or NaN */
  y = fabs(x);
  if (ix >= 0x40000000) {  /* |x| >= 2.0 */
    s = sun_sin(y);
    c = sun_cos(y);
    ss = -s - c;
    cc = s - c;
    if (ix < 0x7fe00000) {  /* make sure y+y not overflow */
      z = sun_cos(y + y);
      if ((s*c) > zero) cc = z/ss;
      else ss = z/cc;
  }
  /*
   * j1(x) = 1/sqrt(pi) * (P(1,x)*cc - Q(1,x)*ss) / sqrt(x)
   * y1(x) = 1/sqrt(pi) * (P(1,x)*ss + Q(1,x)*cc) / sqrt(x)
   */
  if (ix > 0x48000000) z = (invsqrtpi*cc)/sqrt(y);
    else {
      u = pone(y); v = qone(y);
      z = invsqrtpi*(u*cc - v*ss)/sqrt(y);
    }
    if (hx < 0) return -z;
    else return z;
  }
  if (ix < 0x3e400000) {  /* |x|<2**-27 */
    if (huge + x > one) return 0.5*x;/* inexact if x!=0 necessary */
  }
  z = x*x;
  r = z*(r00 + z*(r01 + z*(r02 + z*r03)));
  s = one + z*(s01 + z*(s02 + z*(s03 + z*(s04 + z*s05))));
  r *= x;
  return x*0.5 + r/s;
}

static const double U0[5] = {
 -1.96057090646238940668e-01, /* 0xBFC91866, 0x143CBC8A */
  5.04438716639811282616e-02, /* 0x3FA9D3C7, 0x76292CD1 */
 -1.91256895875763547298e-03, /* 0xBF5F55E5, 0x4844F50F */
  2.35252600561610495928e-05, /* 0x3EF8AB03, 0x8FA6B88E */
 -9.19099158039878874504e-08, /* 0xBE78AC00, 0x569105B8 */
};

static const double V0[5] = {
  1.99167318236649903973e-02, /* 0x3F94650D, 0x3F4DA9F0 */
  2.02552581025135171496e-04, /* 0x3F2A8C89, 0x6C257764 */
  1.35608801097516229404e-06, /* 0x3EB6C05A, 0x894E8CA6 */
  6.22741452364621501295e-09, /* 0x3E3ABF1D, 0x5BA69A86 */
  1.66559246207992079114e-11, /* 0x3DB25039, 0xDACA772A */
};

LUALIB_API double sun_y1 (double x) {  /* 2.17.4 */
  double z, s, c, ss, cc, u, v;
  int hx, ix, lx;
  EXTRACT_WORDS(hx, lx, x);
  ix = 0x7fffffff & hx;
  /* if Y1(NaN) is NaN, Y1(-inf) is NaN, Y1(inf) is 0 */
  if (ix >= 0x7ff00000) return one/(x + x*x);
  if ((ix | lx) == 0) return -one/zero;
  if (hx < 0) return zero/zero;
  if (ix >= 0x40000000) {  /* |x| >= 2.0 */
    s = sun_sin(x);
    c = sun_cos(x);
    ss = -s - c;
    cc = s - c;
    if (ix < 0x7fe00000) {  /* make sure x+x not overflow */
      z = sun_cos(x + x);
      if ((s*c) > zero) cc = z/ss;
      else ss = z/cc;
    }
    /* y1(x) = sqrt(2/(pi*x))*(p1(x)*sin(x0)+q1(x)*cos(x0))
     * where x0 = x-3pi/4
     *      Better formula:
     *              cos(x0) = cos(x)cos(3pi/4)+sin(x)sin(3pi/4)
     *                      =  1/sqrt(2) * (sin(x) - cos(x))
     *              sin(x0) = sin(x)cos(3pi/4)-cos(x)sin(3pi/4)
     *                      = -1/sqrt(2) * (cos(x) + sin(x))
     * To avoid cancellation, use
     *              sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x))
     * to compute the worse one.
     */
    if (ix > 0x48000000) z = (invsqrtpi*ss)/sqrt(x);
    else {
      u = pone(x); v = qone(x);
      z = invsqrtpi*(u*ss + v*cc)/sqrt(x);
    }
    return z;
  }
  if (ix <= 0x3c900000) {    /* x < 2**-54 */
    return -tpi/x;
  }
  z = x*x;
  u = U0[0] + z*(U0[1] + z*(U0[2] + z*(U0[3] + z*U0[4])));
  v = one + z*(V0[0] + z*(V0[1] + z*(V0[2] + z*(V0[3] + z*V0[4]))));
  return x*(u/v) + tpi*(sun_j1(x)*sun_log(x) - one/x);
}

/* For x >= 8, the asymptotic expansions of pone is
 *  1 + 15/128 s^2 - 4725/2^15 s^4 - ...,  where s = 1/x.
 * We approximate pone by
 *   pone(x) = 1 + (R/S)
 * where  R = pr0 + pr1*s^2 + pr2*s^4 + ... + pr5*s^10
 *     S = 1 + ps0*s^2 + ... + ps4*s^10
 * and
 *  | pone(x)-1-R/S | <= 2  ** ( -60.06)
 */

static const double pr8[6] = { /* for x in [inf, 8]=1/[0,0.125] */
  0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */
  1.17187499999988647970e-01, /* 0x3FBDFFFF, 0xFFFFFCCE */
  1.32394806593073575129e+01, /* 0x402A7A9D, 0x357F7FCE */
  4.12051854307378562225e+02, /* 0x4079C0D4, 0x652EA590 */
  3.87474538913960532227e+03, /* 0x40AE457D, 0xA3A532CC */
  7.91447954031891731574e+03, /* 0x40BEEA7A, 0xC32782DD */
};

static const double ps8[5] = {
  1.14207370375678408436e+02, /* 0x405C8D45, 0x8E656CAC */
  3.65093083420853463394e+03, /* 0x40AC85DC, 0x964D274F */
  3.69562060269033463555e+04, /* 0x40E20B86, 0x97C5BB7F */
  9.76027935934950801311e+04, /* 0x40F7D42C, 0xB28F17BB */
  3.08042720627888811578e+04, /* 0x40DE1511, 0x697A0B2D */
};

static const double pr5[6] = { /* for x in [8,4.5454]=1/[0.125,0.22001] */
  1.31990519556243522749e-11, /* 0x3DAD0667, 0xDAE1CA7D */
  1.17187493190614097638e-01, /* 0x3FBDFFFF, 0xE2C10043 */
  6.80275127868432871736e+00, /* 0x401B3604, 0x6E6315E3 */
  1.08308182990189109773e+02, /* 0x405B13B9, 0x452602ED */
  5.17636139533199752805e+02, /* 0x40802D16, 0xD052D649 */
  5.28715201363337541807e+02, /* 0x408085B8, 0xBB7E0CB7 */
};

static const double ps5[5] = {
  5.92805987221131331921e+01, /* 0x404DA3EA, 0xA8AF633D */
  9.91401418733614377743e+02, /* 0x408EFB36, 0x1B066701 */
  5.35326695291487976647e+03, /* 0x40B4E944, 0x5706B6FB */
  7.84469031749551231769e+03, /* 0x40BEA4B0, 0xB8A5BB15 */
  1.50404688810361062679e+03, /* 0x40978030, 0x036F5E51 */
};

static const double pr3[6] = {
  3.02503916137373618024e-09, /* 0x3E29FC21, 0xA7AD9EDD */
  1.17186865567253592491e-01, /* 0x3FBDFFF5, 0x5B21D17B */
  3.93297750033315640650e+00, /* 0x400F76BC, 0xE85EAD8A */
  3.51194035591636932736e+01, /* 0x40418F48, 0x9DA6D129 */
  9.10550110750781271918e+01, /* 0x4056C385, 0x4D2C1837 */
  4.85590685197364919645e+01, /* 0x4048478F, 0x8EA83EE5 */
};

static const double ps3[5] = {
  3.47913095001251519989e+01, /* 0x40416549, 0xA134069C */
  3.36762458747825746741e+02, /* 0x40750C33, 0x07F1A75F */
  1.04687139975775130551e+03, /* 0x40905B7C, 0x5037D523 */
  8.90811346398256432622e+02, /* 0x408BD67D, 0xA32E31E9 */
  1.03787932439639277504e+02, /* 0x4059F26D, 0x7C2EED53 */
};

static const double pr2[6] = {/* for x in [2.8570,2]=1/[0.3499,0.5] */
  1.07710830106873743082e-07, /* 0x3E7CE9D4, 0xF65544F4 */
  1.17176219462683348094e-01, /* 0x3FBDFF42, 0xBE760D83 */
  2.36851496667608785174e+00, /* 0x4002F2B7, 0xF98FAEC0 */
  1.22426109148261232917e+01, /* 0x40287C37, 0x7F71A964 */
  1.76939711271687727390e+01, /* 0x4031B1A8, 0x177F8EE2 */
  5.07352312588818499250e+00, /* 0x40144B49, 0xA574C1FE */
};

static const double ps2[5] = {
  2.14364859363821409488e+01, /* 0x40356FBD, 0x8AD5ECDC */
  1.25290227168402751090e+02, /* 0x405F5293, 0x14F92CD5 */
  2.32276469057162813669e+02, /* 0x406D08D8, 0xD5A2DBD9 */
  1.17679373287147100768e+02, /* 0x405D6B7A, 0xDA1884A9 */
  8.36463893371618283368e+00, /* 0x4020BAB1, 0xF44E5192 */
};

static double pone (double x) {
  const double *p, *q;
  double z, r, s;
  int32_t hx, ix;
  p = NULL; q = NULL;
  GET_HIGH_WORD(hx, x);
  ix = 0x7fffffff & hx;
  if (ix >= 0x40200000)      { p = pr8; q = ps8; }
  else if (ix >= 0x40122E8B) { p = pr5; q = ps5; }
  else if (ix >= 0x4006DB6D) { p = pr3; q = ps3; }
  else if (ix >= 0x40000000) { p = pr2; q = ps2; }
  z = one/(x*x);
  r = p[0] + z*(p[1] + z*(p[2] + z*(p[3] + z*(p[4] + z*p[5]))));
  s = one + z*(q[0] + z*(q[1] + z*(q[2] + z*(q[3] + z*q[4]))));
  return one + r/s;
}


/* For x >= 8, the asymptotic expansions of qone is
 *  3/8 s - 105/1024 s^3 - ..., where s = 1/x.
 * We approximate pone by
 *   qone(x) = s*(0.375 + (R/S))
 * where  R = qr1*s^2 + qr2*s^4 + ... + qr5*s^10
 *     S = 1 + qs1*s^2 + ... + qs6*s^12
 * and
 *  | qone(x)/s -0.375-R/S | <= 2  ** ( -61.13)
 */

static const double qr8[6] = { /* for x in [inf, 8]=1/[0,0.125] */
  0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */
 -1.02539062499992714161e-01, /* 0xBFBA3FFF, 0xFFFFFDF3 */
 -1.62717534544589987888e+01, /* 0xC0304591, 0xA26779F7 */
 -7.59601722513950107896e+02, /* 0xC087BCD0, 0x53E4B576 */
 -1.18498066702429587167e+04, /* 0xC0C724E7, 0x40F87415 */
 -4.84385124285750353010e+04, /* 0xC0E7A6D0, 0x65D09C6A */
};

static const double qs8[6] = {
  1.61395369700722909556e+02, /* 0x40642CA6, 0xDE5BCDE5 */
  7.82538599923348465381e+03, /* 0x40BE9162, 0xD0D88419 */
  1.33875336287249578163e+05, /* 0x4100579A, 0xB0B75E98 */
  7.19657723683240939863e+05, /* 0x4125F653, 0x72869C19 */
  6.66601232617776375264e+05, /* 0x412457D2, 0x7719AD5C */
 -2.94490264303834643215e+05, /* 0xC111F969, 0x0EA5AA18 */
};

static const double qr5[6] = { /* for x in [8,4.5454]=1/[0.125,0.22001] */
 -2.08979931141764104297e-11, /* 0xBDB6FA43, 0x1AA1A098 */
 -1.02539050241375426231e-01, /* 0xBFBA3FFF, 0xCB597FEF */
 -8.05644828123936029840e+00, /* 0xC0201CE6, 0xCA03AD4B */
 -1.83669607474888380239e+02, /* 0xC066F56D, 0x6CA7B9B0 */
 -1.37319376065508163265e+03, /* 0xC09574C6, 0x6931734F */
 -2.61244440453215656817e+03, /* 0xC0A468E3, 0x88FDA79D */
};

static const double qs5[6] = {
  8.12765501384335777857e+01, /* 0x405451B2, 0xFF5A11B2 */
  1.99179873460485964642e+03, /* 0x409F1F31, 0xE77BF839 */
  1.74684851924908907677e+04, /* 0x40D10F1F, 0x0D64CE29 */
  4.98514270910352279316e+04, /* 0x40E8576D, 0xAABAD197 */
  2.79480751638918118260e+04, /* 0x40DB4B04, 0xCF7C364B */
 -4.71918354795128470869e+03, /* 0xC0B26F2E, 0xFCFFA004 */
};

static const double qr3[6] = {
 -5.07831226461766561369e-09, /* 0xBE35CFA9, 0xD38FC84F */
 -1.02537829820837089745e-01, /* 0xBFBA3FEB, 0x51AEED54 */
 -4.61011581139473403113e+00, /* 0xC01270C2, 0x3302D9FF */
 -5.78472216562783643212e+01, /* 0xC04CEC71, 0xC25D16DA */
 -2.28244540737631695038e+02, /* 0xC06C87D3, 0x4718D55F */
 -2.19210128478909325622e+02, /* 0xC06B66B9, 0x5F5C1BF6 */
};

static const double qs3[6] = {
  4.76651550323729509273e+01, /* 0x4047D523, 0xCCD367E4 */
  6.73865112676699709482e+02, /* 0x40850EEB, 0xC031EE3E */
  3.38015286679526343505e+03, /* 0x40AA684E, 0x448E7C9A */
  5.54772909720722782367e+03, /* 0x40B5ABBA, 0xA61D54A6 */
  1.90311919338810798763e+03, /* 0x409DBC7A, 0x0DD4DF4B */
 -1.35201191444307340817e+02, /* 0xC060E670, 0x290A311F */
};

static const double qr2[6] = {/* for x in [2.8570,2]=1/[0.3499,0.5] */
 -1.78381727510958865572e-07, /* 0xBE87F126, 0x44C626D2 */
 -1.02517042607985553460e-01, /* 0xBFBA3E8E, 0x9148B010 */
 -2.75220568278187460720e+00, /* 0xC0060484, 0x69BB4EDA */
 -1.96636162643703720221e+01, /* 0xC033A9E2, 0xC168907F */
 -4.23253133372830490089e+01, /* 0xC04529A3, 0xDE104AAA */
 -2.13719211703704061733e+01, /* 0xC0355F36, 0x39CF6E52 */
};

static const double qs2[6] = {
  2.95333629060523854548e+01, /* 0x403D888A, 0x78AE64FF */
  2.52981549982190529136e+02, /* 0x406F9F68, 0xDB821CBA */
  7.57502834868645436472e+02, /* 0x4087AC05, 0xCE49A0F7 */
  7.39393205320467245656e+02, /* 0x40871B25, 0x48D4C029 */
  1.55949003336666123687e+02, /* 0x40637E5E, 0x3C3ED8D4 */
 -4.95949898822628210127e+00, /* 0xC013D686, 0xE71BE86B */
};

static double qone (double x) {
  const double *p, *q;
  double s, r, z;
  int32_t hx, ix;
  p = NULL; q = NULL;
  GET_HIGH_WORD(hx, x);
  ix = 0x7fffffff & hx;
  if (ix >= 0x40200000)      { p = qr8; q = qs8; }
  else if (ix >= 0x40122E8B) { p = qr5; q = qs5; }
  else if (ix >= 0x4006DB6D) { p = qr3; q = qs3; }
  else if (ix >= 0x40000000) { p = qr2; q = qs2; }
  z = one/(x*x);
  r = p[0] + z*(p[1] + z*(p[2] + z*(p[3] + z*(p[4] + z*p[5]))));
  s = one + z*(q[0] + z*(q[1] + z*(q[2] + z*(q[3] + z*(q[4] + z*q[5])))));
  return (0.375 + r/s)/x;
}

/* @(#)e_j0.c 1.3 95/01/18 */
/*
 * ====================================================
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
 *
 * Developed at SunSoft, a Sun Microsystems, Inc. business.
 * Permission to use, copy, modify, and distribute this
 * software is freely granted, provided that this notice
 * is preserved.
 * ====================================================
 */

/* __ieee754_j0(x), __ieee754_y0(x)
 * Bessel function of the first and second kinds of order zero.
 * Method -- j0(x):
 *  1. For tiny x, we use j0(x) = 1 - x^2/4 + x^4/64 - ...
 *  2. Reduce x to |x| since j0(x)=j0(-x),  and
 *     for x in (0,2)
 *    j0(x) = 1-z/4+ z^2*R0/S0,  where z = x*x;
 *     (precision:  |j0-1+z/4-z^2R0/S0 |<2**-63.67 )
 *     for x in (2,inf)
 *     j0(x) = sqrt(2/(pi*x))*(p0(x)*cos(x0)-q0(x)*sin(x0))
 *      where x0 = x-pi/4. It is better to compute sin(x0),cos(x0)
 *     as follow:
 *    cos(x0) = cos(x)cos(pi/4)+sin(x)sin(pi/4)
 *      = 1/sqrt(2) * (cos(x) + sin(x))
 *    sin(x0) = sin(x)cos(pi/4)-cos(x)sin(pi/4)
 *      = 1/sqrt(2) * (sin(x) - cos(x))
 *      (To avoid cancellation, use
 *    sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x))
 *       to compute the worse one.)
 *
 *  3 Special cases
 *    j0(nan)= nan
 *    j0(0) = 1
 *    j0(inf) = 0
 *
 * Method -- y0(x):
 *  1. For x<2.
 *     Since
 *    y0(x) = 2/pi*(j0(x)*(ln(x/2)+Euler) + x^2/4 - ...)
 *     therefore y0(x)-2/pi*j0(x)*ln(x) is an even function.
 *     We use the following function to approximate y0,
 *    y0(x) = U(z)/V(z) + (2/pi)*(j0(x)*ln(x)), z= x^2
 *     where
 *    U(z) = u00 + u01*z + ... + u06*z^6
 *    V(z) = 1  + v01*z + ... + v04*z^4
 *     with absolute approximation error bounded by 2**-72.
 *     Note: For tiny x, U/V = u0 and j0(x)~1, hence
 *    y0(tiny) = u0 + (2/pi)*ln(tiny), (choose tiny<2**-27)
 *  2. For x>=2.
 *     y0(x) = sqrt(2/(pi*x))*(p0(x)*cos(x0)+q0(x)*sin(x0))
 *      where x0 = x-pi/4. It is better to compute sin(x0),cos(x0)
 *     by the method mentioned above.
 *  3. Special cases: y0(0)=-inf, y0(x<0)=NaN, y0(inf)=0.
 */

static double pzero(double), qzero(double);

LUALIB_API double sun_j0 (double x) {  /* 2.17.4 */
  double z, s, c, ss, cc, r, u, v;
  int hx, ix;
  GET_HIGH_WORD(hx, x);
  ix = hx & 0x7fffffff;
  if (ix >= 0x7ff00000) return one/(x*x);  /* x == +/-inf or nan ? */
  /* x = fabs(x); */
  SET_HIGH_WORD(x, ix);  /* x = fabs(x), 3.0.1 */
  if (ix >= 0x40000000) {  /* |x| >= 2.0 */
    s = sun_sin(x);
    c = sun_cos(x);
    ss = s - c;
    cc = s + c;
    if (ix < 0x7fe00000) {  /* make sure x+x not overflow */
      z = -sun_cos(x + x);
      if ((s*c) < zero) cc = z/ss;
      else ss = z/cc;
    }
  /*
   * j0(x) = 1/sqrt(pi) * (P(0,x)*cc - Q(0,x)*ss) / sqrt(x)
   * y0(x) = 1/sqrt(pi) * (P(0,x)*ss + Q(0,x)*cc) / sqrt(x)
   */
    if (ix > 0x48000000) z = (invsqrtpi*cc)/sqrt(x);
    else {
      u = pzero(x); v = qzero(x);
      z = invsqrtpi*(u*cc - v*ss)/sqrt(x);
    }
    return z;
  }
  if (ix < 0x3f200000) {  /* |x| < 2**-13 */
    if (huge + x > one) {  /* raise inexact if x != 0 */
      if (ix < 0x3e400000) return one;  /* |x|<2**-27 */
      else return one - 0.25*x*x;
    }
  }
  z = x*x;
  r = z*(R02 + z*(R03 + z*(R04 + z*R05)));
  s = one + z*(S01 + z*(S02 + z*(S03 + z*S04)));
  if (ix < 0x3FF00000) {  /* |x| < 1.00 */
    return one + z*(-0.25 + (r/s));
  } else {
    u = 0.5*x;
    return (one + u)*(one - u) + z*(r/s);
  }
}

static const double
u00  = -7.38042951086872317523e-02, /* 0xBFB2E4D6, 0x99CBD01F */
u01  =  1.76666452509181115538e-01, /* 0x3FC69D01, 0x9DE9E3FC */
u02  = -1.38185671945596898896e-02, /* 0xBF8C4CE8, 0xB16CFA97 */
u03  =  3.47453432093683650238e-04, /* 0x3F36C54D, 0x20B29B6B */
u04  = -3.81407053724364161125e-06, /* 0xBECFFEA7, 0x73D25CAD */
u05  =  1.95590137035022920206e-08, /* 0x3E550057, 0x3B4EABD4 */
u06  = -3.98205194132103398453e-11, /* 0xBDC5E43D, 0x693FB3C8 */
v01  =  1.27304834834123699328e-02, /* 0x3F8A1270, 0x91C9C71A */
v02  =  7.60068627350353253702e-05, /* 0x3F13ECBB, 0xF578C6C1 */
v03  =  2.59150851840457805467e-07, /* 0x3E91642D, 0x7FF202FD */
v04  =  4.41110311332675467403e-10; /* 0x3DFE5018, 0x3BD6D9EF */

LUALIB_API double sun_y0 (double x) {  /* 2.17.4 */
  double z, s, c, ss, cc, u, v;
  int hx, ix, lx;
  EXTRACT_WORDS(hx, lx, x);
  ix = 0x7fffffff & hx;
  /* Y0(NaN) is NaN, y0(-inf) is Nan, y0(inf) is 0  */
  if (ix >= 0x7ff00000) return one/(x + x*x);
  if ((ix | lx) == 0) return -one/zero;
  if (hx < 0) return zero/zero;
  if (ix >= 0x40000000) {  /* |x| >= 2.0 */
    /* y0(x) = sqrt(2/(pi*x))*(p0(x)*sin(x0)+q0(x)*cos(x0))
     * where x0 = x-pi/4
     *      Better formula:
     *              cos(x0) = cos(x)cos(pi/4)+sin(x)sin(pi/4)
     *                      =  1/sqrt(2) * (sin(x) + cos(x))
     *              sin(x0) = sin(x)cos(3pi/4)-cos(x)sin(3pi/4)
     *                      =  1/sqrt(2) * (sin(x) - cos(x))
     * To avoid cancellation, use
     *              sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x))
     * to compute the worse one.
     */
    s = sun_sin(x);
    c = sun_cos(x);
    ss = s - c;
    cc = s + c;
    /*
     * j0(x) = 1/sqrt(pi) * (P(0,x)*cc - Q(0,x)*ss) / sqrt(x)
     * y0(x) = 1/sqrt(pi) * (P(0,x)*ss + Q(0,x)*cc) / sqrt(x)
     */
    if (ix < 0x7fe00000) {  /* make sure x+x not overflow */
      z = -sun_cos(x + x);
      if ((s*c) < zero) cc = z/ss;
      else ss = z/cc;
    }
    if (ix > 0x48000000) z = (invsqrtpi*ss)/sqrt(x);
    else {
      u = pzero(x); v = qzero(x);
      z = invsqrtpi*(u*ss + v*cc)/sqrt(x);
    }
    return z;
  }
  if (ix <= 0x3e400000) {  /* x < 2**-27 */
    return u00 + tpi*sun_log(x);
  }
  z = x*x;
  u = u00 + z*(u01 + z*(u02 + z*(u03 + z*(u04 + z*(u05 + z*u06)))));
  v = one + z*(v01 + z*(v02 + z*(v03 + z*v04)));
  return u/v + tpi*(sun_j0(x)*sun_log(x));
}

/* The asymptotic expansions of pzero is
 *  1 - 9/128 s^2 + 11025/98304 s^4 - ...,  where s = 1/x.
 * For x >= 2, We approximate pzero by
 *   pzero(x) = 1 + (R/S)
 * where  R = pR0 + pR1*s^2 + pR2*s^4 + ... + pR5*s^10
 *     S = 1 + pS0*s^2 + ... + pS4*s^10
 * and
 *  | pzero(x)-1-R/S | <= 2  ** ( -60.26)
 */
static const double pR8[6] = { /* for x in [inf, 8]=1/[0,0.125] */
  0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */
 -7.03124999999900357484e-02, /* 0xBFB1FFFF, 0xFFFFFD32 */
 -8.08167041275349795626e+00, /* 0xC02029D0, 0xB44FA779 */
 -2.57063105679704847262e+02, /* 0xC0701102, 0x7B19E863 */
 -2.48521641009428822144e+03, /* 0xC0A36A6E, 0xCD4DCAFC */
 -5.25304380490729545272e+03, /* 0xC0B4850B, 0x36CC643D */
};

static const double ppS8[5] = {
  1.16534364619668181717e+02, /* 0x405D2233, 0x07A96751 */
  3.83374475364121826715e+03, /* 0x40ADF37D, 0x50596938 */
  4.05978572648472545552e+04, /* 0x40E3D2BB, 0x6EB6B05F */
  1.16752972564375915681e+05, /* 0x40FC810F, 0x8F9FA9BD */
  4.76277284146730962675e+04, /* 0x40E74177, 0x4F2C49DC */
};

static const double pR5[6] = { /* for x in [8,4.5454]=1/[0.125,0.22001] */
 -1.14125464691894502584e-11, /* 0xBDA918B1, 0x47E495CC */
 -7.03124940873599280078e-02, /* 0xBFB1FFFF, 0xE69AFBC6 */
 -4.15961064470587782438e+00, /* 0xC010A370, 0xF90C6BBF */
 -6.76747652265167261021e+01, /* 0xC050EB2F, 0x5A7D1783 */
 -3.31231299649172967747e+02, /* 0xC074B3B3, 0x6742CC63 */
 -3.46433388365604912451e+02, /* 0xC075A6EF, 0x28A38BD7 */
};

static const double ppS5[5] = {
  6.07539382692300335975e+01, /* 0x404E6081, 0x0C98C5DE */
  1.05125230595704579173e+03, /* 0x40906D02, 0x5C7E2864 */
  5.97897094333855784498e+03, /* 0x40B75AF8, 0x8FBE1D60 */
  9.62544514357774460223e+03, /* 0x40C2CCB8, 0xFA76FA38 */
  2.40605815922939109441e+03, /* 0x40A2CC1D, 0xC70BE864 */
};

static const double pR3[6] = {/* for x in [4.547,2.8571]=1/[0.2199,0.35001] */
 -2.54704601771951915620e-09, /* 0xBE25E103, 0x6FE1AA86 */
 -7.03119616381481654654e-02, /* 0xBFB1FFF6, 0xF7C0E24B */
 -2.40903221549529611423e+00, /* 0xC00345B2, 0xAEA48074 */
 -2.19659774734883086467e+01, /* 0xC035F74A, 0x4CB94E14 */
 -5.80791704701737572236e+01, /* 0xC04D0A22, 0x420A1A45 */
 -3.14479470594888503854e+01, /* 0xC03F72AC, 0xA892D80F */
};

static const double ppS3[5] = {
  3.58560338055209726349e+01, /* 0x4041ED92, 0x84077DD3 */
  3.61513983050303863820e+02, /* 0x40769839, 0x464A7C0E */
  1.19360783792111533330e+03, /* 0x4092A66E, 0x6D1061D6 */
  1.12799679856907414432e+03, /* 0x40919FFC, 0xB8C39B7E */
  1.73580930813335754692e+02, /* 0x4065B296, 0xFC379081 */
};

static const double pR2[6] = {/* for x in [2.8570,2]=1/[0.3499,0.5] */
 -8.87534333032526411254e-08, /* 0xBE77D316, 0xE927026D */
 -7.03030995483624743247e-02, /* 0xBFB1FF62, 0x495E1E42 */
 -1.45073846780952986357e+00, /* 0xBFF73639, 0x8A24A843 */
 -7.63569613823527770791e+00, /* 0xC01E8AF3, 0xEDAFA7F3 */
 -1.11931668860356747786e+01, /* 0xC02662E6, 0xC5246303 */
 -3.23364579351335335033e+00, /* 0xC009DE81, 0xAF8FE70F */
};

static const double ppS2[5] = {
  2.22202997532088808441e+01, /* 0x40363865, 0x908B5959 */
  1.36206794218215208048e+02, /* 0x4061069E, 0x0EE8878F */
  2.70470278658083486789e+02, /* 0x4070E786, 0x42EA079B */
  1.53875394208320329881e+02, /* 0x40633C03, 0x3AB6FAFF */
  1.46576176948256193810e+01, /* 0x402D50B3, 0x44391809 */
};

static double pzero (double x) {
  const double *p, *q;
  double z, r, s;
  int32_t hx, ix;
  p = NULL; q = NULL;
  GET_HIGH_WORD(hx, x);
  ix = 0x7fffffff & hx;
  if (ix >= 0x40200000)      { p = pR8; q = ppS8; }
  else if (ix >= 0x40122E8B) { p = pR5; q = ppS5; }
  else if (ix >= 0x4006DB6D) { p = pR3; q = ppS3; }
  else if (ix >= 0x40000000) { p = pR2; q = ppS2; }
  z = one/(x*x);
  r = p[0] + z*(p[1] + z*(p[2] + z*(p[3] + z*(p[4] + z*p[5]))));
  s = one + z*(q[0] + z*(q[1] + z*(q[2] + z*(q[3] + z*q[4]))));
  return one + r/s;
}

/* For x >= 8, the asymptotic expansions of qzero is
 *  -1/8 s + 75/1024 s^3 - ..., where s = 1/x.
 * We approximate pzero by
 *   qzero(x) = s*(-1.25 + (R/S))
 * where  R = qR0 + qR1*s^2 + qR2*s^4 + ... + qR5*s^10
 *     S = 1 + qS0*s^2 + ... + qS5*s^12
 * and
 *  | qzero(x)/s +1.25-R/S | <= 2  ** ( -61.22)
 */

static const double qR8[6] = { /* for x in [inf, 8]=1/[0,0.125] */
  0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */
  7.32421874999935051953e-02, /* 0x3FB2BFFF, 0xFFFFFE2C */
  1.17682064682252693899e+01, /* 0x40278952, 0x5BB334D6 */
  5.57673380256401856059e+02, /* 0x40816D63, 0x15301825 */
  8.85919720756468632317e+03, /* 0x40C14D99, 0x3E18F46D */
  3.70146267776887834771e+04, /* 0x40E212D4, 0x0E901566 */
};

static const double qqS8[6] = {
  1.63776026895689824414e+02, /* 0x406478D5, 0x365B39BC */
  8.09834494656449805916e+03, /* 0x40BFA258, 0x4E6B0563 */
  1.42538291419120476348e+05, /* 0x41016652, 0x54D38C3F */
  8.03309257119514397345e+05, /* 0x412883DA, 0x83A52B43 */
  8.40501579819060512818e+05, /* 0x4129A66B, 0x28DE0B3D */
 -3.43899293537866615225e+05, /* 0xC114FD6D, 0x2C9530C5 */
};

static const double qR5[6] = { /* for x in [8,4.5454]=1/[0.125,0.22001] */
  1.84085963594515531381e-11, /* 0x3DB43D8F, 0x29CC8CD9 */
  7.32421766612684765896e-02, /* 0x3FB2BFFF, 0xD172B04C */
  5.83563508962056953777e+00, /* 0x401757B0, 0xB9953DD3 */
  1.35111577286449829671e+02, /* 0x4060E392, 0x0A8788E9 */
  1.02724376596164097464e+03, /* 0x40900CF9, 0x9DC8C481 */
  1.98997785864605384631e+03, /* 0x409F17E9, 0x53C6E3A6 */
};

static const double qqS5[6] = {
  8.27766102236537761883e+01, /* 0x4054B1B3, 0xFB5E1543 */
  2.07781416421392987104e+03, /* 0x40A03BA0, 0xDA21C0CE */
  1.88472887785718085070e+04, /* 0x40D267D2, 0x7B591E6D */
  5.67511122894947329769e+04, /* 0x40EBB5E3, 0x97E02372 */
  3.59767538425114471465e+04, /* 0x40E19118, 0x1F7A54A0 */
 -5.35434275601944773371e+03, /* 0xC0B4EA57, 0xBEDBC609 */
};

static const double qR3[6] = {/* for x in [4.547,2.8571]=1/[0.2199,0.35001] */
  4.37741014089738620906e-09, /* 0x3E32CD03, 0x6ADECB82 */
  7.32411180042911447163e-02, /* 0x3FB2BFEE, 0x0E8D0842 */
  3.34423137516170720929e+00, /* 0x400AC0FC, 0x61149CF5 */
  4.26218440745412650017e+01, /* 0x40454F98, 0x962DAEDD */
  1.70808091340565596283e+02, /* 0x406559DB, 0xE25EFD1F */
  1.66733948696651168575e+02, /* 0x4064D77C, 0x81FA21E0 */
};

static const double qqS3[6] = {
  4.87588729724587182091e+01, /* 0x40486122, 0xBFE343A6 */
  7.09689221056606015736e+02, /* 0x40862D83, 0x86544EB3 */
  3.70414822620111362994e+03, /* 0x40ACF04B, 0xE44DFC63 */
  6.46042516752568917582e+03, /* 0x40B93C6C, 0xD7C76A28 */
  2.51633368920368957333e+03, /* 0x40A3A8AA, 0xD94FB1C0 */
 -1.49247451836156386662e+02, /* 0xC062A7EB, 0x201CF40F */
};

static const double qR2[6] = {/* for x in [2.8570,2]=1/[0.3499,0.5] */
  1.50444444886983272379e-07, /* 0x3E84313B, 0x54F76BDB */
  7.32234265963079278272e-02, /* 0x3FB2BEC5, 0x3E883E34 */
  1.99819174093815998816e+00, /* 0x3FFFF897, 0xE727779C */
  1.44956029347885735348e+01, /* 0x402CFDBF, 0xAAF96FE5 */
  3.16662317504781540833e+01, /* 0x403FAA8E, 0x29FBDC4A */
  1.62527075710929267416e+01, /* 0x403040B1, 0x71814BB4 */
};

static const double qqS2[6] = {
  3.03655848355219184498e+01, /* 0x403E5D96, 0xF7C07AED */
  2.69348118608049844624e+02, /* 0x4070D591, 0xE4D14B40 */
  8.44783757595320139444e+02, /* 0x408A6645, 0x22B3BF22 */
  8.82935845112488550512e+02, /* 0x408B977C, 0x9C5CC214 */
  2.12666388511798828631e+02, /* 0x406A9553, 0x0E001365 */
 -5.31095493882666946917e+00, /* 0xC0153E6A, 0xF8B32931 */
};

static double qzero (double x) {
  const double *p, *q;
  double s, r, z;
  int32_t hx, ix;
  p = NULL; q = NULL;
  GET_HIGH_WORD(hx, x);
  ix = 0x7fffffff & hx;
  if (ix >= 0x40200000)      { p = qR8; q = qqS8; }
  else if (ix >= 0x40122E8B) { p = qR5; q = qqS5; }
  else if (ix >= 0x4006DB6D) { p = qR3; q = qqS3; }
  else if (ix >= 0x40000000) { p = qR2; q = qqS2; }
  z = one/(x*x);
  r = p[0] + z*(p[1] + z*(p[2] + z*(p[3] + z*(p[4] + z*p[5]))));
  s = one + z*(q[0] + z*(q[1] + z*(q[2] + z*(q[3] + z*(q[4] + z*q[5])))));
  return (-.125 + r/s)/x;
}

/* @(#)e_jn.c 1.4 95/01/18 */
/*
 * ====================================================
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
 *
 * Developed at SunSoft, a Sun Microsystems, Inc. business.
 * Permission to use, copy, modify, and distribute this
 * software is freely granted, provided that this notice
 * is preserved.
 * ====================================================
 */

/*
 * __ieee754_jn(n, x), __ieee754_yn(n, x)
 * floating point Bessel's function of the 1st and 2nd kind
 * of order n
 *
 * Special cases:
 *  y0(0)=y1(0)=yn(n,0) = -inf with division by zero signal;
 *  y0(-ve)=y1(-ve)=yn(n,-ve) are NaN with invalid signal.
 * Note 2. About jn(n,x), yn(n,x)
 *  For n=0, j0(x) is called,
 *  for n=1, j1(x) is called,
 *  for n<x, forward recursion us used starting
 *  from values of j0(x) and j1(x).
 *  for n>x, a continued fraction approximation to
 *  j(n,x)/j(n-1,x) is evaluated and then backward
 *  recursion is used starting from a supposed value
 *  for j(n,x). The resulting value of j(0,x) is
 *  compared with the actual value to correct the
 *  supposed value of j(n,x).
 *
 *  yn(n,x) is similar in all respects, except
 *  that forward recursion is used for all
 *  values of n>1.
 *
 */

LUALIB_API double sun_jn (int n, double x) {  /* 2.17.4 */
  int32_t i, hx, ix, lx, sgn;
  double a, b, temp, di, z, w;
  /* J(-n,x) = (-1)^n * J(n, x), J(n, -x) = (-1)^n * J(n, x)
   * Thus, J(-n,x) = J(n,-x)
   */
  temp = 0;
  EXTRACT_WORDS(hx, lx, x);
  ix = 0x7fffffff & hx;
  /* if J(n,NaN) is NaN */
  if ((ix | ((unsigned)(lx | -lx)) >> 31) > 0x7ff00000) return x + x;
  if (n < 0) {
    n = -n;
    x = -x;
    hx ^= 0x80000000;
  }
  if (n == 0) return sun_j0(x);
  else if (n == 1) return sun_j1(x);
  sgn = (n & 1) & (hx >> 31);  /* even n -- 0, odd n -- sign(x) */
  SET_HIGH_WORD(x, ix);  /* x = fabs(x), 3.0.1 */
  if ((ix | lx) == 0 || ix >= 0x7ff00000)   /* if x is 0 or inf */
    b = zero;
  else if ((double)n <= x) {
    /* Safe to use J(n+1,x)=2n/x *J(n,x)-J(n-1,x) */
    if (ix >= 0x52D00000) { /* x > 2**302 */
    /* (x >> n**2)
     *      Jn(x) = cos(x-(2n+1)*pi/4)*sqrt(2/x*pi)
     *      Yn(x) = sin(x-(2n+1)*pi/4)*sqrt(2/x*pi)
     *      Let s=sin(x), c=cos(x),
     *    xn=x-(2n+1)*pi/4, sqt2 = sqrt(2),then
     *
     *       n  sin(xn)*sqt2  cos(xn)*sqt2
     *    ----------------------------------
     *       0   s-c     c+s
     *       1  -s-c     -c+s
     *       2  -s+c    -c-s
     *       3   s+c     c-s
     */
      switch (n & 3) {
        case 0: temp =  sun_cos(x) + sun_sin(x); break;
        case 1: temp = -sun_cos(x) + sun_sin(x); break;
        case 2: temp = -sun_cos(x) - sun_sin(x); break;
        case 3: temp =  sun_cos(x) - sun_sin(x); break;
      }
      b = invsqrtpi*temp/sqrt(x);
    } else {
      a = sun_j0(x);
      b = sun_j1(x);
      for (i=1; i < n; i++) {
        temp = b;
        b = b*((double)(i + i)/x) - a; /* avoid underflow */
        a = temp;
      }
    }
  } else {
    if (ix < 0x3e100000) {  /* x < 2**-29 */
    /* x is tiny, return the first Taylor expansion of J(n,x)
     * J(n,x) = 1/n!*(x/2)^n  - ...
     */
      if (n > 33)  /* underflow */
        b = zero;
      else {
        temp = x*0.5; b = temp;
        for (a=one, i=2; i <= n; i++) {
          a *= (double)i;    /* a = n! */
          b *= temp;    /* b = (x/2)^n */
        }
        b = b/a;
      }
    } else {
    /* use backward recurrence:
     *             x      x^2      x^2
     *  J(n,x)/J(n-1,x) =  ----   ------   ------   .....
     *            2n  - 2(n+1) - 2(n+2)
     *
     *             1      1        1
     *  (for large x)   =  ----  ------   ------   .....
     *            2n   2(n+1)   2(n+2)
     *            -- - ------ - ------ -
     *             x     x         x
     *
     * Let w = 2n/x and h=2/x, then the above quotient is equal to the continued fraction:
     *          1
     *  = -----------------------
     *           1
     *     w - -----------------
     *            1
     *           w+h - ---------
     *             w+2h - ...
     *
     * To determine how many terms needed, let
     * Q(0) = w, Q(1) = w(w+h) - 1,
     * Q(k) = (w+k*h)*Q(k-1) - Q(k-2),
     * When Q(k) > 1e4  good for single
     * When Q(k) > 1e9  good for double
     * When Q(k) > 1e17  good for quadruple
     */
     /* determine k */
      double t, v, q0, q1, h, tmp;
      int k, m;
      w  = (n + n)/(double)x; h = 2.0/(double)x;
      q0 = w;  z = w + h; q1 = w*z - 1.0; k = 1;
      while (q1 < 1.0e9) {
        k += 1; z += h;
        tmp = z*q1 - q0;
        q0 = q1;
        q1 = tmp;
      }
      m = n + n;
      for (t=zero, i = 2*(n + k); i >= m; i -= 2) t = one/(i/x - t);
      a = t;
      b = one;
      /*  estimate log((2/x)^n*n!) = n*log(2/x)+n*ln(n)
       *  Hence, if n*(log(2n/x)) > ...
       *  single 8.8722839355e+01
       *  double 7.09782712893383973096e+02
       *  long double 1.1356523406294143949491931077970765006170e+04
       *  then recurrent value may overflow and the result is
       *  likely underflow to zero
       */
      tmp = n;
      v = two/x;
      tmp = tmp*sun_log(fabs(v*tmp));
      if (tmp < 7.09782712893383973096e+02) {
        for (i=n - 1, di=(double)(i + i); i > 0; i--) {
          temp = b;
          b *= di;
          b  = b/x - a;
          a = temp;
          di -= two;
        }
      } else {
        for (i=n-1, di=(double)(i + i); i > 0; i--) {
          temp = b;
          b *= di;
          b  = b/x - a;
          a = temp;
          di -= two;
          /* scale b to avoid spurious overflow */
          if (b > 1e100) {
            a /= b;
            t /= b;
            b  = one;
          }
        }
      }
      b = t*sun_j0(x)/b;
    }
  }
  if (sgn == 1) return -b; else return b;
}

LUALIB_API double sun_yn (int n, double x) {  /* replaced by the SunPro version found in musl-1.2.4/src/math/jn.c */
  uint32_t ix, lx, ib;
  int nm1, sign, i;
  double a, b, temp;
  EXTRACT_WORDS(ix, lx, x);
  sign = ix >> 31;
  ix &= 0x7fffffff;
  if ((ix | (lx | -lx) >> 31) > 0x7ff00000) /* nan */
    return x;
  if (sign || (ix | lx) == 0) /* x <= 0 */
    return AGN_NAN;
  if (ix == 0x7ff00000)
    return 0.0;
  if (n == 0)
    return sun_y0(x);
  if (n < 0) {
    nm1 = -(n + 1);
    sign = n & 1;
  } else {
    nm1 = n - 1;
    sign = 0;
  }
  if (nm1 == 0)
    return sign ? -sun_y1(x) : sun_y1(x);
  if (ix >= 0x52d00000) { /* x > 2**302 */
    /* (x >> n**2)
     *      Jn(x) = cos(x-(2n+1)*pi/4)*sqrt(2/x*pi)
     *      Yn(x) = sin(x-(2n+1)*pi/4)*sqrt(2/x*pi)
     *      Let s=sin(x), c=cos(x),
     *          xn=x-(2n+1)*pi/4, sqt2 = sqrt(2),then
     *
     *             n    sin(xn)*sqt2    cos(xn)*sqt2
     *          ----------------------------------
     *             0     s-c             c+s
     *             1    -s-c            -c+s
     *             2    -s+c            -c-s
     *             3     s+c             c-s
     */
    switch (nm1 & 3) {
      case 0: temp = -sun_sin(x) - sun_cos(x); break;
      case 1: temp = -sun_sin(x) + sun_cos(x); break;
      case 2: temp =  sun_sin(x) + sun_cos(x); break;
      default:
      case 3: temp =  sun_sin(x) - sun_cos(x); break;
    }
    b = invsqrtpi*temp/sqrt(x);
  } else {
    a = sun_y0(x);
    b = sun_y1(x);
    /* quit if b is -inf */
    GET_HIGH_WORD(ib, b);
    for (i=0; i < nm1 && ib!=0xfff00000; ) {
      i++;
      temp = b;
      b = (2.0*i/x)*b - a;
      GET_HIGH_WORD(ib, b);
      a = temp;
    }
  }
  return sign ? -b : b;
}


/* @(#)e_lgamma_r.c 1.3 95/01/18 */
/*
 * ====================================================
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
 *
 * Developed at SunSoft, a Sun Microsystems, Inc. business.
 * Permission to use, copy, modify, and distribute this
 * software is freely granted, provided that this notice
 * is preserved.
 * ====================================================
 *
 */
/* __ieee754_lgamma_r(x, signgamp)
 * Reentrant version of the logarithm of the Gamma function
 * with user provide pointer for the sign of Gamma(x).
 *
 * Method:
 *   1. Argument Reduction for 0 < x <= 8
 *   Since gamma(1+s)=s*gamma(s), for x in [0,8], we may
 *   reduce x to a number in [1.5,2.5] by
 *     lgamma(1+s) = log(s) + lgamma(s)
 *  for example,
 *    lgamma(7.3) = log(6.3) + lgamma(6.3)
 *          = log(6.3*5.3) + lgamma(5.3)
 *          = log(6.3*5.3*4.3*3.3*2.3) + lgamma(2.3)
 *   2. Polynomial approximation of lgamma around its
 *  minimun ymin=1.461632144968362245 to maintain monotonicity.
 *  On [ymin-0.23, ymin+0.27] (i.e., [1.23164,1.73163]), use
 *    Let z = x-ymin;
 *    lgamma(x) = -1.214862905358496078218 + z^2*poly(z)
 *  where
 *    poly(z) is a 14 degree polynomial.
 *   2. Rational approximation in the primary interval [2,3]
 *  We use the following approximation:
 *    s = x-2.0;
 *    lgamma(x) = 0.5*s + s*P(s)/Q(s)
 *  with accuracy
 *    |P/Q - (lgamma(x)-0.5s)| < 2**-61.71
 *  Our algorithms are based on the following observation
 *
 *                             zeta(2)-1    2    zeta(3)-1    3
 * lgamma(2+s) = s*(1-Euler) + --------- * s  -  --------- * s  + ...
 *                                 2                 3
 *
 *  where Euler = 0.5771... is the Euler constant, which is very
 *  close to 0.5.
 *
 *   3. For x>=8, we have
 *  lgamma(x)~(x-0.5)log(x)-x+0.5*log(2pi)+1/(12x)-1/(360x**3)+....
 *  (better formula:
 *     lgamma(x)~(x-0.5)*(log(x)-1)-.5*(log(2pi)-1) + ...)
 *  Let z = 1/x, then we approximation
 *    f(z) = lgamma(x) - (x-0.5)(log(x)-1)
 *  by
 *              3       5             11
 *    w = w0 + w1*z + w2*z  + w3*z  + ... + w6*z
 *  where
 *    |w - f(z)| < 2**-58.74
 *
 *   4. For negative x, since (G is gamma function)
 *    -x*G(-x)*G(x) = pi/sin(pi*x),
 *   we have
 *     G(x) = pi/(sin(pi*x)*(-x)*G(-x))
 *  since G(-x) is positive, sign(G(x)) = sign(sin(pi*x)) for x<0
 *  Hence, for x<0, signgam = sign(sin(pi*x)) and
 *    lgamma(x) = log(|Gamma(x)|)
 *        = log(pi/(|x*sin(pi*x)|)) - lgamma(-x);
 *  Note: one should avoid compute pi*(-x) directly in the
 *        computation of sin(pi*(-x)).
 *
 *   5. Special Cases
 *    lgamma(2+s) ~ s*(1-Euler) for tiny s
 *    lgamma(1) = lgamma(2) = 0
 *    lgamma(x) ~ -log(|x|) for tiny x
 *    lgamma(0) = lgamma(neg.integer) = inf and raise divide-by-zero
 *    lgamma(inf) = inf
 *    lgamma(-inf) = inf (bug for bug compatible with C99!?)
 *
 */

static const double
two52=  4.50359962737049600000e+15, /* 0x43300000, 0x00000000 */
a0  =  7.72156649015328655494e-02, /* 0x3FB3C467, 0xE37DB0C8 */
a1  =  3.22467033424113591611e-01, /* 0x3FD4A34C, 0xC4A60FAD */
a2  =  6.73523010531292681824e-02, /* 0x3FB13E00, 0x1A5562A7 */
a3  =  2.05808084325167332806e-02, /* 0x3F951322, 0xAC92547B */
a4  =  7.38555086081402883957e-03, /* 0x3F7E404F, 0xB68FEFE8 */
a5  =  2.89051383673415629091e-03, /* 0x3F67ADD8, 0xCCB7926B */
a6  =  1.19270763183362067845e-03, /* 0x3F538A94, 0x116F3F5D */
a7  =  5.10069792153511336608e-04, /* 0x3F40B6C6, 0x89B99C00 */
a8  =  2.20862790713908385557e-04, /* 0x3F2CF2EC, 0xED10E54D */
a9  =  1.08011567247583939954e-04, /* 0x3F1C5088, 0x987DFB07 */
a10 =  2.52144565451257326939e-05, /* 0x3EFA7074, 0x428CFA52 */
a11 =  4.48640949618915160150e-05, /* 0x3F07858E, 0x90A45837 */
tc  =  1.46163214496836224576e+00, /* 0x3FF762D8, 0x6356BE3F */
tf  = -1.21486290535849611461e-01, /* 0xBFBF19B9, 0xBCC38A42 */
/* tt = -(tail of tf) */
tt  = -3.63867699703950536541e-18, /* 0xBC50C7CA, 0xA48A971F */
t0  =  4.83836122723810047042e-01, /* 0x3FDEF72B, 0xC8EE38A2 */
t1  = -1.47587722994593911752e-01, /* 0xBFC2E427, 0x8DC6C509 */
t2  =  6.46249402391333854778e-02, /* 0x3FB08B42, 0x94D5419B */
t3  = -3.27885410759859649565e-02, /* 0xBFA0C9A8, 0xDF35B713 */
t4  =  1.79706750811820387126e-02, /* 0x3F9266E7, 0x970AF9EC */
t5  = -1.03142241298341437450e-02, /* 0xBF851F9F, 0xBA91EC6A */
t6  =  6.10053870246291332635e-03, /* 0x3F78FCE0, 0xE370E344 */
t7  = -3.68452016781138256760e-03, /* 0xBF6E2EFF, 0xB3E914D7 */
t8  =  2.25964780900612472250e-03, /* 0x3F6282D3, 0x2E15C915 */
t9  = -1.40346469989232843813e-03, /* 0xBF56FE8E, 0xBF2D1AF1 */
t10 =  8.81081882437654011382e-04, /* 0x3F4CDF0C, 0xEF61A8E9 */
t11 = -5.38595305356740546715e-04, /* 0xBF41A610, 0x9C73E0EC */
t12 =  3.15632070903625950361e-04, /* 0x3F34AF6D, 0x6C0EBBF7 */
t13 = -3.12754168375120860518e-04, /* 0xBF347F24, 0xECC38C38 */
t14 =  3.35529192635519073543e-04, /* 0x3F35FD3E, 0xE8C2D3F4 */
u0  = -7.72156649015328655494e-02, /* 0xBFB3C467, 0xE37DB0C8 */
u1  =  6.32827064025093366517e-01, /* 0x3FE4401E, 0x8B005DFF */
u2  =  1.45492250137234768737e+00, /* 0x3FF7475C, 0xD119BD6F */
u3  =  9.77717527963372745603e-01, /* 0x3FEF4976, 0x44EA8450 */
u4  =  2.28963728064692451092e-01, /* 0x3FCD4EAE, 0xF6010924 */
u5  =  1.33810918536787660377e-02, /* 0x3F8B678B, 0xBF2BAB09 */
v1  =  2.45597793713041134822e+00, /* 0x4003A5D7, 0xC2BD619C */
v2  =  2.12848976379893395361e+00, /* 0x40010725, 0xA42B18F5 */
v3  =  7.69285150456672783825e-01, /* 0x3FE89DFB, 0xE45050AF */
v4  =  1.04222645593369134254e-01, /* 0x3FBAAE55, 0xD6537C88 */
v5  =  3.21709242282423911810e-03, /* 0x3F6A5ABB, 0x57D0CF61 */
s0  = -7.72156649015328655494e-02, /* 0xBFB3C467, 0xE37DB0C8 */
s1  =  2.14982415960608852501e-01, /* 0x3FCB848B, 0x36E20878 */
s2  =  3.25778796408930981787e-01, /* 0x3FD4D98F, 0x4F139F59 */
s3  =  1.46350472652464452805e-01, /* 0x3FC2BB9C, 0xBEE5F2F7 */
s4  =  2.66422703033638609560e-02, /* 0x3F9B481C, 0x7E939961 */
s5  =  1.84028451407337715652e-03, /* 0x3F5E26B6, 0x7368F239 */
s6  =  3.19475326584100867617e-05, /* 0x3F00BFEC, 0xDD17E945 */
r1  =  1.39200533467621045958e+00, /* 0x3FF645A7, 0x62C4AB74 */
r2  =  7.21935547567138069525e-01, /* 0x3FE71A18, 0x93D3DCDC */
r3  =  1.71933865632803078993e-01, /* 0x3FC601ED, 0xCCFBDF27 */
r4  =  1.86459191715652901344e-02, /* 0x3F9317EA, 0x742ED475 */
r5  =  7.77942496381893596434e-04, /* 0x3F497DDA, 0xCA41A95B */
r6  =  7.32668430744625636189e-06, /* 0x3EDEBAF7, 0xA5B38140 */
w0  =  4.18938533204672725052e-01, /* 0x3FDACFE3, 0x90C97D69 */
w1  =  8.33333333333329678849e-02, /* 0x3FB55555, 0x5555553B */
w2  = -2.77777777728775536470e-03, /* 0xBF66C16C, 0x16B02E5C */
w3  =  7.93650558643019558500e-04, /* 0x3F4A019F, 0x98CF38B6 */
w4  = -5.95187557450339963135e-04, /* 0xBF4380CB, 0x8C0FE741 */
w5  =  8.36339918996282139126e-04, /* 0x3F4B67BA, 0x4CDAD5D1 */
w6  = -1.63092934096575273989e-03; /* 0xBF5AB89D, 0x0B9E43E4 */

static double sin_pi (double x) {
  double y, z;
  int n, ix;
  GET_HIGH_WORD(ix, x);
  ix &= 0x7fffffff;
  if (ix < 0x3fd00000) return kernel_sin(pi*x, zero, 0);
  y = -x;  /* x is assume negative */
  /* argument reduction, make sure inexact flag not raised if input is an integer */
  z = sun_floor(y);  /* 2.35.1 tweak */
  if (z != y) {  /* inexact anyway */
    y *= 0.5;
    y = 2.0*(y - sun_floor(y));  /* y = |x| mod 2.0; 2.35.1 tweak */
    n = (int)(y*4.0);
  } else {
    if (ix >= 0x43400000) {
      y = zero; n = 0;  /* y must be even */
    } else {
      if (ix < 0x43300000) z = y + two52;  /* exact */
      GET_LOW_WORD(n, z);
      n &= 1;
      y  = n;
      n <<= 2;
    }
  }
  switch (n) {
    case 0:   y =  kernel_sin(pi*y, zero, 0); break;
    case 1:
    case 2:   y =  kernel_cos(pi*(0.5 - y), zero); break;
    case 3:
    case 4:   y =  kernel_sin(pi*(one - y), zero, 0); break;
    case 5:
    case 6:   y = -kernel_cos(pi*(y - 1.5), zero); break;
    default:  y =  kernel_sin(pi*(y - 2.0), zero, 0); break;
  }
  return -y;
}


LUALIB_API double sun_lgamma_r (double x, int *signgamp) {  /* 2.17.4 */
  double t, y, z, nadj, p, p1, p2, p3, q, r, w;
  int32_t hx;
  int i, lx, ix;
  EXTRACT_WORDS(hx, lx, x);
  /* purge off +-inf, NaN, +-0, tiny and negative arguments */
  *signgamp = 1; nadj = 0;
  ix = hx & 0x7fffffff;
  if (ix >= 0x7ff00000) return x*x;  /* x is inf or NaN */
  if ((ix | lx) == 0) return HUGE_VAL;  /* instead of return one/zero; */
  /* we will ALLOW for negative x, as GCC's lgamma does */
  if (ix < 0x3b900000) {  /* |x|<2**-70, return -log(|x|) */
    if (hx < 0) {
      *signgamp = -1;
      return -sun_log(-x);
    } else return -sun_log(x);
  }
  if (hx < 0) {
    if (ix >= 0x43300000)   /* |x|>=2**52, must be -integer */
      return one/zero;
    t = sin_pi(x);
    if (t == zero) return one/zero; /* -integer */
    nadj = sun_log(pi/fabs(t*x));
    if (t < zero) *signgamp = -1;
    x = -x;
  }
  /* purge off 1 and 2 */
  if ((((ix - 0x3ff00000) | lx) == 0) || (((ix - 0x40000000) | lx) == 0)) r = 0;
  /* for x < 2.0 */
  else if (ix < 0x40000000) {
    if (ix <= 0x3feccccc) {   /* lgamma(x) = lgamma(x+1)-log(x) */
      r = -sun_log(x);
      if (ix >= 0x3FE76944) { y = one - x; i = 0; }
      else if (ix >= 0x3FCDA661) { y = x - (tc - one); i = 1; }
      else { y = x; i = 2; }
    } else {
      r = zero;
      if (ix >= 0x3FFBB4C3) { y = 2.0 - x; i = 0; } /* [1.7316,2] */
      else if (ix >= 0x3FF3B4C4) {y = x - tc; i = 1; } /* [1.23,1.73] */
      else { y = x - one; i = 2; }
    }
    switch (i) {
      case 0:
    z = y*y;
    p1 = a0 + z*(a2 + z*(a4 + z*(a6 + z*(a8 + z*a10))));
    p2 = z*(a1 + z*(a3 + z*(a5 + z*(a7 + z*(a9 + z*a11)))));
    p  = y*p1 + p2;
    r  += (p - 0.5*y); break;
    case 1:
    z = y*y;
    w = z*y;
    p1 = t0 + w*(t3 + w*(t6 + w*(t9  + w*t12)));  /* parallel comp */
    p2 = t1 + w*(t4 + w*(t7 + w*(t10 + w*t13)));
    p3 = t2 + w*(t5 + w*(t8 + w*(t11 + w*t14)));
    p  = z*p1 - (tt - w*(p2 + y*p3));
    r += (tf + p); break;
    case 2:
    p1 = y*(u0 + y*(u1 + y*(u2 + y*(u3 + y*(u4 + y*u5)))));
    p2 = one + y*(v1 + y*(v2 + y*(v3 + y*(v4 + y*v5))));
    r += (-0.5*y + p1/p2);
  }
  } else if (ix < 0x40200000) {       /* x < 8.0 */
    i = (int)x;
    y = x - (double)i;
    p = y*(s0 + y*(s1 + y*(s2 + y*(s3 + y*(s4 + y*(s5 + y*s6))))));
    q = one + y*(r1 + y*(r2 + y*(r3 + y*(r4 + y*(r5 + y*r6)))));
    r = half*y + p/q;
    z = one;  /* lgamma(1+s) = log(s) + lgamma(s) */
    switch (i) {
      case 7: z *= (y + 6.0);  /* FALLTHRU */
      case 6: z *= (y + 5.0);  /* FALLTHRU */
      case 5: z *= (y + 4.0);  /* FALLTHRU */
      case 4: z *= (y + 3.0);  /* FALLTHRU */
      case 3: z *= (y + 2.0);  /* FALLTHRU */
      r += sun_log(z); break;
    }
    /* 8.0 <= x < 2**58 */
  } else if (ix < 0x43900000) {
    t = sun_log(x);
    z = one/x;
    y = z*z;
    w = w0 + z*(w1 + y*(w2 + y*(w3 + y*(w4 + y*(w5 + y*w6)))));
    r = (x - half)*(t - one) + w;
  } else /* 2**58 <= x <= inf */
    r =  x*(sun_log(x) - one);
  if (hx < 0) r = nadj - r;
  return r;
}


/* For a version that returns `undefined` for x <= 0 <=> (ix | lx) == 0 || (hx < 0), use luai_numlngamma;
   we deliberately return inf and allow negative x here to support `binomial` and related functions.
   Speed over size, 2.17.4 */
LUALIB_API double sun_lgamma (double x) {
  double t, y, z, nadj, p, p1, p2, p3, q, r, w;
  int32_t hx, ux;
  int i, lx, ix;
  nadj = 0;
  EXTRACT_WORDS(hx, lx, x);
  /* purge off +-inf, NaN, +-0, tiny and negative arguments */
  ix = hx & 0x7fffffff;
  if (ix >= 0x7ff00000) return x*x;
  /* if ((ix | lx) == 0 || (hx < 0)) return AGN_NAN; */ /* we won't use this. */
  if ((ix | lx) == 0) return HUGE_VAL;
  if (tools_isposintwords(hx, lx, &ux) && ux < AGN_LOG2LNFACTSIZE) return tools_lnfactorial(x - 1);  /* 3.7.6 40 percent tweak */
  /* we will ALLOW for negative x, as GCC's lgamma does */
  if (ix < 0x3b900000) {  /* |x| < 2**-70, return -log(|x|) */
    if (hx < 0) return -sun_log(-x);
    else return -sun_log(x);
  }
  if (hx < 0) {
    if (ix >= 0x43300000) return one/zero;  /* |x| >= 2**52, must be -integer */
    t = sin_pi(x);
    if (t == zero) return one/zero;  /* = -integer */
    nadj = sun_log(pi/fabs(t*x));
    x = -x;
  }
  /* purge off 1 and 2 */
  if ((((ix - 0x3ff00000) | lx) == 0) || (((ix - 0x40000000) | lx) == 0)) r = 0;
  /* for x < 2.0 */
  else if (ix < 0x40000000) {
    if (ix <= 0x3feccccc) {   /* lgamma(x) = lgamma(x + 1) - log(x) */
      r = -sun_log(x);
      if (ix >= 0x3FE76944) { y = one - x; i = 0; }
      else if (ix >= 0x3FCDA661) { y= x - (tc - one); i = 1; }
      else { y = x; i = 2; }
    } else {
      r = zero;
      if (ix >= 0x3FFBB4C3) { y = 2.0 - x; i = 0; } /* [1.7316,2] */
      else if (ix >= 0x3FF3B4C4) { y = x - tc; i = 1; } /* [1.23,1.73] */
      else { y = x - one; i = 2; }
    }
    switch (i) {
      case 0:
        z = y*y;
        p1 = a0 + z*(a2 + z*(a4 + z*(a6 + z*(a8 + z*a10))));
        p2 = z*(a1 + z*(a3 + z*(a5 + z*(a7 + z*(a9 + z*a11)))));
        p  = y*p1 + p2;
        r  += (p - 0.5*y); break;
      case 1:
        z = y*y;
        w = z*y;
        p1 = t0 + w*(t3 + w*(t6 + w*(t9  + w*t12)));  /* parallel comp */
        p2 = t1 + w*(t4 + w*(t7 + w*(t10 + w*t13)));
        p3 = t2 + w*(t5 + w*(t8 + w*(t11 + w*t14)));
        p  = z*p1 - (tt - w*(p2 + y*p3));
        r += (tf + p); break;
      case 2:
        p1 = y*(u0 + y*(u1 + y*(u2 + y*(u3 + y*(u4 + y*u5)))));
        p2 = one + y*(v1 + y*(v2 + y*(v3 + y*(v4 + y*v5))));
        r += (-0.5*y + p1/p2);
    }
  } else if (ix < 0x40200000) {       /* x < 8.0 */
    i = (int)x;
    y = x - (double)i;
    p = y*(s0 + y*(s1 + y*(s2 + y*(s3 + y*(s4 + y*(s5 + y*s6))))));
    q = one + y*(r1 + y*(r2 + y*(r3 + y*(r4 + y*(r5 + y*r6)))));
    r = half*y + p/q;
    z = one;  /* lgamma(1+s) = log(s) + lgamma(s) */
    switch (i) {
      case 7: z *= (y + 6.0);  /* FALLTHRU */
      case 6: z *= (y + 5.0);  /* FALLTHRU */
      case 5: z *= (y + 4.0);  /* FALLTHRU */
      case 4: z *= (y + 3.0);  /* FALLTHRU */
      case 3: z *= (y + 2.0);  /* FALLTHRU */
      r += sun_log(z); break;
    }
  /* 8.0 <= x < 2**58 */
  } else if (ix < 0x43900000) {
    t = sun_log(x);
    z = one/x;
    y = z*z;
    w = w0 + z*(w1 + y*(w2 + y*(w3 + y*(w4 + y*(w5 + y*w6)))));
    r = (x - half)*(t - one) + w;
  } else  /* 2**58 <= x <= inf */
     r =  x*(sun_log(x) - one);
  if (hx < 0) r = nadj - r;
  return r;
}


LUALIB_API double tools_gamma (double x) {  /* speed over size, 2.17.4 */
  double t, y, z, nadj, p, p1, p2, p3, q, r, w;
  int32_t hx, ux;
  int i, lx, ix;
  nadj = 0;
  EXTRACT_WORDS(hx, lx, x);
  /* purge off +-inf, NaN, +-0, tiny and negative arguments */
  ix = hx & 0x7fffffff;
  if ((ix & 0x7ff00000) == 0x7ff00000) return x*x + x;  /* 2.17.4 patch taken from sun_sqrt: sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */
  if ((hx < 0) || (hx | lx) == 0) return cephes_gamma(x);  /* 2.17.4 patch: x < = 0 ? */
  /* if (ix >= 0x7ff00000) return x*x; */ /* return x*x; */
  /* if ((ix | lx) == 0) return AGN_NAN; */ /* one/zero; */
  if (tools_isposintwords(hx, lx, &ux) && ux < AGN_LOG2LNFACTSIZE) return sun_exp(tools_lnfactorial(x - 1));  /* 3.7.6 18 percent tweak */
  if (ix < 0x3b900000) {  /* |x| < 2**-70, return -log(|x|) */
    if (hx < 0) return sun_exp(-sun_log(-x));
    else return sun_exp(-sun_log(x));
  }
  /* 3.7.6 fix: negative arguments are already handled above
  if (hx < 0) {
    if (ix >= 0x43300000)   // |x| >= 2**52, must be -integer
      return one/zero;
    t = sin_pi(x);
    if (t == zero) return one/zero; // -integer
    nadj = sun_log(pi/fabs(t*x));
    x = -x;
  }  */
  /* purge off 1 and 2 */
  if ((((ix - 0x3ff00000) | lx) == 0) || (((ix - 0x40000000) | lx) == 0)) r = 0;
  /* for x < 2.0 */
  else if (ix < 0x40000000) {
    if (ix <= 0x3feccccc) {   /* lgamma(x) = lgamma(x + 1) - log(x) */
      r = -sun_log(x);
      if (ix >= 0x3FE76944) { y = one - x; i = 0; }
      else if (ix >= 0x3FCDA661) { y = x - (tc - one); i = 1; }
      else { y = x; i = 2; }
    } else {
      r = zero;
      if (ix >= 0x3FFBB4C3) { y = 2.0 - x; i = 0; }      /* [1.7316,2] */
      else if (ix >= 0x3FF3B4C4) { y = x - tc; i = 1; }  /* [1.23, 1.73] */
      else { y = x - one; i = 2; }
    }
    switch (i) {
      case 0:
    z = y*y;
    p1 = a0 + z*(a2 + z*(a4 + z*(a6 + z*(a8 + z*a10))));
    p2 = z*(a1 + z*(a3 + z*(a5 + z*(a7 + z*(a9 + z*a11)))));
    p  = y*p1 + p2;
    r  += (p - 0.5*y); break;
     case 1:
    z = y*y;
    w = z*y;
    p1 = t0 + w*(t3 + w*(t6 + w*(t9 + w*t12)));  /* parallel comp */
    p2 = t1 + w*(t4 + w*(t7 + w*(t10 + w*t13)));
    p3 = t2 + w*(t5 + w*(t8 + w*(t11 + w*t14)));
    p  = z*p1 - (tt - w*(p2 + y*p3));
    r += (tf + p); break;
     case 2:
    p1 = y*(u0 + y*(u1 + y*(u2 + y*(u3 + y*(u4 + y*u5)))));
    p2 = one + y*(v1 + y*(v2 + y*(v3 + y*(v4 + y*v5))));
    r += (-0.5*y + p1/p2);
    }
  } else if (ix < 0x40200000) {  /* x < 8.0 */
    i = (int)x;
    y = x - (double)i;
    p = y*(s0 + y*(s1 + y*(s2 + y*(s3 + y*(s4 + y*(s5 + y*s6))))));
    q = one + y*(r1 + y*(r2 + y*(r3 + y*(r4 + y*(r5 + y*r6)))));
    r = half*y + p/q;
    z = one;  /* lgamma(1+s) = log(s) + lgamma(s) */
    switch(i) {
      case 7: z *= (y + 6.0);  /* FALLTHRU */
      case 6: z *= (y + 5.0);  /* FALLTHRU */
      case 5: z *= (y + 4.0);  /* FALLTHRU */
      case 4: z *= (y + 3.0);  /* FALLTHRU */
      case 3: z *= (y + 2.0);  /* FALLTHRU */
      r += sun_log(z); break;
    }
  /* 8.0 <= x < 2**58 */
  } else if (ix < 0x43900000) {
    t = sun_log(x);
    z = one/x;
    y = z*z;
    w = w0 + z*(w1 + y*(w2 + y*(w3 + y*(w4 + y*(w5 + y*w6)))));
    r = (x - half)*(t - one) + w;
  } else {
    r =  x*(sun_log(x) - one);  /* 2**58 <= x <= inf */
  }
  if (hx < 0) r = nadj - r;
  return sun_exp(r);
}


/* origin: OpenBSD /usr/src/lib/libm/src/ld80/e_tgammal.c, 2.41.1 */
/*
 * Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 *
 *      Gamma function
 *
 * SYNOPSIS:
 *
 * long double x, y, tgammal();
 *
 * y = tgammal( x );
 *
 * DESCRIPTION:
 *
 * Returns gamma function of the argument.  The result is
 * correctly signed.
 *
 * Arguments |x| <= 13 are reduced by recurrence and the function
 * approximated by a rational function of degree 7/8 in the
 * interval (2,3).  Large arguments are handled by Stirling's
 * formula. Large negative arguments are made positive using
 * a reflection formula.
 *
 * ACCURACY:
 *
 *                      Relative error:
 * arithmetic   domain     # trials      peak         rms
 *    IEEE     -40,+40      10000       3.6e-19     7.9e-20
 *    IEEE    -1755,+1755   10000       4.8e-18     6.5e-19
 *
 * Accuracy for large arguments is dominated by error in powl().
 */

#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
LUALIB_API long double tools_gammal (long double x) {
	return tgamma(x);
}
#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
/*
tgamma(x+2) = tgamma(x+2) P(x)/Q(x)
0 <= x <= 1
Relative error
n=7, d=8
Peak error =  1.83e-20
Relative error spread =  8.4e-23
*/
static const long double tgP[8] = {
 4.212760487471622013093E-5L,
 4.542931960608009155600E-4L,
 4.092666828394035500949E-3L,
 2.385363243461108252554E-2L,
 1.113062816019361559013E-1L,
 3.629515436640239168939E-1L,
 8.378004301573126728826E-1L,
 1.000000000000000000009E0L,
};
static const long double tgQ[9] = {
-1.397148517476170440917E-5L,
 2.346584059160635244282E-4L,
-1.237799246653152231188E-3L,
-7.955933682494738320586E-4L,
 2.773706565840072979165E-2L,
-4.633887671244534213831E-2L,
-2.243510905670329164562E-1L,
 4.150160950588455434583E-1L,
 9.999999999999999999908E-1L,
};

#define LMAXGAM 1755.455L
/*static const long double LOGPI = 1.14472988584940017414L;*/

/* Stirling's formula for the gamma function
tgamma(x) = sqrt(2 pi) x^(x-.5) exp(-x) (1 + 1/x P(1/x))
z(x) = x
13 <= x <= 1024
Relative error
n=8, d=0
Peak error =  9.44e-21
Relative error spread =  8.8e-4
*/
static const long double STIR[9] = {
 7.147391378143610789273E-4L,
-2.363848809501759061727E-5L,
-5.950237554056330156018E-4L,
 6.989332260623193171870E-5L,
 7.840334842744753003862E-4L,
-2.294719747873185405699E-4L,
-2.681327161876304418288E-3L,
 3.472222222230075327854E-3L,
 8.333333333333331800504E-2L,
};

#define AGN_MAXSTIR 1024.0L
static const long double LSQTPI = 2.50662827463100050242E0L;

/* 1/tgamma(x) = z P(z)
 * z(x) = 1/x
 * 0 < x < 0.03125
 * Peak relative error 4.2e-23
 */
static const long double tgS[9] = {
-1.193945051381510095614E-3L,
 7.220599478036909672331E-3L,
-9.622023360406271645744E-3L,
-4.219773360705915470089E-2L,
 1.665386113720805206758E-1L,
-4.200263503403344054473E-2L,
-6.558780715202540684668E-1L,
 5.772156649015328608253E-1L,
 1.000000000000000000000E0L,
};

/* 1/tgamma(-x) = z P(z)
 * z(x) = 1/x
 * 0 < x < 0.03125
 * Peak relative error 5.16e-23
 * Relative error spread =  2.5e-24
 */
static const long double tgSN[9] = {
 1.133374167243894382010E-3L,
 7.220837261893170325704E-3L,
 9.621911155035976733706E-3L,
-4.219773343731191721664E-2L,
-1.665386113944413519335E-1L,
-4.200263503402112910504E-2L,
 6.558780715202536547116E-1L,
 5.772156649015328608727E-1L,
-1.000000000000000000000E0L,
};

static const long double LPI = 3.1415926535897932384626L;

/* Gamma function computed by Stirling's formula. */
static long double stirfl (long double x) {
	long double y, w, v;
	w = 1.0/x;
	/* for large x, use rational coefficients from the analytical expansion.  */
	if (x > 1024.0)
		w = (((((6.97281375836585777429E-5L*w
 		 + 7.84039221720066627474E-4L)*w
		 - 2.29472093621399176955E-4L)*w
		 - 2.68132716049382716049E-3L)*w
		 + 3.47222222222222222222E-3L)*w
		 + 8.33333333333333333333E-2L)*w
		 + 1.0;
	else
#ifndef __ARMCPU
		w = 1.0 + w*__polevll(w, STIR, 8);
#else
		w = 1.0 + w*polevl(w, STIR, 8);
#endif
	y = expl(x);
	if (x > AGN_MAXSTIR) { /* avoid overflow in pow() */
		v = tools_powl(x, 0.5L*x - 0.25L);
		y = v*(v/y);
	} else {
		y = tools_powl(x, x - 0.5L)/y;
	}
	y = LSQTPI*y*w;
	return y;
}

LUALIB_API long double tools_gammal (long double x) {
	long double p, q, z;
	if (!isfinite(x))
		return x + INFINITY;
	q = tools_fabsl(x);
	if (q > 13.0) {
		if (x < 0.0) {
			p = sun_floorl(q);
			z = q - p;
			if (z == 0)
				return 0/z;
			if (q > LMAXGAM) {
				z = 0;
			} else {
				if (z > 0.5) {
					p += 1.0;
					z = q - p;
				}
				z = q*sun_sinl(LPI*z);
				z = tools_fabsl(z)*stirfl(q);
				z = LPI/z;
			}
			if (0.5*p == sun_floorl(q*0.5))
				z = -z;
		} else if (x > LMAXGAM) {
			z = x*0x1p16383L;
		} else {
			z = stirfl(x);
		}
		return z;
	}
	z = 1.0;
	while (x >= 3.0) {
		x -= 1.0;
		z *= x;
	}
	while (x < -0.03125L) {
		z /= x;
		x += 1.0;
	}
	if (x <= 0.03125L)
		goto lblsmall;
	while (x < 2.0) {
		z /= x;
		x += 1.0;
	}
	if (x == 2.0)
		return z;
	x -= 2.0;
#ifndef __ARMCPU
	p = __polevll(x, tgP, 7);
	q = __polevll(x, tgQ, 8);
#else
	p = polevl(x, tgP, 7);
	q = polevl(x, tgQ, 8);
#endif
	z = z*p/q;
	return z;
lblsmall:
	/* z==1 if x was originally +-0 */
	if (x == 0 && z != 1)
		return x/x;
	if (x < 0.0) {
		x = -x;
#ifndef __ARMCPU
		q = z/(x*__polevll(x, tgSN, 8));
#else
		q = z/(x*polevl(x, tgSN, 8));
#endif
	} else
#ifndef __ARMCPU
		q = z/(x*__polevll(x, tgS, 8));
#else
		q = z/(x*polevl(x, tgS, 8));
#endif
	return q;
}
#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384
/* TODO: broken implementation to make things compile */
LUALIB_API long double tools_gammal (long double x) {
	return tgamma(x);
}
#endif


/* origin: OpenBSD /usr/src/lib/libm/src/ld80/e_lgammal.c, 2.41.1 */
/*
*====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
*====================================================
*
* Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*
* lgammal(x)
* Reentrant version of the logarithm of the Gamma function
* with user provide pointer for the sign of Gamma(x).
*
* Method:
*  1. Argument Reduction for 0 < x <= 8
*     Since gamma(1+s)=s*gamma(s), for x in [0,8], we may
*     reduce x to a number in [1.5,2.5] by
*             lgamma(1+s) = log(s) + lgamma(s)
*     for example,
*             lgamma(7.3) = log(6.3) + lgamma(6.3)
*                         = log(6.3*5.3) + lgamma(5.3)
*                         = log(6.3*5.3*4.3*3.3*2.3) + lgamma(2.3)
*  2. Polynomial approximation of lgamma around its
*     minimun ymin=1.461632144968362245 to maintain monotonicity.
*     On [ymin-0.23, ymin+0.27] (i.e., [1.23164,1.73163]), use
*             Let z = x-ymin;
*             lgamma(x) = -1.214862905358496078218 + z^2*poly(z)
*  2. Rational approximation in the primary interval [2,3]
*     We use the following approximation:
*             s = x-2.0;
*             lgamma(x) = 0.5*s + s*P(s)/Q(s)
*     Our algorithms are based on the following observation
*
*                            zeta(2)-1    2    zeta(3)-1    3
*lgamma(2+s) = s*(1-Euler) + ---------*s  -  ---------*s  + ...
*                                2                 3
*
*     where Euler = 0.5771... is the Euler constant, which is very
*     close to 0.5.
*
*  3. For x>=8, we have
*     lgamma(x)~(x-0.5)log(x)-x+0.5*log(2pi)+1/(12x)-1/(360x**3)+....
*     (better formula:
*        lgamma(x)~(x-0.5)*(log(x)-1)-.5*(log(2pi)-1) + ...)
*     Let z = 1/x, then we approximation
*             f(z) = lgamma(x) - (x-0.5)(log(x)-1)
*     by
*                                 3       5             11
*             w = w0 + w1*z + w2*z  + w3*z  + ... + w6*z
 *
*  4. For negative x, since (G is gamma function)
*             -x*G(-x)*G(x) = pi/sin(pi*x),
*     we have
*             G(x) = pi/(sin(pi*x)*(-x)*G(-x))
*     since G(-x) is positive, sign(G(x)) = sign(sin(pi*x)) for x<0
*     Hence, for x<0, signgam = sign(sin(pi*x)) and
*             lgamma(x) = log(|Gamma(x)|)
*                       = log(pi/(|x*sin(pi*x)|)) - lgamma(-x);
*     Note: one should avoid compute pi*(-x) directly in the
*           computation of sin(pi*(-x)).
 *
*  5. Special Cases
*             lgamma(2+s) ~ s*(1-Euler) for tiny s
*             lgamma(1)=lgamma(2)=0
*             lgamma(x) ~ -log(x) for tiny x
*             lgamma(0) = lgamma(inf) = inf
*             lgamma(-integer) = +-inf
*
*/

#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
LUALIB_API long double tools_lgammal_r (long double x, int *sg) {
	return sun_lgamma_r(x, sg);
}
#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
static const long double lpi = 3.14159265358979323846264L,

/* lgam(1+x) = 0.5 x + x a(x)/b(x)
    -0.268402099609375 <= x <= 0
    peak relative error 6.6e-22 */
lga0 = -6.343246574721079391729402781192128239938E2L,
lga1 =  1.856560238672465796768677717168371401378E3L,
lga2 =  2.404733102163746263689288466865843408429E3L,
lga3 =  8.804188795790383497379532868917517596322E2L,
lga4 =  1.135361354097447729740103745999661157426E2L,
lga5 =  3.766956539107615557608581581190400021285E0L,
lgb0 =  8.214973713960928795704317259806842490498E3L,
lgb1 =  1.026343508841367384879065363925870888012E4L,
lgb2 =  4.553337477045763320522762343132210919277E3L,
lgb3 =  8.506975785032585797446253359230031874803E2L,
lgb4 =  6.042447899703295436820744186992189445813E1L,
/* lgb5 =  1.000000000000000000000000000000000000000E0 */

lgtc =  1.4616321449683623412626595423257213284682E0L,
lgtf = -1.2148629053584961146050602565082954242826E-1, /* double precision */
/* tt = (tail of tf), i.e. tf + tt has extended precision. */
lgtt = 3.3649914684731379602768989080467587736363E-18L,
/* lgam ( 1.4616321449683623412626595423257213284682E0 ) =
-1.2148629053584960809551455717769158215135617312999903886372437313313530E-1 */

/* lgam (x + tc) = tf + tt + x g(x)/h(x)
    -0.230003726999612341262659542325721328468 <= x
       <= 0.2699962730003876587373404576742786715318
     peak relative error 2.1e-21 */
lgg0 = 3.645529916721223331888305293534095553827E-18L,
lgg1 = 5.126654642791082497002594216163574795690E3L,
lgg2 = 8.828603575854624811911631336122070070327E3L,
lgg3 = 5.464186426932117031234820886525701595203E3L,
lgg4 = 1.455427403530884193180776558102868592293E3L,
lgg5 = 1.541735456969245924860307497029155838446E2L,
lgg6 = 4.335498275274822298341872707453445815118E0L,
lgh0 = 1.059584930106085509696730443974495979641E4L,
lgh1 = 2.147921653490043010629481226937850618860E4L,
lgh2 = 1.643014770044524804175197151958100656728E4L,
lgh3 = 5.869021995186925517228323497501767586078E3L,
lgh4 = 9.764244777714344488787381271643502742293E2L,
lgh5 = 6.442485441570592541741092969581997002349E1L,
/* lgh6 = 1.000000000000000000000000000000000000000E0 */

/* lgam (x+1) = -0.5 x + x u(x)/v(x)
    -0.100006103515625 <= x <= 0.231639862060546875
    peak relative error 1.3e-21 */
lgu0 = -8.886217500092090678492242071879342025627E1L,
lgu1 =  6.840109978129177639438792958320783599310E2L,
lgu2 =  2.042626104514127267855588786511809932433E3L,
lgu3 =  1.911723903442667422201651063009856064275E3L,
lgu4 =  7.447065275665887457628865263491667767695E2L,
lgu5 =  1.132256494121790736268471016493103952637E2L,
lgu6 =  4.484398885516614191003094714505960972894E0L,
lgv0 =  1.150830924194461522996462401210374632929E3L,
lgv1 =  3.399692260848747447377972081399737098610E3L,
lgv2 =  3.786631705644460255229513563657226008015E3L,
lgv3 =  1.966450123004478374557778781564114347876E3L,
lgv4 =  4.741359068914069299837355438370682773122E2L,
lgv5 =  4.508989649747184050907206782117647852364E1L,
/* lgv6 =  1.000000000000000000000000000000000000000E0 */

/* lgam (x+2) = .5 x + x s(x)/r(x)
     0 <= x <= 1
     peak relative error 7.2e-22 */
lgs0 =  1.454726263410661942989109455292824853344E6L,
lgs1 = -3.901428390086348447890408306153378922752E6L,
lgs2 = -6.573568698209374121847873064292963089438E6L,
lgs3 = -3.319055881485044417245964508099095984643E6L,
lgs4 = -7.094891568758439227560184618114707107977E5L,
lgs5 = -6.263426646464505837422314539808112478303E4L,
lgs6 = -1.684926520999477529949915657519454051529E3L,
lgr0 = -1.883978160734303518163008696712983134698E7L,
lgr1 = -2.815206082812062064902202753264922306830E7L,
lgr2 = -1.600245495251915899081846093343626358398E7L,
lgr3 = -4.310526301881305003489257052083370058799E6L,
lgr4 = -5.563807682263923279438235987186184968542E5L,
lgr5 = -3.027734654434169996032905158145259713083E4L,
lgr6 = -4.501995652861105629217250715790764371267E2L,
/* lgr6 =  1.000000000000000000000000000000000000000E0 */


/* lgam(x) = ( x - 0.5 )*log(x) - x + LS2PI + 1/x w(1/x^2)
    x >= 8
    Peak relative error 1.51e-21
lgw0 = LS2PI - 0.5 */
lgw0 =  4.189385332046727417803e-1L,
lgw1 =  8.333333333333331447505E-2L,
lgw2 = -2.777777777750349603440E-3L,
lgw3 =  7.936507795855070755671E-4L,
lgw4 = -5.952345851765688514613E-4L,
lgw5 =  8.412723297322498080632E-4L,
lgw6 = -1.880801938119376907179E-3L,
lgw7 =  4.885026142432270781165E-3L;

/* sin(pi*x) assuming x > 2^-1000, if sin(pi*x)==0 the sign is arbitrary */
static long double sin_pil (long double x) {
  int n;
  /* spurious inexact if odd int */
  x *= 0.5;
  x = 2.0*(x - sun_floorl(x));  /* x mod 2.0 */
  n = (int)(x*4.0);
  n = (n + 1)/2;
  x -= n*0.5f;
  x *= lpi;
  switch (n) {
    default: /* case 4: */
    case 0: return sun_sinl(x);
    case 1: return sun_cosl(x);
    case 2: return sun_sinl(-x);
    case 3: return -sun_cosl(x);
  }
}

LUALIB_API long double tools_lgammal_r (long double x, int *sg) {
  long double t, y, z, nadj, p, p1, p2, q, r, w;
  union ldshape u = {x};
  uint32_t ix = (u.i.se & 0x7fffU)<<16 | u.i.m>>48;
  int sign = u.i.se >> 15;
  int i;
  *sg = 1; nadj = 0;  /* 2.41.1 uninitialised warning fix */
  /* purge off +-inf, NaN, +-0, tiny and negative arguments */
  if (ix >= 0x7fff0000)
    return x*x;
  if (ix < 0x3fc08000) {  /* |x|<2**-63, return -log(|x|) */
    if (sign) {
      *sg = -1;
      x = -x;
    }
    return -tools_logl(x);
  }
  if (sign) {
    x = -x;
    t = sin_pil(x);
    if (t == 0.0)
      return 1.0/(x - x); /* -integer */
    if (t > 0.0)
      *sg = -1;
    else
      t = -t;
    nadj = tools_logl(lpi/(t*x));
  }
  /* purge off 1 and 2 (so the sign is ok with downward rounding) */
  if ((ix == 0x3fff8000 || ix == 0x40008000) && u.i.m == 0) {
    r = 0;
  } else if (ix < 0x40008000) {  /* x < 2.0 */
    if (ix <= 0x3ffee666) {  /* 8.99993896484375e-1 */
      /* lgamma(x) = lgamma(x+1) - log(x) */
      r = -tools_logl(x);
      if (ix >= 0x3ffebb4a) {  /* 7.31597900390625e-1 */
        y = x - 1.0;
        i = 0;
      } else if (ix >= 0x3ffced33) {  /* 2.31639862060546875e-1 */
        y = x - (lgtc - 1.0);
        i = 1;
      } else { /* x < 0.23 */
        y = x;
        i = 2;
      }
    } else {
      r = 0.0;
      if (ix >= 0x3fffdda6) {  /* 1.73162841796875 */
      /* [1.7316,2] */
        y = x - 2.0;
        i = 0;
      } else if (ix >= 0x3fff9da6) {  /* 1.23162841796875 */
        /* [1.23,1.73] */
        y = x - lgtc;
        i = 1;
      } else {
        /* [0.9, 1.23] */
        y = x - 1.0;
        i = 2;
      }
    }
    switch (i) {
      case 0:
        p1 = lga0 + y*(lga1 + y*(lga2 + y*(lga3 + y*(lga4 + y*lga5))));
        p2 = lgb0 + y*(lgb1 + y*(lgb2 + y*(lgb3 + y*(lgb4 + y))));
        r += 0.5*y + y*p1/p2;
        break;
      case 1:
        p1 = lgg0 + y*(lgg1 + y*(lgg2 + y*(lgg3 + y*(lgg4 + y*(lgg5 + y*lgg6)))));
        p2 = lgh0 + y*(lgh1 + y*(lgh2 + y*(lgh3 + y*(lgh4 + y*(lgh5 + y)))));
        p = lgtt + y*p1/p2;
        r += (lgtf + p);
        break;
      case 2:
        p1 = y*(lgu0 + y*(lgu1 + y*(lgu2 + y*(lgu3 + y*(lgu4 + y*(lgu5 + y*lgu6))))));
        p2 = lgv0 + y*(lgv1 + y*(lgv2 + y*(lgv3 + y*(lgv4 + y*(lgv5 + y)))));
        r += (-0.5*y + p1/p2);
    }
  } else if (ix < 0x40028000) {  /* 8.0 */
    /* x < 8.0 */
    i = (int)x;
    y = x - (double)i;
    p = y*(lgs0 + y*(lgs1 + y*(lgs2 + y*(lgs3 + y*(lgs4 + y*(lgs5 + y*lgs6))))));
    q = lgr0 + y*(lgr1 + y*(lgr2 + y*(lgr3 + y*(lgr4 + y*(lgr5 + y*(lgr6 + y))))));
    r = 0.5*y + p/q;
    z = 1.0;
    /* lgamma(1+s) = log(s) + lgamma(s) */
    switch (i) {
      case 7:
        z *= (y + 6.0); /* FALLTHRU */
      case 6:
        z *= (y + 5.0); /* FALLTHRU */
      case 5:
        z *= (y + 4.0); /* FALLTHRU */
      case 4:
        z *= (y + 3.0); /* FALLTHRU */
      case 3:
        z *= (y + 2.0); /* FALLTHRU */
        r += tools_logl(z);
        break;
    }
  } else if (ix < 0x40418000) {  /* 2^66 */
    /* 8.0 <= x < 2**66 */
    t = tools_logl(x);
    z = 1.0/x;
    y = z*z;
    w = lgw0 + z*(lgw1 + y*(lgw2 + y*(lgw3 + y*(lgw4 + y*(lgw5 + y*(lgw6 + y*lgw7))))));
    r = (x - 0.5)*(t - 1.0) + w;
  } else  /* 2**66 <= x <= inf */
    r = x*(tools_logl(x) - 1.0);
  if (sign)
    r = nadj - r;
  return r;
}
#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384
/* TODO: broken implementation to make things compile */
LUALIB_API long double tools_lgammal_r (long double x, int *sg) {
	return sun_lgamma_r(x, sg);
}
#endif

LUALIB_API long double tools_lgammal (long double x) {
  int __signgam;
	return tools_lgammal_r(x, &__signgam);
}


/*
Digits := 30:
for i from 0 to 511 do
   printf(`  %0.30eL,   // %3d! //\n`, evalf(ln(i!)), i)
od;
*/
/* Not static to prevent "warning: '...' is static but used in inline function which is not static" */

const double lnfactorials_lt_512_dbl[] = {  /* factorials from 0! to 511! Improved precision 3.16.2 */
  0.000000000000000000000000000000e-01,   /*   0! */
  0.000000000000000000000000000000e-01,   /*   1! */
  6.931471805599453094172321214580e-01,   /*   2! */
  1.791759469228055000812477358380e+00,   /*   3! */
  3.178053830347945619646941601300e+00,   /*   4! */
  4.787491742782045994247700934520e+00,   /*   5! */
  6.579251212010100995060178292900e+00,   /*   6! */
  8.525161361065414300165531036350e+00,   /*   7! */
  1.060460290274525022841722740070e+01,   /*   8! */
  1.280182748008146961120771787460e+01,   /*   9! */
  1.510441257307551529522570932930e+01,   /*  10! */
  1.750230784587388583928765290720e+01,   /*  11! */
  1.998721449566188614951736238710e+01,   /*  12! */
  2.255216385312342288557084982860e+01,   /*  13! */
  2.519122118273868150009343469350e+01,   /*  14! */
  2.789927138384089156608943926370e+01,   /*  15! */
  3.067186010608067280375836774950e+01,   /*  16! */
  3.350507345013688888400790236740e+01,   /*  17! */
  3.639544520803305357621562496270e+01,   /*  18! */
  3.933988418719949403622465239460e+01,   /*  19! */
  4.233561646075348502965987597070e+01,   /*  20! */
  4.538013889847690802616047395110e+01,   /*  21! */
  4.847118135183522387963964965050e+01,   /*  22! */
  5.160667556776437357044640248230e+01,   /*  23! */
  5.478472939811231919009334408360e+01,   /*  24! */
  5.800360522298051993929486275010e+01,   /*  25! */
  6.126170176100200198476558231310e+01,   /*  26! */
  6.455753862700633105895131802390e+01,   /*  27! */
  6.788974313718153498289113501020e+01,   /*  28! */
  7.125703896716800901007440704260e+01,   /*  29! */
  7.465823634883016438548764373420e+01,   /*  30! */
  7.809222355331531063141680805870e+01,   /*  31! */
  8.155795945611503717850296866600e+01,   /*  32! */
  8.505446701758151741396015748090e+01,   /*  33! */
  8.858082754219767880362692422020e+01,   /*  34! */
  9.213617560368709248333303629690e+01,   /*  35! */
  9.571969454214320248495799101370e+01,   /*  36! */
  9.933061245478742692932608668470e+01,   /*  37! */
  1.029681986145138126987523462380e+02,   /*  38! */
  1.066317602606434591262010789170e+02,   /*  39! */
  1.103206397147573954290535346140e+02,   /*  40! */
  1.140342117814617032329202979870e+02,   /*  41! */
  1.177718813997450715388381280890e+02,   /*  42! */
  1.215330815154386339623109706020e+02,   /*  43! */
  1.253172711493568951252073784230e+02,   /*  44! */
  1.291239336391272148825986282300e+02,   /*  45! */
  1.329525750356163098828226131840e+02,   /*  46! */
  1.368027226373263684696435638530e+02,   /*  47! */
  1.406739236482342593987077375760e+02,   /*  48! */
  1.445657439463448860089184430630e+02,   /*  49! */
  1.484777669517730320675371938510e+02,   /*  50! */
  1.524095925844973578391819737060e+02,   /*  51! */
  1.563608363030787851940699253900e+02,   /*  52! */
  1.603311282166309070282143945290e+02,   /*  53! */
  1.643201122631951814118173623610e+02,   /*  54! */
  1.683274454484276523304800652730e+02,   /*  55! */
  1.723527971391628015638371143800e+02,   /*  56! */
  1.763958484069973517152413870490e+02,   /*  57! */
  1.804562914175437710518418912030e+02,   /*  58! */
  1.845338288614494905024579415770e+02,   /*  59! */
  1.886281734236715911872884103900e+02,   /*  60! */
  1.927390472878449024360397994930e+02,   /*  61! */
  1.968661816728899939913861959390e+02,   /*  62! */
  2.010093163992815266792820391570e+02,   /*  63! */
  2.051681994826411985357854318850e+02,   /*  64! */
  2.093425867525368356464396786600e+02,   /*  65! */
  2.135322414945632611913140995960e+02,   /*  66! */
  2.177369341139542272509841715930e+02,   /*  67! */
  2.219564418191303339500681704540e+02,   /*  68! */
  2.261905483237275933322701685220e+02,   /*  69! */
  2.304390435657769523213935127200e+02,   /*  70! */
  2.347017234428182677427229672530e+02,   /*  71! */
  2.389783895618343230537651540910e+02,   /*  72! */
  2.432688490029827141828572629490e+02,   /*  73! */
  2.475729140961868839366425907410e+02,   /*  74! */
  2.518904022097231943772393546440e+02,   /*  75! */
  2.562211355500095254560828463190e+02,   /*  76! */
  2.605649409718632093052501426410e+02,   /*  77! */
  2.649216497985528010421161074410e+02,   /*  78! */
  2.692910976510198225362890529820e+02,   /*  79! */
  2.736731242856937041485587408010e+02,   /*  80! */
  2.780675734403661429141397217490e+02,   /*  81! */
  2.824742926876303960274237172430e+02,   /*  82! */
  2.868931332954269939508991894670e+02,   /*  83! */
  2.913239500942703075662342516900e+02,   /*  84! */
  2.957666013507606240210845456410e+02,   /*  85! */
  3.002209486470141317539746202760e+02,   /*  86! */
  3.046868567656687154725531375450e+02,   /*  87! */
  3.091641935801469219448667774870e+02,   /*  88! */
  3.136528299498790617831845930280e+02,   /*  89! */
  3.181526396202093268499930749570e+02,   /*  90! */
  3.226634991267261768911519151420e+02,   /*  91! */
  3.271852877037752172007931322160e+02,   /*  92! */
  3.317178871969284731381175417780e+02,   /*  93! */
  3.362611819791984770343557245690e+02,   /*  94! */
  3.408150588707990178689655113340e+02,   /*  95! */
  3.453794070622668541074469171780e+02,   /*  96! */
  3.499541180407702369295636388000e+02,   /*  97! */
  3.545390855194408088491915764080e+02,   /*  98! */
  3.591342053695753987760440104600e+02,   /*  99! */
  3.637393755555634901440799933700e+02,   /* 100! */
  3.683544960724047495949641916370e+02,   /* 101! */
  3.729794688856890206760262036130e+02,   /* 102! */
  3.776141978739186564467948059280e+02,   /* 103! */
  3.822585887730600291110999897340e+02,   /* 104! */
  3.869125491232175524822013470470e+02,   /* 105! */
  3.915759882173296196257630483080e+02,   /* 106! */
  3.962488170517915257990674471250e+02,   /* 107! */
  4.009309482789157454920876470790e+02,   /* 108! */
  4.056222961611448891924649635310e+02,   /* 109! */
  4.103227765269373054205448985630e+02,   /* 110! */
  4.150323067282496395563082394710e+02,   /* 111! */
  4.197508055995447340990825207010e+02,   /* 112! */
  4.244781934182570746676646521940e+02,   /* 113! */
  4.292143918666515701284861569850e+02,   /* 114! */
  4.339593239950148201938936691500e+02,   /* 115! */
  4.387129141861211848399114054250e+02,   /* 116! */
  4.434750881209189409587553833400e+02,   /* 117! */
  4.482457727453846057187886658350e+02,   /* 118! */
  4.530248962384961351041435531970e+02,   /* 119! */
  4.578123879812781810983912541310e+02,   /* 120! */
  4.626081785268749221865151412870e+02,   /* 121! */
  4.674121995716081787446837625120e+02,   /* 122! */
  4.722243839269805962399457711220e+02,   /* 123! */
  4.770446654925856331047093996900e+02,   /* 124! */
  4.818729792298879342285116776890e+02,   /* 125! */
  4.867092611368394122258247530280e+02,   /* 126! */
  4.915534482232980034988721938360e+02,   /* 127! */
  4.964054784872176206647928186860e+02,   /* 128! */
  5.012652908915792927796609064360e+02,   /* 129! */
  5.061328253420348751997323853320e+02,   /* 130! */
  5.110080226652360267438818093440e+02,   /* 131! */
  5.158908245878223975981734624010e+02,   /* 132! */
  5.207811737160441513632878425770e+02,   /* 133! */
  5.256790135159950627323751466950e+02,   /* 134! */
  5.305842882944334921811616417390e+02,   /* 135! */
  5.354969431801695441896628727210e+02,   /* 136! */
  5.404169241059976691049780628490e+02,   /* 137! */
  5.453441777911548737965972930390e+02,   /* 138! */
  5.502786517242855655537860779580e+02,   /* 139! */
  5.552202941468948698523266542770e+02,   /* 140! */
  5.601690540372730381305428501840e+02,   /* 141! */
  5.651248810948742988612895368380e+02,   /* 142! */
  5.700877257251342061414049678580e+02,   /* 143! */
  5.750575390247102067618643868170e+02,   /* 144! */
  5.800342727671307811636484181830e+02,   /* 145! */
  5.850178793888391176021577591620e+02,   /* 146! */
  5.900083119756178539037637098860e+02,   /* 147! */
  5.950055242493819689669662698000e+02,   /* 148! */
  6.000094705553274281079586980750e+02,   /* 149! */
  6.050201058494236838579726940990e+02,   /* 150! */
  6.100373856862386081867689303990e+02,   /* 151! */
  6.150612662070848845750296541950e+02,   /* 152! */
  6.200917041284773200380696792870e+02,   /* 153! */
  6.251286567308909491966542077300e+02,   /* 154! */
  6.301720818478101958171841313880e+02,   /* 155! */
  6.352219378550597328634673283090e+02,   /* 156! */
  6.402781836604080409208917735450e+02,   /* 157! */
  6.453407786934350077244819512080e+02,   /* 158! */
  6.504096828956552392500216655840e+02,   /* 159! */
  6.554848567108890661717085855250e+02,   /* 160! */
  6.605662610758735291676206911000e+02,   /* 161! */
  6.656538574111059132426189041690e+02,   /* 162! */
  6.707476076119126755766838365350e+02,   /* 163! */
  6.758474740397368739993850641510e+02,   /* 164! */
  6.809534195136374546094430122990e+02,   /* 165! */
  6.860654073019939978423357166440e+02,   /* 166! */
  6.911834011144107529495954674210e+02,   /* 167! */
  6.963073650938140118743477617660e+02,   /* 168! */
  7.014372638087370853464547366490e+02,   /* 169! */
  7.065730622457873471107222627210e+02,   /* 170! */
  7.117147258022900069535217806270e+02,   /* 171! */
  7.168622202791034599958290873830e+02,   /* 172! */
  7.220155118736012389427625877720e+02,   /* 173! */
  7.271745671728157679707583371620e+02,   /* 174! */
  7.323393531467392820250652085720e+02,   /* 175! */
  7.375098371417774338067960806360e+02,   /* 176! */
  7.426859868743512629488073762470e+02,   /* 177! */
  7.478677704246433480965424239090e+02,   /* 178! */
  7.530551562304841030927202283390e+02,   /* 179! */
  7.582481130813743134689459423890e+02,   /* 180! */
  7.634466101126401392157850294280e+02,   /* 181! */
  7.686506167997169345663611017340e+02,   /* 182! */
  7.738601029525583555065077360740e+02,   /* 183! */
  7.790750387101673411255661852710e+02,   /* 184! */
  7.842953945352456659445350402750e+02,   /* 185! */
  7.895211412089588671912766819580e+02,   /* 186! */
  7.947522498258134538155881601540e+02,   /* 187! */
  7.999886917886434030212435750660e+02,   /* 188! */
  8.052304388037030454005346635210e+02,   /* 189! */
  8.104774628758635315445616824070e+02,   /* 190! */
  8.157297363039101614174116323590e+02,   /* 191! */
  8.209872316759379429653102703250e+02,   /* 192! */
  8.262499218648428285171652013780e+02,   /* 193! */
  8.315177800239061566486991551220e+02,   /* 194! */
  8.367907795824699034507486471330e+02,   /* 195! */
  8.420688942417004206797938168630e+02,   /* 196! */
  8.473520979704384091865736141920e+02,   /* 197! */
  8.526403650011329444228432803650e+02,   /* 198! */
  8.579336698258574368182534016570e+02,   /* 199! */
  8.632319871924054734957066166880e+02,   /* 200! */
  8.685352921004645492467719339210e+02,   /* 201! */
  8.738435597978657540070733643100e+02,   /* 202! */
  8.791567657769075413393619890860e+02,   /* 203! */
  8.844748857707517577298412331830e+02,   /* 204! */
  8.897978957498901659083087558900e+02,   /* 205! */
  8.951257719186797469884945903260e+02,   /* 206! */
  9.004584907119451160620918336320e+02,   /* 207! */
  9.057960287916464340358142495590e+02,   /* 208! */
  9.111383630436112450398852205690e+02,   /* 209! */
  9.164854705743287137204038100040e+02,   /* 210! */
  9.218373287078047802161457662700e+02,   /* 211! */
  9.271939149824767926691246996510e+02,   /* 212! */
  9.325552071481862177818493994210e+02,   /* 213! */
  9.379211831632080692645710303590e+02,   /* 214! */
  9.432918211913357320626446322060e+02,   /* 215! */
  9.486670995990198970650820642810e+02,   /* 216! */
  9.540469969525603566161165813490e+02,   /* 217! */
  9.594314920153494456259111299230e+02,   /* 218! */
  9.648205637451659464463984840170e+02,   /* 219! */
  9.702141912915183079838956511710e+02,   /* 220! */
  9.756123539930360608001986732310e+02,   /* 221! */
  9.810150313749083402453792462600e+02,   /* 222! */
  9.864222031463684590040153514700e+02,   /* 223! */
  9.918338491982234988562068648210e+02,   /* 224! */
  9.972499496004279189881988739610e+02,   /* 225! */
  1.002670484599700204866198237580e+03,   /* 226! */
  1.008095434617181607541211619150e+03,   /* 227! */
  1.013524780246136048311450356060e+03,   /* 228! */
  1.018958502249690287959891591670e+03,   /* 229! */
  1.024396581558613483334716335950e+03,   /* 230! */
  1.029838999269135276875278877510e+03,   /* 231! */
  1.035285736640801586830713845910e+03,   /* 232! */
  1.040736775094367287396016070780e+03,   /* 233! */
  1.046192096209724988824277280820e+03,   /* 234! */
  1.051651681723869147785698990820e+03,   /* 235! */
  1.057115513528894757855149505440e+03,   /* 236! */
  1.062583573670029889040717696220e+03,   /* 237! */
  1.068055844343701363735489815700e+03,   /* 238! */
  1.073532307895632874402452979300e+03,   /* 239! */
  1.079012946818974865706117912350e+03,   /* 240! */
  1.084497743752465520701584323670e+03,   /* 241! */
  1.089986681478622207099125442950e+03,   /* 242! */
  1.095479742921962755556101669130e+03,   /* 243! */
  1.100976911147255957423687522480e+03,   /* 244! */
  1.106478169357800684408498987300e+03,   /* 245! */
  1.111983500893733047213178228030e+03,   /* 246! */
  1.117492889230361024409240742900e+03,   /* 247! */
  1.123006317976526006583421603590e+03,   /* 248! */
  1.128523770872990714198292321050e+03,   /* 249! */
  1.134045231790852960631511831170e+03,   /* 250! */
  1.139570684729984744517730416410e+03,   /* 251! */
  1.145100113817496167824460723870e+03,   /* 252! */
  1.150633503306223688059329420280e+03,   /* 253! */
  1.156170837573242224641794093210e+03,   /* 254! */
  1.161712101118400650788039632400e+03,   /* 255! */
  1.167257278562880213263377489370e+03,   /* 256! */
  1.172806354647775433061729283690e+03,   /* 257! */
  1.178359314232697050486014603560e+03,   /* 258! */
  1.183916142294396588235488051970e+03,   /* 259! */
  1.189476823925412115964976762990e+03,   /* 260! */
  1.195041344332734809374950525500e+03,   /* 261! */
  1.200609688836495906228517181630e+03,   /* 262! */
  1.206181842868673670779603107890e+03,   /* 263! */
  1.211757791971819986943311993070e+03,   /* 264! */
  1.217337521797806209152057221550e+03,   /* 265! */
  1.222921018106587908226588833840e+03,   /* 266! */
  1.228508266764988157756301894620e+03,   /* 267! */
  1.234099253745499014434806430860e+03,   /* 268! */
  1.239693965125100853541026384040e+03,   /* 269! */
  1.245292387084099228299230111210e+03,   /* 270! */
  1.250894505904978929199326981280e+03,   /* 271! */
  1.256500307971274926517245444380e+03,   /* 272! */
  1.262109779766459886249799529800e+03,   /* 273! */
  1.267722907872847956474531952050e+03,   /* 274! */
  1.273339678970514527767795414300e+03,   /* 275! */
  1.278960079836231677768831876610e+03,   /* 276! */
  1.284584097342419016268701274160e+03,   /* 277! */
  1.290211718456109653335307291200e+03,   /* 278! */
  1.295842930237931018964026946000e+03,   /* 279! */
  1.301477719841100268571984754440e+03,   /* 280! */
  1.307116074510434014337091392760e+03,   /* 281! */
  1.312757981581372127924724820790e+03,   /* 282! */
  1.318403428479015365549370807940e+03,   /* 283! */
  1.324052402717176571589534726710e+03,   /* 284! */
  1.329704891897445222115539758720e+03,   /* 285! */
  1.335360883708265074705072421860e+03,   /* 286! */
  1.341020365924024695814044537970e+03,   /* 287! */
  1.346683326404160641743921189050e+03,   /* 288! */
  1.352349753092273073904420258290e+03,   /* 289! */
  1.358019634015253593615621521780e+03,   /* 290! */
  1.363692957282425086129133488640e+03,   /* 291! */
  1.369369711084693367877060061740e+03,   /* 292! */
  1.375049883693710435182999902000e+03,   /* 293! */
  1.380733463461049116794023084840e+03,   /* 294! */
  1.386420438817388936619239894550e+03,   /* 295! */
  1.392110798271712996991859686590e+03,   /* 296! */
  1.397804530410515696610107365880e+03,   /* 297! */
  1.403501623897021101060517026270e+03,   /* 298! */
  1.409202067470411787487377266550e+03,   /* 299! */
  1.414905849945067988546808494690e+03,   /* 300! */
  1.420612960209816864275386689950e+03,   /* 301! */
  1.426323387227191733913600158370e+03,   /* 302! */
  1.432037120032701103055879601870e+03,   /* 303! */
  1.437754147734107324753557557790e+03,   /* 304! */
  1.443474459510714736376909706230e+03,   /* 305! */
  1.449198044612667117149366963440e+03,   /* 306! */
  1.454924892360254314302965426010e+03,   /* 307! */
  1.460654992143227888770967186570e+03,   /* 308! */
  1.466388333420125634233131034120e+03,   /* 309! */
  1.472124905717604826163078189900e+03,   /* 310! */
  1.477864698629784060297274185470e+03,   /* 311! */
  1.483607701817593542652974614510e+03,   /* 312! */
  1.489353905008133695869653379820e+03,   /* 313! */
  1.495103297994041949236495057180e+03,   /* 314! */
  1.500855870632867582298991659730e+03,   /* 315! */
  1.506611612846454494411999069510e+03,   /* 316! */
  1.512370514620331775026198831680e+03,   /* 317! */
  1.518132566003111951861155778180e+03,   /* 318! */
  1.523897757105896796432400993790e+03,   /* 319! */
  1.529666078101690568663505145850e+03,   /* 320! */
  1.535437519224820584528204789910e+03,   /* 321! */
  1.541212070770364992833534127600e+03,   /* 322! */
  1.546989723093587649373792689650e+03,   /* 323! */
  1.552770466609379978758208134840e+03,   /* 324! */
  1.558554291791709716243463140950e+03,   /* 325! */
  1.564341189173076423886945305440e+03,   /* 326! */
  1.570131149343973677278717867130e+03,   /* 327! */
  1.575924162952357821010836326870e+03,   /* 328! */
  1.581720220703123192902762630280e+03,   /* 329! */
  1.587519313357583718822237810550e+03,   /* 330! */
  1.593321431732960781723022427390e+03,   /* 331! */
  1.599126566701877270265332363860e+03,   /* 332! */
  1.604934709191857714092490950000e+03,   /* 333! */
  1.610745850184834414509167932900e+03,   /* 334! */
  1.616559980716659480943438764230e+03,   /* 335! */
  1.622377091876622685177608290690e+03,   /* 336! */
  1.628197174806975046902113157250e+03,   /* 337! */
  1.634020220702458065683637364250e+03,   /* 338! */
  1.639846220809838515943614740980e+03,   /* 339! */
  1.645675166427448723017299499180e+03,   /* 340! */
  1.651507048904732239807290607080e+03,   /* 341! */
  1.657341859641794844959507357110e+03,   /* 342! */
  1.663179590088960784874823415340e+03,   /* 343! */
  1.669020231746334183226547954220e+03,   /* 344! */
  1.674863776163365542983350711620e+03,   /* 345! */
  1.680710214938423267239701444130e+03,   /* 346! */
  1.686559539718370126432055120970e+03,   /* 347! */
  1.692411742198144600769468102490e+03,   /* 348! */
  1.698266814120347027932667584010e+03,   /* 349! */
  1.704124747274830487296391687540e+03,   /* 350! */
  1.709985533498296353106630910690e+03,   /* 351! */
  1.715849164673894450197779014880e+03,   /* 352! */
  1.721715632730827746965174734990e+03,   /* 353! */
  1.727584929643961521416603262730e+03,   /* 354! */
  1.733457047433436937212533476590e+03,   /* 355! */
  1.739331978164288967669685756380e+03,   /* 356! */
  1.745209713946068606746435888970e+03,   /* 357! */
  1.751090246932469307052030925530e+03,   /* 358! */
  1.756973569320957585925079018410e+03,   /* 359! */
  1.762859673352407741610721964580e+03,   /* 360! */
  1.768748551310740622530740019450e+03,   /* 361! */
  1.774640195522566393586996338610e+03,   /* 362! */
  1.780534598356831244366515471000e+03,   /* 363! */
  1.786431752224467985026508775430e+03,   /* 364! */
  1.792331649578050476530201643620e+03,   /* 365! */
  1.798234282911451842779765510080e+03,   /* 366! */
  1.804139644759506413051384942020e+03,   /* 367! */
  1.810047727697675343979860623340e+03,   /* 368! */
  1.815958524341715871166517877180e+03,   /* 369! */
  1.821872027347354141294903964310e+03,   /* 370! */
  1.827788229409961576434153786190e+03,   /* 371! */
  1.833707123264234722990312660000e+03,   /* 372! */
  1.839628701683878538531648394600e+03,   /* 373! */
  1.845552957481293070465377104920e+03,   /* 374! */
  1.851479883507263481280574628160e+03,   /* 375! */
  1.857409472650653375795647275190e+03,   /* 376! */
  1.863341717838101386558884034660e+03,   /* 377! */
  1.869276612033720974247592355240e+03,   /* 378! */
  1.875214148238803400595409591820e+03,   /* 379! */
  1.881154319491523832048853842830e+03,   /* 380! */
  1.887097118866650533013296528880e+03,   /* 381! */
  1.893042539475257108195563710950e+03,   /* 382! */
  1.898990574464437754185556765490e+03,   /* 383! */
  1.904941217017025481042872635570e+03,   /* 384! */
  1.910894460351313265266640691230e+03,   /* 385! */
  1.916850297720778096127912854400e+03,   /* 386! */
  1.922808722413807877934176187390e+03,   /* 387! */
  1.928769727753431151375127373250e+03,   /* 388! */
  1.934733307097049597667975993490e+03,   /* 389! */
  1.940699453836173289779442717620e+03,   /* 390! */
  1.946668161396158655550499005070e+03,   /* 391! */
  1.952639423235949118088961406920e+03,   /* 392! */
  1.958613232847818379324506076170e+03,   /* 393! */
  1.964589583757116313140703105620e+03,   /* 394! */
  1.970568469522017435009476810500e+03,   /* 395! */
  1.976549883733271915555163708790e+03,   /* 396! */
  1.982533820013959105968247140820e+03,   /* 397! */
  1.988520272019243543673074494230e+03,   /* 398! */
  1.994509233436133407129584119650e+03,   /* 399! */
  2.000500697983241389116454566800e+03,   /* 400! */
  2.006494659410547958302305245890e+03,   /* 401! */
  2.012491111499166979362787795240e+03,   /* 402! */
  2.018490048061113662344770447010e+03,   /* 403! */
  2.024491462939074812414489109520e+03,   /* 404! */
  2.030495350006181351554670849800e+03,   /* 405! */
  2.036501703165783084196376706690e+03,   /* 406! */
  2.042510516351225679184806745940e+03,   /* 407! */
  2.048521783525629840884703222160e+03,   /* 408! */
  2.054535498681672642630416744370e+03,   /* 409! */
  2.060551655841370996118301499200e+03,   /* 410! */
  2.066570249055867230725011934560e+03,   /* 411! */
  2.072591272405216757114615001120e+03,   /* 412! */
  2.078614719998177789870336404240e+03,   /* 413! */
  2.084640585972003104253350879670e+03,   /* 414! */
  2.090668864492233802551427111220e+03,   /* 415! */
  2.096699549752495065834566759270e+03,   /* 416! */
  2.102732635974293867283150789430e+03,   /* 417! */
  2.108768117406818623596638992560e+03,   /* 418! */
  2.114805988326740761327671926480e+03,   /* 419! */
  2.120846243038018175317607748040e+03,   /* 420! */
  2.126888875871700556735196464270e+03,   /* 421! */
  2.132933881185736568540355652650e+03,   /* 422! */
  2.138981253364782846509967093800e+03,   /* 423! */
  2.145030986820014804272363259300e+03,   /* 424! */
  2.151083075988939221101814312580e+03,   /* 425! */
  2.157137515335208591523956244470e+03,   /* 426! */
  2.163194299348437216077812986320e+03,   /* 427! */
  2.169253422544019012869951849380e+03,   /* 428! */
  2.175314879462947029841462525640e+03,   /* 429! */
  2.181378664671634637948953359610e+03,   /* 430! */
  2.187444772761738385736701026410e+03,   /* 431! */
  2.193513198349982496048555690610e+03,   /* 432! */
  2.199583936077984985895275975180e+03,   /* 433! */
  2.205656980612085390755727724370e+03,   /* 434! */
  2.211732326643174074848907000980e+03,   /* 435! */
  2.217809968886523109168118781670e+03,   /* 436! */
  2.223889902081618699318934561930e+03,   /* 437! */
  2.229972120991995145448839148150e+03,   /* 438! */
  2.236056620405070316798832360280e+03,   /* 439! */
  2.242143395131982623645746759560e+03,   /* 440! */
  2.248232440007429469638747955520e+03,   /* 441! */
  2.254323749889507167764468209700e+03,   /* 442! */
  2.260417319659552303402554216950e+03,   /* 443! */
  2.266513144221984528157152022110e+03,   /* 444! */
  2.272611218504150768370070596980e+03,   /* 445! */
  2.278711537456170832438123934310e+03,   /* 446! */
  2.284814096050784401270511607820e+03,   /* 447! */
  2.290918889283199386432120353290e+03,   /* 448! */
  2.297025912170941640725373875090e+03,   /* 449! */
  2.303135159753706006166783116350e+03,   /* 450! */
  2.309246627093208684514711823300e+03,   /* 451! */
  2.315360309273040915702128419030e+03,   /* 452! */
  2.321476201398523949722319900570e+03,   /* 453! */
  2.327594298596565297706750514270e+03,   /* 454! */
  2.333714596015516248122510113790e+03,   /* 455! */
  2.339837088825030634202166082820e+03,   /* 456! */
  2.345961772215924838901359214200e+03,   /* 457! */
  2.352088641400039023859217681930e+03,   /* 458! */
  2.358217691610099569013652952260e+03,   /* 459! */
  2.364348918099582709697894928660e+03,   /* 460! */
  2.370482316142579358216267653860e+03,   /* 461! */
  2.376617881033661097066247427540e+03,   /* 462! */
  2.382755608087747331140327274720e+03,   /* 463! */
  2.388895492639973586405179475240e+03,   /* 464! */
  2.395037530045560942717104644130e+03,   /* 465! */
  2.401181715679686588591824101130e+03,   /* 466! */
  2.407328044937355485903353632420e+03,   /* 467! */
  2.413476513233273132641032074570e+03,   /* 468! */
  2.419627116001719412005807499310e+03,   /* 469! */
  2.425779848696423516276646441440e+03,   /* 470! */
  2.431934706790439934025466131910e+03,   /* 471! */
  2.438091685776025489404333878650e+03,   /* 472! */
  2.444250781164517422371868664740e+03,   /* 473! */
  2.450411988486212498866854087640e+03,   /* 474! */
  2.456575303290247140076064633740e+03,   /* 475! */
  2.462740721144478560080253985340e+03,   /* 476! */
  2.468908237635366901297188944960e+03,   /* 477! */
  2.475077848367858357273569340670e+03,   /* 478! */
  2.481249548965269272508686018500e+03,   /* 479! */
  2.487423335069171209121768183680e+03,   /* 480! */
  2.493599202339276970302189766790e+03,   /* 481! */
  2.499777146453327570607073410230e+03,   /* 482! */
  2.505957163106980143294380761050e+03,   /* 483! */
  2.512139248013696775001339112450e+03,   /* 484! */
  2.518323396904634258198056593400e+03,   /* 485! */
  2.524509605528534751964450051710e+03,   /* 486! */
  2.530697869651617341755179962540e+03,   /* 487! */
  2.536888185057470488932183048010e+03,   /* 488! */
  2.543080547546945360957643225610e+03,   /* 489! */
  2.549274952938050033251871922550e+03,   /* 490! */
  2.555471397065844553830606112870e+03,   /* 491! */
  2.561669875782336861944702585730e+03,   /* 492! */
  2.567870384956379552052135392380e+03,   /* 493! */
  2.574072920473567474557615139370e+03,   /* 494! */
  2.580277478236136164859068332760e+03,   /* 495! */
  2.586484054162861092342666425570e+03,   /* 496! */
  2.592692644188957721069101232840e+03,   /* 497! */
  2.598903244265982373993389182430e+03,   /* 498! */
  2.605115850361733892658674273560e+03,   /* 499! */
  2.611330458460156084401311015810e+03,   /* 500! */
  2.617547064561240949199966011820e+03,   /* 501! */
  2.623765664680932678395601829180e+03,   /* 502! */
  2.629986254851032417602020590300e+03,   /* 503! */
  2.636208831119103786218168129880e+03,   /* 504! */
  2.642433389548379146043653087480e+03,   /* 505! */
  2.648659926217666611587939016010e+03,   /* 506! */
  2.654888437221257794751441236130e+03,   /* 507! */
  2.661118918668836276643323141180e+03,   /* 508! */
  2.667351366685386799386158005040e+03,   /* 509! */
  2.673585777411105170841820776350e+03,   /* 510! */
  2.679822147001308875276018237950e+03    /* 511! */
};

const long double lnfactorials_lt_512[] = {  /* factorials from 0! to 511! Improved precision 3.16.2 */
  0.000000000000000000000000000000e-01L,   /*   0! */
  0.000000000000000000000000000000e-01L,   /*   1! */
  6.931471805599453094172321214580e-01L,   /*   2! */
  1.791759469228055000812477358380e+00L,   /*   3! */
  3.178053830347945619646941601300e+00L,   /*   4! */
  4.787491742782045994247700934520e+00L,   /*   5! */
  6.579251212010100995060178292900e+00L,   /*   6! */
  8.525161361065414300165531036350e+00L,   /*   7! */
  1.060460290274525022841722740070e+01L,   /*   8! */
  1.280182748008146961120771787460e+01L,   /*   9! */
  1.510441257307551529522570932930e+01L,   /*  10! */
  1.750230784587388583928765290720e+01L,   /*  11! */
  1.998721449566188614951736238710e+01L,   /*  12! */
  2.255216385312342288557084982860e+01L,   /*  13! */
  2.519122118273868150009343469350e+01L,   /*  14! */
  2.789927138384089156608943926370e+01L,   /*  15! */
  3.067186010608067280375836774950e+01L,   /*  16! */
  3.350507345013688888400790236740e+01L,   /*  17! */
  3.639544520803305357621562496270e+01L,   /*  18! */
  3.933988418719949403622465239460e+01L,   /*  19! */
  4.233561646075348502965987597070e+01L,   /*  20! */
  4.538013889847690802616047395110e+01L,   /*  21! */
  4.847118135183522387963964965050e+01L,   /*  22! */
  5.160667556776437357044640248230e+01L,   /*  23! */
  5.478472939811231919009334408360e+01L,   /*  24! */
  5.800360522298051993929486275010e+01L,   /*  25! */
  6.126170176100200198476558231310e+01L,   /*  26! */
  6.455753862700633105895131802390e+01L,   /*  27! */
  6.788974313718153498289113501020e+01L,   /*  28! */
  7.125703896716800901007440704260e+01L,   /*  29! */
  7.465823634883016438548764373420e+01L,   /*  30! */
  7.809222355331531063141680805870e+01L,   /*  31! */
  8.155795945611503717850296866600e+01L,   /*  32! */
  8.505446701758151741396015748090e+01L,   /*  33! */
  8.858082754219767880362692422020e+01L,   /*  34! */
  9.213617560368709248333303629690e+01L,   /*  35! */
  9.571969454214320248495799101370e+01L,   /*  36! */
  9.933061245478742692932608668470e+01L,   /*  37! */
  1.029681986145138126987523462380e+02L,   /*  38! */
  1.066317602606434591262010789170e+02L,   /*  39! */
  1.103206397147573954290535346140e+02L,   /*  40! */
  1.140342117814617032329202979870e+02L,   /*  41! */
  1.177718813997450715388381280890e+02L,   /*  42! */
  1.215330815154386339623109706020e+02L,   /*  43! */
  1.253172711493568951252073784230e+02L,   /*  44! */
  1.291239336391272148825986282300e+02L,   /*  45! */
  1.329525750356163098828226131840e+02L,   /*  46! */
  1.368027226373263684696435638530e+02L,   /*  47! */
  1.406739236482342593987077375760e+02L,   /*  48! */
  1.445657439463448860089184430630e+02L,   /*  49! */
  1.484777669517730320675371938510e+02L,   /*  50! */
  1.524095925844973578391819737060e+02L,   /*  51! */
  1.563608363030787851940699253900e+02L,   /*  52! */
  1.603311282166309070282143945290e+02L,   /*  53! */
  1.643201122631951814118173623610e+02L,   /*  54! */
  1.683274454484276523304800652730e+02L,   /*  55! */
  1.723527971391628015638371143800e+02L,   /*  56! */
  1.763958484069973517152413870490e+02L,   /*  57! */
  1.804562914175437710518418912030e+02L,   /*  58! */
  1.845338288614494905024579415770e+02L,   /*  59! */
  1.886281734236715911872884103900e+02L,   /*  60! */
  1.927390472878449024360397994930e+02L,   /*  61! */
  1.968661816728899939913861959390e+02L,   /*  62! */
  2.010093163992815266792820391570e+02L,   /*  63! */
  2.051681994826411985357854318850e+02L,   /*  64! */
  2.093425867525368356464396786600e+02L,   /*  65! */
  2.135322414945632611913140995960e+02L,   /*  66! */
  2.177369341139542272509841715930e+02L,   /*  67! */
  2.219564418191303339500681704540e+02L,   /*  68! */
  2.261905483237275933322701685220e+02L,   /*  69! */
  2.304390435657769523213935127200e+02L,   /*  70! */
  2.347017234428182677427229672530e+02L,   /*  71! */
  2.389783895618343230537651540910e+02L,   /*  72! */
  2.432688490029827141828572629490e+02L,   /*  73! */
  2.475729140961868839366425907410e+02L,   /*  74! */
  2.518904022097231943772393546440e+02L,   /*  75! */
  2.562211355500095254560828463190e+02L,   /*  76! */
  2.605649409718632093052501426410e+02L,   /*  77! */
  2.649216497985528010421161074410e+02L,   /*  78! */
  2.692910976510198225362890529820e+02L,   /*  79! */
  2.736731242856937041485587408010e+02L,   /*  80! */
  2.780675734403661429141397217490e+02L,   /*  81! */
  2.824742926876303960274237172430e+02L,   /*  82! */
  2.868931332954269939508991894670e+02L,   /*  83! */
  2.913239500942703075662342516900e+02L,   /*  84! */
  2.957666013507606240210845456410e+02L,   /*  85! */
  3.002209486470141317539746202760e+02L,   /*  86! */
  3.046868567656687154725531375450e+02L,   /*  87! */
  3.091641935801469219448667774870e+02L,   /*  88! */
  3.136528299498790617831845930280e+02L,   /*  89! */
  3.181526396202093268499930749570e+02L,   /*  90! */
  3.226634991267261768911519151420e+02L,   /*  91! */
  3.271852877037752172007931322160e+02L,   /*  92! */
  3.317178871969284731381175417780e+02L,   /*  93! */
  3.362611819791984770343557245690e+02L,   /*  94! */
  3.408150588707990178689655113340e+02L,   /*  95! */
  3.453794070622668541074469171780e+02L,   /*  96! */
  3.499541180407702369295636388000e+02L,   /*  97! */
  3.545390855194408088491915764080e+02L,   /*  98! */
  3.591342053695753987760440104600e+02L,   /*  99! */
  3.637393755555634901440799933700e+02L,   /* 100! */
  3.683544960724047495949641916370e+02L,   /* 101! */
  3.729794688856890206760262036130e+02L,   /* 102! */
  3.776141978739186564467948059280e+02L,   /* 103! */
  3.822585887730600291110999897340e+02L,   /* 104! */
  3.869125491232175524822013470470e+02L,   /* 105! */
  3.915759882173296196257630483080e+02L,   /* 106! */
  3.962488170517915257990674471250e+02L,   /* 107! */
  4.009309482789157454920876470790e+02L,   /* 108! */
  4.056222961611448891924649635310e+02L,   /* 109! */
  4.103227765269373054205448985630e+02L,   /* 110! */
  4.150323067282496395563082394710e+02L,   /* 111! */
  4.197508055995447340990825207010e+02L,   /* 112! */
  4.244781934182570746676646521940e+02L,   /* 113! */
  4.292143918666515701284861569850e+02L,   /* 114! */
  4.339593239950148201938936691500e+02L,   /* 115! */
  4.387129141861211848399114054250e+02L,   /* 116! */
  4.434750881209189409587553833400e+02L,   /* 117! */
  4.482457727453846057187886658350e+02L,   /* 118! */
  4.530248962384961351041435531970e+02L,   /* 119! */
  4.578123879812781810983912541310e+02L,   /* 120! */
  4.626081785268749221865151412870e+02L,   /* 121! */
  4.674121995716081787446837625120e+02L,   /* 122! */
  4.722243839269805962399457711220e+02L,   /* 123! */
  4.770446654925856331047093996900e+02L,   /* 124! */
  4.818729792298879342285116776890e+02L,   /* 125! */
  4.867092611368394122258247530280e+02L,   /* 126! */
  4.915534482232980034988721938360e+02L,   /* 127! */
  4.964054784872176206647928186860e+02L,   /* 128! */
  5.012652908915792927796609064360e+02L,   /* 129! */
  5.061328253420348751997323853320e+02L,   /* 130! */
  5.110080226652360267438818093440e+02L,   /* 131! */
  5.158908245878223975981734624010e+02L,   /* 132! */
  5.207811737160441513632878425770e+02L,   /* 133! */
  5.256790135159950627323751466950e+02L,   /* 134! */
  5.305842882944334921811616417390e+02L,   /* 135! */
  5.354969431801695441896628727210e+02L,   /* 136! */
  5.404169241059976691049780628490e+02L,   /* 137! */
  5.453441777911548737965972930390e+02L,   /* 138! */
  5.502786517242855655537860779580e+02L,   /* 139! */
  5.552202941468948698523266542770e+02L,   /* 140! */
  5.601690540372730381305428501840e+02L,   /* 141! */
  5.651248810948742988612895368380e+02L,   /* 142! */
  5.700877257251342061414049678580e+02L,   /* 143! */
  5.750575390247102067618643868170e+02L,   /* 144! */
  5.800342727671307811636484181830e+02L,   /* 145! */
  5.850178793888391176021577591620e+02L,   /* 146! */
  5.900083119756178539037637098860e+02L,   /* 147! */
  5.950055242493819689669662698000e+02L,   /* 148! */
  6.000094705553274281079586980750e+02L,   /* 149! */
  6.050201058494236838579726940990e+02L,   /* 150! */
  6.100373856862386081867689303990e+02L,   /* 151! */
  6.150612662070848845750296541950e+02L,   /* 152! */
  6.200917041284773200380696792870e+02L,   /* 153! */
  6.251286567308909491966542077300e+02L,   /* 154! */
  6.301720818478101958171841313880e+02L,   /* 155! */
  6.352219378550597328634673283090e+02L,   /* 156! */
  6.402781836604080409208917735450e+02L,   /* 157! */
  6.453407786934350077244819512080e+02L,   /* 158! */
  6.504096828956552392500216655840e+02L,   /* 159! */
  6.554848567108890661717085855250e+02L,   /* 160! */
  6.605662610758735291676206911000e+02L,   /* 161! */
  6.656538574111059132426189041690e+02L,   /* 162! */
  6.707476076119126755766838365350e+02L,   /* 163! */
  6.758474740397368739993850641510e+02L,   /* 164! */
  6.809534195136374546094430122990e+02L,   /* 165! */
  6.860654073019939978423357166440e+02L,   /* 166! */
  6.911834011144107529495954674210e+02L,   /* 167! */
  6.963073650938140118743477617660e+02L,   /* 168! */
  7.014372638087370853464547366490e+02L,   /* 169! */
  7.065730622457873471107222627210e+02L,   /* 170! */
  7.117147258022900069535217806270e+02L,   /* 171! */
  7.168622202791034599958290873830e+02L,   /* 172! */
  7.220155118736012389427625877720e+02L,   /* 173! */
  7.271745671728157679707583371620e+02L,   /* 174! */
  7.323393531467392820250652085720e+02L,   /* 175! */
  7.375098371417774338067960806360e+02L,   /* 176! */
  7.426859868743512629488073762470e+02L,   /* 177! */
  7.478677704246433480965424239090e+02L,   /* 178! */
  7.530551562304841030927202283390e+02L,   /* 179! */
  7.582481130813743134689459423890e+02L,   /* 180! */
  7.634466101126401392157850294280e+02L,   /* 181! */
  7.686506167997169345663611017340e+02L,   /* 182! */
  7.738601029525583555065077360740e+02L,   /* 183! */
  7.790750387101673411255661852710e+02L,   /* 184! */
  7.842953945352456659445350402750e+02L,   /* 185! */
  7.895211412089588671912766819580e+02L,   /* 186! */
  7.947522498258134538155881601540e+02L,   /* 187! */
  7.999886917886434030212435750660e+02L,   /* 188! */
  8.052304388037030454005346635210e+02L,   /* 189! */
  8.104774628758635315445616824070e+02L,   /* 190! */
  8.157297363039101614174116323590e+02L,   /* 191! */
  8.209872316759379429653102703250e+02L,   /* 192! */
  8.262499218648428285171652013780e+02L,   /* 193! */
  8.315177800239061566486991551220e+02L,   /* 194! */
  8.367907795824699034507486471330e+02L,   /* 195! */
  8.420688942417004206797938168630e+02L,   /* 196! */
  8.473520979704384091865736141920e+02L,   /* 197! */
  8.526403650011329444228432803650e+02L,   /* 198! */
  8.579336698258574368182534016570e+02L,   /* 199! */
  8.632319871924054734957066166880e+02L,   /* 200! */
  8.685352921004645492467719339210e+02L,   /* 201! */
  8.738435597978657540070733643100e+02L,   /* 202! */
  8.791567657769075413393619890860e+02L,   /* 203! */
  8.844748857707517577298412331830e+02L,   /* 204! */
  8.897978957498901659083087558900e+02L,   /* 205! */
  8.951257719186797469884945903260e+02L,   /* 206! */
  9.004584907119451160620918336320e+02L,   /* 207! */
  9.057960287916464340358142495590e+02L,   /* 208! */
  9.111383630436112450398852205690e+02L,   /* 209! */
  9.164854705743287137204038100040e+02L,   /* 210! */
  9.218373287078047802161457662700e+02L,   /* 211! */
  9.271939149824767926691246996510e+02L,   /* 212! */
  9.325552071481862177818493994210e+02L,   /* 213! */
  9.379211831632080692645710303590e+02L,   /* 214! */
  9.432918211913357320626446322060e+02L,   /* 215! */
  9.486670995990198970650820642810e+02L,   /* 216! */
  9.540469969525603566161165813490e+02L,   /* 217! */
  9.594314920153494456259111299230e+02L,   /* 218! */
  9.648205637451659464463984840170e+02L,   /* 219! */
  9.702141912915183079838956511710e+02L,   /* 220! */
  9.756123539930360608001986732310e+02L,   /* 221! */
  9.810150313749083402453792462600e+02L,   /* 222! */
  9.864222031463684590040153514700e+02L,   /* 223! */
  9.918338491982234988562068648210e+02L,   /* 224! */
  9.972499496004279189881988739610e+02L,   /* 225! */
  1.002670484599700204866198237580e+03L,   /* 226! */
  1.008095434617181607541211619150e+03L,   /* 227! */
  1.013524780246136048311450356060e+03L,   /* 228! */
  1.018958502249690287959891591670e+03L,   /* 229! */
  1.024396581558613483334716335950e+03L,   /* 230! */
  1.029838999269135276875278877510e+03L,   /* 231! */
  1.035285736640801586830713845910e+03L,   /* 232! */
  1.040736775094367287396016070780e+03L,   /* 233! */
  1.046192096209724988824277280820e+03L,   /* 234! */
  1.051651681723869147785698990820e+03L,   /* 235! */
  1.057115513528894757855149505440e+03L,   /* 236! */
  1.062583573670029889040717696220e+03L,   /* 237! */
  1.068055844343701363735489815700e+03L,   /* 238! */
  1.073532307895632874402452979300e+03L,   /* 239! */
  1.079012946818974865706117912350e+03L,   /* 240! */
  1.084497743752465520701584323670e+03L,   /* 241! */
  1.089986681478622207099125442950e+03L,   /* 242! */
  1.095479742921962755556101669130e+03L,   /* 243! */
  1.100976911147255957423687522480e+03L,   /* 244! */
  1.106478169357800684408498987300e+03L,   /* 245! */
  1.111983500893733047213178228030e+03L,   /* 246! */
  1.117492889230361024409240742900e+03L,   /* 247! */
  1.123006317976526006583421603590e+03L,   /* 248! */
  1.128523770872990714198292321050e+03L,   /* 249! */
  1.134045231790852960631511831170e+03L,   /* 250! */
  1.139570684729984744517730416410e+03L,   /* 251! */
  1.145100113817496167824460723870e+03L,   /* 252! */
  1.150633503306223688059329420280e+03L,   /* 253! */
  1.156170837573242224641794093210e+03L,   /* 254! */
  1.161712101118400650788039632400e+03L,   /* 255! */
  1.167257278562880213263377489370e+03L,   /* 256! */
  1.172806354647775433061729283690e+03L,   /* 257! */
  1.178359314232697050486014603560e+03L,   /* 258! */
  1.183916142294396588235488051970e+03L,   /* 259! */
  1.189476823925412115964976762990e+03L,   /* 260! */
  1.195041344332734809374950525500e+03L,   /* 261! */
  1.200609688836495906228517181630e+03L,   /* 262! */
  1.206181842868673670779603107890e+03L,   /* 263! */
  1.211757791971819986943311993070e+03L,   /* 264! */
  1.217337521797806209152057221550e+03L,   /* 265! */
  1.222921018106587908226588833840e+03L,   /* 266! */
  1.228508266764988157756301894620e+03L,   /* 267! */
  1.234099253745499014434806430860e+03L,   /* 268! */
  1.239693965125100853541026384040e+03L,   /* 269! */
  1.245292387084099228299230111210e+03L,   /* 270! */
  1.250894505904978929199326981280e+03L,   /* 271! */
  1.256500307971274926517245444380e+03L,   /* 272! */
  1.262109779766459886249799529800e+03L,   /* 273! */
  1.267722907872847956474531952050e+03L,   /* 274! */
  1.273339678970514527767795414300e+03L,   /* 275! */
  1.278960079836231677768831876610e+03L,   /* 276! */
  1.284584097342419016268701274160e+03L,   /* 277! */
  1.290211718456109653335307291200e+03L,   /* 278! */
  1.295842930237931018964026946000e+03L,   /* 279! */
  1.301477719841100268571984754440e+03L,   /* 280! */
  1.307116074510434014337091392760e+03L,   /* 281! */
  1.312757981581372127924724820790e+03L,   /* 282! */
  1.318403428479015365549370807940e+03L,   /* 283! */
  1.324052402717176571589534726710e+03L,   /* 284! */
  1.329704891897445222115539758720e+03L,   /* 285! */
  1.335360883708265074705072421860e+03L,   /* 286! */
  1.341020365924024695814044537970e+03L,   /* 287! */
  1.346683326404160641743921189050e+03L,   /* 288! */
  1.352349753092273073904420258290e+03L,   /* 289! */
  1.358019634015253593615621521780e+03L,   /* 290! */
  1.363692957282425086129133488640e+03L,   /* 291! */
  1.369369711084693367877060061740e+03L,   /* 292! */
  1.375049883693710435182999902000e+03L,   /* 293! */
  1.380733463461049116794023084840e+03L,   /* 294! */
  1.386420438817388936619239894550e+03L,   /* 295! */
  1.392110798271712996991859686590e+03L,   /* 296! */
  1.397804530410515696610107365880e+03L,   /* 297! */
  1.403501623897021101060517026270e+03L,   /* 298! */
  1.409202067470411787487377266550e+03L,   /* 299! */
  1.414905849945067988546808494690e+03L,   /* 300! */
  1.420612960209816864275386689950e+03L,   /* 301! */
  1.426323387227191733913600158370e+03L,   /* 302! */
  1.432037120032701103055879601870e+03L,   /* 303! */
  1.437754147734107324753557557790e+03L,   /* 304! */
  1.443474459510714736376909706230e+03L,   /* 305! */
  1.449198044612667117149366963440e+03L,   /* 306! */
  1.454924892360254314302965426010e+03L,   /* 307! */
  1.460654992143227888770967186570e+03L,   /* 308! */
  1.466388333420125634233131034120e+03L,   /* 309! */
  1.472124905717604826163078189900e+03L,   /* 310! */
  1.477864698629784060297274185470e+03L,   /* 311! */
  1.483607701817593542652974614510e+03L,   /* 312! */
  1.489353905008133695869653379820e+03L,   /* 313! */
  1.495103297994041949236495057180e+03L,   /* 314! */
  1.500855870632867582298991659730e+03L,   /* 315! */
  1.506611612846454494411999069510e+03L,   /* 316! */
  1.512370514620331775026198831680e+03L,   /* 317! */
  1.518132566003111951861155778180e+03L,   /* 318! */
  1.523897757105896796432400993790e+03L,   /* 319! */
  1.529666078101690568663505145850e+03L,   /* 320! */
  1.535437519224820584528204789910e+03L,   /* 321! */
  1.541212070770364992833534127600e+03L,   /* 322! */
  1.546989723093587649373792689650e+03L,   /* 323! */
  1.552770466609379978758208134840e+03L,   /* 324! */
  1.558554291791709716243463140950e+03L,   /* 325! */
  1.564341189173076423886945305440e+03L,   /* 326! */
  1.570131149343973677278717867130e+03L,   /* 327! */
  1.575924162952357821010836326870e+03L,   /* 328! */
  1.581720220703123192902762630280e+03L,   /* 329! */
  1.587519313357583718822237810550e+03L,   /* 330! */
  1.593321431732960781723022427390e+03L,   /* 331! */
  1.599126566701877270265332363860e+03L,   /* 332! */
  1.604934709191857714092490950000e+03L,   /* 333! */
  1.610745850184834414509167932900e+03L,   /* 334! */
  1.616559980716659480943438764230e+03L,   /* 335! */
  1.622377091876622685177608290690e+03L,   /* 336! */
  1.628197174806975046902113157250e+03L,   /* 337! */
  1.634020220702458065683637364250e+03L,   /* 338! */
  1.639846220809838515943614740980e+03L,   /* 339! */
  1.645675166427448723017299499180e+03L,   /* 340! */
  1.651507048904732239807290607080e+03L,   /* 341! */
  1.657341859641794844959507357110e+03L,   /* 342! */
  1.663179590088960784874823415340e+03L,   /* 343! */
  1.669020231746334183226547954220e+03L,   /* 344! */
  1.674863776163365542983350711620e+03L,   /* 345! */
  1.680710214938423267239701444130e+03L,   /* 346! */
  1.686559539718370126432055120970e+03L,   /* 347! */
  1.692411742198144600769468102490e+03L,   /* 348! */
  1.698266814120347027932667584010e+03L,   /* 349! */
  1.704124747274830487296391687540e+03L,   /* 350! */
  1.709985533498296353106630910690e+03L,   /* 351! */
  1.715849164673894450197779014880e+03L,   /* 352! */
  1.721715632730827746965174734990e+03L,   /* 353! */
  1.727584929643961521416603262730e+03L,   /* 354! */
  1.733457047433436937212533476590e+03L,   /* 355! */
  1.739331978164288967669685756380e+03L,   /* 356! */
  1.745209713946068606746435888970e+03L,   /* 357! */
  1.751090246932469307052030925530e+03L,   /* 358! */
  1.756973569320957585925079018410e+03L,   /* 359! */
  1.762859673352407741610721964580e+03L,   /* 360! */
  1.768748551310740622530740019450e+03L,   /* 361! */
  1.774640195522566393586996338610e+03L,   /* 362! */
  1.780534598356831244366515471000e+03L,   /* 363! */
  1.786431752224467985026508775430e+03L,   /* 364! */
  1.792331649578050476530201643620e+03L,   /* 365! */
  1.798234282911451842779765510080e+03L,   /* 366! */
  1.804139644759506413051384942020e+03L,   /* 367! */
  1.810047727697675343979860623340e+03L,   /* 368! */
  1.815958524341715871166517877180e+03L,   /* 369! */
  1.821872027347354141294903964310e+03L,   /* 370! */
  1.827788229409961576434153786190e+03L,   /* 371! */
  1.833707123264234722990312660000e+03L,   /* 372! */
  1.839628701683878538531648394600e+03L,   /* 373! */
  1.845552957481293070465377104920e+03L,   /* 374! */
  1.851479883507263481280574628160e+03L,   /* 375! */
  1.857409472650653375795647275190e+03L,   /* 376! */
  1.863341717838101386558884034660e+03L,   /* 377! */
  1.869276612033720974247592355240e+03L,   /* 378! */
  1.875214148238803400595409591820e+03L,   /* 379! */
  1.881154319491523832048853842830e+03L,   /* 380! */
  1.887097118866650533013296528880e+03L,   /* 381! */
  1.893042539475257108195563710950e+03L,   /* 382! */
  1.898990574464437754185556765490e+03L,   /* 383! */
  1.904941217017025481042872635570e+03L,   /* 384! */
  1.910894460351313265266640691230e+03L,   /* 385! */
  1.916850297720778096127912854400e+03L,   /* 386! */
  1.922808722413807877934176187390e+03L,   /* 387! */
  1.928769727753431151375127373250e+03L,   /* 388! */
  1.934733307097049597667975993490e+03L,   /* 389! */
  1.940699453836173289779442717620e+03L,   /* 390! */
  1.946668161396158655550499005070e+03L,   /* 391! */
  1.952639423235949118088961406920e+03L,   /* 392! */
  1.958613232847818379324506076170e+03L,   /* 393! */
  1.964589583757116313140703105620e+03L,   /* 394! */
  1.970568469522017435009476810500e+03L,   /* 395! */
  1.976549883733271915555163708790e+03L,   /* 396! */
  1.982533820013959105968247140820e+03L,   /* 397! */
  1.988520272019243543673074494230e+03L,   /* 398! */
  1.994509233436133407129584119650e+03L,   /* 399! */
  2.000500697983241389116454566800e+03L,   /* 400! */
  2.006494659410547958302305245890e+03L,   /* 401! */
  2.012491111499166979362787795240e+03L,   /* 402! */
  2.018490048061113662344770447010e+03L,   /* 403! */
  2.024491462939074812414489109520e+03L,   /* 404! */
  2.030495350006181351554670849800e+03L,   /* 405! */
  2.036501703165783084196376706690e+03L,   /* 406! */
  2.042510516351225679184806745940e+03L,   /* 407! */
  2.048521783525629840884703222160e+03L,   /* 408! */
  2.054535498681672642630416744370e+03L,   /* 409! */
  2.060551655841370996118301499200e+03L,   /* 410! */
  2.066570249055867230725011934560e+03L,   /* 411! */
  2.072591272405216757114615001120e+03L,   /* 412! */
  2.078614719998177789870336404240e+03L,   /* 413! */
  2.084640585972003104253350879670e+03L,   /* 414! */
  2.090668864492233802551427111220e+03L,   /* 415! */
  2.096699549752495065834566759270e+03L,   /* 416! */
  2.102732635974293867283150789430e+03L,   /* 417! */
  2.108768117406818623596638992560e+03L,   /* 418! */
  2.114805988326740761327671926480e+03L,   /* 419! */
  2.120846243038018175317607748040e+03L,   /* 420! */
  2.126888875871700556735196464270e+03L,   /* 421! */
  2.132933881185736568540355652650e+03L,   /* 422! */
  2.138981253364782846509967093800e+03L,   /* 423! */
  2.145030986820014804272363259300e+03L,   /* 424! */
  2.151083075988939221101814312580e+03L,   /* 425! */
  2.157137515335208591523956244470e+03L,   /* 426! */
  2.163194299348437216077812986320e+03L,   /* 427! */
  2.169253422544019012869951849380e+03L,   /* 428! */
  2.175314879462947029841462525640e+03L,   /* 429! */
  2.181378664671634637948953359610e+03L,   /* 430! */
  2.187444772761738385736701026410e+03L,   /* 431! */
  2.193513198349982496048555690610e+03L,   /* 432! */
  2.199583936077984985895275975180e+03L,   /* 433! */
  2.205656980612085390755727724370e+03L,   /* 434! */
  2.211732326643174074848907000980e+03L,   /* 435! */
  2.217809968886523109168118781670e+03L,   /* 436! */
  2.223889902081618699318934561930e+03L,   /* 437! */
  2.229972120991995145448839148150e+03L,   /* 438! */
  2.236056620405070316798832360280e+03L,   /* 439! */
  2.242143395131982623645746759560e+03L,   /* 440! */
  2.248232440007429469638747955520e+03L,   /* 441! */
  2.254323749889507167764468209700e+03L,   /* 442! */
  2.260417319659552303402554216950e+03L,   /* 443! */
  2.266513144221984528157152022110e+03L,   /* 444! */
  2.272611218504150768370070596980e+03L,   /* 445! */
  2.278711537456170832438123934310e+03L,   /* 446! */
  2.284814096050784401270511607820e+03L,   /* 447! */
  2.290918889283199386432120353290e+03L,   /* 448! */
  2.297025912170941640725373875090e+03L,   /* 449! */
  2.303135159753706006166783116350e+03L,   /* 450! */
  2.309246627093208684514711823300e+03L,   /* 451! */
  2.315360309273040915702128419030e+03L,   /* 452! */
  2.321476201398523949722319900570e+03L,   /* 453! */
  2.327594298596565297706750514270e+03L,   /* 454! */
  2.333714596015516248122510113790e+03L,   /* 455! */
  2.339837088825030634202166082820e+03L,   /* 456! */
  2.345961772215924838901359214200e+03L,   /* 457! */
  2.352088641400039023859217681930e+03L,   /* 458! */
  2.358217691610099569013652952260e+03L,   /* 459! */
  2.364348918099582709697894928660e+03L,   /* 460! */
  2.370482316142579358216267653860e+03L,   /* 461! */
  2.376617881033661097066247427540e+03L,   /* 462! */
  2.382755608087747331140327274720e+03L,   /* 463! */
  2.388895492639973586405179475240e+03L,   /* 464! */
  2.395037530045560942717104644130e+03L,   /* 465! */
  2.401181715679686588591824101130e+03L,   /* 466! */
  2.407328044937355485903353632420e+03L,   /* 467! */
  2.413476513233273132641032074570e+03L,   /* 468! */
  2.419627116001719412005807499310e+03L,   /* 469! */
  2.425779848696423516276646441440e+03L,   /* 470! */
  2.431934706790439934025466131910e+03L,   /* 471! */
  2.438091685776025489404333878650e+03L,   /* 472! */
  2.444250781164517422371868664740e+03L,   /* 473! */
  2.450411988486212498866854087640e+03L,   /* 474! */
  2.456575303290247140076064633740e+03L,   /* 475! */
  2.462740721144478560080253985340e+03L,   /* 476! */
  2.468908237635366901297188944960e+03L,   /* 477! */
  2.475077848367858357273569340670e+03L,   /* 478! */
  2.481249548965269272508686018500e+03L,   /* 479! */
  2.487423335069171209121768183680e+03L,   /* 480! */
  2.493599202339276970302189766790e+03L,   /* 481! */
  2.499777146453327570607073410230e+03L,   /* 482! */
  2.505957163106980143294380761050e+03L,   /* 483! */
  2.512139248013696775001339112450e+03L,   /* 484! */
  2.518323396904634258198056593400e+03L,   /* 485! */
  2.524509605528534751964450051710e+03L,   /* 486! */
  2.530697869651617341755179962540e+03L,   /* 487! */
  2.536888185057470488932183048010e+03L,   /* 488! */
  2.543080547546945360957643225610e+03L,   /* 489! */
  2.549274952938050033251871922550e+03L,   /* 490! */
  2.555471397065844553830606112870e+03L,   /* 491! */
  2.561669875782336861944702585730e+03L,   /* 492! */
  2.567870384956379552052135392380e+03L,   /* 493! */
  2.574072920473567474557615139370e+03L,   /* 494! */
  2.580277478236136164859068332760e+03L,   /* 495! */
  2.586484054162861092342666425570e+03L,   /* 496! */
  2.592692644188957721069101232840e+03L,   /* 497! */
  2.598903244265982373993389182430e+03L,   /* 498! */
  2.605115850361733892658674273560e+03L,   /* 499! */
  2.611330458460156084401311015810e+03L,   /* 500! */
  2.617547064561240949199966011820e+03L,   /* 501! */
  2.623765664680932678395601829180e+03L,   /* 502! */
  2.629986254851032417602020590300e+03L,   /* 503! */
  2.636208831119103786218168129880e+03L,   /* 504! */
  2.642433389548379146043653087480e+03L,   /* 505! */
  2.648659926217666611587939016010e+03L,   /* 506! */
  2.654888437221257794751441236130e+03L,   /* 507! */
  2.661118918668836276643323141180e+03L,   /* 508! */
  2.667351366685386799386158005040e+03L,   /* 509! */
  2.673585777411105170841820776350e+03L,   /* 510! */
  2.679822147001308875276018237950e+03L    /* 511! */
};

/* DEFINE AGN_LNFACTSIZE at the top of this file !!! */

/* For an approximation, which is slower, see https://www.johndcook.com/blog/csharp_log_factorial/
   int x = n + 1;
   return (x - 0.5)*sun_log(x) - x + 0.5*sun_log(2*PI) + 1.0/(12.0*x); */
LUALIB_API double tools_lnfactorial (unsigned int n) {
  return (n >= AGN_LNFACTSIZE) ? sun_lgamma(n + 1) : lnfactorials_lt_512_dbl[n];
}

LUALIB_API long double tools_lnfactoriall (unsigned int n) {
  int __signgam;
  return (n >= AGN_LNFACTSIZE) ? tools_lgammal_r(n + 1, &__signgam) : lnfactorials_lt_512[n];
}


/*							fac.c
 *
 *	Factorial function
 *
 * SYNOPSIS:
 *
 * double y, fac();
 * int i;
 *
 * y = fac( i );
 *
 * DESCRIPTION:
 *
 * Returns factorial of i  =  1 * 2 * 3 * ... * i.
 * fac(0) = 1.0.
 *
 * Due to machine arithmetic bounds the largest value of
 * i accepted is 33 in DEC arithmetic or 170 in IEEE
 * arithmetic.  Greater values, or negative ones,
 * produce an error message and return MAXNUM.
 *
 * ACCURACY:
 *
 * For i < 34 the values are simply tabulated, and have
 * full machine accuracy.  If i > 55, fac(i) = gamma(i+1);
 * see gamma.c.
 *                      Relative error:
 * arithmetic   domain      peak
 *    IEEE      0, 170    1.4e-15
 *    DEC       0, 33      1.4e-17
 *
 * Cephes Math Library Release 2.8:  June, 2000
 * Copyright 1984, 1987, 2000 by Stephen L. Moshier */

/* Factorials of integers from 0 through 33 */
static double factbl[] = {
  1.00000000000000000000E0,
  1.00000000000000000000E0,
  2.00000000000000000000E0,
  6.00000000000000000000E0,
  2.40000000000000000000E1,
  1.20000000000000000000E2,
  7.20000000000000000000E2,
  5.04000000000000000000E3,
  4.03200000000000000000E4,
  3.62880000000000000000E5,
  3.62880000000000000000E6,
  3.99168000000000000000E7,
  4.79001600000000000000E8,
  6.22702080000000000000E9,
  8.71782912000000000000E10,
  1.30767436800000000000E12,
  2.09227898880000000000E13,
  3.55687428096000000000E14,
  6.40237370572800000000E15,
  1.21645100408832000000E17,
  2.43290200817664000000E18,
  5.10909421717094400000E19,
  1.12400072777760768000E21,
  2.58520167388849766400E22,
  6.20448401733239439360E23,
  1.55112100433309859840E25,
  4.03291461126605635584E26,
  1.0888869450418352160768E28,
  3.04888344611713860501504E29,
  8.841761993739701954543616E30,
  2.6525285981219105863630848E32,
  8.22283865417792281772556288E33,
  2.6313083693369353016721801216E35,
  8.68331761881188649551819440128E36
};

#define MAXFAC 170

LUALIB_API double cephes_factorial (int i) {
  double x, f, n;
  int j;
  if (i < 0) return AGN_NAN;
  if (i > MAXFAC) return HUGE_VAL;
  /* Get answer from table for small i. */
  if (i < 34) return factbl[i];
  /* Use gamma function for large i. */
  if (i > 55) {
    x = i + 1;
    return tools_gamma(x);
	}
  /* Compute directly for intermediate i. */
  n = 34.0;
  f = 34.0;
  for (j=35; j <= i; j++) {
    n += 1.0;
    f *= n;
  }
	f *= factbl[33];
  return f;
}


static double factbll[] = {  /* factorials from 0! to 33 ! Improved precision 3.16.2 */
  1.000000000000000000000000000000e+00L,   /*   0! */
  1.000000000000000000000000000000e+00L,   /*   1! */
  2.000000000000000000000000000000e+00L,   /*   2! */
  6.000000000000000000000000000000e+00L,   /*   3! */
  2.400000000000000000000000000000e+01L,   /*   4! */
  1.200000000000000000000000000000e+02L,   /*   5! */
  7.200000000000000000000000000000e+02L,   /*   6! */
  5.040000000000000000000000000000e+03L,   /*   7! */
  4.032000000000000000000000000000e+04L,   /*   8! */
  3.628800000000000000000000000000e+05L,   /*   9! */
  3.628800000000000000000000000000e+06L,   /*  10! */
  3.991680000000000000000000000000e+07L,   /*  11! */
  4.790016000000000000000000000000e+08L,   /*  12! */
  6.227020800000000000000000000000e+09L,   /*  13! */
  8.717829120000000000000000000000e+10L,   /*  14! */
  1.307674368000000000000000000000e+12L,   /*  15! */
  2.092278988800000000000000000000e+13L,   /*  16! */
  3.556874280960000000000000000000e+14L,   /*  17! */
  6.402373705728000000000000000000e+15L,   /*  18! */
  1.216451004088320000000000000000e+17L,   /*  19! */
  2.432902008176640000000000000000e+18L,   /*  20! */
  5.109094217170944000000000000000e+19L,   /*  21! */
  1.124000727777607680000000000000e+21L,   /*  22! */
  2.585201673888497664000000000000e+22L,   /*  23! */
  6.204484017332394393600000000000e+23L,   /*  24! */
  1.551121004333098598400000000000e+25L,   /*  25! */
  4.032914611266056355840000000000e+26L,   /*  26! */
  1.088886945041835216076800000000e+28L,   /*  27! */
  3.048883446117138605015040000000e+29L,   /*  28! */
  8.841761993739701954543616000000e+30L,   /*  29! */
  2.652528598121910586363084800000e+32L,   /*  30! */
  8.222838654177922817725562880000e+33L,   /*  31! */
  2.631308369336935301672180121600e+35L,   /*  32! */
  8.683317618811886495518194401280e+36L    /*  33! */
};

LUALIB_API long double cephes_factoriall (int i) {
  long double x, f, n;
  int j;
  if (i < 0) return AGN_NAN;
  if (i > MAXFAC) return HUGE_VAL;
  /* Get answer from table for small i. */
  if (i < 34) return factbll[i];
  /* Use gamma function for large i. */
  if (i > 55) {
    x = i + 1;
    return tools_gammal(x);
	}
  /* Compute directly for intermediate i. */
  n = 34.0;
  f = 34.0;
  for (j=35; j <= i; j++) {
    n += 1.0;
    f *= n;
  }
	f *= factbll[33];
  return f;
}


LUALIB_API double tools_mulsign (double x, double y) {
  double_cast a, b;
  a.f = x;
  b.f = y;
  a.i ^= (b.i & (1LL << 63));
  return a.f;
}


/*
   BesselJY complex aux functions
   taken from the FORTRAN mcjyna.for file written by Shanjie Zhang and Jianming Jin,
   Computation of Special Functions, Copyright 1996 by John Wiley & Sons, Inc.,
   by kind permission of Jianming Jin
   Modifications: calculation of the derivaties has been removed.
*/

LUALIB_API double tools_envj (double n, double x) {
  return 0.5*sun_log10(6.28*n) - n*sun_log10(1.36*x/n);  /* 2.11.1 tuning */
}


#ifndef __ARMCPU  /* 2.37.1 */
LUALIB_API long double tools_envjl (long double n, long double x) {
  return 0.5*tools_log10l(6.28*n) - n*tools_log10l(1.36*x/n);  /* 2.11.1 tuning */
}
#endif

/*     ===================================================
       Function: msta1
       Purpose:  Determine the starting point for backward
                 recurrence such that the magnitude of
                 Jn(x) at that point is about 10^(-mp)
       Input :   x     --- Argument of Jn(x)
                 mp    --- Value of magnitude
       Output:   msta1 --- Starting point
       =================================================== */

LUALIB_API int tools_msta1 (double x, double mp) {
  double a0, f, f0, f1;
  int it, n0, n1, nn;
  a0 = fabs(x);
  n0 = sun_trunc(1.1*a0) + 1;
  f0 = tools_envj(n0, a0) - mp;
  n1 = n0 + 5;
  f1 = tools_envj(n1, a0) - mp;
  for (it=1; it < 21; it++) {
    nn = n1 - (n1 - n0)/(1.0 - f0/f1);
    f = tools_envj(nn, a0) - mp;
    if (fabs(nn - n1) < 1) break;
    n0 = n1;
    f0 = f1;
    n1 = nn;
    f1 = f;
  }
  return nn;
}


#ifndef __ARMCPU  /* 2.37.1 */
LUALIB_API int tools_msta1l (long double x, long double mp) {
  long double a0, f, f0, f1;
  int it, n0, n1, nn;
  a0 = fabsl(x);
  n0 = truncl(1.1*a0) + 1;
  f0 = tools_envjl(n0, a0) - mp;
  n1 = n0 + 5;
  f1 = tools_envj(n1, a0) - mp;
  for (it=1; it < 21; it++) {
    nn = n1 - (n1 - n0)/(1.0 - f0/f1);
    f = tools_envjl(nn, a0) - mp;
    if (fabsl(nn - n1) < 1) break;
    n0 = n1;
    f0 = f1;
    n1 = nn;
    f1 = f;
  }
  return nn;
}
#endif


/*     ===================================================
       Function: msta2
       Purpose: Determine the starting point for backward
                recurrence such that all Jn(x) has mp
                significant digits
       Input :  x  --- Argument of Jn(x)
                n  --- Order of Jn(x)
                mp --- Significant digit
       Output:  msta2 --- Starting point
       =================================================== */

LUALIB_API int tools_msta2 (double x, double n, double mp) {
  double a0, hmp, ejn, obj, f, f0, f1;
  int it, n0, n1, nn;
  a0 = fabs(x);
  hmp = 0.5*mp;
  ejn = tools_envj(n, a0);
  if (ejn <= hmp) {
    obj = mp;
    n0 = sun_trunc(1.1*a0);
  } else {
    obj = hmp + ejn;
    n0 = n;
  }
  f0 = tools_envj(n0, a0) - obj;
  n1 = n0 + 5;
  f1 = tools_envj(n1, a0) - obj;
  for (it=1; it < 21; it++) {
    nn = n1 - (n1 - n0)/(1.0 - f0/f1);
    f = tools_envj(nn, a0) - obj;
    if (fabs(nn - n1) < 1) break;
    n0 = n1;
    f0 = f1;
    n1 = nn;
    f1 = f;
  }
  return nn + 10;
}


#ifndef __ARMCPU  /* 2.37.1 */
LUALIB_API int tools_msta2l (long double x, long double n, long double mp) {
  long double a0, hmp, ejn, obj, f, f0, f1;
  int it, n0, n1, nn;
  a0 = fabsl(x);
  hmp = 0.5*mp;
  ejn = tools_envjl(n, a0);
  if (ejn <= hmp) {
    obj = mp;
    n0 = truncl(1.1*a0);
  } else {
    obj = hmp + ejn;
    n0 = n;
  }
  f0 = tools_envjl(n0, a0) - obj;
  n1 = n0 + 5;
  f1 = tools_envjl(n1, a0) - obj;
  for (it=1; it < 21; it++) {
    nn = n1 - (n1 - n0)/(1.0 - f0/f1);
    f = tools_envjl(nn, a0) - obj;
    if (fabsl(nn - n1) < 1) break;
    n0 = n1;
    f0 = f1;
    n1 = nn;
    f1 = f;
  }
  return nn + 10;
}
#endif


/* Computes relative error |b - a|/|a|, handling case of NaN and Inf. 2.21.6.

   Taken from: Taken from: http://ab-initio.mit.edu/Faddeeva.cc

   Obviously written by: Steven G. Johnson, Massachusetts Institute of Technology,
   http://math.mit.edu/~stevenj ;; October 2012. MIT licence. */
LUALIB_API double tools_relerr (double a, double b) {
  if (tools_isnanorinf(a) || tools_isnanorinf(b)) {  /* 3.7.4 tweak */
    return ((isnan(a) && !isnan(b)) || (!isnan(a) && isnan(b)) ||
            (isinf(a) && !isinf(b)) || (!isinf(a) && isinf(b)) ||
            (isinf(a) && isinf(b) && a*b < 0)) ? HUGE_VAL : AGN_NAN;
  }
  if (a == 0)
    return b == 0 ? 0 : HUGE_VAL;
  else
    return fabs((b - a)/a);
}


/* Returns the same checksum as the UNIX cksum utility for the given string. The return is a
   non-negative integer. The function can be used to validate the integrity of a file but
   may not always detect hacker manipulation. This hash algorithm proved to be the best due to
   high speed and few collisions.

   Taken from: http://pubs.opengroup.org/onlinepubs/9699919799/utilities/cksum.html; 2.22.1 */
static unsigned long cksum_crctab[] = {
  0x00000000,
  0x04c11db7, 0x09823b6e, 0x0d4326d9, 0x130476dc, 0x17c56b6b,
  0x1a864db2, 0x1e475005, 0x2608edb8, 0x22c9f00f, 0x2f8ad6d6,
  0x2b4bcb61, 0x350c9b64, 0x31cd86d3, 0x3c8ea00a, 0x384fbdbd,
  0x4c11db70, 0x48d0c6c7, 0x4593e01e, 0x4152fda9, 0x5f15adac,
  0x5bd4b01b, 0x569796c2, 0x52568b75, 0x6a1936c8, 0x6ed82b7f,
  0x639b0da6, 0x675a1011, 0x791d4014, 0x7ddc5da3, 0x709f7b7a,
  0x745e66cd, 0x9823b6e0, 0x9ce2ab57, 0x91a18d8e, 0x95609039,
  0x8b27c03c, 0x8fe6dd8b, 0x82a5fb52, 0x8664e6e5, 0xbe2b5b58,
  0xbaea46ef, 0xb7a96036, 0xb3687d81, 0xad2f2d84, 0xa9ee3033,
  0xa4ad16ea, 0xa06c0b5d, 0xd4326d90, 0xd0f37027, 0xddb056fe,
  0xd9714b49, 0xc7361b4c, 0xc3f706fb, 0xceb42022, 0xca753d95,
  0xf23a8028, 0xf6fb9d9f, 0xfbb8bb46, 0xff79a6f1, 0xe13ef6f4,
  0xe5ffeb43, 0xe8bccd9a, 0xec7dd02d, 0x34867077, 0x30476dc0,
  0x3d044b19, 0x39c556ae, 0x278206ab, 0x23431b1c, 0x2e003dc5,
  0x2ac12072, 0x128e9dcf, 0x164f8078, 0x1b0ca6a1, 0x1fcdbb16,
  0x018aeb13, 0x054bf6a4, 0x0808d07d, 0x0cc9cdca, 0x7897ab07,
  0x7c56b6b0, 0x71159069, 0x75d48dde, 0x6b93dddb, 0x6f52c06c,
  0x6211e6b5, 0x66d0fb02, 0x5e9f46bf, 0x5a5e5b08, 0x571d7dd1,
  0x53dc6066, 0x4d9b3063, 0x495a2dd4, 0x44190b0d, 0x40d816ba,
  0xaca5c697, 0xa864db20, 0xa527fdf9, 0xa1e6e04e, 0xbfa1b04b,
  0xbb60adfc, 0xb6238b25, 0xb2e29692, 0x8aad2b2f, 0x8e6c3698,
  0x832f1041, 0x87ee0df6, 0x99a95df3, 0x9d684044, 0x902b669d,
  0x94ea7b2a, 0xe0b41de7, 0xe4750050, 0xe9362689, 0xedf73b3e,
  0xf3b06b3b, 0xf771768c, 0xfa325055, 0xfef34de2, 0xc6bcf05f,
  0xc27dede8, 0xcf3ecb31, 0xcbffd686, 0xd5b88683, 0xd1799b34,
  0xdc3abded, 0xd8fba05a, 0x690ce0ee, 0x6dcdfd59, 0x608edb80,
  0x644fc637, 0x7a089632, 0x7ec98b85, 0x738aad5c, 0x774bb0eb,
  0x4f040d56, 0x4bc510e1, 0x46863638, 0x42472b8f, 0x5c007b8a,
  0x58c1663d, 0x558240e4, 0x51435d53, 0x251d3b9e, 0x21dc2629,
  0x2c9f00f0, 0x285e1d47, 0x36194d42, 0x32d850f5, 0x3f9b762c,
  0x3b5a6b9b, 0x0315d626, 0x07d4cb91, 0x0a97ed48, 0x0e56f0ff,
  0x1011a0fa, 0x14d0bd4d, 0x19939b94, 0x1d528623, 0xf12f560e,
  0xf5ee4bb9, 0xf8ad6d60, 0xfc6c70d7, 0xe22b20d2, 0xe6ea3d65,
  0xeba91bbc, 0xef68060b, 0xd727bbb6, 0xd3e6a601, 0xdea580d8,
  0xda649d6f, 0xc423cd6a, 0xc0e2d0dd, 0xcda1f604, 0xc960ebb3,
  0xbd3e8d7e, 0xb9ff90c9, 0xb4bcb610, 0xb07daba7, 0xae3afba2,
  0xaafbe615, 0xa7b8c0cc, 0xa379dd7b, 0x9b3660c6, 0x9ff77d71,
  0x92b45ba8, 0x9675461f, 0x8832161a, 0x8cf30bad, 0x81b02d74,
  0x857130c3, 0x5d8a9099, 0x594b8d2e, 0x5408abf7, 0x50c9b640,
  0x4e8ee645, 0x4a4ffbf2, 0x470cdd2b, 0x43cdc09c, 0x7b827d21,
  0x7f436096, 0x7200464f, 0x76c15bf8, 0x68860bfd, 0x6c47164a,
  0x61043093, 0x65c52d24, 0x119b4be9, 0x155a565e, 0x18197087,
  0x1cd86d30, 0x029f3d35, 0x065e2082, 0x0b1d065b, 0x0fdc1bec,
  0x3793a651, 0x3352bbe6, 0x3e119d3f, 0x3ad08088, 0x2497d08d,
  0x2056cd3a, 0x2d15ebe3, 0x29d4f654, 0xc5a92679, 0xc1683bce,
  0xcc2b1d17, 0xc8ea00a0, 0xd6ad50a5, 0xd26c4d12, 0xdf2f6bcb,
  0xdbee767c, 0xe3a1cbc1, 0xe760d676, 0xea23f0af, 0xeee2ed18,
  0xf0a5bd1d, 0xf464a0aa, 0xf9278673, 0xfde69bc4, 0x89b8fd09,
  0x8d79e0be, 0x803ac667, 0x84fbdbd0, 0x9abc8bd5, 0x9e7d9662,
  0x933eb0bb, 0x97ffad0c, 0xafb010b1, 0xab710d06, 0xa6322bdf,
  0xa2f33668, 0xbcb4666d, 0xb8757bda, 0xb5365d03, 0xb1f740b4
};

LUALIB_API unsigned long tools_cksum (const unsigned char *b, size_t n) {
  /*  Input arguments:
   *  const unsigned char*   b = byte sequence to checksum
   *  size_t                 n = length of sequence */
  register size_t i;
  register unsigned c, s = 0;
  for (i=n; i > 0; --i) {
    c = *b++;
    s = (s << 8) ^ cksum_crctab[(s >> 24) ^ c];
  }
  /* extend with the length of the string. */
  while (n != 0) {
    c = n & 0377;
    n >>= 8;
    s = (s << 8) ^ cksum_crctab[(s >> 24) ^ c];
  }
  return ~s;  /* invert */
}


/* Approximates the number of partitions of n, the partition numbers, taken r at a time. By default, r = n.
   See: https://oeis.org/A000041; 2.22.1. Exact for 1 <= n <= 10. Not faster when storing the coefficients globally. */
LUALIB_API double tools_numbpartapx (int n) {
  double c0, c1, c2, c3, c4, sqn, e, r;
  if (n == 0) return 0.0;
  c0 = -0.230420145062453320665537;
  c1 = -0.0178416569128570889793;
  c2 =  0.0051329911273;
  c3 = -0.0011129404;
  c4 =  0.0009573;
  sqn = sqrt(n);
  e = PI * sqrt(2.0*n/3.0 + c0 + c1/sqn + c2/n + c3/(n*sqn) + c4/(n*n));
  r = 1/(4*n*1.73205080756887729352744634151)*sun_pow(EXP1, e, 0);
  return sun_round(r);
}


/* Returns the next multiple towards +`infinity` of an integer to the given integer base. If base is negative, the result is the next
   multiple towards -infinity; 2.22.1, changed 2.22.2
   Taken from: https://stackoverflow.com/questions/2403631/how-do-i-find-the-next-multiple-of-10-of-any-integer */
LUALIB_API double tools_nextmultiple (int32_t n, int32_t b) {  /* 2.22.2 */
  int32_t rem, pos;
  if (b == 0) return AGN_NAN;
  rem = n % b;
  pos = n > 0;
  if (pos) {  /* 39 -> 40, 40 -> _50_, 41 -> 50 */
    n += b - rem;
  } else {  /* -39 -> -30, -30 -> -20, -21 -> -20 */
    n += b*(rem == 0);
    n += ((b - 1 - rem) + 1) % b;
  }
  n += (b < 0 && pos)*((-b)*(1 - (rem == 0)));  /* `round` towards -infinity */
  return n;
}


LUALIB_API uint32_t tools_adjustmultiple (uint32_t n, uint32_t b) {  /* 2.22.2, for n >= 0 && b <> 0 */
  uint32_t rem;
  /* e.g. 0 -> 0, but 1 -> 10, .., 39 -> 40, 40 -> _40_, 41 -> 50 */
  /* if (n == 0) return b; */ /* 4.6.7 change */
  rem = n % b;
  if (rem) n += b - rem;
  return n;
}


#define tools_isnearint(q,eps) (tools_approx(q, sun_round(q), eps))
LUALIB_API int tools_ismultiple (double x, double y, int inexact, double eps) {  /* 2.32.5 */
  int32_t hy, iy, ly;
  EXTRACT_WORDS(hy, ly, y);
  iy = 0x7fffffff & hy;  /* absolute value */
  if (iy >= 0x7ff00000 || ((iy | ly) == 0)) return -1;  /* +/-0, inf, nan */
  if (!inexact) {
    return tools_isint(x/y);
  } else {
    double q = tools_chop(x)/tools_chop(y);  /* the next macro evaluates its argument twice ! */
    return tools_isnearint(q, eps);
  }
}


#ifndef __ARMCPU  /* 2.37.1 */
#define tools_isnearintl(q,eps) (tools_approxl(q, sun_roundl(q), eps))
LUALIB_API int tools_ismultiplel (long double x, long double y, int inexact, long double eps) {  /* 2.34.8 */
  int fp = tools_fpclassifyl(y);
  if (fp == FP_ZERO || fp == FP_INFINITE || fp == FP_NAN) return -1;  /* +/-0, inf, nan */
  if (!inexact) {
    return sun_isintl(x/y);
  } else {
    long double q = tools_chopl(x)/tools_chopl(y);  /* the next macro evaluates its argument twice ! */
    return tools_isnearintl(q, eps);
  }
}
#endif


/* Returns the smallest power of b greater than x if orequalx = 0, or greater than or equal x if orequalx = 1. */
LUALIB_API double tools_nextpower (double x, uint32_t b, int orequalx) {  /* 2.26.3, for n >= 0 && b > 0 */
  if (b == 2) {
    if (tools_isposint(x)) {
      uint64_t y = (uint64_t)x;
#if defined(__GNUC__) && defined(__INTEL)  /* 2.27.0 */
      return (1UL << (1 + (63 -__builtin_clzl(y - orequalx)))) - (orequalx && x == 1);  /* 2.29.1 fix */
#else
      /* Taken and adapted from: https://stackoverflow.com/questions/466204/rounding-up-to-next-power-of-2 */
      y -= orequalx;
      y |= y >> 1;
      y |= y >> 2;
      y |= y >> 4;
      y |= y >> 8;
      y |= y >> 16;
      y |= y >> 32;
      y++;
      return y;
#endif  /* 2.29.1 fix */
    } else {
      int e;
      if (sun_frexp(x, &e) == 0.5) {  /* tuned 2.29.1 */
        if (orequalx) return x;
      }
      return sun_ldexp(1.0, e);
    }
  }
  return cephes_powi(b, ceil(tools_chop(tools_logbase(x + 1 - orequalx, b))));  /* tuned 3.16.3 */
}


/* Returns the smallest power of 2 greater than or equal x; protected against underflows.
   Both versions tested successfully in 2.29.1. */
LUALIB_API double tools_nextpow2 (uint32_t x) {
  uint64_t y;
  y = (uint64_t)x;
#if defined(__GNUC__) && defined(__INTEL)
  /* this actually is 8 % faster than the non-GCC code below */
  y += (x == 0);
  return (1UL << (1 + (63 -__builtin_clzl(y - 1)))) - (x < 2);  /* !(x >> 1) instead of x < 2 is not faster */
#else
  y -= (x != 0);
  y |= y >> 1;
  y |= y >> 2;
  y |= y >> 4;
  y |= y >> 8;
  y |= y >> 16;
  y |= y >> 32;
  y++;
  return y;
#endif
}


/* Returns the smallest power of b less than x if orequalx = 1, or less than or equal x if orequalx = 0. */
LUALIB_API double tools_prevpower (double x, uint32_t b, int orequalx) {  /* for x >= 0 && b > 0 */
  if (x == 0) return AGN_NAN;
  if (b == 2) {
    if (tools_isposint(x)) {
      uint64_t y = (uint64_t)x;
      /* Taken and adapted from: https://stackoverflow.com/questions/466204/rounding-up-to-next-power-of-2 */
      y -= !orequalx;
      y |= y >> 1;
      y |= y >> 2;
      y |= y >> 4;
      y |= y >> 8;
      y |= y >> 16;
      y |= y >> 32;
      return y - (y >> 1);
    } else {
      int e;
      if (sun_frexp(x, &e) == 0.5) {
        if (!orequalx) return x/2.0;
      }
      return sun_ldexp(1.0, e - 1);
    }
  }
  return cephes_powi(b, sun_floor(tools_chop(tools_logbase(x - !orequalx, b))));
}


/* Returns the smallest power of 2 less than x; protected against underflows. 3.19.3 */
LUALIB_API double tools_prevpow2 (uint32_t x) {
  uint64_t y;
  if (x == 0) return 0;
  y = (uint64_t)x;
  y--;
  y |= y >> 1;
  y |= y >> 2;
  y |= y >> 4;
  y |= y >> 8;
  y |= y >> 16;
  y |= y >> 32;
  return y - (y >> 1);
}


/* - tools_exp2 -
 * Copyright (c) 2005 David Schultz <das@FreeBSD.ORG>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#undef TTBLBITS
#define  TTBLBITS  8
#define  TTBLSIZE  (1 << TTBLBITS)

/* Taylor series expansion in Maple: 2^x = 1 + x*ln(2) + x^2*(ln(2)^2/2!) + x^3*(ln(2)^3/3! + ...) = taylor(2^x, x=0, 7); */
static const double
  EXP2REDUX = 0x1.8p52 / TTBLSIZE,                    /* 3*2^43 = 26388279066624 */
  EXP2P1  = 0.69314718055994530941723212145818,      /* 0x1.62e42fefa39efp-1 = ln(2) = 0.69314718055994530941723212145818 */
  EXP2P2  = 0.24022650695910071233355126316333,      /* 0x1.ebfbdff82c575p-3 = ln(2)^2/2! = 0.24022650695910071233355126316333 */
  EXP2P3  = 0.055504108664821579953142263768622,     /* 0x1.c6b08d704a0a6p-5 = ln(2)^3/3! = 0.055504108664821579953142263768622 */
  EXP2P4  = 0.0096181291076284771619790715736589,    /* 0x1.3b2ab88f70400p-7 is inexact, exact coeff is: ln(2)^4/4! = 0.0096181291076284771619790715736589 */
  EXP2P5  = 0.0013333558146428443423412221987996;    /* 0x1.5d88003875c74p-10 is inexact, exact coeff is: ln(2)^5/5! = 0.0013333558146428443423412221987996 */

#ifdef XXX
  EXP2P6  = 0.000154035303933816099544370973327424,  /* ln(2)^6/6! = 0.000154035303933816099544370973327424 */
  EXP2P7  = 0.0000152527338040598402800254390120097; /* ln(2)^7/7! = 0.0000152527338040598402800254390120097 */
  EXP2P8  = 0.132154867901443094884037582282884e-5,  /* ln(2)^8/8! */
  EXP2P9  = 0.101780860092396997274900075977447e-6;  /* ln(2)^9/9! */

/* Taylor series expansion 10^x = 1 + x*ln(10) + x^2*(ln(10)^2/2!) + x^3*(ln(10)^3/3! + ...) */
static const double
  EXP10REDUX = 0x1.8p52 / TTBLSIZE,   /* 3*2^43 = 26388279066624 */
  EXP10P1  = 2.30258509299404568401799145468,   /* ln(10) */
  EXP10P2  = 2.65094905523919900528083319429,   /* ln(10)^2/2! = 0.24022650695910071233355126316333 */
  EXP10P3  = 2.03467859229347619683099119170,   /* ln(10)^3/3! = 0.055504108664821579953142263768622 */
  EXP10P4  = 1.17125514891226696317825761602,   /* ln(10)^4/4! = 0.0096181291076284771619790715736589 */
  EXP10P5  = 0.539382929195581410199691555704;  /* ln(10)^5/5! = 0.0013333558146428443423412221987996 */
#endif

/* compute tbl2 with:
   c := 0;
   for i from 2 to 2*256 by 2 do
      a := math.exp2(0.5 + c++/256 + tonumber(tbl[i]))/2;
      printf('  %.18f, %13s,\n', a, tbl[i])
   od */

static const double tbl2[TTBLSIZE * 2] = {
  /* exp2(z + eps)            eps  */
  0x1.6a09e667f3d5dp-1,   0x1.9880p-44,  /* = math.exp2(0.5 + 0x1.9880p-44)/2 */
  0x1.6b052fa751744p-1,   0x1.8000p-50,
  0x1.6c012750bd9fep-1,  -0x1.8780p-45,
  0x1.6cfdcddd476bfp-1,   0x1.ec00p-46,
  0x1.6dfb23c651a29p-1,  -0x1.8000p-50,
  0x1.6ef9298593ae3p-1,  -0x1.c000p-52,
  0x1.6ff7df9519386p-1,  -0x1.fd80p-45,
  0x1.70f7466f42da3p-1,  -0x1.c880p-45,
  0x1.71f75e8ec5fc3p-1,   0x1.3c00p-46,
  0x1.72f8286eacf05p-1,  -0x1.8300p-44,
  0x1.73f9a48a58152p-1,  -0x1.0c00p-47,
  0x1.74fbd35d7ccfcp-1,   0x1.f880p-45,
  0x1.75feb564267f1p-1,   0x1.3e00p-47,
  0x1.77024b1ab6d48p-1,  -0x1.7d00p-45,
  0x1.780694fde5d38p-1,  -0x1.d000p-50,
  0x1.790b938ac1d00p-1,   0x1.3000p-49,
  0x1.7a11473eb0178p-1,  -0x1.d000p-49,
  0x1.7b17b0976d060p-1,   0x1.0400p-45,
  0x1.7c1ed0130c133p-1,   0x1.0000p-53,
  0x1.7d26a62ff8636p-1,  -0x1.6900p-45,
  0x1.7e2f336cf4e3bp-1,  -0x1.2e00p-47,
  0x1.7f3878491c3e8p-1,  -0x1.4580p-45,
  0x1.80427543e1b4ep-1,   0x1.3000p-44,
  0x1.814d2add1071ap-1,   0x1.f000p-47,
  0x1.82589994ccd7ep-1,  -0x1.1c00p-45,
  0x1.8364c1eb942d0p-1,   0x1.9d00p-45,
  0x1.8471a4623cab5p-1,   0x1.7100p-43,
  0x1.857f4179f5bbcp-1,   0x1.2600p-45,
  0x1.868d99b4491afp-1,  -0x1.2c40p-44,
  0x1.879cad931a395p-1,  -0x1.3000p-45,
  0x1.88ac7d98a65b8p-1,  -0x1.a800p-45,
  0x1.89bd0a4785800p-1,  -0x1.d000p-49,
  0x1.8ace5422aa223p-1,   0x1.3280p-44,
  0x1.8be05bad619fap-1,   0x1.2b40p-43,
  0x1.8cf3216b54383p-1,  -0x1.ed00p-45,
  0x1.8e06a5e08664cp-1,  -0x1.0500p-45,
  0x1.8f1ae99157807p-1,   0x1.8280p-45,
  0x1.902fed0282c0ep-1,  -0x1.cb00p-46,
  0x1.9145b0b91ff96p-1,  -0x1.5e00p-47,
  0x1.925c353aa2ff9p-1,   0x1.5400p-48,
  0x1.93737b0cdc64ap-1,   0x1.7200p-46,
  0x1.948b82b5f98aep-1,  -0x1.9000p-47,
  0x1.95a44cbc852cbp-1,   0x1.5680p-45,
  0x1.96bdd9a766f21p-1,  -0x1.6d00p-44,
  0x1.97d829fde4e2ap-1,  -0x1.1000p-47,
  0x1.98f33e47a23a3p-1,   0x1.d000p-45,
  0x1.9a0f170ca0604p-1,  -0x1.8a40p-44,
  0x1.9b2bb4d53ff89p-1,   0x1.55c0p-44,
  0x1.9c49182a3f15bp-1,   0x1.6b80p-45,
  0x1.9d674194bb8c5p-1,  -0x1.c000p-49,
  0x1.9e86319e3238ep-1,   0x1.7d00p-46,
  0x1.9fa5e8d07f302p-1,   0x1.6400p-46,
  0x1.a0c667b5de54dp-1,  -0x1.5000p-48,
  0x1.a1e7aed8eb8f6p-1,   0x1.9e00p-47,
  0x1.a309bec4a2e27p-1,   0x1.ad80p-45,
  0x1.a42c980460a5dp-1,  -0x1.af00p-46,
  0x1.a5503b23e259bp-1,   0x1.b600p-47,
  0x1.a674a8af46213p-1,   0x1.8880p-44,
  0x1.a799e1330b3a7p-1,   0x1.1200p-46,
  0x1.a8bfe53c12e8dp-1,   0x1.6c00p-47,
  0x1.a9e6b5579fcd2p-1,  -0x1.9b80p-45,
  0x1.ab0e521356fb8p-1,   0x1.b700p-45,
  0x1.ac36bbfd3f381p-1,   0x1.9000p-50,
  0x1.ad5ff3a3c2780p-1,   0x1.4000p-49,
  0x1.ae89f995ad2a3p-1,  -0x1.c900p-45,
  0x1.afb4ce622f367p-1,   0x1.6500p-46,
  0x1.b0e07298db790p-1,   0x1.fd40p-45,
  0x1.b20ce6c9a89a9p-1,   0x1.2700p-46,
  0x1.b33a2b84f1a4bp-1,   0x1.d470p-43,
  0x1.b468415b747e7p-1,  -0x1.8380p-44,
  0x1.b59728de5593ap-1,   0x1.8000p-54,
  0x1.b6c6e29f1c56ap-1,   0x1.ad00p-47,
  0x1.b7f76f2fb5e50p-1,   0x1.e800p-50,
  0x1.b928cf22749b2p-1,  -0x1.4c00p-47,
  0x1.ba5b030a10603p-1,  -0x1.d700p-47,
  0x1.bb8e0b79a6f66p-1,   0x1.d900p-47,
  0x1.bcc1e904bc1ffp-1,   0x1.2a00p-47,
  0x1.bdf69c3f3a16fp-1,  -0x1.f780p-46,
  0x1.bf2c25bd71db8p-1,  -0x1.0a00p-46,
  0x1.c06286141b2e9p-1,  -0x1.1400p-46,
  0x1.c199bdd8552e0p-1,   0x1.be00p-47,
  0x1.c2d1cd9fa64eep-1,  -0x1.9400p-47,
  0x1.c40ab5fffd02fp-1,  -0x1.ed00p-47,
  0x1.c544778fafd15p-1,   0x1.9660p-44,
  0x1.c67f12e57d0cbp-1,  -0x1.a100p-46,
  0x1.c7ba88988c1b6p-1,  -0x1.8458p-42,
  0x1.c8f6d9406e733p-1,  -0x1.a480p-46,
  0x1.ca3405751c4dfp-1,   0x1.b000p-51,
  0x1.cb720dcef9094p-1,   0x1.1400p-47,
  0x1.ccb0f2e6d1689p-1,   0x1.0200p-48,
  0x1.cdf0b555dc412p-1,   0x1.3600p-48,
  0x1.cf3155b5bab3bp-1,  -0x1.6900p-47,
  0x1.d072d4a0789bcp-1,   0x1.9a00p-47,
  0x1.d1b532b08c8fap-1,  -0x1.5e00p-46,
  0x1.d2f87080d8a85p-1,   0x1.d280p-46,
  0x1.d43c8eacaa203p-1,   0x1.1a00p-47,
  0x1.d5818dcfba491p-1,   0x1.f000p-50,
  0x1.d6c76e862e6a1p-1,  -0x1.3a00p-47,
  0x1.d80e316c9834ep-1,  -0x1.cd80p-47,
  0x1.d955d71ff6090p-1,   0x1.4c00p-48,
  0x1.da9e603db32aep-1,   0x1.f900p-48,
  0x1.dbe7cd63a8325p-1,   0x1.9800p-49,
  0x1.dd321f301b445p-1,  -0x1.5200p-48,
  0x1.de7d5641c05bfp-1,  -0x1.d700p-46,
  0x1.dfc97337b9aecp-1,  -0x1.6140p-46,
  0x1.e11676b197d5ep-1,   0x1.b480p-47,
  0x1.e264614f5a3e7p-1,   0x1.0ce0p-43,
  0x1.e3b333b16ee5cp-1,   0x1.c680p-47,
  0x1.e502ee78b3fb4p-1,  -0x1.9300p-47,
  0x1.e653924676d68p-1,  -0x1.5000p-49,
  0x1.e7a51fbc74c44p-1,  -0x1.7f80p-47,
  0x1.e8f7977cdb726p-1,  -0x1.3700p-48,
  0x1.ea4afa2a490e8p-1,   0x1.5d00p-49,
  0x1.eb9f4867ccae4p-1,   0x1.61a0p-46,
  0x1.ecf482d8e680dp-1,   0x1.5500p-48,
  0x1.ee4aaa2188514p-1,   0x1.6400p-51,
  0x1.efa1bee615a13p-1,  -0x1.e800p-49,
  0x1.f0f9c1cb64106p-1,  -0x1.a880p-48,
  0x1.f252b376bb963p-1,  -0x1.c900p-45,
  0x1.f3ac948dd7275p-1,   0x1.a000p-53,
  0x1.f50765b6e4524p-1,  -0x1.4f00p-48,
  0x1.f6632798844fdp-1,   0x1.a800p-51,
  0x1.f7bfdad9cbe38p-1,   0x1.abc0p-48,
  0x1.f91d802243c82p-1,  -0x1.4600p-50,
  0x1.fa7c1819e908ep-1,  -0x1.b0c0p-47,
  0x1.fbdba3692d511p-1,  -0x1.0e00p-51,
  0x1.fd3c22b8f7194p-1,  -0x1.0de8p-46,
  0x1.fe9d96b2a23eep-1,   0x1.e430p-49,
  0x1.0000000000000p+0,   0x0.0000p+0,
  0x1.00b1afa5abcbep+0,  -0x1.3400p-52,
  0x1.0163da9fb3303p+0,  -0x1.2170p-46,
  0x1.02168143b0282p+0,   0x1.a400p-52,
  0x1.02c9a3e77806cp+0,   0x1.f980p-49,
  0x1.037d42e11bbcap+0,  -0x1.7400p-51,
  0x1.04315e86e7f89p+0,   0x1.8300p-50,
  0x1.04e5f72f65467p+0,  -0x1.a3f0p-46,
  0x1.059b0d315855ap+0,  -0x1.2840p-47,
  0x1.0650a0e3c1f95p+0,   0x1.1600p-48,
  0x1.0706b29ddf71ap+0,   0x1.5240p-46,
  0x1.07bd42b72a82dp+0,  -0x1.9a00p-49,
  0x1.0874518759bd0p+0,   0x1.6400p-49,
  0x1.092bdf66607c8p+0,  -0x1.0780p-47,
  0x1.09e3ecac6f383p+0,  -0x1.8000p-54,
  0x1.0a9c79b1f3930p+0,   0x1.fa00p-48,
  0x1.0b5586cf988fcp+0,  -0x1.ac80p-48,
  0x1.0c0f145e46c8ap+0,   0x1.9c00p-50,
  0x1.0cc922b724816p+0,   0x1.5200p-47,
  0x1.0d83b23395dd8p+0,  -0x1.ad00p-48,
  0x1.0e3ec32d3d1f3p+0,   0x1.bac0p-46,
  0x1.0efa55fdfa9a6p+0,  -0x1.4e80p-47,
  0x1.0fb66affed2f0p+0,  -0x1.d300p-47,
  0x1.1073028d7234bp+0,   0x1.1500p-48,
  0x1.11301d0125b5bp+0,   0x1.c000p-49,
  0x1.11edbab5e2af9p+0,   0x1.6bc0p-46,
  0x1.12abdc06c31d5p+0,   0x1.8400p-49,
  0x1.136a814f2047dp+0,  -0x1.ed00p-47,
  0x1.1429aaea92de9p+0,   0x1.8e00p-49,
  0x1.14e95934f3138p+0,   0x1.b400p-49,
  0x1.15a98c8a58e71p+0,   0x1.5300p-47,
  0x1.166a45471c3dfp+0,   0x1.3380p-47,
  0x1.172b83c7d5211p+0,   0x1.8d40p-45,
  0x1.17ed48695bb9fp+0,  -0x1.5d00p-47,
  0x1.18af9388c8d93p+0,  -0x1.c880p-46,
  0x1.1972658375d66p+0,   0x1.1f00p-46,
  0x1.1a35beb6fcba7p+0,   0x1.0480p-46,
  0x1.1af99f81387e3p+0,  -0x1.7390p-43,
  0x1.1bbe084045d54p+0,   0x1.4e40p-45,
  0x1.1c82f95281c43p+0,  -0x1.a200p-47,
  0x1.1d4873168b9b2p+0,   0x1.3800p-49,
  0x1.1e0e75eb44031p+0,   0x1.ac00p-49,
  0x1.1ed5022fcd938p+0,   0x1.1900p-47,
  0x1.1f9c18438cdf7p+0,  -0x1.b780p-46,
  0x1.2063b88628d8fp+0,   0x1.d940p-45,
  0x1.212be3578a81ep+0,   0x1.8000p-50,
  0x1.21f49917ddd41p+0,   0x1.b340p-45,
  0x1.22bdda2791323p+0,   0x1.9f80p-46,
  0x1.2387a6e7561e7p+0,  -0x1.9c80p-46,
  0x1.2451ffb821427p+0,   0x1.2300p-47,
  0x1.251ce4fb2a602p+0,  -0x1.3480p-46,
  0x1.25e85711eceb0p+0,   0x1.2700p-46,
  0x1.26b4565e27d16p+0,   0x1.1d00p-46,
  0x1.2780e341de00fp+0,   0x1.1ee0p-44,
  0x1.284dfe1f5633ep+0,  -0x1.4c00p-46,
  0x1.291ba7591bb30p+0,  -0x1.3d80p-46,
  0x1.29e9df51fdf09p+0,   0x1.8b00p-47,
  0x1.2ab8a66d10e9bp+0,  -0x1.27c0p-45,
  0x1.2b87fd0dada3ap+0,   0x1.a340p-45,
  0x1.2c57e39771af9p+0,  -0x1.0800p-46,
  0x1.2d285a6e402d9p+0,  -0x1.ed00p-47,
  0x1.2df961f641579p+0,  -0x1.4200p-48,
  0x1.2ecafa93e2ecfp+0,  -0x1.4980p-45,
  0x1.2f9d24abd8822p+0,  -0x1.6300p-46,
  0x1.306fe0a31b625p+0,  -0x1.2360p-44,
  0x1.31432edeea50bp+0,  -0x1.0df8p-40,
  0x1.32170fc4cd7b8p+0,  -0x1.2480p-45,
  0x1.32eb83ba8e9a2p+0,  -0x1.5980p-45,
  0x1.33c08b2641766p+0,   0x1.ed00p-46,
  0x1.3496266e3fa27p+0,  -0x1.c000p-50,
  0x1.356c55f929f0fp+0,  -0x1.0d80p-44,
  0x1.36431a2de88b9p+0,   0x1.2c80p-45,
  0x1.371a7373aaa39p+0,   0x1.0600p-45,
  0x1.37f26231e74fep+0,  -0x1.6600p-46,
  0x1.38cae6d05d838p+0,  -0x1.ae00p-47,
  0x1.39a401b713ec3p+0,  -0x1.4720p-43,
  0x1.3a7db34e5a020p+0,   0x1.8200p-47,
  0x1.3b57fbfec6e95p+0,   0x1.e800p-44,
  0x1.3c32dc313a8f2p+0,   0x1.f800p-49,
  0x1.3d0e544ede122p+0,  -0x1.7a00p-46,
  0x1.3dea64c1234bbp+0,   0x1.6300p-45,
  0x1.3ec70df1c4eccp+0,  -0x1.8a60p-43,
  0x1.3fa4504ac7e8cp+0,  -0x1.cdc0p-44,
  0x1.40822c367a0bbp+0,   0x1.5b80p-45,
  0x1.4160a21f72e95p+0,   0x1.ec00p-46,
  0x1.423fb27094646p+0,  -0x1.3600p-46,
  0x1.431f5d950a920p+0,   0x1.3980p-45,
  0x1.43ffa3f84b9ebp+0,   0x1.a000p-48,
  0x1.44e0860618919p+0,  -0x1.6c00p-48,
  0x1.45c2042a7d201p+0,  -0x1.bc00p-47,
  0x1.46a41ed1d0016p+0,  -0x1.2800p-46,
  0x1.4786d668b3326p+0,   0x1.0e00p-44,
  0x1.486a2b5c13c00p+0,  -0x1.d400p-45,
  0x1.494e1e192af04p+0,   0x1.c200p-47,
  0x1.4a32af0d7d372p+0,  -0x1.e500p-46,
  0x1.4b17dea6db801p+0,   0x1.7800p-47,
  0x1.4bfdad53629e1p+0,  -0x1.3800p-46,
  0x1.4ce41b817c132p+0,   0x1.0800p-47,
  0x1.4dcb299fddddbp+0,   0x1.c700p-45,
  0x1.4eb2d81d8ab96p+0,  -0x1.ce00p-46,
  0x1.4f9b2769d2d02p+0,   0x1.9200p-46,
  0x1.508417f4531c1p+0,  -0x1.8c00p-47,
  0x1.516daa2cf662ap+0,  -0x1.a000p-48,
  0x1.5257de83f51eap+0,   0x1.a080p-43,
  0x1.5342b569d4edap+0,  -0x1.6d80p-45,
  0x1.542e2f4f6ac1ap+0,  -0x1.2440p-44,
  0x1.551a4ca5d94dbp+0,   0x1.83c0p-43,
  0x1.56070dde9116bp+0,   0x1.4b00p-45,
  0x1.56f4736b529dep+0,   0x1.15a0p-43,
  0x1.57e27dbe2c40ep+0,  -0x1.9e00p-45,
  0x1.58d12d497c76fp+0,  -0x1.3080p-45,
  0x1.59c0827ff0b4cp+0,   0x1.dec0p-43,
  0x1.5ab07dd485427p+0,  -0x1.4000p-51,
  0x1.5ba11fba87af4p+0,   0x1.0080p-44,
  0x1.5c9268a59460bp+0,  -0x1.6c80p-45,
  0x1.5d84590998e3fp+0,   0x1.69a0p-43,
  0x1.5e76f15ad20e1p+0,  -0x1.b400p-46,
  0x1.5f6a320dcebcap+0,   0x1.7700p-46,
  0x1.605e1b976dcb8p+0,   0x1.6f80p-45,
  0x1.6152ae6cdf715p+0,   0x1.1000p-47,
  0x1.6247eb03a5531p+0,  -0x1.5d00p-46,
  0x1.633dd1d1929b5p+0,  -0x1.2d00p-46,
  0x1.6434634ccc313p+0,  -0x1.a800p-49,
  0x1.652b9febc8efap+0,  -0x1.8600p-45,
  0x1.6623882553397p+0,   0x1.1fe0p-40,
  0x1.671c1c708328ep+0,  -0x1.7200p-44,
  0x1.68155d44ca97ep+0,   0x1.6800p-49,
  0x1.690f4b19e9471p+0,  -0x1.9780p-45,
};


/*
 * exp2(x): compute the base 2 exponential of x
 *
 * Accuracy: Peak error < 0.503 ulp for normalized results.
 *
 * Method: (accurate tables)
 *
 *   Reduce x:
 *     x = 2**k + y, for integer k and |y| <= 1/2.
 *     Thus we have exp2(x) = 2**k * exp2(y).
 *
 *   Reduce y:
 *     y = i/TBLSIZE + z - eps[i] for integer i near y * TBLSIZE.
 *     Thus we have exp2(y) = exp2(i/TBLSIZE) * exp2(z - eps[i]),
 *     with |z - eps[i]| <= 2**-9 + 2**-39 for the table used.
 *
 *   We compute exp2(i/TBLSIZE) via table lookup and exp2(z - eps[i]) via
 *   a degree-5 minimax polynomial with maximum error under 1.3 * 2**-61.
 *   The values in exp2t[] and eps[] are chosen such that
 *   exp2t[i] = exp2(i/TBLSIZE + eps[i]), and eps[i] is a small offset such
 *   that exp2t[i] is accurate to 2**-64.
 *
 *   Note that the range of i is +-TBLSIZE/2, so we actually index the tables
 *   by i0 = i + TBLSIZE/2.  For cache efficiency, exp2t[] and eps[] are
 *   virtual tables, interleaved in the real table tbl[].
 *
 *   This method is due to Gal, with many details due to Gal and Bachelis:
 *
 *  Gal, S. and Bachelis, B.  An Accurate Elementary Mathematical Library
 *  for the IEEE Floating Point Standard.  TOMS 17(1), 26-46 (1991).
 *
 * Taken from file: FreeBSD/.../s_exp2.c */

#ifdef __ARMCPU
LUALIB_API double sun_exp2 (double x) {
  return sun_exp(LN2*x);
}

LUALIB_API double sun_exp10 (double x) {
  return sun_exp(LN10*x);
}
#endif

LUALIB_API double tools_exp2 (double x) {
  double r, t, z;
  uint32_t hr, hx, lx, ix, i0;
  int k;
  /* With x > -16.5 and non-integral x, the following implementation is utterly incorrect, so bail out.
     Otherwise it is twice as fast as GCC's exp2. */
  EXTRACT_WORDS(hx, lx, x);
  /* Filter out exceptional cases. */
  ix = hx & 0x7fffffff;    /* high word of |x| */
  if (ix >= 0x40900000) {  /* |x| >= 1024 */
    if (ix >= 0x7ff00000) {  /* x is inf or NaN */
      if (((ix & 0xfffff) | lx) != 0 || (hx & 0x80000000) == 0)
        return x + x;  /* x is NaN or +Inf */
      else
        return 0.0;  /* x is -Inf */
    }
    if (x >= 0x1.0p10)
      return huge*huge; /* overflow */
    if (x <= -0x1.0ccp10)
      return twom1000 * twom1000; /* underflow */
  } else if (ix < 0x3c900000) {   /* |x| < 0x1p-54 = 5.5511151231258e-017 */
     return 1.0 + x;
  }
  if (x > -17 && tools_isfracwords(hx, lx)) {
    return sun_exp2(x);  /* = exp2(x) 35 % tweak, 2.40.0 */
  }
  /* Reduce x, computing z, i0, and k. */
  STRICT_ASSIGN(double, t, x + EXP2REDUX);
  GET_LOW_WORD(i0, t);
  i0 += TTBLSIZE/2;
  k = (i0 >> TTBLBITS) << 20;
  i0 = (i0 & (TTBLSIZE - 1)) << 1;
  t -= EXP2REDUX;
  z = x - t;
  /* Compute r = exp2(y) = exp2t[i0] * p(z - eps[i]). */
  t = tbl2[i0];       /* exp2t[i0] */
  z -= tbl2[i0 + 1];  /* eps[i0]   */
  /* https://searchcode.com/file/43703738/libm/src/s_exp2.c/ */
  r = t + t*z*(EXP2P1 + z*(EXP2P2 + z*(EXP2P3 + z*(EXP2P4 + z*EXP2P5))));
  /* Scale by 2**(k>>20). */
  if (k >= -1021 << 20) {
    if (k != 0) {
      GET_HIGH_WORD(hr, r);
      SET_HIGH_WORD(r, hr + k);
    }
    return (r);
  } else {
    GET_HIGH_WORD(hr, r);
    SET_HIGH_WORD(r, hr + (k + (1000 << 20)));
    return (r * twom1000);
  }
}


LUALIB_API double tools_exp10 (double x) {
  return tools_exp2(LOG2_10*x);
}


#define BIASDOUBLE 1023

/* See: https://stackoverflow.com/questions/9695329/c-how-to-round-a-double-to-an-int */
#define ROUND2INT(f) ((int)(f >= 0.0 ? (f + 0.5) : (f - 0.5)))

/* Digits := 33; taylor(2.^x, x, 10); good accuracy with x < -15; tuned by 15 % 3.1.3 */
LUALIB_API double tools_exp2_9 (register double x) {
  double xsq, r, s;
  register int i;
  register double_ieee754 f = { 0, };
  i = ROUND2INT(x);  /* 9 % point speed increase when compared to (int)sun_round(x) */
  f.c.exponent = i + BIASDOUBLE;
  x -= i;
  xsq = x*x;
  r = 1.0 + (0.24022650695910071233355126316333+
          (0.96181291076284771619790715736589e-2+
          (0.15403530393381609954437097332742e-3+
           0.13215486790144309488403758228288e-5*xsq)*xsq)*xsq)*xsq;
  s =     (0.69314718055994530941723212145818+
          (0.55504108664821579953142263768622e-1+
          (0.13333558146428443423412221987996e-2+
          (0.1525273380405984028002543901201e-4+
           0.10178086009239699727490007597745e-6*xsq)*xsq)*xsq)*xsq)*x;
  return f.v*(r + s);
}


/* Digits := 33; taylor(10.^x, x, 10); good accuracy with x < 10; tuned by 15 % 3.1.3 */
LUALIB_API double tools_exp10_9 (register double x) {
  double xsq, r, s;
  register int i;
  register double_ieee754 f = { 0, };
  x = LOG2_10*x;
  i = ROUND2INT(x);  /* 9 % point speed increase when compared to (int)sun_round(x) */
  f.c.exponent = i + BIASDOUBLE;
  x -= i;
  xsq = x*x;
  r = 1.0 + (0.24022650695910071233355126316333+
          (0.96181291076284771619790715736589e-2+
          (0.15403530393381609954437097332742e-3+
           0.13215486790144309488403758228288e-5*xsq)*xsq)*xsq)*xsq;
  s =     (0.69314718055994530941723212145818+
          (0.55504108664821579953142263768622e-1+
          (0.13333558146428443423412221987996e-2+
          (0.1525273380405984028002543901201e-4+
           0.10178086009239699727490007597745e-6*xsq)*xsq)*xsq)*xsq)*x;
  return f.v*(r + s);
}


LUALIB_API double tools_zeroin (double x, double eps) {  /* 2.30.4 */
  if (x == -0) return 0;
  return fabs(x) <= eps ? 0 : x;
}


LUALIB_API long double tools_zeroinl (long double x, long double eps) {  /* 3.16.6 */
  if (x == -0.0L) return 0.0L;
  return fabsl(x) <= eps ? 0.0L : x;
}


/* 3.18.1, slower but more reliable, with some glitches, however, but way better than the numeric approach which
   srikes at far less fractional digits:
> math.ndigits(1.1111111111, -10):
10

> math.ndigits(1.11111111111, -10):
11

> math.ndigits(1.111111111111, -10):
11

> math.ndigits(1.1111111111111, -10):
13 */

LUALIB_API int tools_ndigplaces (double x, int *overflow) {  /* int, not size_t to prevent overflows with very large numbers */
  int r;
  char str[128];
  sprintf(str, "%.25f", x);
  r = strchr(str, '0') - strchr(str, '.') - 1;
  if (r < 0) {
    r = 0;  /* we might have an overflow with very large x */
    *overflow = 1;
  }
  return r;
}

#ifdef OBSOLETE
#define MAXFRACS 17
LUALIB_API double _tools_ndigplaces (double x) {
  int i;
  if (x == 0) return 0;
  if (x < 0) x = -x;
  x -= sun_trunc(x);
  for (i=0; x > AGN_EPSILON && i <= MAXFRACS; i++) {
    x *= 10.0;
    x -= sun_trunc(x);
  }
  return i;
}

/* Counts the number of fractional places in a C double. 2.31.0, improved and changed 2.34.1 to get much better results.
   Wrong results with large |x|:
   math.ndigits(10000000.10101007, -10) -> 9 */
LUALIB_API double __tools_ndigplaces (double x) {
  int i, iplaces;
  double iptr;
  if (x == 0) return 0;
  if (x < 0) x = -x;
  iplaces = 1 + ((x < 1) ? 0 : sun_floor(sun_log(x)/LN10));
  x = tools_roundf(sun_modf(x, &iptr), MAXFRACS - iplaces, AGN_FORNUMRNDDIR);
  for (i=0; x > AGN_EPSILON && i <= MAXFRACS; i++) {  /* AGN_EPSILON instead of zero ! */
    x *= 10.0;
    iplaces = 1 + ((x < 1) ? 0 : sun_floor(sun_log(x)/LN10));
    x = tools_roundf(sun_modf(x, &iptr), MAXFRACS - iplaces - (i + 1), AGN_FORNUMRNDDIR);
  }
  return (i <= 12) ? i : AGN_FORNUMADJBAILOUT;
}
#endif

/* 2.35.0. Checks whether an item is part of an unsorted int array. With, for example,
     const int E[] = {0x2E, 0x23, 0x40};
   call like this:
     tools_isintenum(item, E, sizeof(E)/sizeof(*E))
   sizeofE cannot be computed within the function for argument *E is just the address of the array and
   not the array itself. */
LUALIB_API int tools_isintenum (int x, const int *a, size_t n) {
  size_t i;
  for (i=0; i < n; i++) {
    if (a[i] == x) return 1;
  }
  return 0;
}


/* call it like this: tools_isinarray(x, a, sizeof(a)/sizeof(*a) */
LUALIB_API int _tools_isinushortarray (unsigned short int x, const unsigned short int *a, size_t n) {  /* 2.38.2 */
  size_t l, u, m, x0;
  l = 0; m = 0; u = n - 1;
  while (l <= u) {
    m = tools_midpoint(l, u);
    x0 = a[m];
    if (x0 < x)
      l = m + 1;
    else if (x0 > x)
      u = m - 1;
    else
      return 1;
  }
  return 0;
}


LUALIB_API int tools_isinushortarray (unsigned short int x, const unsigned short int *a, size_t n) {  /* 2.38.2 */
  size_t l, u, m;
  l = 0; u = n - 1;
  m = tools_midpoint(l, u);
  while (l < u) {
    if (a[m] < x)
      l = m + 1;
    else
      u = m;
    m = tools_midpoint(l, u);
  }
  return a[u] == x;  /* u, not m */
}


/* Taken from:
   z64.cpp: 2012-04-16 sieve of Eratosthenes for prime numbers, ver.3
   (c) Khashin S.I.
   http://math.ivanovo.ac.ru/dalgebra/Khashin/index.html
   written by:
   Sergey Khashin, Department of Mathematics, Ivanovo State University, Ermaka 39, Ivanovo, Russian Federation */

#define LASTSMALLPRIME   8161
static unsigned short int skh_smallprimes[1024] = {
  /* 0     1     2     3     4     5     6     7     8     9    10    11    12    13    14    15   */
     2,    3,    5,    7,   11,   13,   17,   19,   23,   29,   31,   37,   41,   43,   47,   53,  /*   0 */
    59,   61,   67,   71,   73,   79,   83,   89,   97,  101,  103,  107,  109,  113,  127,  131,  /*  16 */
   137,  139,  149,  151,  157,  163,  167,  173,  179,  181,  191,  193,  197,  199,  211,  223,  /*  32 */
   227,  229,  233,  239,  241,  251,  257,  263,  269,  271,  277,  281,  283,  293,  307,  311,  /*  48 */
   313,  317,  331,  337,  347,  349,  353,  359,  367,  373,  379,  383,  389,  397,  401,  409,  /*  65-1 */
   419,  421,  431,  433,  439,  443,  449,  457,  461,  463,  467,  479,  487,  491,  499,  503,  /*  81.. */
   509,  521,  523,  541,  547,  557,  563,  569,  571,  577,  587,  593,  599,  601,  607,  613,  /*  97 */
   617,  619,  631,  641,  643,  647,  653,  659,  661,  673,  677,  683,  691,  701,  709,  719,  /* 113 */
   727,  733,  739,  743,  751,  757,  761,  769,  773,  787,  797,  809,  811,  821,  823,  827,  /* 129 */
   829,  839,  853,  857,  859,  863,  877,  881,  883,  887,  907,  911,  919,  929,  937,  941,  /* 145 */
   947,  953,  967,  971,  977,  983,  991,  997, 1009, 1013, 1019, 1021, 1031, 1033, 1039, 1049,  /* 161 */
  1051, 1061, 1063, 1069, 1087, 1091, 1093, 1097, 1103, 1109, 1117, 1123, 1129, 1151, 1153, 1163,  /* 177 */
  1171, 1181, 1187, 1193, 1201, 1213, 1217, 1223, 1229, 1231, 1237, 1249, 1259, 1277, 1279, 1283,  /* 193 */
  1289, 1291, 1297, 1301, 1303, 1307, 1319, 1321, 1327, 1361, 1367, 1373, 1381, 1399, 1409, 1423,  /* 209 */
  1427, 1429, 1433, 1439, 1447, 1451, 1453, 1459, 1471, 1481, 1483, 1487, 1489, 1493, 1499, 1511,  /* 225 */
  1523, 1531, 1543, 1549, 1553, 1559, 1567, 1571, 1579, 1583, 1597, 1601, 1607, 1609, 1613, 1619,  /* 241 */
  1621, 1627, 1637, 1657, 1663, 1667, 1669, 1693, 1697, 1699, 1709, 1721, 1723, 1733, 1741, 1747,  /* 257 */
  1753, 1759, 1777, 1783, 1787, 1789, 1801, 1811, 1823, 1831, 1847, 1861, 1867, 1871, 1873, 1877,  /* 273 */
  1879, 1889, 1901, 1907, 1913, 1931, 1933, 1949, 1951, 1973, 1979, 1987, 1993, 1997, 1999, 2003,  /* 289 */
  2011, 2017, 2027, 2029, 2039, 2053, 2063, 2069, 2081, 2083, 2087, 2089, 2099, 2111, 2113, 2129,  /* 305 */
  2131, 2137, 2141, 2143, 2153, 2161, 2179, 2203, 2207, 2213, 2221, 2237, 2239, 2243, 2251, 2267,  /* 321 */
  2269, 2273, 2281, 2287, 2293, 2297, 2309, 2311, 2333, 2339, 2341, 2347, 2351, 2357, 2371, 2377,  /* 337 */
  2381, 2383, 2389, 2393, 2399, 2411, 2417, 2423, 2437, 2441, 2447, 2459, 2467, 2473, 2477, 2503,  /* 353 */
  2521, 2531, 2539, 2543, 2549, 2551, 2557, 2579, 2591, 2593, 2609, 2617, 2621, 2633, 2647, 2657,  /* 369 */
  2659, 2663, 2671, 2677, 2683, 2687, 2689, 2693, 2699, 2707, 2711, 2713, 2719, 2729, 2731, 2741,  /* 385 */
  2749, 2753, 2767, 2777, 2789, 2791, 2797, 2801, 2803, 2819, 2833, 2837, 2843, 2851, 2857, 2861,  /* 401 */
  2879, 2887, 2897, 2903, 2909, 2917, 2927, 2939, 2953, 2957, 2963, 2969, 2971, 2999, 3001, 3011,  /* 417 */
  3019, 3023, 3037, 3041, 3049, 3061, 3067, 3079, 3083, 3089, 3109, 3119, 3121, 3137, 3163, 3167,  /* 433 */
  3169, 3181, 3187, 3191, 3203, 3209, 3217, 3221, 3229, 3251, 3253, 3257, 3259, 3271, 3299, 3301,  /* 449 */
  3307, 3313, 3319, 3323, 3329, 3331, 3343, 3347, 3359, 3361, 3371, 3373, 3389, 3391, 3407, 3413,  /* 465 */
  3433, 3449, 3457, 3461, 3463, 3467, 3469, 3491, 3499, 3511, 3517, 3527, 3529, 3533, 3539, 3541,  /* 481 */
  3547, 3557, 3559, 3571, 3581, 3583, 3593, 3607, 3613, 3617, 3623, 3631, 3637, 3643, 3659, 3671,  /* 497 */
  3673, 3677, 3691, 3697, 3701, 3709, 3719, 3727, 3733, 3739, 3761, 3767, 3769, 3779, 3793, 3797,  /* 513 */
  3803, 3821, 3823, 3833, 3847, 3851, 3853, 3863, 3877, 3881, 3889, 3907, 3911, 3917, 3919, 3923,  /* 529 */
  3929, 3931, 3943, 3947, 3967, 3989, 4001, 4003, 4007, 4013, 4019, 4021, 4027, 4049, 4051, 4057,  /* 545 */
  4073, 4079, 4091, 4093, 4099, 4111, 4127, 4129, 4133, 4139, 4153, 4157, 4159, 4177, 4201, 4211,  /* 561 */
  4217, 4219, 4229, 4231, 4241, 4243, 4253, 4259, 4261, 4271, 4273, 4283, 4289, 4297, 4327, 4337,  /* 577 */
  4339, 4349, 4357, 4363, 4373, 4391, 4397, 4409, 4421, 4423, 4441, 4447, 4451, 4457, 4463, 4481,  /* 593 */
  4483, 4493, 4507, 4513, 4517, 4519, 4523, 4547, 4549, 4561, 4567, 4583, 4591, 4597, 4603, 4621,  /* 609 */
  4637, 4639, 4643, 4649, 4651, 4657, 4663, 4673, 4679, 4691, 4703, 4721, 4723, 4729, 4733, 4751,  /* 625 */
  4759, 4783, 4787, 4789, 4793, 4799, 4801, 4813, 4817, 4831, 4861, 4871, 4877, 4889, 4903, 4909,  /* 641 */
  4919, 4931, 4933, 4937, 4943, 4951, 4957, 4967, 4969, 4973, 4987, 4993, 4999, 5003, 5009, 5011,  /* 657 */
  5021, 5023, 5039, 5051, 5059, 5077, 5081, 5087, 5099, 5101, 5107, 5113, 5119, 5147, 5153, 5167,  /* 673 */
  5171, 5179, 5189, 5197, 5209, 5227, 5231, 5233, 5237, 5261, 5273, 5279, 5281, 5297, 5303, 5309,  /* 689 */
  5323, 5333, 5347, 5351, 5381, 5387, 5393, 5399, 5407, 5413, 5417, 5419, 5431, 5437, 5441, 5443,  /* 705 */
  5449, 5471, 5477, 5479, 5483, 5501, 5503, 5507, 5519, 5521, 5527, 5531, 5557, 5563, 5569, 5573,  /* 721 */
  5581, 5591, 5623, 5639, 5641, 5647, 5651, 5653, 5657, 5659, 5669, 5683, 5689, 5693, 5701, 5711,  /* 737 */
  5717, 5737, 5741, 5743, 5749, 5779, 5783, 5791, 5801, 5807, 5813, 5821, 5827, 5839, 5843, 5849,  /* 753 */
  5851, 5857, 5861, 5867, 5869, 5879, 5881, 5897, 5903, 5923, 5927, 5939, 5953, 5981, 5987, 6007,  /* 769 */
  6011, 6029, 6037, 6043, 6047, 6053, 6067, 6073, 6079, 6089, 6091, 6101, 6113, 6121, 6131, 6133,  /* 785 */
  6143, 6151, 6163, 6173, 6197, 6199, 6203, 6211, 6217, 6221, 6229, 6247, 6257, 6263, 6269, 6271,  /* 801 */
  6277, 6287, 6299, 6301, 6311, 6317, 6323, 6329, 6337, 6343, 6353, 6359, 6361, 6367, 6373, 6379,  /* 817 */
  6389, 6397, 6421, 6427, 6449, 6451, 6469, 6473, 6481, 6491, 6521, 6529, 6547, 6551, 6553, 6563,  /* 833 */
  6569, 6571, 6577, 6581, 6599, 6607, 6619, 6637, 6653, 6659, 6661, 6673, 6679, 6689, 6691, 6701,  /* 849 */
  6703, 6709, 6719, 6733, 6737, 6761, 6763, 6779, 6781, 6791, 6793, 6803, 6823, 6827, 6829, 6833,  /* 865 */
  6841, 6857, 6863, 6869, 6871, 6883, 6899, 6907, 6911, 6917, 6947, 6949, 6959, 6961, 6967, 6971,  /* 881 */
  6977, 6983, 6991, 6997, 7001, 7013, 7019, 7027, 7039, 7043, 7057, 7069, 7079, 7103, 7109, 7121,  /* 897 */
  7127, 7129, 7151, 7159, 7177, 7187, 7193, 7207, 7211, 7213, 7219, 7229, 7237, 7243, 7247, 7253,  /* 913 */
  7283, 7297, 7307, 7309, 7321, 7331, 7333, 7349, 7351, 7369, 7393, 7411, 7417, 7433, 7451, 7457,  /* 929 */
  7459, 7477, 7481, 7487, 7489, 7499, 7507, 7517, 7523, 7529, 7537, 7541, 7547, 7549, 7559, 7561,  /* 945 */
  7573, 7577, 7583, 7589, 7591, 7603, 7607, 7621, 7639, 7643, 7649, 7669, 7673, 7681, 7687, 7691,  /* 961 */
  7699, 7703, 7717, 7723, 7727, 7741, 7753, 7757, 7759, 7789, 7793, 7817, 7823, 7829, 7841, 7853,  /* 977 */
  7867, 7873, 7877, 7879, 7883, 7901, 7907, 7919, 7927, 7933, 7937, 7949, 7951, 7963, 7993, 8009,  /* 993 */
  8011, 8017, 8039, 8053, 8059, 8069, 8081, 8087, 8089, 8093, 8101, 8111, 8117, 8123, 8147,        /* 1009 */
  LASTSMALLPRIME
};

/* Taken from https://stackoverflow.com/questions/50930926/how-to-check-if-a-number-is-prime-in-a-more-efficient-manner,
   posted by user K_Bhanu_Prakash; 2.37.9 */
LUALIB_API int tools_isprime (unsigned long long int x) {
  unsigned long long int i, imax;
  if (x <= 1) return 0;
  if (x == 2 || x == 3) return 1;
  if (x % 2 == 0 || x % 3 == 0) return 0;
  if ((x - 1) % 6 != 0 && (x + 1) % 6 != 0) return 1;
  if (x < LASTSMALLPRIME + 1) {
    return tools_isinushortarray((unsigned short int)x, skh_smallprimes, 1024);
  }
  if (x < 51529) {  /* fast constexpr check for small primes (removable), 12 % tweak, 2.38.2, taken from:
    https://stackoverflow.com/questions/1538644/c-determine-if-a-number-is-prime by user user13366325 */
    return (x & 1) & ((x < 6) * 42 + 0x208A2882) >> x % 30 && (x < 49 ||
      (x % 7 && x % 11 && x % 13 && x % 17 && x % 19 && x % 23 && x % 29 && x % 31 && x % 37 && (x < 1369 ||
      (x % 41 && x % 43 && x % 47 && x % 53 && x % 59 && x % 61 && x % 67 && x % 71 && x % 73 && ( x < 6241 ||
      (x % 79 && x % 83 && x % 89 && x % 97 && x % 101 && x % 103 && x % 107 && x % 109 && x % 113 && ( x < 16129 ||
      (x % 127 && x % 131 && x % 137 && x % 139 && x % 149 && x % 151 && x % 157 && x % 163 && x % 167 && ( x < 29929 ||
      (x % 173 && x % 179 && x % 181 && x % 191 && x % 193 && x % 197 && x % 199 && x % 211 && x % 223))))))))));
  }
  imax = (unsigned long long int)sqrt(x) + 1;
  for (i=5; i <= imax; i += 6) {
    if (x % i == 0 || x % (i + 2) == 0) return 0;
  }
  return 1;
}


/* Taken from https://stackoverflow.com/questions/4475996/given-prime-number-n-compute-the-next-prime, Implementation 5; 2.37.9
   As highly optimised, ONLY works with tools_nextprime and canNOT be used stand-alone !!! */
static FORCE_INLINE int tools_nextprime_isprime (unsigned long long int x) {
  unsigned long long int i, o, q;
  o = 4;
  for (i=5; ; i += o) {
    q = x/i;
    if (q < i) return 1;
    if (x == q*i) return 0;
    o ^= 6;
  }
  return 1;
}

LUALIB_API unsigned long long int tools_nextprime (unsigned long long int x) {  /* 2.37.9 */
  unsigned long long int i, k, o;
  if (x < 3) return 2;
  if (x < 5) return 5;
  if (x == 5) return 7;
  x += 1;
  k = x/6;
  i = x - 6*k;
  o = i < 2 ? 1 : 5;
  x = 6*k + o;
  for (i=(3 + o)/2; !tools_nextprime_isprime(x); x += i) i ^= 6;
  return x;
}


LUALIB_API unsigned long long int tools_prevprime (unsigned long long int x) {  /* 2.37.9 */
  unsigned long long int n, b, i;
  if (x < 3) return 0;
  if (x == 3) return 2;
  if (x < 6) return 3;
  b = (x % 2 == 0) ? x - 1 : x - 2;
  for (i=b; ; i = i - 2) {
    n = 3;
    while (n * n < i && i % n != 0)
      n += 2;
    if (i % n != 0) break;
  }
  return i;
}


/*
Cephes Math Library Release 2.8:  June, 2000
Copyright 1984, 1995, 2000 by Stephen L. Moshier
Taken from: https://github.com/jeremybarnes/cephes/blob/master/cmath/clog.c
Subtract nearest integer multiple of PI, extended precision value of PI (`reduce Pi`)
Constants
  DP1 = 3.14159265160560607910E0;
  DP2 = 1.98418714791870343106E-9;
  DP3 = 1.14423774522196636802E-17;
come from cephes.h; switching to long doubles does not improve results.
*/

LUALIB_API double tools_redupi (double x) {
  double t;  /* switching to long doubles does not improve results */
  long i;
  t = x/PI;
  t += 0.5*tools_bool2pm(t >= 0.0);
  i = t;	/* the multiple */
  t = i;
  return ((x - t*DP1) - t*DP2) - t*DP3;
}


/* The constants below have been taken from OpenBSD/.../s_ctanl.c.
 *
 * Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */

#define LDP1 3.14159265358979323829596852490908531763125L
#define LDP2 1.6667485837041756656403424829301998703007e-19L
#define LDP3 1.8830410776607851167459095484560349402753e-39L

LUALIB_API long double tools_redupil (long double x) {  /* 2.41.2 */
  long double t;
  long i;
  t = x/M_PIld;
  t += 0.5L*(long double)tools_bool2pm(t >= 0.0L);
  i = t;	/* the multiple */
  t = i;
  return ((x - t*LDP1) - t*LDP2) - t*LDP3;
}


LUALIB_API double tools_beta (double z, double w) {  /* 3.7.5 */
  int zisint, wisint;
  zisint = tools_isint(z);
  wisint = tools_isint(w);
  if ((zisint && z <= 0) && (wisint && w <= 0)) {  /* 3.7.5/3.7.7 fix */
    return AGN_NAN;
  } else {
    if ((z < 0 && w < 0) && tools_isint(z + w) && !zisint && !wisint)
      return 0;
    else if ((zisint && z > 0) && (wisint && w > 0)) {  /* 3.7.6 40 % boost if z < 512 && w < 512 */
      /* no need to evaluate gammasign as both arguments are nonnegative */
      unsigned int intz, intw;
      intz = (unsigned int)z;
      intw = (unsigned int)w;
      /* with z + w < 172 we could use factorials, but going logarithmic is faster;
         see brilliant.org/wiki/beta-function; 172 threshold to prevent `undefined` being returned */
      return sun_exp(tools_lnfactorial(intz - 1) + tools_lnfactorial(intw - 1) - tools_lnfactorial(intz + intw - 1));
    } else {
      double sign, r;
      if (zisint && wisint) {  /* 3.7.7 fix */
        if (z + w <= 0) {
          if (z < 0 && w > 0) return sun_pow(-1, w, 1)*tools_beta(1 - z - w, w);
          else if (z > 0 && w < 0) return sun_pow(-1, z, 1)*tools_beta(1 - z - w, z);
          else return AGN_NAN;
        } else if (z*w < 0) {
          return AGN_NAN;
        }
        if (z == 0 || w == 0) return AGN_NAN;
        r = 0; sign = 0;  /* to prevent compiler warnings */
      } else if (z == w) {  /* 3.7.6 tweak */
        sign = tools_square(tools_gammasign(z))*tools_gammasign(2*z);  /* the signs may differ, e.g. gammasign(-3.0) <> gammasign(-3.0*2) */
        if (z > 0)
          r = 2*sun_lgamma(z) - sun_lgamma(2*z);
        else
          return tools_square(tools_gamma(z))/tools_gamma(2*z);
      } else {
        /* with z + w > 171, fact(z - 1)*fact(w - 1) overflows, so we use logarithmic evaluation, see:
           brilliant.org/wiki/beta-function; 172 threshold to prevent `undefined` being returned */
        if (z > 0 && w > 0) {  /* 3.7.7 fix */
          sign = tools_gammasign(z)*tools_gammasign(w)*tools_gammasign(z + w);
          r = sun_lgamma(z) + sun_lgamma(w) - sun_lgamma(z + w);
        } else {
          double s = z + w;
          r = (tools_gamma(z)*tools_gamma(w))/tools_gamma(s);
          if (fabs(r) < 2*DBL_EPSILON) s = 0;  /* 4.3.1 fix */
          return tools_isnonposint(s) ? 0.0 : r;  /* 3.7.7, tools_isnonposint: work like in Maple */
        }
      }
      return (tools_isinf(r)) ? AGN_NAN : sign * sun_exp(r);  /* switching to long double does not improve the precision of the result */
    }
  }
}


LUALIB_API double tools_lnbeta (double z, double w) {  /* 4.3.1 */
  int zisint, wisint;
  zisint = tools_isint(z);
  wisint = tools_isint(w);
  if ((zisint && z <= 0) && (wisint && w <= 0)) {
    return AGN_NAN;
  } else {
    if ((z < 0 && w < 0) && tools_isint(z + w) && !zisint && !wisint)
      return AGN_NAN;
    else if ((zisint && z > 0) && (wisint && w > 0)) {
      /* no need to evaluate gammasign as both arguments are nonnegative */
      unsigned int intz, intw;
      intz = (unsigned int)z;
      intw = (unsigned int)w;
      return tools_lnfactorial(intz - 1) + tools_lnfactorial(intw - 1) - tools_lnfactorial(intz + intw - 1);
    } else {
      double sign, r;
      if (zisint && wisint) {
        if (z + w <= 0) {
          if (z < 0 && w > 0) return sun_log(sun_pow(-1, w, 1)*tools_beta(1 - z - w, w));
          else if (z > 0 && w < 0) return sun_log(sun_pow(-1, z, 1)*tools_beta(1 - z - w, z));
          else return AGN_NAN;
        } else if (z*w < 0) {
          return AGN_NAN;
        }
        if (z == 0 || w == 0) return AGN_NAN;
        r = 0; sign = 0;  /* to prevent compiler warnings */
      } else if (z == w) {
        sign = tools_square(tools_gammasign(z))*tools_gammasign(2*z);  /* the signs may differ, e.g. gammasign(-3.0) <> gammasign(-3.0*2) */
        if (z > 0)
          r = 2*sun_lgamma(z) - sun_lgamma(2*z);
        else
          return sun_log(tools_square(tools_gamma(z))/tools_gamma(2*z));
      } else {
        if (z > 0 && w > 0) {
          sign = tools_gammasign(z)*tools_gammasign(w)*tools_gammasign(z + w);
          r = sun_lgamma(z) + sun_lgamma(w) - sun_lgamma(z + w);
        } else {
          double s = z + w;
          sign = tools_gammasign(z)*tools_gammasign(w)*tools_gammasign(z + w);
          r = (tools_gamma(z)*tools_gamma(w))/tools_gamma(s);
          if (fabs(r) < 2*DBL_EPSILON) s = 0;
          return tools_isnonposint(s) ? AGN_NAN : sun_log(r);  /* work like in Maple */
        }
      }
      return (tools_isinf(r)) ? AGN_NAN : sign * r;  /* switching to long double does not improve the precision of the result */
    }
  }
}


LUALIB_API char *tools_computername (void) {  /* 2.39.1, FREE ME ! */
#if defined(_WIN32)
  TCHAR buffer[257] = { 0 };
  DWORD len = 256;
  return GetComputerName(buffer, &len) ? strdup(buffer) : NULL;
#elif defined(__unix__) || defined(__APPLE__)
  char buffer[257] = { 0 };  /* 5.1.1 fix */
  return (gethostname(buffer, 256) == 0) ? strdup(buffer) : NULL;
#elif defined(__OS2__)
  char buffer[1025] = { 0 };
  gethostname(buffer, 1024);
  return (tools_streq(buffer, "")) ? NULL : strdup(buffer);
#else
  return NULL;
#endif
}


/* find base used for computer number representation, 3.10.6 */
LUALIB_API int tools_doublebase (void) {
  double x, eins, b;
  x = eins = b = 1.0;
  while ((x + eins) - x == eins) x *= 2.0;
  while ((x + b) == x) b *= 2.0;
  return (int)((x + b) - x);
}


/* ************************** DO NOT USE OR REMOVE THE FOLLOWING FUNCTIONS ************************** */

