### /u/sy/beebe/mpieee.maple, Wed Nov 28 08:11:48 2001 ### Edit by Nelson H. F. Beebe ### ==================================================================== ### Output an IEEE-754-like representation of multiple-precision ### floating-point numbers. ### ### Given a floating-point number, v, we have ### ### v = (-1)^s * 2^p * (1.f) ### ### where the ^ operator means to-the-power-of, p is an integer, f is ### the fractional part, and s == 0 (v is positive) or 1 (v is ### negative). ### ### From this, we see that ### ### log2(|v|) = p + log2(1.f) ### ### (|...| means absolute value), and since ### ### 0 <= log2(1.f) < 1 ### ### we can determine p as ### ### p = floor(log2(|v|)) ### ### Similarly, we can determine f from ### ### 2^(-p)|v| = 1.f ### ### f = 2^(-p)|v| - 1 ### ### where ### ### 0 <= f < 1 ### ### The IEEE-754 style representation of such a normal number is ### ### --------------------------------------- ### | s | biased exponent | fraction ... | ### --------------------------------------- ### ^ ^^^^^^^^^^^^^^^ ^^^^^^^^^^^^ ### | | |---- N bits ### | |--- q bits ### |--1 bit ### ### For architectural reasons, 1 + q + N is always a multiple of 8 ### bits, and the exponent bias is chosen to make the exponent ### positive, reserving the largest positive biased exponent for ### representation of NaN (nonzero fraction) and Infinity (zero ### fraction). ### ### The IEEE 754 Standard formats are: ### ### -------------------------------------------------------- ### word size q exponent bias N hidden bit? ### -------------------------------------------------------- ### 32-bit 8 127 23 yes ### 64-bit 11 1023 52 yes ### 80-bit 15 16383 64 no ### 128-bit 15 16383 112 yes ### -------------------------------------------------------- ### ### For the variant that lacks a hidden bit, we rewrite the above ### formulas as ### ### v = (-1)^s * 2^p * (1.f) ### = (-1)^s * 2^(p+1) * (0.g) ### ### where ### ### g = (1.f)/2 ### ### and then encode as before, with p replaced by p+1, and f by g. ### ### To implement the IEEE-754 encodings in arbitrary precision (i.e., ### with possibly more than N fractional bits), we construct the ### hexadecimal representation from the integer expressions ### ### when v > 0: (p + exponent bias) * 2^N + (f * 2^N) ### when v < 0: 2^(q + N) + (p + exponent bias) * 2^N + (f * 2^N) ### when v == 0: 0 ### ### with the constraint that ### ### 0 <= (p + exponent bias) < 2^(q - 1) ### ### For negative out-of-range biased exponents, we signal underflow ### (ignoring for now the possibility of representing these as ### IEEE-754 subnormals), and simply output ### ### v > 0: +0 (UNDERFLOW) ### v < 0: -0 (UNDERFLOW) ### ### with the convention that the leading sign distinguishes the result ### from an exact zero, 0. ### ### For positive out-of-range biased exponents, we signal overflow ### by outputting the representation of signed Infinity: ### ### v > 0: (2^q - 1) * 2^N + 0 (+Infinity) ### v < 0: 2^(q + N) + (2^q - 1) * 2^N + 0 (-Infinity) ### ### ==================================================================== Digits := 75: matula_string := proc(Nbits,v) description "Return a string containing the maximal-length representation of v with Nbits bits of precision"; ## From David Matula, ``In-and-Out Conversions'', ## Comm. ACM, 11(1) 47--50, January, 1968): need at ## least (N/(log_b 10) + 1) decimal digits to ## correctly represent an N-digit base-b number in ## base 10. Maple's %.we format prints 1+w decimal ## digits, so we subtract 1: return (sprintf(sprintf("%%.%de", ceil(Nbits/log[2](10))), v)); end proc; tohex := proc(n,wordlength) local s; s := convert(n,hex); ## print ("DEBUG 3:", n, wordlength, length(s), trunc(wordlength/4)); ## print ("DEBUG 4: old s", s); if (evalb(length(s) <> trunc(wordlength/4))) then ## print ("DEBUG 5: padding s", s); s := cat(substring("000000000000000000000000000000000000000000000000000000000000000000000000", 1 .. (trunc(wordlength/4) - length(s))), s); end if; ## print ("DEBUG 6: new s", s); return (s); end proc; ### Define a private kernel function, not intended for global use: flthex := proc(q, exponent_bias, N, hidden_bit, v) local biased_exponent, exponent_part, f, fhex, hidden_prefix, max_finite_biased_exponent, Nbits, p, prefix, s, sign_part, suffix, wordlength; description "Print an IEEE-754-like encoding of v, with a q-bit biased exponent field, and N-bit fraction."; ## print ("DEBUG", q, exponent_bias, N, v); if ((N <= 0) or (q <= 0)) then return end if; wordlength := N + q + 1; if (evalb((wordlength mod 8) > 0)) then wordlength := wordlength + 8 - (wordlength mod 8); # Ensure 8-bit byte multiple end if; Nbits := wordlength - (q + 1); # recompute N if (v = 0) then printf("%3d-bit: 0 (EXACT ZERO)\n", wordlength); else ## printf("DEBUG: nonzero\n"); max_finite_biased_exponent := eval(2^q - 2); p := floor(log[2](evalf(abs(v)))); f := eval(2^(-p)*abs(v) - 1); if (evalb(not hidden_bit)) then p := p + 1; f := (1 + f)/2; end if; biased_exponent := eval(p + exponent_bias); ## print ("DEBUG", biased_exponent, max_finite_biased_exponent, p, f); if (evalb(v < 0)) then sign_part := 2^(q + Nbits); s := -1; else sign_part := 0; s := 1; end if; prefix := " "; if (evalb(biased_exponent > max_finite_biased_exponent)) then ## overflow exponent_part := (max_finite_biased_exponent + 1) * 2^Nbits; if (v < 0) then suffix := sprintf("%s (-Infinity)", matula_string(Nbits,v)); else suffix := sprintf("%s (+Infinity)", matula_string(Nbits,v)); end if; fhex := tohex((sign_part + exponent_part + 0), wordlength); elif (evalb(biased_exponent < 0)) then ## underflow exponent_part := 0; suffix := sprintf("%s (UNDERFLOW)", matula_string(Nbits,v)); if (evalb(v < 0)) then prefix := "-"; else prefix := "+"; end if; fhex := tohex((sign_part + exponent_part + 0), wordlength); else ## normal number exponent_part := biased_exponent * 2^Nbits; suffix := sprintf("%s", matula_string(Nbits,v)); fhex := tohex((sign_part + exponent_part + round(f * 2^Nbits)), wordlength); end if; ## print ("DEBUG 2:", sign_part, exponent_part, round(f*2^Nbits), suffix); hidden_prefix := `if`(hidden_bit, " ", "(Intel x86)"); printf("%3d-bit%s: %s%-47s\t%s\n", wordlength, hidden_prefix, prefix, fhex, suffix); end if; end proc; ### Define public user interface functions flthex_32 := proc(v) flthex( 8, 127, 23, true, v) end proc; flthex_64 := proc(v) flthex(11, 1023, 52, true, v) end proc; flthex_80 := proc(v) flthex(15, 16383, 64, true, v) end proc; flthex_128 := proc(v) flthex(15, 16383, 112, true, v) end proc; flthex_160 := proc(v) flthex(15, 16383, 144, true, v) end proc; flthex_192 := proc(v) flthex(15, 16383, 176, true, v) end proc; flthex_80_intel_x86 := proc(v) flthex(15, 16383, 64, false, v) end proc; F := proc(v) flthex_32(v); flthex_64(v); flthex_80(v); flthex_80_intel_x86(v); flthex_128(v); flthex_160(v); flthex_192(v); end proc; G := proc(v) flthex_80(v); flthex_80_intel_x86(v); end proc;