IEEEFloat == Base.IEEEFloat == Union{Float16, Float32, Float64}
@inlinebitwidth(::Type{T}) where T<:IEEEFloat=sizeof(T) *8# precision, significand_bits and exponent_bits are unchangedprecision(::Type{T}) where T<:IEEEFloat= Base.Math.precision(T)
significand_bits(::Type{T}) where T<:IEEEFloat= Base.Math.significand_bits(T)
exponent_bits(::Type{T}) where T<:IEEEFloat= Base.Math.exponent_bits(T)
# exponent_max (Emax in the standard) is an IEEE754-2008 standard term;# The standard tabulates its values (see Table 3.2 on page 8).# Julia had defined it in a nonstandard manner. This is conformant:exponentmax(Float64) ==1023exponentmax(Float32) ==127exponentmax(Float16) ==15# exponent_min (Emin) is another standard term# it is fully determined by exponent_max:# exponent_min(T) = 1 - exponent_max(T)exponentmin(Float64) ==-1022exponentmin(Float32) ==-126exponentmin(Float16) ==-14# exponent_bias (bias) is defined equal to exponent_maxexponentbias(Float64) ==1023exponentbias(Float32) ==127exponentbias(Float16) ==15# The value Julia has called exponent_max does not appear# in the standard (nor do its values). It is a useful quantity# (Emax + 1) that we rename `exponent_field_max`.exponentfieldmax(Float64) ==1024exponentfieldmax(Float32) ==128exponentfieldmax(Float16) ==16
all values
precision(Float16) =11precision(Float32) =24precision(Float64) =53significand_bits(Float16) =10significand_bits(Float32) =23significand_bits(Float64) =52exponent_bits(Float16) =5exponent_bits(Float32) =8exponent_bits(Float64) =11exponentmax(Float16) =15exponentmax(Float32) =127exponentmax(Float64) =1023exponentmin(Float16) =-14exponentmin(Float32) =-126exponentmin(Float64) =-1022exponentbias(Float16) =15exponentbias(Float32) =127exponentbias(Float64) =1023exponentfieldmax(Float16) =16exponentfieldmax(Float32) =128exponentfieldmax(Float64) =1024# intfloatmax(FloatNN) is the most positive IntNN I where FloatNN(I-1) is representable# intfloatmin(FloatNN) is the most negative IntNN I where FloatNN(I+1) is representable intfloatmax(Float16) =Int16(2048)
intfloatmax(Float32) =Int32(16777216)
intfloatmax(Float64) =Int64(9007199254740992)
intfloatmin(Float16) =Int16(-2048)
intfloatmin(Float32) =Int32(-16777216)
intfloatmin(Float64) =Int64(-9007199254740992)
# floatintmax(FloatNN) is FloatNN(intfloat_max(FloatNN))# floatintmin(FloatNN) is FloatNN(intfloat_min(FloatNN))floatintmax(Float16) =Float16(2048)
floatintmax(Float32) =Float32(16777216)
floatintmax(Float64) =Float64(9007199254740992)
floatintmin(Float16) =Float16(-2048)
floatintmin(Float32) =Float32(-16777216)
floatintmin(Float64) =Float64(-9007199254740992)