Geant4 Cross Reference |
1 // 2 // ******************************************************************** 3 // * License and Disclaimer * 4 // * * 5 // * The Geant4 software is copyright of the Copyright Holders of * 6 // * the Geant4 Collaboration. It is provided under the terms and * 7 // * conditions of the Geant4 Software License, included in the file * 8 // * LICENSE and available at http://cern.ch/geant4/license . These * 9 // * include a list of copyright holders. * 10 // * * 11 // * Neither the authors of this software system, nor their employing * 12 // * institutes,nor the agencies providing financial support for this * 13 // * work make any representation or warranty, express or implied, * 14 // * regarding this software system or assume any liability for its * 15 // * use. Please see the license in the file LICENSE and URL above * 16 // * for the full disclaimer and the limitation of liability. * 17 // * * 18 // * This code implementation is the result of the scientific and * 19 // * technical work of the GEANT4 collaboration. * 20 // * By using, copying, modifying or distributing the software (or * 21 // * any work based on the software) you agree to acknowledge its * 22 // * use in resulting scientific publications, and indicate your * 23 // * acceptance of all terms of the Geant4 Software license. * 24 // ******************************************************************** 25 // 26 // G4Exp 27 // 28 // Class description: 29 // 30 // The basic idea is to exploit Pade polynomials. 31 // A lot of ideas were inspired by the cephes math library 32 // (by Stephen L. Moshier moshier@na-net.ornl.gov) as well as actual code. 33 // The Cephes library can be found here: http://www.netlib.org/cephes/ 34 // Code and algorithms for G4Exp have been extracted and adapted for Geant4 35 // from the original implementation in the VDT mathematical library 36 // (https://svnweb.cern.ch/trac/vdt), version 0.3.7. 37 38 // Original implementation created on: Jun 23, 2012 39 // Authors: Danilo Piparo, Thomas Hauth, Vincenzo Innocente 40 // 41 // -------------------------------------------------------------------- 42 /* 43 * VDT is free software: you can redistribute it and/or modify 44 * it under the terms of the GNU Lesser Public License as published by 45 * the Free Software Foundation, either version 3 of the License, or 46 * (at your option) any later version. 47 * 48 * This program is distributed in the hope that it will be useful, 49 * but WITHOUT ANY WARRANTY; without even the implied warranty of 50 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 51 * GNU Lesser Public License for more details. 52 * 53 * You should have received a copy of the GNU Lesser Public License 54 * along with this program. If not, see <http://www.gnu.org/licenses/>. 55 */ 56 // -------------------------------------------------------------------- 57 #ifndef G4Exp_hh 58 #define G4Exp_hh 1 59 60 #ifdef WIN32 61 62 # define G4Exp std::exp 63 64 #else 65 66 # include "G4Types.hh" 67 68 # include <cstdint> 69 # include <limits> 70 71 namespace G4ExpConsts 72 { 73 const G4double EXP_LIMIT = 708; 74 75 const G4double PX1exp = 1.26177193074810590878E-4; 76 const G4double PX2exp = 3.02994407707441961300E-2; 77 const G4double PX3exp = 9.99999999999999999910E-1; 78 const G4double QX1exp = 3.00198505138664455042E-6; 79 const G4double QX2exp = 2.52448340349684104192E-3; 80 const G4double QX3exp = 2.27265548208155028766E-1; 81 const G4double QX4exp = 2.00000000000000000009E0; 82 83 const G4double LOG2E = 1.4426950408889634073599; // 1/log(2) 84 85 const G4float MAXLOGF = 88.72283905206835f; 86 const G4float MINLOGF = -88.f; 87 88 const G4float C1F = 0.693359375f; 89 const G4float C2F = -2.12194440e-4f; 90 91 const G4float PX1expf = 1.9875691500E-4f; 92 const G4float PX2expf = 1.3981999507E-3f; 93 const G4float PX3expf = 8.3334519073E-3f; 94 const G4float PX4expf = 4.1665795894E-2f; 95 const G4float PX5expf = 1.6666665459E-1f; 96 const G4float PX6expf = 5.0000001201E-1f; 97 98 const G4float LOG2EF = 1.44269504088896341f; 99 100 //---------------------------------------------------------------------------- 101 // Used to switch between different type of interpretations of the data 102 // (64 bits) 103 // 104 union ieee754 105 { 106 ieee754()= default; 107 ieee754(G4double thed) { d = thed; }; 108 ieee754(uint64_t thell) { ll = thell; }; 109 ieee754(G4float thef) { f[0] = thef; }; 110 ieee754(uint32_t thei) { i[0] = thei; }; 111 G4double d; 112 G4float f[2]; 113 uint32_t i[2]; 114 uint64_t ll; 115 uint16_t s[4]; 116 }; 117 118 //---------------------------------------------------------------------------- 119 // Converts an unsigned long long to a double 120 // 121 inline G4double uint642dp(uint64_t ll) 122 { 123 ieee754 tmp; 124 tmp.ll = ll; 125 return tmp.d; 126 } 127 128 //---------------------------------------------------------------------------- 129 // Converts an int to a float 130 // 131 inline G4float uint322sp(G4int x) 132 { 133 ieee754 tmp; 134 tmp.i[0] = x; 135 return tmp.f[0]; 136 } 137 138 //---------------------------------------------------------------------------- 139 // Converts a float to an int 140 // 141 inline uint32_t sp2uint32(G4float x) 142 { 143 ieee754 tmp; 144 tmp.f[0] = x; 145 return tmp.i[0]; 146 } 147 148 //---------------------------------------------------------------------------- 149 /** 150 * A vectorisable floor implementation, not only triggered by fast-math. 151 * These functions do not distinguish between -0.0 and 0.0, so are not IEC6509 152 * compliant for argument -0.0 153 **/ 154 inline G4double fpfloor(const G4double x) 155 { 156 // no problem since exp is defined between -708 and 708. Int is enough for 157 // it! 158 int32_t ret = int32_t(x); 159 ret -= (sp2uint32(x) >> 31); 160 return ret; 161 } 162 163 //---------------------------------------------------------------------------- 164 /** 165 * A vectorisable floor implementation, not only triggered by fast-math. 166 * These functions do not distinguish between -0.0 and 0.0, so are not IEC6509 167 * compliant for argument -0.0 168 **/ 169 inline G4float fpfloor(const G4float x) 170 { 171 int32_t ret = int32_t(x); 172 ret -= (sp2uint32(x) >> 31); 173 return ret; 174 } 175 } // namespace G4ExpConsts 176 177 // Exp double precision -------------------------------------------------------- 178 179 /// Exponential Function double precision 180 inline G4double G4Exp(G4double initial_x) 181 { 182 G4double x = initial_x; 183 G4double px = G4ExpConsts::fpfloor(G4ExpConsts::LOG2E * x + 0.5); 184 185 const int32_t n = int32_t(px); 186 187 x -= px * 6.93145751953125E-1; 188 x -= px * 1.42860682030941723212E-6; 189 190 const G4double xx = x * x; 191 192 // px = x * P(x**2). 193 px = G4ExpConsts::PX1exp; 194 px *= xx; 195 px += G4ExpConsts::PX2exp; 196 px *= xx; 197 px += G4ExpConsts::PX3exp; 198 px *= x; 199 200 // Evaluate Q(x**2). 201 G4double qx = G4ExpConsts::QX1exp; 202 qx *= xx; 203 qx += G4ExpConsts::QX2exp; 204 qx *= xx; 205 qx += G4ExpConsts::QX3exp; 206 qx *= xx; 207 qx += G4ExpConsts::QX4exp; 208 209 // e**x = 1 + 2x P(x**2)/( Q(x**2) - P(x**2) ) 210 x = px / (qx - px); 211 x = 1.0 + 2.0 * x; 212 213 // Build 2^n in double. 214 x *= G4ExpConsts::uint642dp((((uint64_t) n) + 1023) << 52); 215 216 if(initial_x > G4ExpConsts::EXP_LIMIT) 217 x = std::numeric_limits<G4double>::infinity(); 218 if(initial_x < -G4ExpConsts::EXP_LIMIT) 219 x = 0.; 220 221 return x; 222 } 223 224 // Exp single precision -------------------------------------------------------- 225 226 /// Exponential Function single precision 227 inline G4float G4Expf(G4float initial_x) 228 { 229 G4float x = initial_x; 230 231 G4float z = 232 G4ExpConsts::fpfloor(G4ExpConsts::LOG2EF * x + 233 0.5f); /* std::floor() truncates toward -infinity. */ 234 235 x -= z * G4ExpConsts::C1F; 236 x -= z * G4ExpConsts::C2F; 237 const int32_t n = int32_t(z); 238 239 const G4float x2 = x * x; 240 241 z = x * G4ExpConsts::PX1expf; 242 z += G4ExpConsts::PX2expf; 243 z *= x; 244 z += G4ExpConsts::PX3expf; 245 z *= x; 246 z += G4ExpConsts::PX4expf; 247 z *= x; 248 z += G4ExpConsts::PX5expf; 249 z *= x; 250 z += G4ExpConsts::PX6expf; 251 z *= x2; 252 z += x + 1.0f; 253 254 /* multiply by power of 2 */ 255 z *= G4ExpConsts::uint322sp((n + 0x7f) << 23); 256 257 if(initial_x > G4ExpConsts::MAXLOGF) 258 z = std::numeric_limits<G4float>::infinity(); 259 if(initial_x < G4ExpConsts::MINLOGF) 260 z = 0.f; 261 262 return z; 263 } 264 265 //------------------------------------------------------------------------------ 266 267 void expv(const uint32_t size, G4double const* __restrict__ iarray, 268 G4double* __restrict__ oarray); 269 void G4Expv(const uint32_t size, G4double const* __restrict__ iarray, 270 G4double* __restrict__ oarray); 271 void expfv(const uint32_t size, G4float const* __restrict__ iarray, 272 G4float* __restrict__ oarray); 273 void G4Expfv(const uint32_t size, G4float const* __restrict__ iarray, 274 G4float* __restrict__ oarray); 275 276 #endif /* WIN32 */ 277 278 #endif 279