Geant4 Cross Reference |
1 // 1 // 2 // ******************************************* 2 // ******************************************************************** 3 // * License and Disclaimer 3 // * License and Disclaimer * 4 // * 4 // * * 5 // * The Geant4 software is copyright of th 5 // * The Geant4 software is copyright of the Copyright Holders of * 6 // * the Geant4 Collaboration. It is provided 6 // * the Geant4 Collaboration. It is provided under the terms and * 7 // * conditions of the Geant4 Software License 7 // * conditions of the Geant4 Software License, included in the file * 8 // * LICENSE and available at http://cern.ch/ 8 // * LICENSE and available at http://cern.ch/geant4/license . These * 9 // * include a list of copyright holders. 9 // * include a list of copyright holders. * 10 // * 10 // * * 11 // * Neither the authors of this software syst 11 // * Neither the authors of this software system, nor their employing * 12 // * institutes,nor the agencies providing fin 12 // * institutes,nor the agencies providing financial support for this * 13 // * work make any representation or warran 13 // * work make any representation or warranty, express or implied, * 14 // * regarding this software system or assum 14 // * regarding this software system or assume any liability for its * 15 // * use. Please see the license in the file 15 // * use. Please see the license in the file LICENSE and URL above * 16 // * for the full disclaimer and the limitatio 16 // * for the full disclaimer and the limitation of liability. * 17 // * 17 // * * 18 // * This code implementation is the result 18 // * This code implementation is the result of the scientific and * 19 // * technical work of the GEANT4 collaboratio 19 // * technical work of the GEANT4 collaboration. * 20 // * By using, copying, modifying or distri 20 // * By using, copying, modifying or distributing the software (or * 21 // * any work based on the software) you ag 21 // * any work based on the software) you agree to acknowledge its * 22 // * use in resulting scientific publicati 22 // * use in resulting scientific publications, and indicate your * 23 // * acceptance of all terms of the Geant4 Sof 23 // * acceptance of all terms of the Geant4 Software license. * 24 // ******************************************* 24 // ******************************************************************** 25 // 25 // 26 // G4Exp << 27 // 26 // 28 // Class description: << 27 // >> 28 // >> 29 // -------------------------------------------------------------------- >> 30 // >> 31 // Class Description: 29 // 32 // 30 // The basic idea is to exploit Pade polynomia 33 // The basic idea is to exploit Pade polynomials. 31 // A lot of ideas were inspired by the cephes 34 // A lot of ideas were inspired by the cephes math library 32 // (by Stephen L. Moshier moshier@na-net.ornl. << 35 // (by Stephen L. Moshier moshier@na-net.ornl.gov) as well as actual code. 33 // The Cephes library can be found here: http 36 // The Cephes library can be found here: http://www.netlib.org/cephes/ 34 // Code and algorithms for G4Exp have been ext 37 // Code and algorithms for G4Exp have been extracted and adapted for Geant4 35 // from the original implementation in the VDT 38 // from the original implementation in the VDT mathematical library 36 // (https://svnweb.cern.ch/trac/vdt), version 39 // (https://svnweb.cern.ch/trac/vdt), version 0.3.7. 37 40 38 // Original implementation created on: Jun 23, 41 // Original implementation created on: Jun 23, 2012 39 // Authors: Danilo Piparo, Thomas Hauth, Vince << 42 // Author: Danilo Piparo, Thomas Hauth, Vincenzo Innocente 40 // 43 // 41 // ------------------------------------------- 44 // -------------------------------------------------------------------- 42 /* << 45 /* 43 * VDT is free software: you can redistribute 46 * VDT is free software: you can redistribute it and/or modify 44 * it under the terms of the GNU Lesser Public 47 * it under the terms of the GNU Lesser Public License as published by 45 * the Free Software Foundation, either versio 48 * the Free Software Foundation, either version 3 of the License, or 46 * (at your option) any later version. 49 * (at your option) any later version. 47 * << 50 * 48 * This program is distributed in the hope tha 51 * This program is distributed in the hope that it will be useful, 49 * but WITHOUT ANY WARRANTY; without even the 52 * but WITHOUT ANY WARRANTY; without even the implied warranty of 50 * MERCHANTABILITY or FITNESS FOR A PARTICULAR 53 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 51 * GNU Lesser Public License for more details. 54 * GNU Lesser Public License for more details. 52 * << 55 * 53 * You should have received a copy of the GNU 56 * You should have received a copy of the GNU Lesser Public License 54 * along with this program. If not, see <http 57 * along with this program. If not, see <http://www.gnu.org/licenses/>. 55 */ 58 */ 56 // ------------------------------------------- 59 // -------------------------------------------------------------------- 57 #ifndef G4Exp_hh << 60 #ifndef G4Exp_h 58 #define G4Exp_hh 1 << 61 #define G4Exp_h 1 59 62 60 #ifdef WIN32 63 #ifdef WIN32 61 64 62 # define G4Exp std::exp << 65 #define G4Exp std::exp 63 66 64 #else 67 #else 65 68 66 # include "G4Types.hh" << 69 #include <limits> 67 << 70 #include <stdint.h> 68 # include <cstdint> << 71 #include "G4Types.hh" 69 # include <limits> << 70 72 71 namespace G4ExpConsts 73 namespace G4ExpConsts 72 { << 74 { 73 const G4double EXP_LIMIT = 708; 75 const G4double EXP_LIMIT = 708; 74 76 75 const G4double PX1exp = 1.261771930748105908 77 const G4double PX1exp = 1.26177193074810590878E-4; 76 const G4double PX2exp = 3.029944077074419613 78 const G4double PX2exp = 3.02994407707441961300E-2; 77 const G4double PX3exp = 9.999999999999999999 79 const G4double PX3exp = 9.99999999999999999910E-1; 78 const G4double QX1exp = 3.001985051386644550 80 const G4double QX1exp = 3.00198505138664455042E-6; 79 const G4double QX2exp = 2.524483403496841041 81 const G4double QX2exp = 2.52448340349684104192E-3; 80 const G4double QX3exp = 2.272655482081550287 82 const G4double QX3exp = 2.27265548208155028766E-1; 81 const G4double QX4exp = 2.000000000000000000 83 const G4double QX4exp = 2.00000000000000000009E0; 82 84 83 const G4double LOG2E = 1.4426950408889634073 << 85 const G4double LOG2E = 1.4426950408889634073599; // 1/log(2) 84 86 85 const G4float MAXLOGF = 88.72283905206835f; 87 const G4float MAXLOGF = 88.72283905206835f; 86 const G4float MINLOGF = -88.f; 88 const G4float MINLOGF = -88.f; 87 89 88 const G4float C1F = 0.693359375f; << 90 const G4float C1F = 0.693359375f; 89 const G4float C2F = -2.12194440e-4f; << 91 const G4float C2F = -2.12194440e-4f; 90 92 91 const G4float PX1expf = 1.9875691500E-4f; 93 const G4float PX1expf = 1.9875691500E-4f; 92 const G4float PX2expf = 1.3981999507E-3f; << 94 const G4float PX2expf =1.3981999507E-3f; 93 const G4float PX3expf = 8.3334519073E-3f; << 95 const G4float PX3expf =8.3334519073E-3f; 94 const G4float PX4expf = 4.1665795894E-2f; << 96 const G4float PX4expf =4.1665795894E-2f; 95 const G4float PX5expf = 1.6666665459E-1f; << 97 const G4float PX5expf =1.6666665459E-1f; 96 const G4float PX6expf = 5.0000001201E-1f; << 98 const G4float PX6expf =5.0000001201E-1f; 97 99 98 const G4float LOG2EF = 1.44269504088896341f; 100 const G4float LOG2EF = 1.44269504088896341f; 99 101 100 //------------------------------------------ 102 //---------------------------------------------------------------------------- 101 // Used to switch between different type of 103 // Used to switch between different type of interpretations of the data 102 // (64 bits) 104 // (64 bits) 103 // 105 // 104 union ieee754 106 union ieee754 105 { 107 { 106 ieee754()= default; << 108 ieee754 () {}; 107 ieee754(G4double thed) { d = thed; }; << 109 ieee754 (G4double thed) {d=thed;}; 108 ieee754(uint64_t thell) { ll = thell; }; << 110 ieee754 (uint64_t thell) {ll=thell;}; 109 ieee754(G4float thef) { f[0] = thef; }; << 111 ieee754 (G4float thef) {f[0]=thef;}; 110 ieee754(uint32_t thei) { i[0] = thei; }; << 112 ieee754 (uint32_t thei) {i[0]=thei;}; 111 G4double d; 113 G4double d; 112 G4float f[2]; 114 G4float f[2]; 113 uint32_t i[2]; 115 uint32_t i[2]; 114 uint64_t ll; 116 uint64_t ll; 115 uint16_t s[4]; 117 uint16_t s[4]; 116 }; 118 }; 117 119 118 //------------------------------------------ 120 //---------------------------------------------------------------------------- 119 // Converts an unsigned long long to a doubl 121 // Converts an unsigned long long to a double 120 // 122 // 121 inline G4double uint642dp(uint64_t ll) 123 inline G4double uint642dp(uint64_t ll) 122 { 124 { 123 ieee754 tmp; 125 ieee754 tmp; 124 tmp.ll = ll; << 126 tmp.ll=ll; 125 return tmp.d; 127 return tmp.d; 126 } 128 } 127 129 128 //------------------------------------------ 130 //---------------------------------------------------------------------------- 129 // Converts an int to a float 131 // Converts an int to a float 130 // 132 // 131 inline G4float uint322sp(G4int x) 133 inline G4float uint322sp(G4int x) 132 { 134 { 133 ieee754 tmp; 135 ieee754 tmp; 134 tmp.i[0] = x; << 136 tmp.i[0]=x; 135 return tmp.f[0]; 137 return tmp.f[0]; 136 } 138 } 137 139 138 //------------------------------------------ 140 //---------------------------------------------------------------------------- 139 // Converts a float to an int 141 // Converts a float to an int 140 // 142 // 141 inline uint32_t sp2uint32(G4float x) 143 inline uint32_t sp2uint32(G4float x) 142 { 144 { 143 ieee754 tmp; 145 ieee754 tmp; 144 tmp.f[0] = x; << 146 tmp.f[0]=x; 145 return tmp.i[0]; 147 return tmp.i[0]; 146 } 148 } 147 149 148 //------------------------------------------ 150 //---------------------------------------------------------------------------- 149 /** 151 /** 150 * A vectorisable floor implementation, not 152 * A vectorisable floor implementation, not only triggered by fast-math. 151 * These functions do not distinguish betwee << 153 * These functions do not distinguish between -0.0 and 0.0, so are not IEC6509 152 * compliant for argument -0.0 154 * compliant for argument -0.0 153 **/ << 155 **/ 154 inline G4double fpfloor(const G4double x) 156 inline G4double fpfloor(const G4double x) 155 { 157 { 156 // no problem since exp is defined between << 158 // no problem since exp is defined between -708 and 708. Int is enough for it! 157 // it! << 159 int32_t ret = int32_t (x); 158 int32_t ret = int32_t(x); << 160 ret-=(sp2uint32(x)>>31); 159 ret -= (sp2uint32(x) >> 31); << 160 return ret; 161 return ret; 161 } 162 } 162 163 163 //------------------------------------------ 164 //---------------------------------------------------------------------------- 164 /** 165 /** 165 * A vectorisable floor implementation, not 166 * A vectorisable floor implementation, not only triggered by fast-math. 166 * These functions do not distinguish betwee << 167 * These functions do not distinguish between -0.0 and 0.0, so are not IEC6509 167 * compliant for argument -0.0 168 * compliant for argument -0.0 168 **/ << 169 **/ 169 inline G4float fpfloor(const G4float x) 170 inline G4float fpfloor(const G4float x) 170 { 171 { 171 int32_t ret = int32_t(x); << 172 int32_t ret = int32_t (x); 172 ret -= (sp2uint32(x) >> 31); << 173 ret-=(sp2uint32(x)>>31); 173 return ret; 174 return ret; 174 } 175 } 175 } // namespace G4ExpConsts << 176 } 176 177 177 // Exp double precision ---------------------- 178 // Exp double precision -------------------------------------------------------- 178 179 >> 180 179 /// Exponential Function double precision 181 /// Exponential Function double precision 180 inline G4double G4Exp(G4double initial_x) 182 inline G4double G4Exp(G4double initial_x) 181 { 183 { 182 G4double x = initial_x; << 184 G4double x = initial_x; 183 G4double px = G4ExpConsts::fpfloor(G4ExpCons << 185 G4double px=G4ExpConsts::fpfloor(G4ExpConsts::LOG2E * x +0.5); >> 186 >> 187 const int32_t n = int32_t(px); >> 188 >> 189 x -= px * 6.93145751953125E-1; >> 190 x -= px * 1.42860682030941723212E-6; >> 191 >> 192 const G4double xx = x * x; >> 193 >> 194 // px = x * P(x**2). >> 195 px = G4ExpConsts::PX1exp; >> 196 px *= xx; >> 197 px += G4ExpConsts::PX2exp; >> 198 px *= xx; >> 199 px += G4ExpConsts::PX3exp; >> 200 px *= x; >> 201 >> 202 // Evaluate Q(x**2). >> 203 G4double qx = G4ExpConsts::QX1exp; >> 204 qx *= xx; >> 205 qx += G4ExpConsts::QX2exp; >> 206 qx *= xx; >> 207 qx += G4ExpConsts::QX3exp; >> 208 qx *= xx; >> 209 qx += G4ExpConsts::QX4exp; >> 210 >> 211 // e**x = 1 + 2x P(x**2)/( Q(x**2) - P(x**2) ) >> 212 x = px / (qx - px); >> 213 x = 1.0 + 2.0 * x; >> 214 >> 215 // Build 2^n in double. >> 216 x *= G4ExpConsts::uint642dp(( ((uint64_t)n) +1023)<<52); >> 217 >> 218 if (initial_x > G4ExpConsts::EXP_LIMIT) >> 219 x = std::numeric_limits<G4double>::infinity(); >> 220 if (initial_x < -G4ExpConsts::EXP_LIMIT) >> 221 x = 0.; 184 222 185 const int32_t n = int32_t(px); << 223 return x; 186 << 187 x -= px * 6.93145751953125E-1; << 188 x -= px * 1.42860682030941723212E-6; << 189 << 190 const G4double xx = x * x; << 191 << 192 // px = x * P(x**2). << 193 px = G4ExpConsts::PX1exp; << 194 px *= xx; << 195 px += G4ExpConsts::PX2exp; << 196 px *= xx; << 197 px += G4ExpConsts::PX3exp; << 198 px *= x; << 199 << 200 // Evaluate Q(x**2). << 201 G4double qx = G4ExpConsts::QX1exp; << 202 qx *= xx; << 203 qx += G4ExpConsts::QX2exp; << 204 qx *= xx; << 205 qx += G4ExpConsts::QX3exp; << 206 qx *= xx; << 207 qx += G4ExpConsts::QX4exp; << 208 << 209 // e**x = 1 + 2x P(x**2)/( Q(x**2) - P(x**2) << 210 x = px / (qx - px); << 211 x = 1.0 + 2.0 * x; << 212 << 213 // Build 2^n in double. << 214 x *= G4ExpConsts::uint642dp((((uint64_t) n) << 215 << 216 if(initial_x > G4ExpConsts::EXP_LIMIT) << 217 x = std::numeric_limits<G4double>::infinit << 218 if(initial_x < -G4ExpConsts::EXP_LIMIT) << 219 x = 0.; << 220 << 221 return x; << 222 } 224 } 223 225 224 // Exp single precision ---------------------- 226 // Exp single precision -------------------------------------------------------- 225 227 226 /// Exponential Function single precision 228 /// Exponential Function single precision 227 inline G4float G4Expf(G4float initial_x) 229 inline G4float G4Expf(G4float initial_x) 228 { 230 { 229 G4float x = initial_x; << 231 G4float x = initial_x; >> 232 >> 233 G4float z = G4ExpConsts::fpfloor( G4ExpConsts::LOG2EF * x +0.5f ); /* std::floor() truncates toward -infinity. */ >> 234 >> 235 x -= z * G4ExpConsts::C1F; >> 236 x -= z * G4ExpConsts::C2F; >> 237 const int32_t n = int32_t ( z ); >> 238 >> 239 const G4float x2 = x * x; >> 240 >> 241 z = x*G4ExpConsts::PX1expf; >> 242 z += G4ExpConsts::PX2expf; >> 243 z *= x; >> 244 z += G4ExpConsts::PX3expf; >> 245 z *= x; >> 246 z += G4ExpConsts::PX4expf; >> 247 z *= x; >> 248 z += G4ExpConsts::PX5expf; >> 249 z *= x; >> 250 z += G4ExpConsts::PX6expf; >> 251 z *= x2; >> 252 z += x + 1.0f; >> 253 >> 254 /* multiply by power of 2 */ >> 255 z *= G4ExpConsts::uint322sp((n+0x7f)<<23); 230 256 231 G4float z = << 257 if (initial_x > G4ExpConsts::MAXLOGF) z=std::numeric_limits<G4float>::infinity(); 232 G4ExpConsts::fpfloor(G4ExpConsts::LOG2EF * << 258 if (initial_x < G4ExpConsts::MINLOGF) z=0.f; 233 0.5f); /* std::floor( << 234 << 235 x -= z * G4ExpConsts::C1F; << 236 x -= z * G4ExpConsts::C2F; << 237 const int32_t n = int32_t(z); << 238 << 239 const G4float x2 = x * x; << 240 << 241 z = x * G4ExpConsts::PX1expf; << 242 z += G4ExpConsts::PX2expf; << 243 z *= x; << 244 z += G4ExpConsts::PX3expf; << 245 z *= x; << 246 z += G4ExpConsts::PX4expf; << 247 z *= x; << 248 z += G4ExpConsts::PX5expf; << 249 z *= x; << 250 z += G4ExpConsts::PX6expf; << 251 z *= x2; << 252 z += x + 1.0f; << 253 << 254 /* multiply by power of 2 */ << 255 z *= G4ExpConsts::uint322sp((n + 0x7f) << 23 << 256 << 257 if(initial_x > G4ExpConsts::MAXLOGF) << 258 z = std::numeric_limits<G4float>::infinity << 259 if(initial_x < G4ExpConsts::MINLOGF) << 260 z = 0.f; << 261 259 262 return z; << 260 return z; 263 } 261 } 264 262 265 //-------------------------------------------- 263 //------------------------------------------------------------------------------ 266 264 267 void expv(const uint32_t size, G4double const* << 265 void expv(const uint32_t size, G4double const * __restrict__ iarray, G4double* __restrict__ oarray); 268 G4double* __restrict__ oarray); << 266 void G4Expv(const uint32_t size, G4double const * __restrict__ iarray, G4double* __restrict__ oarray); 269 void G4Expv(const uint32_t size, G4double cons << 267 void expfv(const uint32_t size, G4float const * __restrict__ iarray, G4float* __restrict__ oarray); 270 G4double* __restrict__ oarray); << 268 void G4Expfv(const uint32_t size, G4float const * __restrict__ iarray, G4float* __restrict__ oarray); 271 void expfv(const uint32_t size, G4float const* << 272 G4float* __restrict__ oarray); << 273 void G4Expfv(const uint32_t size, G4float cons << 274 G4float* __restrict__ oarray); << 275 269 276 #endif /* WIN32 */ 270 #endif /* WIN32 */ 277 271 278 #endif 272 #endif 279 273