Geant4 Cross Reference |
1 // 1 // 2 // ******************************************* 2 // ******************************************************************** 3 // * License and Disclaimer 3 // * License and Disclaimer * 4 // * 4 // * * 5 // * The Geant4 software is copyright of th 5 // * The Geant4 software is copyright of the Copyright Holders of * 6 // * the Geant4 Collaboration. It is provided 6 // * the Geant4 Collaboration. It is provided under the terms and * 7 // * conditions of the Geant4 Software License 7 // * conditions of the Geant4 Software License, included in the file * 8 // * LICENSE and available at http://cern.ch/ 8 // * LICENSE and available at http://cern.ch/geant4/license . These * 9 // * include a list of copyright holders. 9 // * include a list of copyright holders. * 10 // * 10 // * * 11 // * Neither the authors of this software syst 11 // * Neither the authors of this software system, nor their employing * 12 // * institutes,nor the agencies providing fin 12 // * institutes,nor the agencies providing financial support for this * 13 // * work make any representation or warran 13 // * work make any representation or warranty, express or implied, * 14 // * regarding this software system or assum 14 // * regarding this software system or assume any liability for its * 15 // * use. Please see the license in the file 15 // * use. Please see the license in the file LICENSE and URL above * 16 // * for the full disclaimer and the limitatio 16 // * for the full disclaimer and the limitation of liability. * 17 // * 17 // * * 18 // * This code implementation is the result 18 // * This code implementation is the result of the scientific and * 19 // * technical work of the GEANT4 collaboratio 19 // * technical work of the GEANT4 collaboration. * 20 // * By using, copying, modifying or distri 20 // * By using, copying, modifying or distributing the software (or * 21 // * any work based on the software) you ag 21 // * any work based on the software) you agree to acknowledge its * 22 // * use in resulting scientific publicati 22 // * use in resulting scientific publications, and indicate your * 23 // * acceptance of all terms of the Geant4 Sof 23 // * acceptance of all terms of the Geant4 Software license. * 24 // ******************************************* 24 // ******************************************************************** 25 // 25 // 26 // G4Exp 26 // G4Exp 27 // 27 // 28 // Class description: 28 // Class description: 29 // 29 // 30 // The basic idea is to exploit Pade polynomia 30 // The basic idea is to exploit Pade polynomials. 31 // A lot of ideas were inspired by the cephes 31 // A lot of ideas were inspired by the cephes math library 32 // (by Stephen L. Moshier moshier@na-net.ornl. 32 // (by Stephen L. Moshier moshier@na-net.ornl.gov) as well as actual code. 33 // The Cephes library can be found here: http 33 // The Cephes library can be found here: http://www.netlib.org/cephes/ 34 // Code and algorithms for G4Exp have been ext 34 // Code and algorithms for G4Exp have been extracted and adapted for Geant4 35 // from the original implementation in the VDT 35 // from the original implementation in the VDT mathematical library 36 // (https://svnweb.cern.ch/trac/vdt), version 36 // (https://svnweb.cern.ch/trac/vdt), version 0.3.7. 37 37 38 // Original implementation created on: Jun 23, 38 // Original implementation created on: Jun 23, 2012 39 // Authors: Danilo Piparo, Thomas Hauth, Vince 39 // Authors: Danilo Piparo, Thomas Hauth, Vincenzo Innocente 40 // 40 // 41 // ------------------------------------------- 41 // -------------------------------------------------------------------- 42 /* 42 /* 43 * VDT is free software: you can redistribute 43 * VDT is free software: you can redistribute it and/or modify 44 * it under the terms of the GNU Lesser Public 44 * it under the terms of the GNU Lesser Public License as published by 45 * the Free Software Foundation, either versio 45 * the Free Software Foundation, either version 3 of the License, or 46 * (at your option) any later version. 46 * (at your option) any later version. 47 * 47 * 48 * This program is distributed in the hope tha 48 * This program is distributed in the hope that it will be useful, 49 * but WITHOUT ANY WARRANTY; without even the 49 * but WITHOUT ANY WARRANTY; without even the implied warranty of 50 * MERCHANTABILITY or FITNESS FOR A PARTICULAR 50 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 51 * GNU Lesser Public License for more details. 51 * GNU Lesser Public License for more details. 52 * 52 * 53 * You should have received a copy of the GNU 53 * You should have received a copy of the GNU Lesser Public License 54 * along with this program. If not, see <http 54 * along with this program. If not, see <http://www.gnu.org/licenses/>. 55 */ 55 */ 56 // ------------------------------------------- 56 // -------------------------------------------------------------------- 57 #ifndef G4Exp_hh 57 #ifndef G4Exp_hh 58 #define G4Exp_hh 1 58 #define G4Exp_hh 1 59 59 60 #ifdef WIN32 60 #ifdef WIN32 61 61 62 # define G4Exp std::exp 62 # define G4Exp std::exp 63 63 64 #else 64 #else 65 65 66 # include "G4Types.hh" 66 # include "G4Types.hh" 67 << 68 # include <cstdint> << 69 # include <limits> 67 # include <limits> >> 68 # include <stdint.h> 70 69 71 namespace G4ExpConsts 70 namespace G4ExpConsts 72 { 71 { 73 const G4double EXP_LIMIT = 708; 72 const G4double EXP_LIMIT = 708; 74 73 75 const G4double PX1exp = 1.261771930748105908 74 const G4double PX1exp = 1.26177193074810590878E-4; 76 const G4double PX2exp = 3.029944077074419613 75 const G4double PX2exp = 3.02994407707441961300E-2; 77 const G4double PX3exp = 9.999999999999999999 76 const G4double PX3exp = 9.99999999999999999910E-1; 78 const G4double QX1exp = 3.001985051386644550 77 const G4double QX1exp = 3.00198505138664455042E-6; 79 const G4double QX2exp = 2.524483403496841041 78 const G4double QX2exp = 2.52448340349684104192E-3; 80 const G4double QX3exp = 2.272655482081550287 79 const G4double QX3exp = 2.27265548208155028766E-1; 81 const G4double QX4exp = 2.000000000000000000 80 const G4double QX4exp = 2.00000000000000000009E0; 82 81 83 const G4double LOG2E = 1.4426950408889634073 82 const G4double LOG2E = 1.4426950408889634073599; // 1/log(2) 84 83 85 const G4float MAXLOGF = 88.72283905206835f; 84 const G4float MAXLOGF = 88.72283905206835f; 86 const G4float MINLOGF = -88.f; 85 const G4float MINLOGF = -88.f; 87 86 88 const G4float C1F = 0.693359375f; 87 const G4float C1F = 0.693359375f; 89 const G4float C2F = -2.12194440e-4f; 88 const G4float C2F = -2.12194440e-4f; 90 89 91 const G4float PX1expf = 1.9875691500E-4f; 90 const G4float PX1expf = 1.9875691500E-4f; 92 const G4float PX2expf = 1.3981999507E-3f; 91 const G4float PX2expf = 1.3981999507E-3f; 93 const G4float PX3expf = 8.3334519073E-3f; 92 const G4float PX3expf = 8.3334519073E-3f; 94 const G4float PX4expf = 4.1665795894E-2f; 93 const G4float PX4expf = 4.1665795894E-2f; 95 const G4float PX5expf = 1.6666665459E-1f; 94 const G4float PX5expf = 1.6666665459E-1f; 96 const G4float PX6expf = 5.0000001201E-1f; 95 const G4float PX6expf = 5.0000001201E-1f; 97 96 98 const G4float LOG2EF = 1.44269504088896341f; 97 const G4float LOG2EF = 1.44269504088896341f; 99 98 100 //------------------------------------------ 99 //---------------------------------------------------------------------------- 101 // Used to switch between different type of 100 // Used to switch between different type of interpretations of the data 102 // (64 bits) 101 // (64 bits) 103 // 102 // 104 union ieee754 103 union ieee754 105 { 104 { 106 ieee754()= default; << 105 ieee754(){}; 107 ieee754(G4double thed) { d = thed; }; 106 ieee754(G4double thed) { d = thed; }; 108 ieee754(uint64_t thell) { ll = thell; }; 107 ieee754(uint64_t thell) { ll = thell; }; 109 ieee754(G4float thef) { f[0] = thef; }; 108 ieee754(G4float thef) { f[0] = thef; }; 110 ieee754(uint32_t thei) { i[0] = thei; }; 109 ieee754(uint32_t thei) { i[0] = thei; }; 111 G4double d; 110 G4double d; 112 G4float f[2]; 111 G4float f[2]; 113 uint32_t i[2]; 112 uint32_t i[2]; 114 uint64_t ll; 113 uint64_t ll; 115 uint16_t s[4]; 114 uint16_t s[4]; 116 }; 115 }; 117 116 118 //------------------------------------------ 117 //---------------------------------------------------------------------------- 119 // Converts an unsigned long long to a doubl 118 // Converts an unsigned long long to a double 120 // 119 // 121 inline G4double uint642dp(uint64_t ll) 120 inline G4double uint642dp(uint64_t ll) 122 { 121 { 123 ieee754 tmp; 122 ieee754 tmp; 124 tmp.ll = ll; 123 tmp.ll = ll; 125 return tmp.d; 124 return tmp.d; 126 } 125 } 127 126 128 //------------------------------------------ 127 //---------------------------------------------------------------------------- 129 // Converts an int to a float 128 // Converts an int to a float 130 // 129 // 131 inline G4float uint322sp(G4int x) 130 inline G4float uint322sp(G4int x) 132 { 131 { 133 ieee754 tmp; 132 ieee754 tmp; 134 tmp.i[0] = x; 133 tmp.i[0] = x; 135 return tmp.f[0]; 134 return tmp.f[0]; 136 } 135 } 137 136 138 //------------------------------------------ 137 //---------------------------------------------------------------------------- 139 // Converts a float to an int 138 // Converts a float to an int 140 // 139 // 141 inline uint32_t sp2uint32(G4float x) 140 inline uint32_t sp2uint32(G4float x) 142 { 141 { 143 ieee754 tmp; 142 ieee754 tmp; 144 tmp.f[0] = x; 143 tmp.f[0] = x; 145 return tmp.i[0]; 144 return tmp.i[0]; 146 } 145 } 147 146 148 //------------------------------------------ 147 //---------------------------------------------------------------------------- 149 /** 148 /** 150 * A vectorisable floor implementation, not 149 * A vectorisable floor implementation, not only triggered by fast-math. 151 * These functions do not distinguish betwee 150 * These functions do not distinguish between -0.0 and 0.0, so are not IEC6509 152 * compliant for argument -0.0 151 * compliant for argument -0.0 153 **/ 152 **/ 154 inline G4double fpfloor(const G4double x) 153 inline G4double fpfloor(const G4double x) 155 { 154 { 156 // no problem since exp is defined between 155 // no problem since exp is defined between -708 and 708. Int is enough for 157 // it! 156 // it! 158 int32_t ret = int32_t(x); 157 int32_t ret = int32_t(x); 159 ret -= (sp2uint32(x) >> 31); 158 ret -= (sp2uint32(x) >> 31); 160 return ret; 159 return ret; 161 } 160 } 162 161 163 //------------------------------------------ 162 //---------------------------------------------------------------------------- 164 /** 163 /** 165 * A vectorisable floor implementation, not 164 * A vectorisable floor implementation, not only triggered by fast-math. 166 * These functions do not distinguish betwee 165 * These functions do not distinguish between -0.0 and 0.0, so are not IEC6509 167 * compliant for argument -0.0 166 * compliant for argument -0.0 168 **/ 167 **/ 169 inline G4float fpfloor(const G4float x) 168 inline G4float fpfloor(const G4float x) 170 { 169 { 171 int32_t ret = int32_t(x); 170 int32_t ret = int32_t(x); 172 ret -= (sp2uint32(x) >> 31); 171 ret -= (sp2uint32(x) >> 31); 173 return ret; 172 return ret; 174 } 173 } 175 } // namespace G4ExpConsts 174 } // namespace G4ExpConsts 176 175 177 // Exp double precision ---------------------- 176 // Exp double precision -------------------------------------------------------- 178 177 179 /// Exponential Function double precision 178 /// Exponential Function double precision 180 inline G4double G4Exp(G4double initial_x) 179 inline G4double G4Exp(G4double initial_x) 181 { 180 { 182 G4double x = initial_x; 181 G4double x = initial_x; 183 G4double px = G4ExpConsts::fpfloor(G4ExpCons 182 G4double px = G4ExpConsts::fpfloor(G4ExpConsts::LOG2E * x + 0.5); 184 183 185 const int32_t n = int32_t(px); 184 const int32_t n = int32_t(px); 186 185 187 x -= px * 6.93145751953125E-1; 186 x -= px * 6.93145751953125E-1; 188 x -= px * 1.42860682030941723212E-6; 187 x -= px * 1.42860682030941723212E-6; 189 188 190 const G4double xx = x * x; 189 const G4double xx = x * x; 191 190 192 // px = x * P(x**2). 191 // px = x * P(x**2). 193 px = G4ExpConsts::PX1exp; 192 px = G4ExpConsts::PX1exp; 194 px *= xx; 193 px *= xx; 195 px += G4ExpConsts::PX2exp; 194 px += G4ExpConsts::PX2exp; 196 px *= xx; 195 px *= xx; 197 px += G4ExpConsts::PX3exp; 196 px += G4ExpConsts::PX3exp; 198 px *= x; 197 px *= x; 199 198 200 // Evaluate Q(x**2). 199 // Evaluate Q(x**2). 201 G4double qx = G4ExpConsts::QX1exp; 200 G4double qx = G4ExpConsts::QX1exp; 202 qx *= xx; 201 qx *= xx; 203 qx += G4ExpConsts::QX2exp; 202 qx += G4ExpConsts::QX2exp; 204 qx *= xx; 203 qx *= xx; 205 qx += G4ExpConsts::QX3exp; 204 qx += G4ExpConsts::QX3exp; 206 qx *= xx; 205 qx *= xx; 207 qx += G4ExpConsts::QX4exp; 206 qx += G4ExpConsts::QX4exp; 208 207 209 // e**x = 1 + 2x P(x**2)/( Q(x**2) - P(x**2) 208 // e**x = 1 + 2x P(x**2)/( Q(x**2) - P(x**2) ) 210 x = px / (qx - px); 209 x = px / (qx - px); 211 x = 1.0 + 2.0 * x; 210 x = 1.0 + 2.0 * x; 212 211 213 // Build 2^n in double. 212 // Build 2^n in double. 214 x *= G4ExpConsts::uint642dp((((uint64_t) n) 213 x *= G4ExpConsts::uint642dp((((uint64_t) n) + 1023) << 52); 215 214 216 if(initial_x > G4ExpConsts::EXP_LIMIT) 215 if(initial_x > G4ExpConsts::EXP_LIMIT) 217 x = std::numeric_limits<G4double>::infinit 216 x = std::numeric_limits<G4double>::infinity(); 218 if(initial_x < -G4ExpConsts::EXP_LIMIT) 217 if(initial_x < -G4ExpConsts::EXP_LIMIT) 219 x = 0.; 218 x = 0.; 220 219 221 return x; 220 return x; 222 } 221 } 223 222 224 // Exp single precision ---------------------- 223 // Exp single precision -------------------------------------------------------- 225 224 226 /// Exponential Function single precision 225 /// Exponential Function single precision 227 inline G4float G4Expf(G4float initial_x) 226 inline G4float G4Expf(G4float initial_x) 228 { 227 { 229 G4float x = initial_x; 228 G4float x = initial_x; 230 229 231 G4float z = 230 G4float z = 232 G4ExpConsts::fpfloor(G4ExpConsts::LOG2EF * 231 G4ExpConsts::fpfloor(G4ExpConsts::LOG2EF * x + 233 0.5f); /* std::floor( 232 0.5f); /* std::floor() truncates toward -infinity. */ 234 233 235 x -= z * G4ExpConsts::C1F; 234 x -= z * G4ExpConsts::C1F; 236 x -= z * G4ExpConsts::C2F; 235 x -= z * G4ExpConsts::C2F; 237 const int32_t n = int32_t(z); 236 const int32_t n = int32_t(z); 238 237 239 const G4float x2 = x * x; 238 const G4float x2 = x * x; 240 239 241 z = x * G4ExpConsts::PX1expf; 240 z = x * G4ExpConsts::PX1expf; 242 z += G4ExpConsts::PX2expf; 241 z += G4ExpConsts::PX2expf; 243 z *= x; 242 z *= x; 244 z += G4ExpConsts::PX3expf; 243 z += G4ExpConsts::PX3expf; 245 z *= x; 244 z *= x; 246 z += G4ExpConsts::PX4expf; 245 z += G4ExpConsts::PX4expf; 247 z *= x; 246 z *= x; 248 z += G4ExpConsts::PX5expf; 247 z += G4ExpConsts::PX5expf; 249 z *= x; 248 z *= x; 250 z += G4ExpConsts::PX6expf; 249 z += G4ExpConsts::PX6expf; 251 z *= x2; 250 z *= x2; 252 z += x + 1.0f; 251 z += x + 1.0f; 253 252 254 /* multiply by power of 2 */ 253 /* multiply by power of 2 */ 255 z *= G4ExpConsts::uint322sp((n + 0x7f) << 23 254 z *= G4ExpConsts::uint322sp((n + 0x7f) << 23); 256 255 257 if(initial_x > G4ExpConsts::MAXLOGF) 256 if(initial_x > G4ExpConsts::MAXLOGF) 258 z = std::numeric_limits<G4float>::infinity 257 z = std::numeric_limits<G4float>::infinity(); 259 if(initial_x < G4ExpConsts::MINLOGF) 258 if(initial_x < G4ExpConsts::MINLOGF) 260 z = 0.f; 259 z = 0.f; 261 260 262 return z; 261 return z; 263 } 262 } 264 263 265 //-------------------------------------------- 264 //------------------------------------------------------------------------------ 266 265 267 void expv(const uint32_t size, G4double const* 266 void expv(const uint32_t size, G4double const* __restrict__ iarray, 268 G4double* __restrict__ oarray); 267 G4double* __restrict__ oarray); 269 void G4Expv(const uint32_t size, G4double cons 268 void G4Expv(const uint32_t size, G4double const* __restrict__ iarray, 270 G4double* __restrict__ oarray); 269 G4double* __restrict__ oarray); 271 void expfv(const uint32_t size, G4float const* 270 void expfv(const uint32_t size, G4float const* __restrict__ iarray, 272 G4float* __restrict__ oarray); 271 G4float* __restrict__ oarray); 273 void G4Expfv(const uint32_t size, G4float cons 272 void G4Expfv(const uint32_t size, G4float const* __restrict__ iarray, 274 G4float* __restrict__ oarray); 273 G4float* __restrict__ oarray); 275 274 276 #endif /* WIN32 */ 275 #endif /* WIN32 */ 277 276 278 #endif 277 #endif 279 278