Geant4 Cross Reference |
1 // 1 // 2 // ******************************************* 2 // ******************************************************************** 3 // * License and Disclaimer 3 // * License and Disclaimer * 4 // * 4 // * * 5 // * The Geant4 software is copyright of th 5 // * The Geant4 software is copyright of the Copyright Holders of * 6 // * the Geant4 Collaboration. It is provided 6 // * the Geant4 Collaboration. It is provided under the terms and * 7 // * conditions of the Geant4 Software License 7 // * conditions of the Geant4 Software License, included in the file * 8 // * LICENSE and available at http://cern.ch/ 8 // * LICENSE and available at http://cern.ch/geant4/license . These * 9 // * include a list of copyright holders. 9 // * include a list of copyright holders. * 10 // * 10 // * * 11 // * Neither the authors of this software syst 11 // * Neither the authors of this software system, nor their employing * 12 // * institutes,nor the agencies providing fin 12 // * institutes,nor the agencies providing financial support for this * 13 // * work make any representation or warran 13 // * work make any representation or warranty, express or implied, * 14 // * regarding this software system or assum 14 // * regarding this software system or assume any liability for its * 15 // * use. Please see the license in the file 15 // * use. Please see the license in the file LICENSE and URL above * 16 // * for the full disclaimer and the limitatio 16 // * for the full disclaimer and the limitation of liability. * 17 // * 17 // * * 18 // * This code implementation is the result 18 // * This code implementation is the result of the scientific and * 19 // * technical work of the GEANT4 collaboratio 19 // * technical work of the GEANT4 collaboration. * 20 // * By using, copying, modifying or distri 20 // * By using, copying, modifying or distributing the software (or * 21 // * any work based on the software) you ag 21 // * any work based on the software) you agree to acknowledge its * 22 // * use in resulting scientific publicati 22 // * use in resulting scientific publications, and indicate your * 23 // * acceptance of all terms of the Geant4 Sof 23 // * acceptance of all terms of the Geant4 Software license. * 24 // ******************************************* 24 // ******************************************************************** 25 // 25 // 26 // G4Exp << 27 // 26 // 28 // Class description: << 27 // $Id:$ >> 28 // >> 29 // >> 30 // -------------------------------------------------------------------- >> 31 // >> 32 // Class Description: 29 // 33 // 30 // The basic idea is to exploit Pade polynomia 34 // The basic idea is to exploit Pade polynomials. 31 // A lot of ideas were inspired by the cephes 35 // A lot of ideas were inspired by the cephes math library 32 // (by Stephen L. Moshier moshier@na-net.ornl. << 36 // (by Stephen L. Moshier moshier@na-net.ornl.gov) as well as actual code. 33 // The Cephes library can be found here: http 37 // The Cephes library can be found here: http://www.netlib.org/cephes/ 34 // Code and algorithms for G4Exp have been ext 38 // Code and algorithms for G4Exp have been extracted and adapted for Geant4 35 // from the original implementation in the VDT 39 // from the original implementation in the VDT mathematical library 36 // (https://svnweb.cern.ch/trac/vdt), version 40 // (https://svnweb.cern.ch/trac/vdt), version 0.3.7. 37 41 38 // Original implementation created on: Jun 23, 42 // Original implementation created on: Jun 23, 2012 39 // Authors: Danilo Piparo, Thomas Hauth, Vince << 43 // Author: Danilo Piparo, Thomas Hauth, Vincenzo Innocente 40 // 44 // 41 // ------------------------------------------- 45 // -------------------------------------------------------------------- 42 /* << 46 /* 43 * VDT is free software: you can redistribute 47 * VDT is free software: you can redistribute it and/or modify 44 * it under the terms of the GNU Lesser Public 48 * it under the terms of the GNU Lesser Public License as published by 45 * the Free Software Foundation, either versio 49 * the Free Software Foundation, either version 3 of the License, or 46 * (at your option) any later version. 50 * (at your option) any later version. 47 * << 51 * 48 * This program is distributed in the hope tha 52 * This program is distributed in the hope that it will be useful, 49 * but WITHOUT ANY WARRANTY; without even the 53 * but WITHOUT ANY WARRANTY; without even the implied warranty of 50 * MERCHANTABILITY or FITNESS FOR A PARTICULAR 54 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 51 * GNU Lesser Public License for more details. 55 * GNU Lesser Public License for more details. 52 * << 56 * 53 * You should have received a copy of the GNU 57 * You should have received a copy of the GNU Lesser Public License 54 * along with this program. If not, see <http 58 * along with this program. If not, see <http://www.gnu.org/licenses/>. 55 */ 59 */ 56 // ------------------------------------------- 60 // -------------------------------------------------------------------- 57 #ifndef G4Exp_hh << 61 #ifndef G4Exp_h 58 #define G4Exp_hh 1 << 62 #define G4Exp_h 1 59 63 60 #ifdef WIN32 64 #ifdef WIN32 61 65 62 # define G4Exp std::exp << 66 #define G4Exp std::exp 63 67 64 #else 68 #else 65 69 66 # include "G4Types.hh" << 70 #include <limits> 67 << 71 #include <stdint.h> 68 # include <cstdint> << 72 #include "G4Types.hh" 69 # include <limits> << 70 73 71 namespace G4ExpConsts 74 namespace G4ExpConsts 72 { << 75 { 73 const G4double EXP_LIMIT = 708; 76 const G4double EXP_LIMIT = 708; 74 77 75 const G4double PX1exp = 1.261771930748105908 78 const G4double PX1exp = 1.26177193074810590878E-4; 76 const G4double PX2exp = 3.029944077074419613 79 const G4double PX2exp = 3.02994407707441961300E-2; 77 const G4double PX3exp = 9.999999999999999999 80 const G4double PX3exp = 9.99999999999999999910E-1; 78 const G4double QX1exp = 3.001985051386644550 81 const G4double QX1exp = 3.00198505138664455042E-6; 79 const G4double QX2exp = 2.524483403496841041 82 const G4double QX2exp = 2.52448340349684104192E-3; 80 const G4double QX3exp = 2.272655482081550287 83 const G4double QX3exp = 2.27265548208155028766E-1; 81 const G4double QX4exp = 2.000000000000000000 84 const G4double QX4exp = 2.00000000000000000009E0; 82 85 83 const G4double LOG2E = 1.4426950408889634073 << 86 const G4double LOG2E = 1.4426950408889634073599; // 1/log(2) 84 87 85 const G4float MAXLOGF = 88.72283905206835f; 88 const G4float MAXLOGF = 88.72283905206835f; 86 const G4float MINLOGF = -88.f; 89 const G4float MINLOGF = -88.f; 87 90 88 const G4float C1F = 0.693359375f; << 91 const G4float C1F = 0.693359375f; 89 const G4float C2F = -2.12194440e-4f; << 92 const G4float C2F = -2.12194440e-4f; 90 93 91 const G4float PX1expf = 1.9875691500E-4f; 94 const G4float PX1expf = 1.9875691500E-4f; 92 const G4float PX2expf = 1.3981999507E-3f; << 95 const G4float PX2expf =1.3981999507E-3f; 93 const G4float PX3expf = 8.3334519073E-3f; << 96 const G4float PX3expf =8.3334519073E-3f; 94 const G4float PX4expf = 4.1665795894E-2f; << 97 const G4float PX4expf =4.1665795894E-2f; 95 const G4float PX5expf = 1.6666665459E-1f; << 98 const G4float PX5expf =1.6666665459E-1f; 96 const G4float PX6expf = 5.0000001201E-1f; << 99 const G4float PX6expf =5.0000001201E-1f; 97 100 98 const G4float LOG2EF = 1.44269504088896341f; 101 const G4float LOG2EF = 1.44269504088896341f; 99 102 100 //------------------------------------------ 103 //---------------------------------------------------------------------------- 101 // Used to switch between different type of 104 // Used to switch between different type of interpretations of the data 102 // (64 bits) 105 // (64 bits) 103 // 106 // 104 union ieee754 107 union ieee754 105 { 108 { 106 ieee754()= default; << 109 ieee754 () {}; 107 ieee754(G4double thed) { d = thed; }; << 110 ieee754 (G4double thed) {d=thed;}; 108 ieee754(uint64_t thell) { ll = thell; }; << 111 ieee754 (uint64_t thell) {ll=thell;}; 109 ieee754(G4float thef) { f[0] = thef; }; << 112 ieee754 (G4float thef) {f[0]=thef;}; 110 ieee754(uint32_t thei) { i[0] = thei; }; << 113 ieee754 (uint32_t thei) {i[0]=thei;}; 111 G4double d; 114 G4double d; 112 G4float f[2]; 115 G4float f[2]; 113 uint32_t i[2]; 116 uint32_t i[2]; 114 uint64_t ll; 117 uint64_t ll; 115 uint16_t s[4]; 118 uint16_t s[4]; 116 }; 119 }; 117 120 118 //------------------------------------------ 121 //---------------------------------------------------------------------------- 119 // Converts an unsigned long long to a doubl 122 // Converts an unsigned long long to a double 120 // 123 // 121 inline G4double uint642dp(uint64_t ll) 124 inline G4double uint642dp(uint64_t ll) 122 { 125 { 123 ieee754 tmp; 126 ieee754 tmp; 124 tmp.ll = ll; << 127 tmp.ll=ll; 125 return tmp.d; 128 return tmp.d; 126 } 129 } 127 130 128 //------------------------------------------ 131 //---------------------------------------------------------------------------- 129 // Converts an int to a float 132 // Converts an int to a float 130 // 133 // 131 inline G4float uint322sp(G4int x) 134 inline G4float uint322sp(G4int x) 132 { 135 { 133 ieee754 tmp; 136 ieee754 tmp; 134 tmp.i[0] = x; << 137 tmp.i[0]=x; 135 return tmp.f[0]; 138 return tmp.f[0]; 136 } 139 } 137 140 138 //------------------------------------------ 141 //---------------------------------------------------------------------------- 139 // Converts a float to an int 142 // Converts a float to an int 140 // 143 // 141 inline uint32_t sp2uint32(G4float x) 144 inline uint32_t sp2uint32(G4float x) 142 { 145 { 143 ieee754 tmp; 146 ieee754 tmp; 144 tmp.f[0] = x; << 147 tmp.f[0]=x; 145 return tmp.i[0]; 148 return tmp.i[0]; 146 } 149 } 147 150 148 //------------------------------------------ 151 //---------------------------------------------------------------------------- 149 /** 152 /** 150 * A vectorisable floor implementation, not 153 * A vectorisable floor implementation, not only triggered by fast-math. 151 * These functions do not distinguish betwee << 154 * These functions do not distinguish between -0.0 and 0.0, so are not IEC6509 152 * compliant for argument -0.0 155 * compliant for argument -0.0 153 **/ << 156 **/ 154 inline G4double fpfloor(const G4double x) 157 inline G4double fpfloor(const G4double x) 155 { 158 { 156 // no problem since exp is defined between << 159 // no problem since exp is defined between -708 and 708. Int is enough for it! 157 // it! << 160 int32_t ret = int32_t (x); 158 int32_t ret = int32_t(x); << 161 ret-=(sp2uint32(x)>>31); 159 ret -= (sp2uint32(x) >> 31); << 160 return ret; 162 return ret; 161 } 163 } 162 164 163 //------------------------------------------ 165 //---------------------------------------------------------------------------- 164 /** 166 /** 165 * A vectorisable floor implementation, not 167 * A vectorisable floor implementation, not only triggered by fast-math. 166 * These functions do not distinguish betwee << 168 * These functions do not distinguish between -0.0 and 0.0, so are not IEC6509 167 * compliant for argument -0.0 169 * compliant for argument -0.0 168 **/ << 170 **/ 169 inline G4float fpfloor(const G4float x) 171 inline G4float fpfloor(const G4float x) 170 { 172 { 171 int32_t ret = int32_t(x); << 173 int32_t ret = int32_t (x); 172 ret -= (sp2uint32(x) >> 31); << 174 ret-=(sp2uint32(x)>>31); 173 return ret; 175 return ret; 174 } 176 } 175 } // namespace G4ExpConsts << 177 } 176 178 177 // Exp double precision ---------------------- 179 // Exp double precision -------------------------------------------------------- 178 180 >> 181 179 /// Exponential Function double precision 182 /// Exponential Function double precision 180 inline G4double G4Exp(G4double initial_x) 183 inline G4double G4Exp(G4double initial_x) 181 { 184 { 182 G4double x = initial_x; << 185 G4double x = initial_x; 183 G4double px = G4ExpConsts::fpfloor(G4ExpCons << 186 G4double px=G4ExpConsts::fpfloor(G4ExpConsts::LOG2E * x +0.5); >> 187 >> 188 const int32_t n = int32_t(px); >> 189 >> 190 x -= px * 6.93145751953125E-1; >> 191 x -= px * 1.42860682030941723212E-6; >> 192 >> 193 const G4double xx = x * x; >> 194 >> 195 // px = x * P(x**2). >> 196 px = G4ExpConsts::PX1exp; >> 197 px *= xx; >> 198 px += G4ExpConsts::PX2exp; >> 199 px *= xx; >> 200 px += G4ExpConsts::PX3exp; >> 201 px *= x; >> 202 >> 203 // Evaluate Q(x**2). >> 204 G4double qx = G4ExpConsts::QX1exp; >> 205 qx *= xx; >> 206 qx += G4ExpConsts::QX2exp; >> 207 qx *= xx; >> 208 qx += G4ExpConsts::QX3exp; >> 209 qx *= xx; >> 210 qx += G4ExpConsts::QX4exp; >> 211 >> 212 // e**x = 1 + 2x P(x**2)/( Q(x**2) - P(x**2) ) >> 213 x = px / (qx - px); >> 214 x = 1.0 + 2.0 * x; >> 215 >> 216 // Build 2^n in double. >> 217 x *= G4ExpConsts::uint642dp(( ((uint64_t)n) +1023)<<52); >> 218 >> 219 if (initial_x > G4ExpConsts::EXP_LIMIT) >> 220 x = std::numeric_limits<G4double>::infinity(); >> 221 if (initial_x < -G4ExpConsts::EXP_LIMIT) >> 222 x = 0.; 184 223 185 const int32_t n = int32_t(px); << 224 return x; 186 << 187 x -= px * 6.93145751953125E-1; << 188 x -= px * 1.42860682030941723212E-6; << 189 << 190 const G4double xx = x * x; << 191 << 192 // px = x * P(x**2). << 193 px = G4ExpConsts::PX1exp; << 194 px *= xx; << 195 px += G4ExpConsts::PX2exp; << 196 px *= xx; << 197 px += G4ExpConsts::PX3exp; << 198 px *= x; << 199 << 200 // Evaluate Q(x**2). << 201 G4double qx = G4ExpConsts::QX1exp; << 202 qx *= xx; << 203 qx += G4ExpConsts::QX2exp; << 204 qx *= xx; << 205 qx += G4ExpConsts::QX3exp; << 206 qx *= xx; << 207 qx += G4ExpConsts::QX4exp; << 208 << 209 // e**x = 1 + 2x P(x**2)/( Q(x**2) - P(x**2) << 210 x = px / (qx - px); << 211 x = 1.0 + 2.0 * x; << 212 << 213 // Build 2^n in double. << 214 x *= G4ExpConsts::uint642dp((((uint64_t) n) << 215 << 216 if(initial_x > G4ExpConsts::EXP_LIMIT) << 217 x = std::numeric_limits<G4double>::infinit << 218 if(initial_x < -G4ExpConsts::EXP_LIMIT) << 219 x = 0.; << 220 << 221 return x; << 222 } 225 } 223 226 224 // Exp single precision ---------------------- 227 // Exp single precision -------------------------------------------------------- 225 228 226 /// Exponential Function single precision 229 /// Exponential Function single precision 227 inline G4float G4Expf(G4float initial_x) 230 inline G4float G4Expf(G4float initial_x) 228 { 231 { 229 G4float x = initial_x; << 232 G4float x = initial_x; >> 233 >> 234 G4float z = G4ExpConsts::fpfloor( G4ExpConsts::LOG2EF * x +0.5f ); /* std::floor() truncates toward -infinity. */ >> 235 >> 236 x -= z * G4ExpConsts::C1F; >> 237 x -= z * G4ExpConsts::C2F; >> 238 const int32_t n = int32_t ( z ); >> 239 >> 240 const G4float x2 = x * x; >> 241 >> 242 z = x*G4ExpConsts::PX1expf; >> 243 z += G4ExpConsts::PX2expf; >> 244 z *= x; >> 245 z += G4ExpConsts::PX3expf; >> 246 z *= x; >> 247 z += G4ExpConsts::PX4expf; >> 248 z *= x; >> 249 z += G4ExpConsts::PX5expf; >> 250 z *= x; >> 251 z += G4ExpConsts::PX6expf; >> 252 z *= x2; >> 253 z += x + 1.0f; >> 254 >> 255 /* multiply by power of 2 */ >> 256 z *= G4ExpConsts::uint322sp((n+0x7f)<<23); 230 257 231 G4float z = << 258 if (initial_x > G4ExpConsts::MAXLOGF) z=std::numeric_limits<G4float>::infinity(); 232 G4ExpConsts::fpfloor(G4ExpConsts::LOG2EF * << 259 if (initial_x < G4ExpConsts::MINLOGF) z=0.f; 233 0.5f); /* std::floor( << 234 << 235 x -= z * G4ExpConsts::C1F; << 236 x -= z * G4ExpConsts::C2F; << 237 const int32_t n = int32_t(z); << 238 << 239 const G4float x2 = x * x; << 240 << 241 z = x * G4ExpConsts::PX1expf; << 242 z += G4ExpConsts::PX2expf; << 243 z *= x; << 244 z += G4ExpConsts::PX3expf; << 245 z *= x; << 246 z += G4ExpConsts::PX4expf; << 247 z *= x; << 248 z += G4ExpConsts::PX5expf; << 249 z *= x; << 250 z += G4ExpConsts::PX6expf; << 251 z *= x2; << 252 z += x + 1.0f; << 253 << 254 /* multiply by power of 2 */ << 255 z *= G4ExpConsts::uint322sp((n + 0x7f) << 23 << 256 << 257 if(initial_x > G4ExpConsts::MAXLOGF) << 258 z = std::numeric_limits<G4float>::infinity << 259 if(initial_x < G4ExpConsts::MINLOGF) << 260 z = 0.f; << 261 260 262 return z; << 261 return z; 263 } 262 } 264 263 265 //-------------------------------------------- 264 //------------------------------------------------------------------------------ 266 265 267 void expv(const uint32_t size, G4double const* << 266 void expv(const uint32_t size, G4double const * __restrict__ iarray, G4double* __restrict__ oarray); 268 G4double* __restrict__ oarray); << 267 void G4Expv(const uint32_t size, G4double const * __restrict__ iarray, G4double* __restrict__ oarray); 269 void G4Expv(const uint32_t size, G4double cons << 268 void expfv(const uint32_t size, G4float const * __restrict__ iarray, G4float* __restrict__ oarray); 270 G4double* __restrict__ oarray); << 269 void G4Expfv(const uint32_t size, G4float const * __restrict__ iarray, G4float* __restrict__ oarray); 271 void expfv(const uint32_t size, G4float const* << 272 G4float* __restrict__ oarray); << 273 void G4Expfv(const uint32_t size, G4float cons << 274 G4float* __restrict__ oarray); << 275 270 276 #endif /* WIN32 */ 271 #endif /* WIN32 */ 277 272 278 #endif 273 #endif 279 274