Konstantin Ladutenko hace 10 años
padre
commit
0f2fda9dc8
Se han modificado 3 ficheros con 83 adiciones y 36 borrados
  1. 37 35
      compare.cc
  2. 40 1
      nmie-wrapper.cc
  3. 6 0
      nmie-wrapper.h

+ 37 - 35
compare.cc

@@ -214,41 +214,41 @@ int main(int argc, char *argv[]) {
 
     timespec time1, time2;
 
-    long cpptime_nsec, best_cpp;
-    long ctime_nsec, best_c;
-    long cpptime_sec, ctime_sec;
-    long repeats = 150;
-    //HeapProfilerStart("heapprof");    
-    do {
-      clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &time1);
-      for (int i = 0; i<repeats; ++i) {
-	nmie::nMie_wrapper(L, x, m, nt, Theta, &Qextw, &Qscaw,
-			   &Qabsw, &Qbkw, &Qprw, &gw, &Albedow, S1w, S2w);
-      }
-      clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &time2);
-      cpptime_nsec = diff(time1,time2).tv_nsec;
-      cpptime_sec = diff(time1,time2).tv_sec;
-      clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &time1);
-      // for (int i = 0; i<repeats; ++i) {      
-      // 	nMie(L, x, m, nt, Theta, &Qext, &Qsca, &Qabs, &Qbk, &Qpr, &g, &Albedo, S1, S2);
-      // }  
-      clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &time2);
-      ctime_nsec = diff(time1,time2).tv_nsec;
-      ctime_sec = diff(time1,time2).tv_sec;
-      long double ratio = static_cast<long double>(ctime_nsec)
-	/static_cast<long double>(cpptime_nsec);
-      printf("-- C++ time consumed %lg sec\n", (cpptime_nsec/1e9));
-      if ( ratio > 0.01 ) {
-	if ( ctime_sec == 0 && cpptime_sec == 0) {
-	  printf("-- C time consumed %lg sec\n", (ctime_nsec/1e9));
-	  printf("-- total repeats: %ld\n", repeats);
-	  printf("-- C/C++ time ratio: %Lg\n", ratio);
-	} else {
-	  printf("==Test is too long!\n");
-	}
-      }
-      repeats *= 10;
-    } while (cpptime_nsec < 1e8 && ctime_nsec < 1e8);
+    // long cpptime_nsec, best_cpp;
+    // long ctime_nsec, best_c;
+    // long cpptime_sec, ctime_sec;
+    // long repeats = 150;
+    // //HeapProfilerStart("heapprof");    
+    // do {
+    //   clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &time1);
+    //   for (int i = 0; i<repeats; ++i) {
+    // 	nmie::nMie_wrapper(L, x, m, nt, Theta, &Qextw, &Qscaw,
+    // 			   &Qabsw, &Qbkw, &Qprw, &gw, &Albedow, S1w, S2w);
+    //   }
+    //   clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &time2);
+    //   cpptime_nsec = diff(time1,time2).tv_nsec;
+    //   cpptime_sec = diff(time1,time2).tv_sec;
+    //   clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &time1);
+    //   // for (int i = 0; i<repeats; ++i) {      
+    //   // 	nMie(L, x, m, nt, Theta, &Qext, &Qsca, &Qabs, &Qbk, &Qpr, &g, &Albedo, S1, S2);
+    //   // }  
+    //   clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &time2);
+    //   ctime_nsec = diff(time1,time2).tv_nsec;
+    //   ctime_sec = diff(time1,time2).tv_sec;
+    //   long double ratio = static_cast<long double>(ctime_nsec)
+    // 	/static_cast<long double>(cpptime_nsec);
+    //   printf("-- C++ time consumed %lg sec\n", (cpptime_nsec/1e9));
+    //   if ( ratio > 0.01 ) {
+    // 	if ( ctime_sec == 0 && cpptime_sec == 0) {
+    // 	  printf("-- C time consumed %lg sec\n", (ctime_nsec/1e9));
+    // 	  printf("-- total repeats: %ld\n", repeats);
+    // 	  printf("-- C/C++ time ratio: %Lg\n", ratio);
+    // 	} else {
+    // 	  printf("==Test is too long!\n");
+    // 	}
+    //   }
+    //   repeats *= 10;
+    // } while (cpptime_nsec < 1e8 && ctime_nsec < 1e8);
 
     nMie(L, x, m, nt, Theta, &Qext, &Qsca, &Qabs, &Qbk, &Qpr, &g, &Albedo, S1, S2);
     nmie::nMie_wrapper(L, x, m, nt, Theta, &Qextw, &Qscaw, &Qabsw, &Qbkw, &Qprw, &gw, &Albedow, S1w, S2w);
@@ -298,6 +298,7 @@ int main(int argc, char *argv[]) {
     std::vector<std::vector<std::complex<double> > > E(ncoord), H(ncoord);
     for (auto& f:E) f.resize(3);
     for (auto& f:H) f.resize(3);
+
     nmax =  nField( L,  pl,  x,  m, nmax,  ncoord,  Xp,  Yp,  Zp, E, H);
     double sum_e = 0.0, sum_h = 0.0;
     for (auto f:E) 
@@ -305,6 +306,7 @@ int main(int argc, char *argv[]) {
     for (auto f:H) 
       for (auto c:f) sum_h+=std::abs(c);
     printf ("Field total sum (old) \n\tE    =%23.16f\n\tH*377=%23.16f\n", sum_e, sum_h*377.0);
+
     nmie::nField( L,  pl,  x,  m, nmax,  ncoord,  Xp,  Yp,  Zp, E, H);
     sum_e = 0.0, sum_h = 0.0;
     for (auto f:E) 

+ 40 - 1
nmie-wrapper.cc

@@ -904,7 +904,7 @@ c    MM       + 1  and - 1, alternately
     for (int t = 0; t < theta_.size(); t++) {	
       costheta[t] = std::cos(theta_[t]);
     }
-    // Do not join upper and lower for to a single one!  It will slow
+    // Do not join upper and lower 'for' to a single one!  It will slow
     // down the code!!! (For about 0.5-2.0% of runtime, it is probably
     // due to increased cache missing rate originated from the
     // recurrence in calcPiTau...)
@@ -1251,6 +1251,44 @@ c    MM       + 1  and - 1, alternately
     nmax_used_ = nmax_;
     //return nmax;
   }
+  
+
+  // ********************************************************************** //
+  // ********************************************************************** //
+  // ********************************************************************** //
+  void MultiLayerMie::ScattCoeffsLayerdInit() {
+    const int L = index_.size();
+    // we need to fill
+    // std::vector< std::vector<std::complex<double> > > al_n_, bl_n_, cl_n_, dl_n_;
+    //     for n = [0..nmax_) and for l=[L..0)
+    // to decrease cache miss outer loop is with n and inner with reversed l
+    al_n_.resize(nmax_);
+    bl_n_.resize(nmax_);
+    cl_n_.resize(nmax_);
+    dl_n_.resize(nmax_);
+    for (auto& element:al_n_) element.resize(L+1);
+    for (auto& element:bl_n_) element.resize(L+1);
+    for (auto& element:cl_n_) element.resize(L+1);
+    for (auto& element:dl_n_) element.resize(L+1);
+    std::complex<double> c_one(1.0, 0.0);
+    // Yang, paragraph under eq. A3
+    // a^(L+1)_n = a_n, d^(L+1) = 1 ...
+    for (int i = 0; i < nmax_; ++i) {
+      al_n_[i][0] = an_[i];
+      bl_n_[i][0] = bn_[i];
+      cl_n_[i][0] = c_one;
+      dl_n_[i][0] = c_one;
+    }
+
+  }
+  // ********************************************************************** //
+  // ********************************************************************** //
+  // ********************************************************************** //
+  void MultiLayerMie::ScattCoeffsLayerd() {
+    if (!isMieCalculated_)
+      throw std::invalid_argument("(ScattCoeffsLayerd) You should run calculations first!");
+    ScattCoeffsLayerdInit();
+  }
   // ********************************************************************** //
   // ********************************************************************** //
   // ********************************************************************** //
@@ -1358,6 +1396,7 @@ c    MM       + 1  and - 1, alternately
   void MultiLayerMie::RunFieldCalculations() {
     // Calculate scattering coefficients an_ and bn_
     RunMieCalculations();
+    ScattCoeffsLayerd();
 
     std::vector<double> Pi(nmax_), Tau(nmax_);
     long total_points = coords_sp_[0].size();

+ 6 - 0
nmie-wrapper.h

@@ -209,6 +209,9 @@ namespace nmie {
     void calcAllPiTau( std::vector< std::vector<double> >& Pi,
 		    std::vector< std::vector<double> >& Tau);
     void ScattCoeffs(std::vector<std::complex<double> >& an, std::vector<std::complex<double> >& bn); 
+    void ScattCoeffsLayerd();
+    void ScattCoeffsLayerdInit();
+
     void fieldExt(const double Rho, const double Phi, const double Theta, const  std::vector<double>& Pi, const std::vector<double>& Tau, std::vector<std::complex<double> >& E, std::vector<std::complex<double> >& H);
     
     bool isMieCalculated_ = false;
@@ -233,6 +236,9 @@ namespace nmie {
     // Scattering coefficients
     std::vector<std::complex<double> > an_, bn_;
     std::vector< std::vector<double> > coords_sp_;
+    // l index is reversed!! $a^(L+1)_n$ stored in al_n_[n][0],
+    // $a^(L)_n$ in al_n_[n][1] and so on...
+    std::vector< std::vector<std::complex<double> > > al_n_, bl_n_, cl_n_, dl_n_;
     /// Store result
     double Qsca_ = 0.0, Qext_ = 0.0, Qabs_ = 0.0, Qbk_ = 0.0, Qpr_ = 0.0, asymmetry_factor_ = 0.0, albedo_ = 0.0;
     std::vector<std::vector< std::complex<double> > > E_field_, H_field_;  // {X[], Y[], Z[]}