dune-fem  2.8-git
codegen.hh
Go to the documentation of this file.
1 #ifndef DUNE_FEM_BASEFUNCTIONSETS_CODEGEN_HH
2 #define DUNE_FEM_BASEFUNCTIONSETS_CODEGEN_HH
3 
4 #include <cassert>
5 #include <cstdlib>
6 #include <iostream>
7 #include <fstream>
8 #include <sstream>
9 #include <vector>
10 #include <set>
11 #include <map>
12 
13 #include <dune/common/exceptions.hh>
14 #include <dune/fem/io/io.hh>
15 #include <dune/fem/io/parameter.hh>
16 
19 
20 namespace Dune
21 {
22 
23  namespace Fem
24  {
25 
26  namespace Codegen
27  {
28 
29  class CodegenInfoFinished : public Dune :: Exception {};
30 
32  {
33  std::string path_;
34  std::string outFileName_;
35 
36  int nopMax_;
37  int nopMin_;
38 
39  int baseMax_;
40  int baseMin_;
41 
42  typedef std::set< int > DimRangeSetType;
43  typedef std::set< void* > BaseSetPointerType;
44  DimRangeSetType savedimRanges_;
45  mutable DimRangeSetType dimRanges_;
46  BaseSetPointerType savedBaseSets_;
47 
48  typedef void codegenfunc_t (std::ostream& out,
49  const int dim,
50  const int dimRange,
51  const size_t numRows,
52  const size_t numCols );
53 
54  typedef std::pair< std::string, int > EntryType;
55  std::vector< EntryType > filenames_;
56 
57  typedef std::vector< int > KeyType;
58  typedef std::set< KeyType > KeyStorageType;
59 
60  typedef std::pair< int, std::pair< int, int > > EvalPairType ;
61  typedef std::set< EvalPairType > EvalSetType ;
62 
63  std::map< codegenfunc_t* , KeyStorageType > entryMap_;
64  EvalSetType evalSet_;
65 
66  public:
68  : path_( Dune::Fem::Parameter::commonInputPath() + "/autogeneratedcode"),
69  outFileName_( "autogeneratedcode.hh" ),
70  nopMax_(0), nopMin_(0), baseMax_(0), baseMin_(0),
71  dimRanges_(), savedBaseSets_(), filenames_(),
72  entryMap_(),
73  evalSet_()
74  {
75  }
76 
78  {
80  }
81 
83  void clear() {
84  savedBaseSets_.clear();
85  filenames_.clear();
86  dimRanges_.clear();
87  entryMap_.clear();
88  evalSet_.clear();
89  nopMax_ = 0;
90  nopMin_ = 0;
91  baseMax_ = 0;
92  baseMin_ = 0;
93  }
94 
96  void setPath(const std::string& path ) { path_ = path ; }
97 
99  void setFileName(const std::string& filename ) { outFileName_ = filename ; }
100 
101  template <class BaseSet>
102  void addDimRange(const BaseSet* baseSet,
103  const int dimRange)
104  {
105  void* ptr = (void *) baseSet;
106  if( savedBaseSets_.find( ptr ) == savedBaseSets_.end() )
107  {
108  savedBaseSets_.insert( ptr );
109 #ifndef NDEBUG
110  std::cout << "Add dimRange " << dimRange << std::endl;
111 #endif
112  dimRanges_.insert( dimRange ) ;
113  }
114  }
115 
116  void addEntry(const std::string& fileprefix,
117  codegenfunc_t* codegenfunc,
118  const int dim, const int dimRange, const int quadNop, const int numBase )
119  {
120  KeyStorageType& keyMap = entryMap_[ codegenfunc ];
121 
122  typedef KeyStorageType :: iterator iterator;
123  KeyType key( 4 );
124  key[ 0 ] = dim;
125  key[ 1 ] = dimRange;
126  key[ 2 ] = quadNop;
127  key[ 3 ] = numBase;
128 
129  // search for key, if already exists, then do nothing
130  iterator it = keyMap.find( key );
131  if( it != keyMap.end() ) return ;
132 
133  // make sure dimRange was registered
134  assert( dimRanges_.find( dimRange ) != dimRanges_.end() );
135 
136  // create directory to store files
137  createDirectory( path_ );
138 
139  std::stringstream filename;
140  filename << fileprefix << dimRange << "_" << quadNop << "_" << numBase << ".hh";
141 
142  // second check, if file exists, do nothing
143  int pos = exists( filename.str() );
144  if( pos >= 0 ) return;
145 
146  std::string filenameToWrite( path_ + "/" + filename.str() );
147 
148  // write code to string stream and compare with existing file
149  // if file does not exist, then write code to newly generated file
150  std::stringstream code;
151  // call code generation function
152  codegenfunc( code, dim, dimRange, quadNop, numBase );
153 
154  bool writeCode = false ;
155 
156  {
157  std::ifstream infile( filenameToWrite );
158  if( infile )
159  {
160  std::stringstream checkstr;
161  checkstr << infile.rdbuf();
162 
163  // if both string are identical we can stop here
164  // and don't write the header thus avoiding recompilation
165  if( checkstr.str().compare( code.str() ) != 0 )
166  {
167  // if strings differ then write code
168  writeCode = true;
169  }
170  infile.close();
171  }
172  else
173  {
174  // if file does not exist, then write code
175  writeCode = true;
176  }
177  }
178 
179  if( writeCode )
180  {
181  // if file does not exist, then write code to newly generated file
182  std::ofstream file( filenameToWrite, std::ios::out );
183  file << code.str();
184  file.close();
185 #ifndef NDEBUG
186  std::cout << "Generate code " << fileprefix << " for (" << dimRange << ","
187  << quadNop << "," << numBase << ")" << std::endl;
188 #endif
189  }
190 
191  // insert evaluate pair in any case
192  // otherwise the lists with include files is wrong
193  EvalPairType evalPair ( dimRange, std::make_pair(quadNop, numBase) );
194  evalSet_.insert( evalPair );
195 
196  if( baseMin_ == 0 ) baseMin_ = numBase;
197  if( nopMin_ == 0 ) nopMin_ = quadNop;
198 
199  EntryType entry ( filename.str() , dimRange );
200  filenames_.push_back( entry );
201  nopMax_ = std::max( quadNop, nopMax_ );
202  nopMin_ = std::min( quadNop, nopMin_ );
203  baseMax_ = std::max( numBase, baseMax_ );
204  baseMin_ = std::min( numBase, baseMin_ );
205  }
206 
207  void finalize () const
208  {
209  if( checkAbort() )
210  {
211  dumpInfo();
212  std::cerr << "All automated code generated, bye, bye !! " << std::endl;
213  DUNE_THROW(CodegenInfoFinished,"All automated code generated, bye, bye !!");
214  }
215  }
216 
217 
218  bool dumpInfo(const bool writeToCurrentDir = false ) const
219  {
220  if( writeToCurrentDir )
221  {
222  // write file with correct include to current directory
223  // this is usually not needed if include paths are correct
224  std::ofstream file( outFileName_.c_str() );
225  file << "#include \"" <<path_<< "/" << outFileName_ << "\"";
226  }
227 
228  {
229  std::string filename( path_ + "/" + outFileName_ );
230  std::stringstream file;
231 
232  file << "#ifdef CODEGEN_INCLUDEMAXNUMS" << std::endl;
233  file << "#ifndef CODEGEN_INCLUDEMAXNUMS_INCLUDED" << std::endl;
234  file << "#define CODEGEN_INCLUDEMAXNUMS_INCLUDED" << std::endl << std::endl;
235  file << "#define MAX_NUMBER_OF_QUAD_POINTS " << nopMax_ << std::endl;
236  file << "#define MIN_NUMBER_OF_QUAD_POINTS " << nopMin_ << std::endl;
237  file << "#define MAX_NUMBER_OF_BASE_FCT " << baseMax_ << std::endl;
238  file << "#define MIN_NUMBER_OF_BASE_FCT " << baseMin_ << std::endl << std::endl;
239 #if 0
240  file << "/* include all headers with inner loop extern declarations */" << std::endl;
241  file << "#define CODEGEN_COMPILE_INNERLOOPS 1" << std::endl;
242  file << "namespace Dune {" << std::endl;
243  file << "namespace Fem {" << std::endl;
244  file << "namespace Codegen {" << std::endl;
245  for( size_t i = 0; i < filenames_.size(); ++i )
246  {
247  file << "#include \""<< filenames_[ i ].first << "\"" << std::endl;
248  }
249  file << "}}} // end namespaces" << std::endl;
250  file << "#undef CODEGEN_COMPILE_INNERLOOPS" << std::endl << std::endl;
251  file << "#include \"" << filename << ".c\"" <<std::endl;
252 #endif
253 
254  file << "#endif // CODEGEN_INCLUDEMAXNUMS_INCLUDED" << std::endl << std::endl;
255  file << "#elif defined CODEGEN_INCLUDEEVALCALLERS" << std::endl;
256  file << "#ifndef CODEGEN_EVALCALLERS_INCLUDED" << std::endl;
257  file << "#define CODEGEN_EVALCALLERS_INCLUDED" << std::endl << std::endl;
258  file << "namespace Dune {"<< std::endl;
259  file << "namespace Fem {"<< std::endl;
260  file << "namespace Codegen {"<< std::endl;
261  typedef EvalSetType :: iterator iterator ;
262  const iterator endit = evalSet_.end();
263  for( iterator it = evalSet_.begin(); it != endit; ++it )
264  {
265  int dimRange = it->first;
266  int quadNop = it->second.first;
267  int numBase = it->second.second;
268  file << " template <class Traits>" << std::endl;
269  file << " struct EvaluateImplementation< Traits, " << dimRange << " , " << quadNop << " , " << numBase << " >" << std::endl;
270  file << " : public EvaluateRealImplementation< Traits, " << dimRange << " , " << quadNop << " , " << numBase << " >" << std::endl;
271  file << " {" << std::endl;
272  file << " typedef EvaluateRealImplementation< Traits, " << dimRange << " , " << quadNop << " , " << numBase << " > BaseType;" << std::endl;
273  file << " typedef typename BaseType :: RangeVectorType RangeVectorType;" << std::endl;
274  file << " EvaluateImplementation( const RangeVectorType& rv ) : BaseType ( rv ) {}" << std::endl;
275  file << " };"<< std::endl;
276  file << std::endl;
277  }
278  file << "}}} // end namespaces"<< std::endl;
279  file << "#endif // CODEGEN_EVALCALLERS_INCLUDED" << std::endl << std::endl;
280  file << "#else" << std::endl << std::endl ;
281  file << "#ifndef CODEGEN_INCLUDE_IMPLEMENTATION" << std::endl;
282  file << "#define CODEGEN_INCLUDE_IMPLEMENTATION" << std::endl;
283  //file << "#undef CODEGEN_COMPILE_INNERLOOPS" << std::endl;
284  file << "namespace Dune {" << std::endl;
285  file << "namespace Fem {" << std::endl;
286  file << "namespace Codegen {" << std::endl;
287  for( size_t i = 0; i < filenames_.size(); ++i )
288  {
289  file << "#include \""<< filenames_[ i ].first << "\"" << std::endl;
290  }
291  file << "}}} // end namespaces" << std::endl;
292  file << "#endif // CODEGEN_INCLUDE_IMPLEMENTATION" << std::endl << std::endl;
293  file << "#endif // CODEGEN_INCLUDEMAXNUMS" << std::endl;
294 
295  std::ifstream infile( filename );
296  if( infile )
297  {
298  std::stringstream checkstr;
299  checkstr << infile.rdbuf();
300 
301  // if both string are identical we can stop here
302  // and don't write the header thus avoiding recompilation
303  if( checkstr.str().compare( file.str() ) == 0 )
304  return false;
305  }
306 
307  std::ofstream outfile( filename );
308  outfile << file.str();
309  outfile.close();
310 
311 #if 0
312  // write C file with implementation of inner loop functions
313  filename += ".c";
314  std::ofstream Cfile( filename.c_str() );
315 
316  Cfile << "#include <stdlib.h>" << std::endl;
317  Cfile << "/* include all headers with inner loop implementation */" << std::endl;
318  Cfile << "#define CODEGEN_COMPILE_INNERLOOPS 2" << std::endl;
319  for( size_t i = 0; i < filenames_.size(); ++i )
320  {
321  Cfile << "#include \""<< filenames_[ i ].first << "\"" << std::endl;
322  }
323 #endif
324  return true;
325  }
326  }
327  protected:
328  int exists( const std::string& filename ) const
329  {
330  for( size_t i = 0; i < filenames_.size(); ++i )
331  {
332  if( filename == filenames_[ i ].first )
333  {
334  return i;
335  }
336  }
337  return -1;
338  }
339 
340  bool checkAbort() const
341  {
342  DimRangeSetType found ;
343  bool canAbort = true ;
344  for( size_t i = 0; i < filenames_.size(); ++i )
345  {
346  found.insert( filenames_[ i ].second );
347  if ( filenames_[ i ].second > 0 )
348  {
349  canAbort = false ;
350  }
351  }
352  typedef DimRangeSetType :: iterator iterator ;
353  for( iterator it = found.begin(); it != found.end(); ++it )
354  {
355  dimRanges_.erase( *it );
356  }
357 
358  if( canAbort && dimRanges_.size() == 0 )
359  return true ;
360  else
361  return false ;
362  }
363  };
364 
366  template < int simdWidth = 4 >
368  {
369  static const char* restrictKey()
370  {
371  return "__restrict__";
372  }
373 
374  static const char* doubletype()
375  {
376 #ifdef COUNT_FLOPS
377  return "Dune::Fem::Double";
378 #else
379  return "double";
380 #endif
381  }
382 
383  static void writePreCompHeader(std::ostream& out, const int stage )
384  {
385  const char* codegenPreCompVar = "CODEGEN_COMPILE_INNERLOOPS";
386  if( stage == -1 )
387  {
388  out << "#ifndef HEADERCHECK" << std::endl;
389  //out << "#if ! " << codegenPreCompVar << std::endl;
390  }
391  else if( stage == 0 )
392  {
393  out << "#else" << std::endl;
394  out << "#if " << codegenPreCompVar << " == 1" << std::endl;
395  out << "extern \"C\" {" << std::endl
396  << " extern inline" << std::endl;
397  out << "#endif" << std::endl;
398  }
399  else if( stage == 1 )
400  {
401  out << "#if " << codegenPreCompVar << " == 1" << std::endl;
402  out << " ;" << std::endl;
403  out << "}" << std::endl;
404  out << "#else" << std::endl;
405  }
406  else
407  {
408  //out << "#endif" << std::endl;
409  //out << "#endif" << std::endl;
410  out << "#endif" << std::endl;
411  }
412  }
413 
414  // generate inner loop function name
415  static std::string generateFunctionName( const std::string& prefix,
416  const int simdW, const int dimRange,
417  const size_t numRows, const size_t numCols )
418  {
419  std::stringstream funcName;
420  funcName << prefix << "_" << simdWidth << "_" << dimRange << "_" << numRows << "_" << numCols ;
421  return funcName.str();
422  }
423 
424  static void writeInnerLoopEval(std::ostream& out, const int simdW, const int dimRange, const size_t numRows, const size_t numCols )
425  {
426  out << " // Loop over all quadrature points" << std::endl;
427  out << " for(int qp = 0; qp < " << numRows << " ; ++qp )" << std::endl;
428  out << " {" << std::endl;
429  /*
430  if( simdW == 1 )
431  {
432  out << " const " << doubletype() << " phi0 = rangeStorage[ quad.cachingPoint( row ) * " << numCols << " + col ][ 0 ];" << std::endl;
433  }
434  else
435  {
436  for( int i = 0 ; i< simdW ; ++ i )
437  out << " const " << doubletype() << " phi" << i << " = base" << i << "[ row ];" << std::endl;
438  }
439  */
440  for( int i = 0 ; i< simdW ; ++ i )
441  out << " const " << doubletype() << " phi" << i << " = base" << i << "[ qp ];" << std::endl;
442  for(int r = 0; r < dimRange; ++ r )
443  {
444  out << " result" << r << "[ qp ] += phi0 * dof0" << r;
445  for( int i=1; i<simdW; ++i )
446  out << " + phi" << i << " * dof" << i << r;
447  out << " ;" << std::endl;
448  }
449  out << " }" << std::endl;
450  }
451 
452  static void evaluateCodegen(std::ostream& out,
453  const int dim,
454  const int dimRange,
455  const size_t numRows,
456  const size_t numCols )
457  {
458  const std::string funcName =
459  generateFunctionName( "evalRangesLoop", simdWidth, dimRange, numRows, numCols );
460 
461  writePreCompHeader( out, -1 );
462 
463  out << "template <class BaseFunctionSet> // dimRange = "<< dimRange << ", quadNop = " << numRows << ", scalarBasis = " << numCols << std::endl;
464  out << "struct EvaluateRanges<BaseFunctionSet, EmptyGeometry, " << dimRange << ", " << numRows << ", " << numCols << ">" << std::endl;
465  out << "{" << std::endl;
466  out << " template< class QuadratureType,"<< std::endl;
467  out << " class RangeVectorType," << std::endl;
468  out << " class RangeFactorType," << std::endl;
469  out << " class LocalDofVectorType>" << std::endl;
470  out << " static void eval( const QuadratureType& quad," << std::endl;
471  out << " const RangeVectorType& rangeStorageTransposed," << std::endl;
472  out << " const LocalDofVectorType& dofs," << std::endl;
473  out << " RangeFactorType &rangeVector)" << std::endl;
474  out << " {" << std::endl;
475  //out << " typedef typename ScalarRangeType :: field_type field_type;" << std::endl;
476  //out << " typedef Field field_type;" << std::endl;
477  //out << " typedef typename RangeVectorType :: value_type value_type;" << std::endl;
478  //out << " typedef RangeType value_type;" << std::endl;
479 
480  out << " typedef " << doubletype() << " Field;" << std::endl;
481  //out << " static Dune::Fem::ThreadSafeValue< std::vector< Field > > memory( " << numRows * dimRange << " );" << std::endl;
482  out << " static thread_local std::vector< Field > memory( " << numRows * dimRange << " );" << std::endl;
483 
484  // make length simd conform
485  //out << " Field* resultTmp = (*memory).data();" << std::endl;
486  out << " Field* resultTmp = memory.data();" << std::endl;
487  out << " for( int i=0; i < " << numRows * dimRange << "; ++ i ) resultTmp[ i ] = 0;" << std::endl <<std::endl;
488 
489  for(int r=0; r<dimRange ; ++r )
490  {
491  out << " Field* result" << r << " = resultTmp + " << r * numRows << " ;" << std::endl;
492  }
493  out << std::endl;
494 
495  out << " const Field* baseData = rangeStorageTransposed.data();" << std::endl;
496  //for( int i=0; i< simdWidth; ++ i )
497  //{
498  // out << " Field base" << i << "[ " << numRows << " ];" << std::endl;
499  //}
500  out << std::endl;
501  const size_t simdCols = simdWidth * ( numCols / simdWidth );
502  if( simdCols > 0 )
503  {
504  out << " for( int col = 0, dof = 0 ; col < "<< simdCols <<" ; col += " << simdWidth << ", dof += " << simdWidth * dimRange<< " )"<<std::endl;
505  out << " {" << std::endl;
506 
507  //out << " const value_type& rangeStorageRow = rangeStorage[ rowMap[ row ] ];" << std::endl;
508  //out << " for( int row = 0 ; row < " << numRows << " ; ++ row )" << std::endl;
509  //out << " {" << std::endl;
510 
511  //out << " int idx = quad.cachingPoint( row ) * " << numCols << " + col;" << std::endl;
512  //for( int i=0; i< simdWidth; ++ i )
513  //{
514  // const char* plusplus = (i == simdWidth-1) ? " " : "++";
515  // out << " base" << i << "[ row ] = rangeStorage[ idx" << plusplus << " ][ 0 ];" << std::endl;
516  //}
517  //out << " }" << std::endl;
518 
519 
520  out << " " << funcName << "(" << std::endl;
521  out << " ";
522  for( int i = 0; i< simdWidth * dimRange; ++i )
523  out << " dofs[ dof + " << i << " ],";
524  out << std::endl;
525  for( int i=0; i< simdWidth; ++i )
526  out << " baseData + ((col + " << i << ") * " << numRows << ")," << std::endl;
527  out << " ";
528  for( int r = 0; r < dimRange-1; ++r)
529  out << "result" << r << ", ";
530  out << "result" << dimRange-1 << " );" << std::endl;
531  out << " }"<< std::endl;
532  out << std::endl;
533  }
534 
535  if( numCols > simdCols )
536  {
537  out << " // remainder iteration" << std::endl;
538  out << " for( int col = " << simdCols << ", dof = " << simdCols * dimRange << " ; col < " << numCols << " ; ++col )" << std::endl;
539  out << " {" << std::endl;
540  for( int r=0; r<dimRange; ++r )
541  out << " const " << doubletype() << " dof0" << r << " = dofs[ dof++ ];" << std::endl;
542  out << " const Field* base0 = baseData + (col * " << numRows << ");" << std::endl;
543 
544  writeInnerLoopEval( out, 1, dimRange, numRows, numCols );
545  out << " }" << std::endl;
546  out << std::endl;
547  }
548 
549  out << " // store result" << std::endl;
550  out << " for(int row = 0; row < " << numRows << " ; ++row )" << std::endl;
551  out << " {" << std::endl;
552  out << " auto& result = rangeVector[ row ];" << std::endl;
553  for( int r = 0 ; r < dimRange; ++ r )
554  {
555  out << " result[ " << r << " ] = result" << r << "[ row ];" << std::endl;
556  }
557  out << " }" << std::endl;
558  out << " }" << std::endl << std::endl;
559  //out << "};" << std::endl;
560 
561  //writePreCompHeader( out, 0 );
562  out << " static void " << funcName << "(" << std::endl;
563  for( int i=0; i<simdWidth; ++i )
564  {
565  out << " ";
566  for( int r=0; r<dimRange; ++ r )
567  {
568  if( r > 0 ) out << " ";
569  out << "const " << doubletype() << " dof"<< i << r << ",";
570  }
571  out << std::endl;
572  }
573  for( int i=0; i<simdWidth; ++ i )
574  out << " const " << doubletype() << "* " << restrictKey() << " base" << i << "," << std::endl;
575  for( int r=0; r<dimRange; ++ r )
576  {
577  out << " " << doubletype() << "* "<< restrictKey() << " result" << r;
578  if( r == dimRange-1 ) out << " )" << std::endl;
579  else out << "," << std::endl;
580  }
581 
582  //writePreCompHeader( out, 1 );
583  out << " {" << std::endl;
584  writeInnerLoopEval( out, simdWidth, dimRange, numRows, numCols );
585  out << " }" << std::endl;
586  out << "};" << std::endl;
587  writePreCompHeader( out, 2 );
588  }
589 
590  static void writeInnerLoop(std::ostream& out, const int simdW, const int dimRange, const size_t numCols )
591  {
592  for( int i=0; i< simdW; ++i )
593  {
594  for( int r=0; r< dimRange; ++r )
595  {
596  out << " const " << doubletype() << " fac" << i << r << " = rangeFactor" << i << "[ " << r << " ];" << std::endl;
597  }
598  }
599  if( simdW == 1 )
600  out << " int rowCol = quad.cachingPoint( row ) * " << numCols <<";" << std::endl;
601  out << " for(int col = 0 ; col < " << numCols << " ; ++ col";
602  if( simdW == 1 )
603  out << ", ++rowCol";
604  out << " )" << std::endl;
605  out << " {" << std::endl;
606  for( int i = 0 ; i< simdW ; ++ i )
607  {
608  if( simdW == 1 )
609  out << " const " << doubletype() << " phi" << i << " = rangeStorage[ rowCol ][ 0 ];" << std::endl;
610  else
611  out << " const " << doubletype() << " phi" << i << " = base" << i << " [ col ];" << std::endl;
612  }
613  for(int r = 0; r < dimRange; ++ r )
614  {
615  out << " dofs" << r << "[ col ] += phi0 * fac0" << r;
616  for( int i=1; i<simdW; ++i )
617  {
618  out << " + phi" << i << " * fac" << i << r ;
619  }
620  out << " ;" << std::endl;
621  }
622  out << " }" << std::endl;
623  }
624 
625  static void axpyCodegen(std::ostream& out,
626  const int dim, const int dimRange, const size_t numRows, const size_t numCols )
627  {
628  const std::string funcName =
629  generateFunctionName( "axpyRangesLoop", simdWidth, dimRange, numRows, numCols );
630 
631  writePreCompHeader( out, -1 );
632 
633  out << "template <class BaseFunctionSet> // dimRange = "<< dimRange << ", quadNop = " << numRows << ", scalarBasis = " << numCols << std::endl;
634  out << "struct AxpyRanges<BaseFunctionSet, EmptyGeometry, " << dimRange << ", " << numRows << ", " << numCols << ">" << std::endl;
635  out << "{" << std::endl;
636 
637  out << std::endl;
638  out << " template< class QuadratureType,"<< std::endl;
639  out << " class RangeVectorType," << std::endl;
640  out << " class RangeFactorType," << std::endl;
641  out << " class LocalDofVectorType>" << std::endl;
642  out << " static void axpy( const QuadratureType& quad," << std::endl;
643  out << " const RangeVectorType& rangeStorage," << std::endl;
644  out << " const RangeFactorType& rangeFactors," << std::endl;
645  out << " LocalDofVectorType& dofs)" << std::endl;
646  out << " {" << std::endl;
647 
649  // axpy
651 
652  //out << " typedef RangeType value_type;" << std::endl;
653  //out << " typedef typename ScalarRangeType :: field_type field_type;" << std::endl;
654 
655  out << " typedef " << doubletype() << " Field;" << std::endl;
656  out << " static thread_local std::vector< Field > memory( " << numCols * dimRange << " );" << std::endl;
657  //out << " static Dune::Fem::ThreadSafeValue< std::vector< Field > > memory( " << numCols * dimRange << " );" << std::endl;
658 
659  //out << " " << doubletype() << "* dofResult = (*memory).data();" << std::endl;
660  out << " " << doubletype() << "* dofResult = memory.data();" << std::endl;
661  out << " for( int i=0; i < " << numCols * dimRange << "; ++i ) dofResult[ i ] = 0;" << std::endl << std::endl;
662 
663  out << " " << doubletype() << "* dofs0 = dofResult;" << std::endl;
664  for( int r = 1; r < dimRange; ++ r )
665  out << " " << doubletype() << "* dofs" << r << " = dofResult + " << r * numCols << ";" << std::endl;
666  out << std::endl;
667 
668  const size_t simdRows = simdWidth * (numRows / simdWidth) ;
669 
670  if( simdRows > 0 )
671  {
672  out << " for( int row = 0; row < "<< simdRows << " ; row += " << int(simdWidth) << " )" << std::endl;
673  out << " {" << std::endl;
674  for( int i=0; i<simdWidth; ++ i )
675  out << " const " << doubletype() << "* rangeFactor" << i << " = &rangeFactors[ row + " << i << " ][ 0 ];" << std::endl;
676  out << " " << funcName << "(";
677  for( int i = 0; i < simdWidth; ++i )
678  {
679  if( i>0 )
680  out << " ";
681  out << " &rangeStorage[ quad.cachingPoint( row + " << i << " ) * " << numCols << " ][ 0 ]," << std::endl;
682  }
683  out << " rangeFactor0, ";
684  for( int i=1; i<simdWidth; ++ i )
685  out << "rangeFactor" << i << ",";
686  out << std::endl;
687  out << " ";
688  for( int r = 0; r < dimRange; ++ r )
689  {
690  out << " dofs" << r ;
691  if( r == dimRange-1 )
692  out << " );" << std::endl;
693  else
694  out << ",";
695  }
696  out << std::endl;
697  out << " }" << std::endl;
698  out << std::endl;
699  }
700 
701  if( numRows > simdRows )
702  {
703  out << " // remainder iteration" << std::endl;
704  out << " for( int row = " << simdRows << " ; row < " << numRows << " ; ++row )" << std::endl;
705  out << " {" << std::endl;
706  out << " const " << doubletype() << "* rangeFactor0 = &rangeFactors[ row ][ 0 ];" << std::endl;
707  writeInnerLoop( out, 1, dimRange, numCols );
708  out << " }" << std::endl;
709  out << std::endl;
710  }
711 
712  out << " // sum up results (transform from variable based to point based layout)" << std::endl;
713  out << " for( int col = 0, dof = 0 ; col < "<< numCols << " ; ++col )" << std::endl;
714  out << " {" << std::endl;
715  for( int r = 0 ; r < dimRange; ++ r )
716  out << " dofs[ dof++ ] += dofs" << r << "[ col ];" << std::endl;
717  out << " }" << std::endl;
718 
719  out << " }" << std::endl << std::endl;
720  //out << "};" << std::endl;
721 
723  // inner loop
725  //writePreCompHeader( out, 0 );
726  out << " static void " << funcName << "(" << std::endl;
727  out << " const " << doubletype() << "* " << restrictKey() << " base0," << std::endl;
728  for( int i=1; i<simdWidth; ++ i )
729  out << " const " << doubletype() << "* " << restrictKey() << " base" << i << "," << std::endl;
730  for( int i=0; i<simdWidth; ++ i )
731  out << " const " << doubletype() << "* " << restrictKey() << " rangeFactor" << i << "," << std::endl;
732  for( int r = 0; r < dimRange; ++r )
733  {
734  out << " " << doubletype() << "* " << restrictKey() << " dofs" << r;
735  if( r == dimRange-1 )
736  out << " )" << std::endl;
737  else
738  out << "," << std::endl;
739  }
740  //writePreCompHeader( out, 1 );
741  out << " {" << std::endl;
742  writeInnerLoop( out, simdWidth, dimRange, numCols );
743  out << " }" << std::endl;
744  out << "};" << std::endl;
745  writePreCompHeader( out, 2 );
746  }
747 
748  static void writeInnerJacEvalLoop(std::ostream& out, const int simdW, const int dim,
749  const int dimRange, const size_t numRows, const size_t numCols )
750  {
751  out << " for(int row = 0; row < " << numRows << " ; ++row )" << std::endl;
752  out << " {" << std::endl;
753  if( simdW == 1 )
754  {
755  /*
756  out << " int idx = quad.cachingPoint( row ) * " << numCols << " + col ;" << std::endl;
757  out << " const GeometryJacobianType& gjit = geometry.jacobianInverseTransposed( quad.point( row ) );" << std::endl << std::endl;
758  for( int i = 0 ; i< simdW ; ++ i )
759  {
760  const char* plusplus = (i == simdW-1) ? " " : "++";
761  out << " gjit.mv( jacStorage[ idx" << plusplus << " ][ 0 ], gradPhi" << i << " );" << std::endl;
762  for( int d = 0 ; d < dim; ++ d )
763  out << " const " << doubletype() << " phi" << i << d << " = gradPhi" << i << "[ " << d << " ];" << std::endl;
764  }
765  */
766  /*
767  for( int i = 0 ; i< simdW ; ++ i )
768  {
769  const char* plusplus = (i == simdW-1) ? " " : "++";
770  out << " gradPhi" << i << " = jacStorage[ idx" << plusplus << " ][ 0 ];" << std::endl;
771  //out << " gjit.mv( jacStorage[ idx" << plusplus << " ][ 0 ], gradPhi" << i << " );" << std::endl;
772  for( int d = 0 ; d < dim; ++ d )
773  out << " const " << doubletype() << " phi" << i << d << " = gradPhi" << i << "[ " << d << " ];" << std::endl;
774  }
775  */
776  // out << " const "<< doubletype() << "* base" << i << " = jacobians + (" << dim * numRows << " * (col + "<< i << "));" << std::endl;
777  for( int i = 0 ; i< simdW ; ++ i )
778  {
779  //out << " gjit.mv( jacStorage[ idx" << plusplus << " ][ 0 ], gradPhi" << i << " );" << std::endl;
780  for( int d = 0 ; d < dim; ++ d )
781  out << " const " << doubletype() << " phi" << i << d << " = base" << i << "[ row * " << dim << " + " << d << " ];" << std::endl;
782  }
783  out << std::endl;
784  }
785  else
786  {
787  out << " const int idx = row * " << dim<< ";" << std::endl;
788  for( int d = 0; d < dim ; ++ d )
789  {
790  for( int i = 0 ; i< simdW ; ++ i )
791  out << " const " << doubletype() << " phi" << i << d << " = base" << i << "[ idx + " << d << " ];" << std::endl;
792  }
793  }
794  for( int d = 0; d < dim ; ++ d )
795  {
796  for(int r = 0; r < dimRange; ++ r )
797  {
798  out << " result" << r << d << "[ row ] += phi0" << d << " * dof0" << r;
799  for( int i=1; i<simdW; ++i )
800  out << " + phi" << i << d << " * dof" << i << r;
801  out << " ;" << std::endl;
802  }
803  }
804  out << " }" << std::endl;
805  }
806 
807  static void evaluateJacobiansCodegen(std::ostream& out,
808  const int dim, const int dimRange, const size_t numRows, const size_t numCols )
809  {
810  const std::string funcName =
811  generateFunctionName( "evalJacobianLoop", simdWidth, dimRange, numRows, numCols );
812 
813  writePreCompHeader( out, -1 );
814 
815  out << "template <class BaseFunctionSet> // dimRange = "<< dimRange << ", quadNop = " << numRows << ", scalarBasis = " << numCols << std::endl;
816  out << "struct EvaluateJacobians<BaseFunctionSet, EmptyGeometry, " << dimRange << ", " << numRows << ", " << numCols << ">" << std::endl;
817  out << "{" << std::endl;
818  out << " template< class QuadratureType,"<< std::endl;
819  out << " class JacobianRangeVectorType," << std::endl;
820  out << " class LocalDofVectorType," << std::endl;
821  out << " class JacobianRangeFactorType>" << std::endl;
822  out << " static void eval( const QuadratureType&," << std::endl;
823  out << " const EmptyGeometry&," << std::endl;
824  out << " const JacobianRangeVectorType&," << std::endl;
825  out << " const LocalDofVectorType&," << std::endl;
826  out << " JacobianRangeFactorType &)" << std::endl;
827  out << " {" << std::endl;
828  out << " std::cerr << \"ERROR: wrong code generated for VectorialBaseFunctionSet::axpyJacobians\" << std::endl;" << std::endl;
829  out << " abort();" << std::endl;
830  out << " }" << std::endl;
831  out << "};" << std::endl << std::endl;
832  out << "template <class BaseFunctionSet, class Geometry> // dimRange = "<< dimRange << ", quadNop = " << numRows << ", scalarBasis = " << numCols << std::endl;
833  out << "struct EvaluateJacobians<BaseFunctionSet, Geometry, " << dimRange << ", " << numRows << ", " << numCols << ">" << std::endl;
834  out << "{" << std::endl;
835  out << " template< class QuadratureType,"<< std::endl;
836  out << " class JacobianRangeVectorType," << std::endl;
837  out << " class LocalDofVectorType," << std::endl;
838  out << " class JacobianRangeFactorType>" << std::endl;
839  out << " static void eval( const QuadratureType& quad," << std::endl;
840  out << " const Geometry& geometry," << std::endl;
841  out << " const JacobianRangeVectorType& jacStorage," << std::endl;
842  out << " const LocalDofVectorType& dofs," << std::endl;
843  out << " JacobianRangeFactorType& jacFactors)" << std::endl;
844  out << " {" << std::endl;
845  out << " evalJac( quad, geometry, jacStorage, dofs, jacFactors, jacFactors[ 0 ] );" << std::endl;
846  out << " }" << std::endl;
847  out << "private:" << std::endl;
848  out << " template< class QuadratureType,"<< std::endl;
849  out << " class JacobianRangeVectorType," << std::endl;
850  out << " class LocalDofVectorType," << std::endl;
851  out << " class JacobianRangeFactorType," << std::endl;
852  out << " class GlobalJacobianRangeType>" << std::endl;
853  out << " static void evalJac( const QuadratureType& quad," << std::endl;
854  out << " const Geometry& geometry," << std::endl;
855  out << " const JacobianRangeVectorType& jacStorage," << std::endl;
856  out << " const LocalDofVectorType& dofs," << std::endl;
857  out << " JacobianRangeFactorType& jacVector," << std::endl;
858  out << " const GlobalJacobianRangeType&)" << std::endl;
859  out << " {" << std::endl;
860  //out << " typedef typename JacobianRangeVectorType :: value_type value_type;" << std::endl;
861  //out << " typedef JacobianRangeType value_type;" << std::endl;
862  //out << " typedef typename JacobianRangeType :: field_type field_type;" << std::endl;
863  out << " typedef typename Geometry::JacobianInverseTransposed GeometryJacobianType;" << std::endl;
864 
865  const size_t nDofs = numRows * dimRange * dim ;
866  out << " typedef " << doubletype() << " Field;" << std::endl;
867  out << " static thread_local std::vector< Field > memory( " << nDofs << " );" << std::endl;
868  //out << " static Dune::Fem::ThreadSafeValue< std::vector< Field > > memory( " << nDofs << " );" << std::endl;
869 
870  for( int d = 0; d < dim ; ++ d )
871  {
872  // make length simd conform
873  //out << " Field* resultTmp" << d << " = (*memory).data() + " << d * numRows * dimRange << ";" << std::endl;
874  out << " Field* resultTmp" << d << " = memory.data() + " << d * numRows * dimRange << ";" << std::endl;
875  }
876  out << " for( int i=0; i<" << numRows * dimRange << "; ++i ) " << std::endl;
877  out << " {" << std::endl;
878  for( int d = 0; d < dim ; ++ d )
879  out << " resultTmp" << d << "[ i ] = 0;" << std::endl;
880  out << " }" << std::endl << std::endl;
881 
882  for( int d = 0; d < dim ; ++ d )
883  {
884  for(int r=0; r<dimRange ; ++r )
885  {
886  out << " Field* result" << r << d <<" = resultTmp" << d << " + " << r * numRows << " ;" << std::endl;
887  }
888  }
889  out << std::endl;
890 
891  const size_t simdNumCols = simdWidth * ( numCols / simdWidth );
892  out << " typedef typename GlobalJacobianRangeType :: row_type JacobianRangeType;" << std::endl;
893  out << " const " << doubletype() << "* jacobians = jacStorage.data();" << std::endl;
894  if( simdNumCols > 0 )
895  {
896  /*
897  for( int d = 0; d < dim; ++d )
898  {
899  for( int i=0; i< simdWidth; ++ i )
900  {
901  out << " Field* base" << i << d << " = memory.data() + " << nDofs + ((d * simdWidth) + i) * numRows << ";" << std::endl;
902  }
903  }
904  */
905 
906  //for( int i=0; i< simdWidth; ++ i )
907  //out << " JacobianRangeType gradPhi" << i << ";" << std::endl;
908 
909  out << " for( int col = 0, dof = 0 ; col < "<< simdNumCols <<" ; col += " << simdWidth << ", dof += " << simdWidth * dimRange<< " )"<<std::endl;
910  out << " {" << std::endl;
911  /*
912  out << " for( int row = 0; row < " << numRows << " ; ++ row )" << std::endl;
913  out << " {" << std::endl;
914  out << " int idx = quad.cachingPoint( row ) * " << numCols << " + col ;" << std::endl;
915  out << " // use reference to GeometryJacobianType to make code compile with SPGrid Geometry" << std::endl;
916  */
917  //out << " const GeometryJacobianType& gjit = geometry.jacobianInverseTransposed( quad.point( row ) );" << std::endl << std::endl;
918  /*
919  for( int i=0; i< simdWidth; ++ i )
920  {
921  const char* plusplus = (i == simdWidth-1) ? " " : "++";
922  out << " gjit.mv( jacStorage[ idx" << plusplus << " ][ 0 ], gradPhi" << i << " );" << std::endl;
923  }
924  */
925  /*
926  for( int i=0; i< simdWidth; ++ i )
927  {
928  const char* plusplus = (i == simdWidth-1) ? " " : "++";
929  out << " gradPhi" << i << " = jacStorage[ idx" << plusplus << " ][ 0 ];" << std::endl;
930  }
931  out << std::endl;
932  */
933  /*
934  for( int d = 0; d < dim; ++ d )
935  {
936  for( int i=0; i< simdWidth; ++ i )
937  {
938  out << " base" << i << d << "[ row ] = gradPhi"<< i << "[ " << d << " ];" << std::endl;
939  }
940  }
941  out << " }" << std::endl << std::endl;
942  */
943  for( int i=0; i< simdWidth; ++ i )
944  {
945  out << " const "<< doubletype() << "* base" << i << " = jacobians + (" << dim * numRows << " * (col + "<< i << "));" << std::endl;
946  }
947 
948  out << " " << funcName << "(";
949  for( int i = 0; i< simdWidth * dimRange; ++i )
950  out << " dofs[ dof + " << i << " ],";
951  out << std::endl << " ";
952  for( int i=0; i< simdWidth; ++i )
953  out << "base" << i << ", ";
954  out << std::endl << " ";
955  for( int d = 0; d < dim; ++ d )
956  {
957  for( int r = 0; r < dimRange; ++r)
958  {
959  out << "result" << r << d;
960  if( (r == dimRange - 1) && (d == dim-1 ) ) out << std::endl;
961  else out << ", ";
962  }
963  }
964  out << " );" << std::endl;
965  out << " }"<< std::endl;
966  out << std::endl;
967  }
968 
969  // remainder iteration
970  if( numCols > simdNumCols )
971  {
972  out << " // remainder iteration" << std::endl;
973  out << " for( int col = " << simdNumCols << ", dof = " << simdNumCols * dimRange << " ; col < " << numCols << " ; ++col )" << std::endl;
974  out << " {" << std::endl;
975  out << " const "<< doubletype() << "* base0" << " = jacobians + (" << dim * numRows << " * col);" << std::endl;
976  for( int r=0; r<dimRange; ++r )
977  out << " const " << doubletype() << " dof0" << r << " = dofs[ dof++ ];" << std::endl;
978  writeInnerJacEvalLoop( out, 1, dim, dimRange, numRows, numCols );
979  out << " }" << std::endl;
980  out << std::endl;
981  }
982 
983  out << " // store result" << std::endl;
984  out << " JacobianRangeType tmp;" << std::endl;
985  out << " for(int row = 0; row < " << numRows << " ; ++row )" << std::endl;
986  out << " {" << std::endl;
987  out << " const GeometryJacobianType& gjit = geometry.jacobianInverseTransposed( quad.point( row ) );" << std::endl << std::endl;
988  out << " GlobalJacobianRangeType& result = jacVector[ row ];" << std::endl;
989  for( int r = 0 ; r < dimRange; ++ r )
990  {
991  for( int d = 0 ; d < dim; ++ d )
992  {
993  out << " tmp[ " << d << " ] = result" << r << d << "[ row ];" << std::endl;
994  /*
995  {
996  out << " result[ " << r << " ][ " << d <<" ] = result" << r << d << "[ row ];" << std::endl;
997  }
998  */
999  }
1000  out << " gjit.mv( tmp, result[ "<< r << " ] );" << std::endl;
1001  }
1002  out << " }" << std::endl;
1003  out << " }" << std::endl << std::endl;
1004  //out << "};" << std::endl;
1005 
1006  //writePreCompHeader( out, 0 );
1007  out << " static void " << funcName << "(" << std::endl;
1008  for( int i=0; i<simdWidth; ++i )
1009  {
1010  out << " ";
1011  for( int r=0; r<dimRange; ++ r )
1012  out << " const " << doubletype() << " dof"<< i << r << ",";
1013  out << std::endl;
1014  }
1015  for( int i=0; i<simdWidth; ++ i )
1016  out << " const " << doubletype() << "* " << restrictKey() << " base" << i << "," << std::endl;
1017  for( int d=0; d<dim; ++ d )
1018  {
1019  for( int r=0; r<dimRange; ++ r )
1020  {
1021  out << " " << doubletype() << "* "<< restrictKey() << " result" << r << d;
1022  if( (r == dimRange - 1) && (d == dim-1 ) ) out << " )" << std::endl;
1023  else out << "," << std::endl;
1024  }
1025  }
1026 
1027  //writePreCompHeader( out, 1 );
1028  out << " {" << std::endl;
1029  writeInnerJacEvalLoop( out, simdWidth, dim, dimRange, numRows, numCols );
1030  out << " }" << std::endl;
1031  out << "};" << std::endl;
1032  writePreCompHeader( out, 2 );
1033  }
1034 
1035  static void writeInnerLoopAxpyJac(std::ostream& out, const int dim, const int dimRange, const size_t numCols )
1036  {
1037  out << " for( int col = 0; col < " << numCols << " ; ++col )" << std::endl;
1038  out << " {" << std::endl;
1039  for( int d =0; d < dim; ++d )
1040  out << " const " << doubletype() << " phi" << d << " = base[ (col * " << dim << ") + " << d << " ];" << std::endl;
1041 
1042  for( int r = 0; r < dimRange; ++r )
1043  {
1044  out << " result" << r << "[ col ] += phi0 * jacFactorInv0" << r;
1045  for( int d=1; d < dim; ++d )
1046  out << " + phi" << d << " * jacFactorInv" << d << r;
1047  out << ";" << std::endl;
1048  }
1049  out << " }" << std::endl;
1050  }
1051 
1052  static void axpyJacobianCodegen(std::ostream& out,
1053  const int dim, const int dimRange, const size_t numRows, const size_t numCols )
1054  {
1055  const std::string funcName =
1056  generateFunctionName( "axpyJacobianLoop", simdWidth, dimRange, numRows, numCols );
1057 
1058  writePreCompHeader( out, -1 );
1059 
1060  out << "template <class BaseFunctionSet> // dimRange = "<< dimRange << ", quadNop = " << numRows << ", scalarBasis = " << numCols << std::endl;
1061  out << "struct AxpyJacobians<BaseFunctionSet, EmptyGeometry, " << dimRange << ", " << numRows << ", " << numCols << ">" << std::endl;
1062  out << "{" << std::endl;
1063  out << " template< class QuadratureType,"<< std::endl;
1064  out << " class JacobianRangeVectorType," << std::endl;
1065  out << " class JacobianRangeFactorType," << std::endl;
1066  out << " class LocalDofVectorType>" << std::endl;
1067  out << " static void axpy( const QuadratureType&," << std::endl;
1068  out << " const EmptyGeometry&," << std::endl;
1069  out << " const JacobianRangeVectorType&," << std::endl;
1070  out << " const JacobianRangeFactorType&," << std::endl;
1071  out << " LocalDofVectorType&)" << std::endl;
1072  out << " {" << std::endl;
1073  out << " std::cerr << \"ERROR: wrong code generated for VectorialBaseFunctionSet::axpyJacobians\" << std::endl;" << std::endl;
1074  out << " abort();" << std::endl;
1075  out << " }" << std::endl;
1076  out << "};" << std::endl << std::endl;
1077  out << "template <class BaseFunctionSet, class Geometry>" << std::endl;
1078  out << "struct AxpyJacobians<BaseFunctionSet, Geometry, " << dimRange << ", " << numRows << ", " << numCols << ">" << std::endl;
1079  out << "{" << std::endl;
1080 
1081  out << " template< class QuadratureType,"<< std::endl;
1082  out << " class JacobianRangeVectorType," << std::endl;
1083  out << " class JacobianRangeFactorType," << std::endl;
1084  out << " class LocalDofVectorType>" << std::endl;
1085  out << " static void axpy( const QuadratureType& quad," << std::endl;
1086  out << " const Geometry& geometry," << std::endl;
1087  out << " const JacobianRangeVectorType& jacStorage," << std::endl;
1088  out << " const JacobianRangeFactorType& jacFactors," << std::endl;
1089  out << " LocalDofVectorType& dofs)" << std::endl;
1090  out << " {" << std::endl;
1091  out << " typedef typename JacobianRangeFactorType :: value_type JacobianRangeType;" << std::endl << std::endl;
1092 
1093  const size_t dofs = dimRange * numCols ;
1094  out << " typedef " << doubletype() << " Field;" << std::endl;
1095  out << " static thread_local std::vector< Field > memory( " << dofs << " );" << std::endl;
1096  //out << " static Dune::Fem::ThreadSafeValue< std::vector< Field > > memory( " << dofs << " );" << std::endl;
1097 
1098  //out << " Field* result = (*memory).data();" << std::endl;
1099  out << " Field* result = memory.data();" << std::endl;
1100  out << " for( int i = 0 ; i < " << dofs << "; ++i ) result[ i ] = 0;" << std::endl << std::endl;
1101 
1102  for( int r=0; r<dimRange; ++r )
1103  out << " Field* result" << r << " = result + " << r * numCols << ";" << std::endl;
1104  out << std::endl;
1105 
1106  //for( int d =0; d < dim; ++d )
1107  //{
1108  // out << " Field* base"<< d << " = result + " << dofs + d * numCols << ";" << std::endl;
1109  //}
1110 
1111  out << " const Field* base = jacStorage.data();" << std::endl << std::endl;
1112  out << " JacobianRangeType jacFactorTmp;" << std::endl;
1113  out << " for( int row = 0; row < " << numRows << " ; ++ row )" << std::endl;
1114  out << " {" << std::endl;
1115  out << " typedef typename Geometry::JacobianInverseTransposed GeometryJacobianType;" << std::endl;
1116  out << " // use reference to GeometryJacobianType to make code compile with SPGrid Geometry" << std::endl;
1117  out << " const GeometryJacobianType& gjit = geometry.jacobianInverseTransposed( quad.point( row ) );" << std::endl << std::endl;
1118  out << " for( int r = 0; r < " << dimRange << " ; ++r )" << std::endl;
1119  out << " {"<<std::endl;
1120  out << " gjit.mtv( jacFactors[ row ][ r ], jacFactorTmp[ r ] );" << std::endl;
1121  out << " }" << std::endl << std::endl;
1122 
1123  /*
1124  out << " for( int col = 0, rowCol = quad.cachingPoint( row ) * " << numCols << "; col < " << numCols << " ; ++ col, ++rowCol )" << std::endl;
1125  out << " {" << std::endl;
1126  for( int d =0; d < dim; ++d )
1127  out << " base"<< d << "[ col ] = jacStorage[ rowCol ][ 0 ][ " << d << " ];" << std::endl;
1128 
1129  out << " }" << std::endl;
1130  */
1131 
1132  out << " // calculate updates" << std::endl;
1133  out << " " << funcName << "(";
1134  //for( int d =0; d < dim; ++d ) out << "base" << d << ", ";
1135  out << " base + ( quad.localCachingPoint( row ) * " << numCols * dim << " )," << std::endl;
1136  for( int i =0; i < dim; ++i )
1137  {
1138  out << " ";
1139  for( int r = 0; r < dimRange; ++ r )
1140  out << " jacFactorTmp[ " << r << " ][ " << i << " ], ";
1141  out << std::endl;
1142  }
1143  out << " ";
1144  for( int r = 0; r < dimRange; ++ r )
1145  {
1146  out << " result" << r;
1147  if( r == dimRange -1 )
1148  out << " );" << std::endl ;
1149  else
1150  out << ", ";
1151  }
1152  out << " }" << std::endl << std::endl;
1153 
1154  out << " // sum up results (transform from variable based to point based layout)" << std::endl;
1155  out << " for( int col = 0, dof = 0 ; col < "<< numCols << " ; ++col )" << std::endl;
1156  out << " {" << std::endl;
1157  for( int r = 0 ; r < dimRange; ++ r )
1158  out << " dofs[ dof++ ] += result" << r << "[ col ];" << std::endl;
1159  out << " }" << std::endl;
1160 
1161  out << " }" << std::endl << std::endl;
1162  //out << "};" << std::endl;
1163 
1164 
1166  // inner loop
1168  //writePreCompHeader( out, 0 );
1169 
1170  out << " static void " << funcName << "(" << std::endl;
1171  out << " const " << doubletype() << "* " << restrictKey() << " base," << std::endl;
1172  //for( int i=1; i<dim; ++ i )
1173  // out << " const " << doubletype() << "* " << restrictKey() << " base" << i << "," << std::endl;
1174  for( int i=0; i<dim; ++i )
1175  {
1176  out << " ";
1177  for( int r=0; r<dimRange; ++ r )
1178  out << "const " << doubletype() << " jacFactorInv"<< i << r << ", ";
1179  out << std::endl;
1180  }
1181  for( int r = 0; r < dimRange; ++r )
1182  {
1183  out << " " << doubletype() << "* " << restrictKey() << " result" << r;
1184  if( r == dimRange-1 )
1185  out << " )" << std::endl;
1186  else
1187  out << "," << std::endl;
1188  }
1189  //writePreCompHeader( out, 1 );
1190  out << " {" << std::endl;
1191  writeInnerLoopAxpyJac( out, dim, dimRange, numCols );
1192  out << " }" << std::endl;
1193  out << "};" << std::endl;
1194  writePreCompHeader( out, 2 );
1195  }
1196  };
1197 
1198  // if this pre processor variable is defined then
1199  // we assume that CODEGENERATOR_REPLACEMENT is CodeGenerator of choice
1200 #ifndef FEM_CODEGENERATOR_REPLACEMENT
1202 #else
1203  typedef FEM_CODEGENERATOR_REPLACEMENT CodeGeneratorType;
1204 #endif
1205 
1206  inline std::string autoFilename(const int dim, const int polynomialOrder )
1207  {
1208  std::stringstream name;
1209  name << "autogeneratedcode_" << dim << "d_k" << polynomialOrder << ".hh";
1210  return name.str();
1211  }
1212 
1213  } // namespace Codegen
1214 
1215 
1216  template <class DiscreteFunctionSpace, class Vector>
1217  inline void generateCode (const DiscreteFunctionSpace& space,
1218  const Vector& elemQuadOrders,
1219  const Vector& faceQuadOrders,
1220  const std::string& outpath = "./",
1221  const std::string& filename = "autogeneratedcode.hh" )
1222  {
1223  using namespace Codegen;
1224 
1225  const int dimRange = DiscreteFunctionSpace :: dimRange;
1226  const int dimDomain = DiscreteFunctionSpace :: dimDomain;
1227  const int dimGrad = dimRange*dimDomain;
1228 
1229  typedef typename DiscreteFunctionSpace :: GridPartType GridPartType;
1230 
1231  typedef CachingQuadrature< GridPartType, 0 > ElementQuadratureType;
1232  typedef CachingQuadrature< GridPartType, 1 > FaceQuadratureType;
1233 
1234 
1235  std::set< int > sizes;
1236  std::set< int > quadNops;
1237  for( const auto& entity : space )
1238  {
1239  // only use size of scalar basis function set, i.e. dimRange = 1
1240  const int scalarSize = space.basisFunctionSet( entity ).size() / dimRange ;
1241  sizes.insert( scalarSize );
1242 
1243  const auto iend = space.gridPart().iend( entity );
1244  for( auto it = space.gridPart().ibegin( entity ); it != iend; ++it )
1245  {
1246  for( const auto& quadOrd : faceQuadOrders )
1247  {
1248  FaceQuadratureType quad( space.gridPart(), *it, quadOrd, FaceQuadratureType::INSIDE );
1249  quadNops.insert( quad.nop() );
1250  }
1251  }
1252  }
1253 
1254  for( const auto& type : space.indexSet().types(0))
1255  {
1256  for( const auto& quadOrd : elemQuadOrders )
1257  {
1258  ElementQuadratureType quad( type, quadOrd );
1259  quadNops.insert( quad.nop() );
1260  }
1261  }
1262 
1263  std::string path ( outpath );
1264  path += "/autogeneratedcode";
1265 
1266  // set output path
1267  CodegenInfo& info = CodegenInfo::instance();
1268  info.setPath( path );
1269 
1270  // add my dimrange
1271  info.addDimRange( &space, dimRange );
1272  int gradSpace;
1273  info.addDimRange( &gradSpace, dimGrad );
1274 
1275  for( const auto& size : sizes )
1276  {
1277  for( const auto& quadNop : quadNops )
1278  {
1279  info.addEntry( "evalranges",
1280  CodeGeneratorType :: evaluateCodegen, dimDomain, dimRange, quadNop, size );
1281  info.addEntry( "evaljacobians",
1282  CodeGeneratorType :: evaluateJacobiansCodegen, dimDomain, dimRange, quadNop, size );
1283  info.addEntry( "axpyranges",
1284  CodeGeneratorType :: axpyCodegen, dimDomain, dimRange, quadNop, size );
1285  info.addEntry( "axpyjacobians",
1286  CodeGeneratorType :: axpyJacobianCodegen, dimDomain, dimRange, quadNop, size );
1287 
1288  info.addEntry( "evalranges",
1289  CodeGeneratorType :: evaluateCodegen, dimDomain, dimGrad, quadNop, size );
1290  info.addEntry( "evaljacobians",
1291  CodeGeneratorType :: evaluateJacobiansCodegen, dimDomain, dimGrad, quadNop, size );
1292  info.addEntry( "axpyranges",
1293  CodeGeneratorType :: axpyCodegen, dimDomain, dimGrad, quadNop, size );
1294  info.addEntry( "axpyjacobians",
1295  CodeGeneratorType :: axpyJacobianCodegen, dimDomain, dimRange, quadNop, size );
1296  }
1297  }
1298 
1299  //std::cerr << "Code for k="<< MAX_POLORD << " generated!! " << std::endl;
1300  //std::remove( autoFilename( CODEDIM, MAX_POLORD ).c_str() );
1301 
1302  info.setFileName( filename );
1303  bool written = info.dumpInfo();
1304 
1305  if( written )
1306  {
1307 #ifndef NDEBUG
1308  std::cout << "Written code to " << filename << std::endl;
1309 #endif
1311  // write include header
1313  std::ofstream file( outpath + "/" + filename );
1314 
1315  if( file )
1316  {
1317  std::string header( filename );
1318  size_t size = header.size();
1319  // replace all . with _
1320  for( size_t i=0; i<size; ++i )
1321  {
1322  if( header[ i ] == '.' )
1323  header[ i ] = '_';
1324  }
1325 
1326  file << "#ifndef " << header << "_INCLUDED" << std::endl;
1327  file << "#define " << header << "_INCLUDED" << std::endl;
1328  file << "#ifndef USE_BASEFUNCTIONSET_CODEGEN" << std::endl;
1329  file << "#define USE_BASEFUNCTIONSET_CODEGEN" << std::endl;
1330  file << "#endif" << std::endl;
1331  file << "// this is the file containing the necessary includes for the specialized codes" << std::endl;
1332  file << "#define DUNE_FEM_INCLUDE_AUTOGENERATEDCODE_FILENAME_SPEC \"" << path << "/" << filename << "\"" << std::endl;
1333  file << "#endif" << std::endl;
1334  }
1335  }
1336  else
1337  {
1338 #ifndef NDEBUG
1339  std::cout << "No changes written to " << filename << std::endl << std::endl;
1340 #endif
1341  }
1342  }
1343 
1344  } // namespace Fem
1345 
1346 } // namespace Dune
1347 
1348 #endif // #ifndef DUNE_FEM_BASEFUNCTIONSETS_CODEGEN_HH
std::string path
Definition: readioparams.cc:156
Definition: bindguard.hh:11
bool createDirectory(const std::string &inName)
create a directory
Definition: io.cc:19
IteratorRange< typename DF::DofIteratorType > dofs(DF &df)
Iterates over all DOFs.
Definition: rangegenerators.hh:76
void generateCode(const DiscreteFunctionSpace &space, const Vector &elemQuadOrders, const Vector &faceQuadOrders, const std::string &outpath="./", const std::string &filename="autogeneratedcode.hh")
Definition: codegen.hh:1217
static constexpr T max(T a)
Definition: utility.hh:77
static constexpr T min(T a)
Definition: utility.hh:93
std::string autoFilename(const int dim, const int polynomialOrder)
Definition: codegen.hh:1206
DefaultCodeGenerator< 4 > CodeGeneratorType
Definition: codegen.hh:1201
Container for User Specified Parameters.
Definition: io/parameter.hh:191
Definition: grcommon.hh:31
Definition: codegen.hh:32
static CodegenInfo & instance()
Definition: codegen.hh:77
void setPath(const std::string &path)
overwrite path
Definition: codegen.hh:96
CodegenInfo()
Definition: codegen.hh:67
void finalize() const
Definition: codegen.hh:207
void setFileName(const std::string &filename)
overwrite filename
Definition: codegen.hh:99
void addEntry(const std::string &fileprefix, codegenfunc_t *codegenfunc, const int dim, const int dimRange, const int quadNop, const int numBase)
Definition: codegen.hh:116
void addDimRange(const BaseSet *baseSet, const int dimRange)
Definition: codegen.hh:102
void clear()
clear all registered entries
Definition: codegen.hh:83
bool dumpInfo(const bool writeToCurrentDir=false) const
Definition: codegen.hh:218
int exists(const std::string &filename) const
Definition: codegen.hh:328
bool checkAbort() const
Definition: codegen.hh:340
default code generator methods
Definition: codegen.hh:368
static std::string generateFunctionName(const std::string &prefix, const int simdW, const int dimRange, const size_t numRows, const size_t numCols)
Definition: codegen.hh:415
static void writeInnerLoopEval(std::ostream &out, const int simdW, const int dimRange, const size_t numRows, const size_t numCols)
Definition: codegen.hh:424
static void writeInnerLoop(std::ostream &out, const int simdW, const int dimRange, const size_t numCols)
Definition: codegen.hh:590
static const char * doubletype()
Definition: codegen.hh:374
static void evaluateJacobiansCodegen(std::ostream &out, const int dim, const int dimRange, const size_t numRows, const size_t numCols)
Definition: codegen.hh:807
static void writePreCompHeader(std::ostream &out, const int stage)
Definition: codegen.hh:383
static const char * restrictKey()
Definition: codegen.hh:369
static void axpyCodegen(std::ostream &out, const int dim, const int dimRange, const size_t numRows, const size_t numCols)
Definition: codegen.hh:625
static void writeInnerJacEvalLoop(std::ostream &out, const int simdW, const int dim, const int dimRange, const size_t numRows, const size_t numCols)
Definition: codegen.hh:748
static void evaluateCodegen(std::ostream &out, const int dim, const int dimRange, const size_t numRows, const size_t numCols)
Definition: codegen.hh:452
static void axpyJacobianCodegen(std::ostream &out, const int dim, const int dimRange, const size_t numRows, const size_t numCols)
Definition: codegen.hh:1052
static void writeInnerLoopAxpyJac(std::ostream &out, const int dim, const int dimRange, const size_t numCols)
Definition: codegen.hh:1035
static Object & instance(Args &&... args)
return singleton instance of given Object type.
Definition: singleton.hh:101
discrete function space