Eigen  3.2.92
AssignEvaluator.h
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2011 Benoit Jacob <jacob.benoit.1@gmail.com>
5 // Copyright (C) 2011-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
6 // Copyright (C) 2011-2012 Jitse Niesen <jitse@maths.leeds.ac.uk>
7 //
8 // This Source Code Form is subject to the terms of the Mozilla
9 // Public License v. 2.0. If a copy of the MPL was not distributed
10 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
11 
12 #ifndef EIGEN_ASSIGN_EVALUATOR_H
13 #define EIGEN_ASSIGN_EVALUATOR_H
14 
15 namespace Eigen {
16 
17 // This implementation is based on Assign.h
18 
19 namespace internal {
20 
21 /***************************************************************************
22 * Part 1 : the logic deciding a strategy for traversal and unrolling *
23 ***************************************************************************/
24 
25 // copy_using_evaluator_traits is based on assign_traits
26 
27 template <typename DstEvaluator, typename SrcEvaluator, typename AssignFunc>
28 struct copy_using_evaluator_traits
29 {
30  typedef typename DstEvaluator::XprType Dst;
31  typedef typename Dst::Scalar DstScalar;
32  // TODO distinguish between linear traversal and inner-traversals
33  typedef typename find_best_packet<DstScalar,Dst::SizeAtCompileTime>::type PacketType;
34 
35  enum {
36  DstFlags = DstEvaluator::Flags,
37  SrcFlags = SrcEvaluator::Flags,
38  RequiredAlignment = unpacket_traits<PacketType>::alignment
39  };
40 
41 public:
42  enum {
43  DstAlignment = DstEvaluator::Alignment,
44  SrcAlignment = SrcEvaluator::Alignment,
45  DstHasDirectAccess = DstFlags & DirectAccessBit,
46  JointAlignment = EIGEN_PLAIN_ENUM_MIN(DstAlignment,SrcAlignment)
47  };
48 
49 private:
50  enum {
51  InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime)
52  : int(DstFlags)&RowMajorBit ? int(Dst::ColsAtCompileTime)
53  : int(Dst::RowsAtCompileTime),
54  InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime)
55  : int(DstFlags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime)
56  : int(Dst::MaxRowsAtCompileTime),
57  OuterStride = int(outer_stride_at_compile_time<Dst>::ret),
58  MaxSizeAtCompileTime = Dst::SizeAtCompileTime,
59  PacketSize = unpacket_traits<PacketType>::size
60  };
61 
62  enum {
63  DstIsRowMajor = DstFlags&RowMajorBit,
64  SrcIsRowMajor = SrcFlags&RowMajorBit,
65  StorageOrdersAgree = (int(DstIsRowMajor) == int(SrcIsRowMajor)),
66  MightVectorize = StorageOrdersAgree
67  && (int(DstFlags) & int(SrcFlags) & ActualPacketAccessBit)
68  && (functor_traits<AssignFunc>::PacketAccess),
69  MayInnerVectorize = MightVectorize
70  && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0
71  && int(OuterStride)!=Dynamic && int(OuterStride)%int(PacketSize)==0
72  && int(JointAlignment)>=int(RequiredAlignment),
73  MayLinearize = StorageOrdersAgree && (int(DstFlags) & int(SrcFlags) & LinearAccessBit),
74  MayLinearVectorize = MightVectorize && MayLinearize && DstHasDirectAccess
75  && ((int(DstAlignment)>=int(RequiredAlignment)) || MaxSizeAtCompileTime == Dynamic),
76  /* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
77  so it's only good for large enough sizes. */
78  MaySliceVectorize = MightVectorize && DstHasDirectAccess
79  && (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=3*PacketSize)
80  /* slice vectorization can be slow, so we only want it if the slices are big, which is
81  indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block
82  in a fixed-size matrix */
83  };
84 
85 public:
86  enum {
87  Traversal = int(MayInnerVectorize) ? int(InnerVectorizedTraversal)
88  : int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
89  : int(MaySliceVectorize) ? int(SliceVectorizedTraversal)
90  : int(MayLinearize) ? int(LinearTraversal)
91  : int(DefaultTraversal),
92  Vectorized = int(Traversal) == InnerVectorizedTraversal
93  || int(Traversal) == LinearVectorizedTraversal
94  || int(Traversal) == SliceVectorizedTraversal
95  };
96 
97 private:
98  enum {
99  UnrollingLimit = EIGEN_UNROLLING_LIMIT * (Vectorized ? int(PacketSize) : 1),
100  MayUnrollCompletely = int(Dst::SizeAtCompileTime) != Dynamic
101  && int(Dst::SizeAtCompileTime) * int(SrcEvaluator::CoeffReadCost) <= int(UnrollingLimit),
102  MayUnrollInner = int(InnerSize) != Dynamic
103  && int(InnerSize) * int(SrcEvaluator::CoeffReadCost) <= int(UnrollingLimit)
104  };
105 
106 public:
107  enum {
108  Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal))
109  ? (
110  int(MayUnrollCompletely) ? int(CompleteUnrolling)
111  : int(MayUnrollInner) ? int(InnerUnrolling)
112  : int(NoUnrolling)
113  )
114  : int(Traversal) == int(LinearVectorizedTraversal)
115  ? ( bool(MayUnrollCompletely) && (int(DstAlignment)>=int(RequiredAlignment)) ? int(CompleteUnrolling)
116  : int(NoUnrolling) )
117  : int(Traversal) == int(LinearTraversal)
118  ? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling)
119  : int(NoUnrolling) )
120  : int(NoUnrolling)
121  };
122 
123 #ifdef EIGEN_DEBUG_ASSIGN
124  static void debug()
125  {
126  std::cerr << "DstXpr: " << typeid(typename DstEvaluator::XprType).name() << std::endl;
127  std::cerr << "SrcXpr: " << typeid(typename SrcEvaluator::XprType).name() << std::endl;
128  std::cerr.setf(std::ios::hex, std::ios::basefield);
129  std::cerr << "DstFlags" << " = " << DstFlags << " (" << demangle_flags(DstFlags) << " )" << std::endl;
130  std::cerr << "SrcFlags" << " = " << SrcFlags << " (" << demangle_flags(SrcFlags) << " )" << std::endl;
131  std::cerr.unsetf(std::ios::hex);
132  EIGEN_DEBUG_VAR(DstAlignment)
133  EIGEN_DEBUG_VAR(SrcAlignment)
134  EIGEN_DEBUG_VAR(RequiredAlignment)
135  EIGEN_DEBUG_VAR(JointAlignment)
136  EIGEN_DEBUG_VAR(InnerSize)
137  EIGEN_DEBUG_VAR(InnerMaxSize)
138  EIGEN_DEBUG_VAR(PacketSize)
139  EIGEN_DEBUG_VAR(StorageOrdersAgree)
140  EIGEN_DEBUG_VAR(MightVectorize)
141  EIGEN_DEBUG_VAR(MayLinearize)
142  EIGEN_DEBUG_VAR(MayInnerVectorize)
143  EIGEN_DEBUG_VAR(MayLinearVectorize)
144  EIGEN_DEBUG_VAR(MaySliceVectorize)
145  std::cerr << "Traversal" << " = " << Traversal << " (" << demangle_traversal(Traversal) << ")" << std::endl;
146  EIGEN_DEBUG_VAR(UnrollingLimit)
147  EIGEN_DEBUG_VAR(MayUnrollCompletely)
148  EIGEN_DEBUG_VAR(MayUnrollInner)
149  std::cerr << "Unrolling" << " = " << Unrolling << " (" << demangle_unrolling(Unrolling) << ")" << std::endl;
150  std::cerr << std::endl;
151  }
152 #endif
153 };
154 
155 /***************************************************************************
156 * Part 2 : meta-unrollers
157 ***************************************************************************/
158 
159 /************************
160 *** Default traversal ***
161 ************************/
162 
163 template<typename Kernel, int Index, int Stop>
164 struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling
165 {
166  // FIXME: this is not very clean, perhaps this information should be provided by the kernel?
167  typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
168  typedef typename DstEvaluatorType::XprType DstXprType;
169 
170  enum {
171  outer = Index / DstXprType::InnerSizeAtCompileTime,
172  inner = Index % DstXprType::InnerSizeAtCompileTime
173  };
174 
175  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
176  {
177  kernel.assignCoeffByOuterInner(outer, inner);
178  copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Index+1, Stop>::run(kernel);
179  }
180 };
181 
182 template<typename Kernel, int Stop>
183 struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Stop, Stop>
184 {
185  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
186 };
187 
188 template<typename Kernel, int Index_, int Stop>
189 struct copy_using_evaluator_DefaultTraversal_InnerUnrolling
190 {
191  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer)
192  {
193  kernel.assignCoeffByOuterInner(outer, Index_);
194  copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Index_+1, Stop>::run(kernel, outer);
195  }
196 };
197 
198 template<typename Kernel, int Stop>
199 struct copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Stop, Stop>
200 {
201  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index) { }
202 };
203 
204 /***********************
205 *** Linear traversal ***
206 ***********************/
207 
208 template<typename Kernel, int Index, int Stop>
209 struct copy_using_evaluator_LinearTraversal_CompleteUnrolling
210 {
211  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel)
212  {
213  kernel.assignCoeff(Index);
214  copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Index+1, Stop>::run(kernel);
215  }
216 };
217 
218 template<typename Kernel, int Stop>
219 struct copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Stop, Stop>
220 {
221  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
222 };
223 
224 /**************************
225 *** Inner vectorization ***
226 **************************/
227 
228 template<typename Kernel, int Index, int Stop>
229 struct copy_using_evaluator_innervec_CompleteUnrolling
230 {
231  // FIXME: this is not very clean, perhaps this information should be provided by the kernel?
232  typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
233  typedef typename DstEvaluatorType::XprType DstXprType;
234  typedef typename Kernel::PacketType PacketType;
235 
236  enum {
237  outer = Index / DstXprType::InnerSizeAtCompileTime,
238  inner = Index % DstXprType::InnerSizeAtCompileTime,
239  JointAlignment = Kernel::AssignmentTraits::JointAlignment
240  };
241 
242  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
243  {
244  kernel.template assignPacketByOuterInner<Aligned, JointAlignment, PacketType>(outer, inner);
245  enum { NextIndex = Index + unpacket_traits<PacketType>::size };
246  copy_using_evaluator_innervec_CompleteUnrolling<Kernel, NextIndex, Stop>::run(kernel);
247  }
248 };
249 
250 template<typename Kernel, int Stop>
251 struct copy_using_evaluator_innervec_CompleteUnrolling<Kernel, Stop, Stop>
252 {
253  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
254 };
255 
256 template<typename Kernel, int Index_, int Stop>
257 struct copy_using_evaluator_innervec_InnerUnrolling
258 {
259  typedef typename Kernel::PacketType PacketType;
260  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer)
261  {
262  kernel.template assignPacketByOuterInner<Aligned, Aligned, PacketType>(outer, Index_);
263  enum { NextIndex = Index_ + unpacket_traits<PacketType>::size };
264  copy_using_evaluator_innervec_InnerUnrolling<Kernel, NextIndex, Stop>::run(kernel, outer);
265  }
266 };
267 
268 template<typename Kernel, int Stop>
269 struct copy_using_evaluator_innervec_InnerUnrolling<Kernel, Stop, Stop>
270 {
271  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &, Index) { }
272 };
273 
274 /***************************************************************************
275 * Part 3 : implementation of all cases
276 ***************************************************************************/
277 
278 // dense_assignment_loop is based on assign_impl
279 
280 template<typename Kernel,
281  int Traversal = Kernel::AssignmentTraits::Traversal,
282  int Unrolling = Kernel::AssignmentTraits::Unrolling>
283 struct dense_assignment_loop;
284 
285 /************************
286 *** Default traversal ***
287 ************************/
288 
289 template<typename Kernel>
290 struct dense_assignment_loop<Kernel, DefaultTraversal, NoUnrolling>
291 {
292  EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE run(Kernel &kernel)
293  {
294  for(Index outer = 0; outer < kernel.outerSize(); ++outer) {
295  for(Index inner = 0; inner < kernel.innerSize(); ++inner) {
296  kernel.assignCoeffByOuterInner(outer, inner);
297  }
298  }
299  }
300 };
301 
302 template<typename Kernel>
303 struct dense_assignment_loop<Kernel, DefaultTraversal, CompleteUnrolling>
304 {
305  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
306  {
307  typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
308  copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
309  }
310 };
311 
312 template<typename Kernel>
313 struct dense_assignment_loop<Kernel, DefaultTraversal, InnerUnrolling>
314 {
315  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
316  {
317  typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
318 
319  const Index outerSize = kernel.outerSize();
320  for(Index outer = 0; outer < outerSize; ++outer)
321  copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime>::run(kernel, outer);
322  }
323 };
324 
325 /***************************
326 *** Linear vectorization ***
327 ***************************/
328 
329 
330 // The goal of unaligned_dense_assignment_loop is simply to factorize the handling
331 // of the non vectorizable beginning and ending parts
332 
333 template <bool IsAligned = false>
334 struct unaligned_dense_assignment_loop
335 {
336  // if IsAligned = true, then do nothing
337  template <typename Kernel>
338  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index, Index) {}
339 };
340 
341 template <>
342 struct unaligned_dense_assignment_loop<false>
343 {
344  // MSVC must not inline this functions. If it does, it fails to optimize the
345  // packet access path.
346  // FIXME check which version exhibits this issue
347 #if EIGEN_COMP_MSVC
348  template <typename Kernel>
349  static EIGEN_DONT_INLINE void run(Kernel &kernel,
350  Index start,
351  Index end)
352 #else
353  template <typename Kernel>
354  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel,
355  Index start,
356  Index end)
357 #endif
358  {
359  for (Index index = start; index < end; ++index)
360  kernel.assignCoeff(index);
361  }
362 };
363 
364 template<typename Kernel>
365 struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, NoUnrolling>
366 {
367  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
368  {
369  const Index size = kernel.size();
370  typedef typename Kernel::Scalar Scalar;
371  typedef typename Kernel::PacketType PacketType;
372  enum {
373  requestedAlignment = Kernel::AssignmentTraits::RequiredAlignment,
374  packetSize = unpacket_traits<PacketType>::size,
375  dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment),
376  dstAlignment = packet_traits<Scalar>::AlignedOnScalar ? int(requestedAlignment)
377  : int(Kernel::AssignmentTraits::DstAlignment),
378  srcAlignment = Kernel::AssignmentTraits::JointAlignment
379  };
380  const Index alignedStart = dstIsAligned ? 0 : internal::first_aligned<requestedAlignment>(&kernel.dstEvaluator().coeffRef(0), size);
381  const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;
382 
383  unaligned_dense_assignment_loop<dstIsAligned!=0>::run(kernel, 0, alignedStart);
384 
385  for(Index index = alignedStart; index < alignedEnd; index += packetSize)
386  kernel.template assignPacket<dstAlignment, srcAlignment, PacketType>(index);
387 
388  unaligned_dense_assignment_loop<>::run(kernel, alignedEnd, size);
389  }
390 };
391 
392 template<typename Kernel>
393 struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, CompleteUnrolling>
394 {
395  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
396  {
397  typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
398 
399  enum { size = DstXprType::SizeAtCompileTime,
400  packetSize = packet_traits<typename Kernel::Scalar>::size,
401  alignedSize = (size/packetSize)*packetSize };
402 
403  copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, alignedSize>::run(kernel);
404  copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, alignedSize, size>::run(kernel);
405  }
406 };
407 
408 /**************************
409 *** Inner vectorization ***
410 **************************/
411 
412 template<typename Kernel>
413 struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, NoUnrolling>
414 {
415  typedef typename Kernel::PacketType PacketType;
416  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
417  {
418  const Index innerSize = kernel.innerSize();
419  const Index outerSize = kernel.outerSize();
420  const Index packetSize = unpacket_traits<PacketType>::size;
421  for(Index outer = 0; outer < outerSize; ++outer)
422  for(Index inner = 0; inner < innerSize; inner+=packetSize)
423  kernel.template assignPacketByOuterInner<Aligned, Aligned, PacketType>(outer, inner);
424  }
425 };
426 
427 template<typename Kernel>
428 struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, CompleteUnrolling>
429 {
430  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
431  {
432  typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
433  copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
434  }
435 };
436 
437 template<typename Kernel>
438 struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, InnerUnrolling>
439 {
440  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
441  {
442  typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
443  const Index outerSize = kernel.outerSize();
444  for(Index outer = 0; outer < outerSize; ++outer)
445  copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime>::run(kernel, outer);
446  }
447 };
448 
449 /***********************
450 *** Linear traversal ***
451 ***********************/
452 
453 template<typename Kernel>
454 struct dense_assignment_loop<Kernel, LinearTraversal, NoUnrolling>
455 {
456  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
457  {
458  const Index size = kernel.size();
459  for(Index i = 0; i < size; ++i)
460  kernel.assignCoeff(i);
461  }
462 };
463 
464 template<typename Kernel>
465 struct dense_assignment_loop<Kernel, LinearTraversal, CompleteUnrolling>
466 {
467  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
468  {
469  typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
470  copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
471  }
472 };
473 
474 /**************************
475 *** Slice vectorization ***
476 ***************************/
477 
478 template<typename Kernel>
479 struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling>
480 {
481  EIGEN_DEVICE_FUNC static inline void run(Kernel &kernel)
482  {
483  typedef typename Kernel::Scalar Scalar;
484  typedef typename Kernel::PacketType PacketType;
485  enum {
486  packetSize = unpacket_traits<PacketType>::size,
487  requestedAlignment = int(Kernel::AssignmentTraits::RequiredAlignment),
488  alignable = packet_traits<Scalar>::AlignedOnScalar || int(Kernel::AssignmentTraits::DstAlignment)>=sizeof(Scalar),
489  dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment),
490  dstAlignment = alignable ? int(requestedAlignment)
491  : int(Kernel::AssignmentTraits::DstAlignment)
492  };
493  const Scalar *dst_ptr = &kernel.dstEvaluator().coeffRef(0,0);
494  if((!bool(dstIsAligned)) && (size_t(dst_ptr) % sizeof(Scalar))>0)
495  {
496  // the pointer is not aligend-on scalar, so alignment is not possible
497  return dense_assignment_loop<Kernel,DefaultTraversal,NoUnrolling>::run(kernel);
498  }
499  const Index packetAlignedMask = packetSize - 1;
500  const Index innerSize = kernel.innerSize();
501  const Index outerSize = kernel.outerSize();
502  const Index alignedStep = alignable ? (packetSize - kernel.outerStride() % packetSize) & packetAlignedMask : 0;
503  Index alignedStart = ((!alignable) || bool(dstIsAligned)) ? 0 : internal::first_aligned<requestedAlignment>(dst_ptr, innerSize);
504 
505  for(Index outer = 0; outer < outerSize; ++outer)
506  {
507  const Index alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask);
508  // do the non-vectorizable part of the assignment
509  for(Index inner = 0; inner<alignedStart ; ++inner)
510  kernel.assignCoeffByOuterInner(outer, inner);
511 
512  // do the vectorizable part of the assignment
513  for(Index inner = alignedStart; inner<alignedEnd; inner+=packetSize)
514  kernel.template assignPacketByOuterInner<dstAlignment, Unaligned, PacketType>(outer, inner);
515 
516  // do the non-vectorizable part of the assignment
517  for(Index inner = alignedEnd; inner<innerSize ; ++inner)
518  kernel.assignCoeffByOuterInner(outer, inner);
519 
520  alignedStart = std::min<Index>((alignedStart+alignedStep)%packetSize, innerSize);
521  }
522  }
523 };
524 
525 /***************************************************************************
526 * Part 4 : Generic dense assignment kernel
527 ***************************************************************************/
528 
529 // This class generalize the assignment of a coefficient (or packet) from one dense evaluator
530 // to another dense writable evaluator.
531 // It is parametrized by the two evaluators, and the actual assignment functor.
532 // This abstraction level permits to keep the evaluation loops as simple and as generic as possible.
533 // One can customize the assignment using this generic dense_assignment_kernel with different
534 // functors, or by completely overloading it, by-passing a functor.
535 template<typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor, int Version = Specialized>
536 class generic_dense_assignment_kernel
537 {
538 protected:
539  typedef typename DstEvaluatorTypeT::XprType DstXprType;
540  typedef typename SrcEvaluatorTypeT::XprType SrcXprType;
541 public:
542 
543  typedef DstEvaluatorTypeT DstEvaluatorType;
544  typedef SrcEvaluatorTypeT SrcEvaluatorType;
545  typedef typename DstEvaluatorType::Scalar Scalar;
546  typedef copy_using_evaluator_traits<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor> AssignmentTraits;
547  typedef typename AssignmentTraits::PacketType PacketType;
548 
549 
550  EIGEN_DEVICE_FUNC generic_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr)
551  : m_dst(dst), m_src(src), m_functor(func), m_dstExpr(dstExpr)
552  {
553  #ifdef EIGEN_DEBUG_ASSIGN
554  AssignmentTraits::debug();
555  #endif
556  }
557 
558  EIGEN_DEVICE_FUNC Index size() const { return m_dstExpr.size(); }
559  EIGEN_DEVICE_FUNC Index innerSize() const { return m_dstExpr.innerSize(); }
560  EIGEN_DEVICE_FUNC Index outerSize() const { return m_dstExpr.outerSize(); }
561  EIGEN_DEVICE_FUNC Index rows() const { return m_dstExpr.rows(); }
562  EIGEN_DEVICE_FUNC Index cols() const { return m_dstExpr.cols(); }
563  EIGEN_DEVICE_FUNC Index outerStride() const { return m_dstExpr.outerStride(); }
564 
565  EIGEN_DEVICE_FUNC DstEvaluatorType& dstEvaluator() { return m_dst; }
566  EIGEN_DEVICE_FUNC const SrcEvaluatorType& srcEvaluator() const { return m_src; }
567 
569  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index row, Index col)
570  {
571  m_functor.assignCoeff(m_dst.coeffRef(row,col), m_src.coeff(row,col));
572  }
573 
575  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index index)
576  {
577  m_functor.assignCoeff(m_dst.coeffRef(index), m_src.coeff(index));
578  }
579 
581  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeffByOuterInner(Index outer, Index inner)
582  {
583  Index row = rowIndexByOuterInner(outer, inner);
584  Index col = colIndexByOuterInner(outer, inner);
585  assignCoeff(row, col);
586  }
587 
588 
589  template<int StoreMode, int LoadMode, typename PacketType>
590  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index row, Index col)
591  {
592  m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(row,col), m_src.template packet<LoadMode,PacketType>(row,col));
593  }
594 
595  template<int StoreMode, int LoadMode, typename PacketType>
596  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index index)
597  {
598  m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(index), m_src.template packet<LoadMode,PacketType>(index));
599  }
600 
601  template<int StoreMode, int LoadMode, typename PacketType>
602  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketByOuterInner(Index outer, Index inner)
603  {
604  Index row = rowIndexByOuterInner(outer, inner);
605  Index col = colIndexByOuterInner(outer, inner);
606  assignPacket<StoreMode,LoadMode,PacketType>(row, col);
607  }
608 
609  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner)
610  {
611  typedef typename DstEvaluatorType::ExpressionTraits Traits;
612  return int(Traits::RowsAtCompileTime) == 1 ? 0
613  : int(Traits::ColsAtCompileTime) == 1 ? inner
614  : int(DstEvaluatorType::Flags)&RowMajorBit ? outer
615  : inner;
616  }
617 
618  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index colIndexByOuterInner(Index outer, Index inner)
619  {
620  typedef typename DstEvaluatorType::ExpressionTraits Traits;
621  return int(Traits::ColsAtCompileTime) == 1 ? 0
622  : int(Traits::RowsAtCompileTime) == 1 ? inner
623  : int(DstEvaluatorType::Flags)&RowMajorBit ? inner
624  : outer;
625  }
626 
627 protected:
628  DstEvaluatorType& m_dst;
629  const SrcEvaluatorType& m_src;
630  const Functor &m_functor;
631  // TODO find a way to avoid the needs of the original expression
632  DstXprType& m_dstExpr;
633 };
634 
635 /***************************************************************************
636 * Part 5 : Entry point for dense rectangular assignment
637 ***************************************************************************/
638 
639 template<typename DstXprType, typename SrcXprType, typename Functor>
640 EIGEN_DEVICE_FUNC void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src, const Functor &func)
641 {
642  eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
643 
644  typedef evaluator<DstXprType> DstEvaluatorType;
645  typedef evaluator<SrcXprType> SrcEvaluatorType;
646 
647  DstEvaluatorType dstEvaluator(dst);
648  SrcEvaluatorType srcEvaluator(src);
649 
650  typedef generic_dense_assignment_kernel<DstEvaluatorType,SrcEvaluatorType,Functor> Kernel;
651  Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived());
652 
653  dense_assignment_loop<Kernel>::run(kernel);
654 }
655 
656 template<typename DstXprType, typename SrcXprType>
657 EIGEN_DEVICE_FUNC void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src)
658 {
659  call_dense_assignment_loop(dst, src, internal::assign_op<typename DstXprType::Scalar>());
660 }
661 
662 /***************************************************************************
663 * Part 6 : Generic assignment
664 ***************************************************************************/
665 
666 // Based on the respective shapes of the destination and source,
667 // the class AssignmentKind determine the kind of assignment mechanism.
668 // AssignmentKind must define a Kind typedef.
669 template<typename DstShape, typename SrcShape> struct AssignmentKind;
670 
671 // Assignement kind defined in this file:
672 struct Dense2Dense {};
673 struct EigenBase2EigenBase {};
674 
675 template<typename,typename> struct AssignmentKind { typedef EigenBase2EigenBase Kind; };
676 template<> struct AssignmentKind<DenseShape,DenseShape> { typedef Dense2Dense Kind; };
677 
678 // This is the main assignment class
679 template< typename DstXprType, typename SrcXprType, typename Functor,
680  typename Kind = typename AssignmentKind< typename evaluator_traits<DstXprType>::Shape , typename evaluator_traits<SrcXprType>::Shape >::Kind,
681  typename Scalar = typename DstXprType::Scalar>
682 struct Assignment;
683 
684 
685 // The only purpose of this call_assignment() function is to deal with noalias() / "assume-aliasing" and automatic transposition.
686 // Indeed, I (Gael) think that this concept of "assume-aliasing" was a mistake, and it makes thing quite complicated.
687 // So this intermediate function removes everything related to "assume-aliasing" such that Assignment
688 // does not has to bother about these annoying details.
689 
690 template<typename Dst, typename Src>
691 EIGEN_DEVICE_FUNC void call_assignment(Dst& dst, const Src& src)
692 {
693  call_assignment(dst, src, internal::assign_op<typename Dst::Scalar>());
694 }
695 template<typename Dst, typename Src>
696 EIGEN_DEVICE_FUNC void call_assignment(const Dst& dst, const Src& src)
697 {
698  call_assignment(dst, src, internal::assign_op<typename Dst::Scalar>());
699 }
700 
701 // Deal with "assume-aliasing"
702 template<typename Dst, typename Src, typename Func>
703 EIGEN_DEVICE_FUNC void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if< evaluator_assume_aliasing<Src>::value, void*>::type = 0)
704 {
705  typename plain_matrix_type<Src>::type tmp(src);
706  call_assignment_no_alias(dst, tmp, func);
707 }
708 
709 template<typename Dst, typename Src, typename Func>
710 EIGEN_DEVICE_FUNC void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if<!evaluator_assume_aliasing<Src>::value, void*>::type = 0)
711 {
712  call_assignment_no_alias(dst, src, func);
713 }
714 
715 // by-pass "assume-aliasing"
716 // When there is no aliasing, we require that 'dst' has been properly resized
717 template<typename Dst, template <typename> class StorageBase, typename Src, typename Func>
718 EIGEN_DEVICE_FUNC void call_assignment(NoAlias<Dst,StorageBase>& dst, const Src& src, const Func& func)
719 {
720  call_assignment_no_alias(dst.expression(), src, func);
721 }
722 
723 
724 template<typename Dst, typename Src, typename Func>
725 EIGEN_DEVICE_FUNC void call_assignment_no_alias(Dst& dst, const Src& src, const Func& func)
726 {
727  enum {
728  NeedToTranspose = ( (int(Dst::RowsAtCompileTime) == 1 && int(Src::ColsAtCompileTime) == 1)
729  || (int(Dst::ColsAtCompileTime) == 1 && int(Src::RowsAtCompileTime) == 1)
730  ) && int(Dst::SizeAtCompileTime) != 1
731  };
732 
733  Index dstRows = NeedToTranspose ? src.cols() : src.rows();
734  Index dstCols = NeedToTranspose ? src.rows() : src.cols();
735  if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
736  dst.resize(dstRows, dstCols);
737 
738  typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst>::type ActualDstTypeCleaned;
739  typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst&>::type ActualDstType;
740  ActualDstType actualDst(dst);
741 
742  // TODO check whether this is the right place to perform these checks:
743  EIGEN_STATIC_ASSERT_LVALUE(Dst)
744  EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(ActualDstTypeCleaned,Src)
745  EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename ActualDstTypeCleaned::Scalar,typename Src::Scalar);
746 
747  Assignment<ActualDstTypeCleaned,Src,Func>::run(actualDst, src, func);
748 }
749 template<typename Dst, typename Src>
750 EIGEN_DEVICE_FUNC void call_assignment_no_alias(Dst& dst, const Src& src)
751 {
752  call_assignment_no_alias(dst, src, internal::assign_op<typename Dst::Scalar>());
753 }
754 
755 template<typename Dst, typename Src, typename Func>
756 EIGEN_DEVICE_FUNC void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src, const Func& func)
757 {
758  Index dstRows = src.rows();
759  Index dstCols = src.cols();
760  if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
761  dst.resize(dstRows, dstCols);
762 
763  // TODO check whether this is the right place to perform these checks:
764  EIGEN_STATIC_ASSERT_LVALUE(Dst)
765  EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Dst,Src)
766 
767  Assignment<Dst,Src,Func>::run(dst, src, func);
768 }
769 template<typename Dst, typename Src>
770 EIGEN_DEVICE_FUNC void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src)
771 {
772  call_assignment_no_alias_no_transpose(dst, src, internal::assign_op<typename Dst::Scalar>());
773 }
774 
775 // forward declaration
776 template<typename Dst, typename Src> void check_for_aliasing(const Dst &dst, const Src &src);
777 
778 // Generic Dense to Dense assignment
779 template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar>
780 struct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Scalar>
781 {
782  EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
783  {
784  eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
785 
786 #ifndef EIGEN_NO_DEBUG
787  internal::check_for_aliasing(dst, src);
788 #endif
789 
790  call_dense_assignment_loop(dst, src, func);
791  }
792 };
793 
794 // Generic assignment through evalTo.
795 // TODO: not sure we have to keep that one, but it helps porting current code to new evaluator mechanism.
796 template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar>
797 struct Assignment<DstXprType, SrcXprType, Functor, EigenBase2EigenBase, Scalar>
798 {
799  EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar> &/*func*/)
800  {
801  eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
802  src.evalTo(dst);
803  }
804 };
805 
806 } // namespace internal
807 
808 } // end namespace Eigen
809 
810 #endif // EIGEN_ASSIGN_EVALUATOR_H
const unsigned int DirectAccessBit
Definition: Constants.h:149
Definition: LDLT.h:16
Definition: StdDeque.h:58
const unsigned int RowMajorBit
Definition: Constants.h:61
Definition: Eigen_Colamd.h:54
const unsigned int ActualPacketAccessBit
Definition: Constants.h:99
const unsigned int LinearAccessBit
Definition: Constants.h:124