16 #ifndef BT_MATRIX3x3_H 17 #define BT_MATRIX3x3_H 26 #define vMPPP (_mm_set_ps (+0.0f, +0.0f, +0.0f, -0.0f)) 29 #if defined(BT_USE_SSE) 30 #define v1000 (_mm_set_ps(0.0f,0.0f,0.0f,1.0f)) 31 #define v0100 (_mm_set_ps(0.0f,0.0f,1.0f,0.0f)) 32 #define v0010 (_mm_set_ps(0.0f,1.0f,0.0f,0.0f)) 33 #elif defined(BT_USE_NEON) 39 #ifdef BT_USE_DOUBLE_PRECISION 40 #define btMatrix3x3Data btMatrix3x3DoubleData 42 #define btMatrix3x3Data btMatrix3x3FloatData 43 #endif //BT_USE_DOUBLE_PRECISION 78 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON) 96 m_el[0].mVec128 = rhs.
m_el[0].mVec128;
97 m_el[1].mVec128 = rhs.
m_el[1].mVec128;
98 m_el[2].mVec128 = rhs.
m_el[2].mVec128;
104 m_el[0].mVec128 = m.
m_el[0].mVec128;
105 m_el[1].mVec128 = m.
m_el[1].mVec128;
106 m_el[2].mVec128 = m.
m_el[2].mVec128;
116 m_el[0] = other.
m_el[0];
117 m_el[1] = other.
m_el[1];
118 m_el[2] = other.
m_el[2];
124 m_el[0] = other.
m_el[0];
125 m_el[1] = other.
m_el[1];
126 m_el[2] = other.
m_el[2];
136 return btVector3(m_el[0][i],m_el[1][i],m_el[2][i]);
215 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE) 216 __m128 vs, Q = q.get128();
217 __m128i Qi = btCastfTo128i(Q);
220 __m128 V11, V21, V31;
221 __m128 NQ = _mm_xor_ps(Q, btvMzeroMask);
222 __m128i NQi = btCastfTo128i(NQ);
224 V1 = btCastiTo128f(_mm_shuffle_epi32 (Qi, BT_SHUFFLE(1,0,2,3)));
225 V2 = _mm_shuffle_ps(NQ, Q, BT_SHUFFLE(0,0,1,3));
226 V3 = btCastiTo128f(_mm_shuffle_epi32 (Qi, BT_SHUFFLE(2,1,0,3)));
227 V1 = _mm_xor_ps(V1, vMPPP);
229 V11 = btCastiTo128f(_mm_shuffle_epi32 (Qi, BT_SHUFFLE(1,1,0,3)));
230 V21 = _mm_unpackhi_ps(Q, Q);
231 V31 = _mm_shuffle_ps(Q, NQ, BT_SHUFFLE(0,2,0,3));
237 V11 = _mm_shuffle_ps(NQ, Q, BT_SHUFFLE(2,3,1,3));
239 V21 = _mm_xor_ps(V21, vMPPP);
240 V31 = _mm_shuffle_ps(Q, NQ, BT_SHUFFLE(3,3,1,3));
241 V31 = _mm_xor_ps(V31, vMPPP);
242 Y = btCastiTo128f(_mm_shuffle_epi32 (NQi, BT_SHUFFLE(3,2,0,3)));
243 Z = btCastiTo128f(_mm_shuffle_epi32 (Qi, BT_SHUFFLE(1,0,1,3)));
245 vs = _mm_load_ss(&s);
253 vs = bt_splat3_ps(vs, 0);
267 btScalar xs = q.
x() * s, ys = q.
y() * s, zs = q.
z() * s;
268 btScalar wx = q.
w() * xs, wy = q.
w() * ys, wz = q.
w() * zs;
269 btScalar xx = q.
x() * xs, xy = q.
x() * ys, xz = q.
x() * zs;
270 btScalar yy = q.
y() * ys, yz = q.
y() * zs, zz = q.
z() * zs;
272 btScalar(1.0) - (yy + zz), xy - wz, xz + wy,
273 xy + wz,
btScalar(1.0) - (xx + zz), yz - wx,
274 xz - wy, yz + wx,
btScalar(1.0) - (xx + yy));
286 setEulerZYX(roll, pitch, yaw);
311 setValue(cj * ch, sj * sc - cs, sj * cc + ss,
312 cj * sh, sj * ss + cc, sj * cs - sc,
313 -sj, cj * si, cj * ci);
319 #if (defined(BT_USE_SSE_IN_API)&& defined (BT_USE_SSE)) || defined(BT_USE_NEON) 332 #if (defined(BT_USE_SSE_IN_API)&& defined (BT_USE_SSE)) || defined(BT_USE_NEON) 334 identityMatrix(v1000, v0100, v0010);
342 return identityMatrix;
349 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE) 350 __m128 v0 = m_el[0].mVec128;
351 __m128 v1 = m_el[1].mVec128;
352 __m128 v2 = m_el[2].mVec128;
353 __m128 *vm = (__m128 *)m;
356 v2 = _mm_and_ps(v2, btvFFF0fMask);
358 vT = _mm_unpackhi_ps(v0, v1);
359 v0 = _mm_unpacklo_ps(v0, v1);
361 v1 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(2, 3, 1, 3) );
362 v0 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(0, 1, 0, 3) );
363 v2 = btCastdTo128f(_mm_move_sd(btCastfTo128d(v2), btCastfTo128d(vT)));
368 #elif defined(BT_USE_NEON) 370 static const uint32x2_t zMask = (
const uint32x2_t) {static_cast<uint32_t>(-1), 0 };
371 float32x4_t *vm = (float32x4_t *)m;
372 float32x4x2_t top = vtrnq_f32( m_el[0].mVec128, m_el[1].mVec128 );
373 float32x2x2_t bl = vtrn_f32( vget_low_f32(m_el[2].mVec128), vdup_n_f32(0.0f) );
374 float32x4_t v0 = vcombine_f32( vget_low_f32(top.val[0]), bl.val[0] );
375 float32x4_t v1 = vcombine_f32( vget_low_f32(top.val[1]), bl.val[1] );
376 float32x2_t q = (float32x2_t) vand_u32( (uint32x2_t) vget_high_f32( m_el[2].mVec128), zMask );
377 float32x4_t v2 = vcombine_f32( vget_high_f32(top.val[0]), q );
402 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON) 403 btScalar trace = m_el[0].
x() + m_el[1].
y() + m_el[2].
z();
415 temp.f[0]=m_el[2].
y() - m_el[1].
z();
416 temp.f[1]=m_el[0].
z() - m_el[2].
x();
417 temp.f[2]=m_el[1].
x() - m_el[0].
y();
424 if(m_el[0].x() < m_el[1].y())
426 if( m_el[1].y() < m_el[2].z() )
427 { i = 2; j = 0; k = 1; }
429 { i = 1; j = 2; k = 0; }
433 if( m_el[0].x() < m_el[2].z())
434 { i = 2; j = 0; k = 1; }
436 { i = 0; j = 1; k = 2; }
439 x = m_el[i][i] - m_el[j][j] - m_el[k][k] +
btScalar(1.0);
441 temp.f[3] = (m_el[k][j] - m_el[j][k]);
442 temp.f[j] = (m_el[j][i] + m_el[i][j]);
443 temp.f[k] = (m_el[k][i] + m_el[i][k]);
454 btScalar trace = m_el[0].
x() + m_el[1].
y() + m_el[2].
z();
464 temp[0]=((m_el[2].
y() - m_el[1].
z()) * s);
465 temp[1]=((m_el[0].
z() - m_el[2].
x()) * s);
466 temp[2]=((m_el[1].
x() - m_el[0].
y()) * s);
470 int i = m_el[0].
x() < m_el[1].
y() ?
471 (m_el[1].
y() < m_el[2].
z() ? 2 : 1) :
472 (m_el[0].x() < m_el[2].
z() ? 2 : 0);
480 temp[3] = (m_el[k][j] - m_el[j][k]) * s;
481 temp[j] = (m_el[j][i] + m_el[i][j]) * s;
482 temp[k] = (m_el[k][i] + m_el[i][k]) * s;
484 q.
setValue(temp[0],temp[1],temp[2],temp[3]);
535 if (
btFabs(m_el[2].x()) >= 1)
546 euler_out.roll = euler_out.pitch + delta;
547 euler_out2.roll = euler_out.pitch + delta;
553 euler_out.roll = -euler_out.pitch + delta;
554 euler_out2.roll = -euler_out.pitch + delta;
559 euler_out.pitch = -
btAsin(m_el[2].x());
560 euler_out2.pitch =
SIMD_PI - euler_out.pitch;
562 euler_out.roll =
btAtan2(m_el[2].y()/
btCos(euler_out.pitch),
563 m_el[2].
z()/
btCos(euler_out.pitch));
564 euler_out2.roll =
btAtan2(m_el[2].y()/
btCos(euler_out2.pitch),
565 m_el[2].
z()/
btCos(euler_out2.pitch));
567 euler_out.yaw =
btAtan2(m_el[1].x()/
btCos(euler_out.pitch),
568 m_el[0].
x()/
btCos(euler_out.pitch));
569 euler_out2.yaw =
btAtan2(m_el[1].x()/
btCos(euler_out2.pitch),
570 m_el[0].
x()/
btCos(euler_out2.pitch));
573 if (solution_number == 1)
576 pitch = euler_out.pitch;
577 roll = euler_out.roll;
581 yaw = euler_out2.yaw;
582 pitch = euler_out2.pitch;
583 roll = euler_out2.roll;
592 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON) 593 return btMatrix3x3(m_el[0] * s, m_el[1] * s, m_el[2] * s);
596 m_el[0].x() * s.
x(), m_el[0].
y() * s.y(), m_el[0].
z() * s.z(),
597 m_el[1].
x() * s.x(), m_el[1].
y() * s.y(), m_el[1].
z() * s.z(),
598 m_el[2].
x() * s.x(), m_el[2].
y() * s.y(), m_el[2].
z() * s.z());
639 return m_el[0].
x() * v.
x() + m_el[1].
x() * v.
y() + m_el[2].
x() * v.
z();
643 return m_el[0].
y() * v.
x() + m_el[1].
y() * v.
y() + m_el[2].
y() * v.
z();
647 return m_el[0].
z() * v.
x() + m_el[1].
z() * v.
y() + m_el[2].
z() * v.
z();
657 for(iter = 0; iter < maxIter; iter++)
686 extractRotation(r,tolerance,maxIter);
690 setValue(old.tdotx( rotInv[0]), old.tdoty( rotInv[0]), old.tdotz( rotInv[0]),
691 old.tdotx( rotInv[1]), old.tdoty( rotInv[1]), old.tdotz( rotInv[1]),
692 old.tdotx( rotInv[2]), old.tdoty( rotInv[2]), old.tdotz( rotInv[2]));
707 return m_el[r1][c1] * m_el[r2][c2] - m_el[r1][c2] * m_el[r2][c1];
726 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE) 727 __m128 rv00, rv01, rv02;
728 __m128 rv10, rv11, rv12;
729 __m128 rv20, rv21, rv22;
730 __m128 mv0, mv1, mv2;
732 rv02 =
m_el[0].mVec128;
733 rv12 =
m_el[1].mVec128;
734 rv22 =
m_el[2].mVec128;
736 mv0 = _mm_and_ps(m[0].mVec128, btvFFF0fMask);
737 mv1 = _mm_and_ps(m[1].mVec128, btvFFF0fMask);
738 mv2 = _mm_and_ps(m[2].mVec128, btvFFF0fMask);
741 rv00 = bt_splat_ps(rv02, 0);
742 rv01 = bt_splat_ps(rv02, 1);
743 rv02 = bt_splat_ps(rv02, 2);
745 rv00 = _mm_mul_ps(rv00, mv0);
746 rv01 = _mm_mul_ps(rv01, mv1);
747 rv02 = _mm_mul_ps(rv02, mv2);
750 rv10 = bt_splat_ps(rv12, 0);
751 rv11 = bt_splat_ps(rv12, 1);
752 rv12 = bt_splat_ps(rv12, 2);
754 rv10 = _mm_mul_ps(rv10, mv0);
755 rv11 = _mm_mul_ps(rv11, mv1);
756 rv12 = _mm_mul_ps(rv12, mv2);
759 rv20 = bt_splat_ps(rv22, 0);
760 rv21 = bt_splat_ps(rv22, 1);
761 rv22 = bt_splat_ps(rv22, 2);
763 rv20 = _mm_mul_ps(rv20, mv0);
764 rv21 = _mm_mul_ps(rv21, mv1);
765 rv22 = _mm_mul_ps(rv22, mv2);
767 rv00 = _mm_add_ps(rv00, rv01);
768 rv10 = _mm_add_ps(rv10, rv11);
769 rv20 = _mm_add_ps(rv20, rv21);
771 m_el[0].mVec128 = _mm_add_ps(rv00, rv02);
772 m_el[1].mVec128 = _mm_add_ps(rv10, rv12);
773 m_el[2].mVec128 = _mm_add_ps(rv20, rv22);
775 #elif defined(BT_USE_NEON) 777 float32x4_t rv0, rv1, rv2;
778 float32x4_t v0, v1, v2;
779 float32x4_t mv0, mv1, mv2;
781 v0 =
m_el[0].mVec128;
782 v1 =
m_el[1].mVec128;
783 v2 =
m_el[2].mVec128;
785 mv0 = (float32x4_t) vandq_s32((int32x4_t)m[0].mVec128, btvFFF0Mask);
786 mv1 = (float32x4_t) vandq_s32((int32x4_t)m[1].mVec128, btvFFF0Mask);
787 mv2 = (float32x4_t) vandq_s32((int32x4_t)m[2].mVec128, btvFFF0Mask);
789 rv0 = vmulq_lane_f32(mv0, vget_low_f32(v0), 0);
790 rv1 = vmulq_lane_f32(mv0, vget_low_f32(v1), 0);
791 rv2 = vmulq_lane_f32(mv0, vget_low_f32(v2), 0);
793 rv0 = vmlaq_lane_f32(rv0, mv1, vget_low_f32(v0), 1);
794 rv1 = vmlaq_lane_f32(rv1, mv1, vget_low_f32(v1), 1);
795 rv2 = vmlaq_lane_f32(rv2, mv1, vget_low_f32(v2), 1);
797 rv0 = vmlaq_lane_f32(rv0, mv2, vget_high_f32(v0), 0);
798 rv1 = vmlaq_lane_f32(rv1, mv2, vget_high_f32(v1), 0);
799 rv2 = vmlaq_lane_f32(rv2, mv2, vget_high_f32(v2), 0);
801 m_el[0].mVec128 = rv0;
802 m_el[1].mVec128 = rv1;
803 m_el[2].mVec128 = rv2;
816 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON) 838 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)) 839 __m128 vk = bt_splat_ps(_mm_load_ss((
float *)&k), 0x80);
841 _mm_mul_ps(m[0].mVec128, vk),
842 _mm_mul_ps(m[1].mVec128, vk),
843 _mm_mul_ps(m[2].mVec128, vk));
844 #elif defined(BT_USE_NEON) 846 vmulq_n_f32(m[0].mVec128, k),
847 vmulq_n_f32(m[1].mVec128, k),
848 vmulq_n_f32(m[2].mVec128, k));
851 m[0].x()*k,m[0].y()*k,m[0].z()*k,
852 m[1].x()*k,m[1].y()*k,m[1].z()*k,
853 m[2].x()*k,m[2].y()*k,m[2].z()*k);
860 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON) 862 m1[0].mVec128 + m2[0].mVec128,
863 m1[1].mVec128 + m2[1].mVec128,
864 m1[2].mVec128 + m2[2].mVec128);
884 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON) 886 m1[0].mVec128 - m2[0].mVec128,
887 m1[1].mVec128 - m2[1].mVec128,
888 m1[2].mVec128 - m2[2].mVec128);
909 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON) 932 return btTriple((*
this)[0], (*
this)[1], (*
this)[2]);
939 #if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)) 941 _mm_and_ps(
m_el[0].mVec128, btvAbsfMask),
942 _mm_and_ps(
m_el[1].mVec128, btvAbsfMask),
943 _mm_and_ps(
m_el[2].mVec128, btvAbsfMask));
944 #elif defined(BT_USE_NEON) 946 (float32x4_t)vandq_s32((int32x4_t)
m_el[0].mVec128, btv3AbsMask),
947 (float32x4_t)vandq_s32((int32x4_t)
m_el[1].mVec128, btv3AbsMask),
948 (float32x4_t)vandq_s32((int32x4_t)
m_el[2].mVec128, btv3AbsMask));
960 #if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)) 961 __m128 v0 =
m_el[0].mVec128;
962 __m128 v1 =
m_el[1].mVec128;
963 __m128 v2 =
m_el[2].mVec128;
966 v2 = _mm_and_ps(v2, btvFFF0fMask);
968 vT = _mm_unpackhi_ps(v0, v1);
969 v0 = _mm_unpacklo_ps(v0, v1);
971 v1 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(2, 3, 1, 3) );
972 v0 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(0, 1, 0, 3) );
973 v2 = btCastdTo128f(_mm_move_sd(btCastfTo128d(v2), btCastfTo128d(vT)));
977 #elif defined(BT_USE_NEON) 979 static const uint32x2_t zMask = (
const uint32x2_t) {static_cast<uint32_t>(-1), 0 };
980 float32x4x2_t top = vtrnq_f32(
m_el[0].mVec128,
m_el[1].mVec128 );
981 float32x2x2_t bl = vtrn_f32( vget_low_f32(
m_el[2].mVec128), vdup_n_f32(0.0f) );
982 float32x4_t v0 = vcombine_f32( vget_low_f32(top.val[0]), bl.val[0] );
983 float32x4_t v1 = vcombine_f32( vget_low_f32(top.val[1]), bl.val[1] );
984 float32x2_t q = (float32x2_t) vand_u32( (uint32x2_t) vget_high_f32(
m_el[2].mVec128), zMask );
985 float32x4_t v2 = vcombine_f32( vget_high_f32(top.val[0]), q );
997 return btMatrix3x3(
cofac(1, 1, 2, 2),
cofac(0, 2, 2, 1),
cofac(0, 1, 1, 2),
998 cofac(1, 2, 2, 0),
cofac(0, 0, 2, 2),
cofac(0, 2, 1, 0),
999 cofac(1, 0, 2, 1),
cofac(0, 1, 2, 0),
cofac(0, 0, 1, 1));
1005 btVector3 co(
cofac(1, 1, 2, 2),
cofac(1, 2, 2, 0),
cofac(1, 0, 2, 1));
1011 co.
y() * s,
cofac(0, 0, 2, 2) * s,
cofac(0, 2, 1, 0) * s,
1012 co.
z() * s,
cofac(0, 1, 2, 0) * s,
cofac(0, 0, 1, 1) * s);
1018 #if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)) 1021 __m128 row =
m_el[0].mVec128;
1022 __m128 m0 = _mm_and_ps( m.
getRow(0).mVec128, btvFFF0fMask );
1023 __m128 m1 = _mm_and_ps( m.
getRow(1).mVec128, btvFFF0fMask);
1024 __m128 m2 = _mm_and_ps( m.
getRow(2).mVec128, btvFFF0fMask );
1025 __m128 r0 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0));
1026 __m128 r1 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0x55));
1027 __m128 r2 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0xaa));
1028 row =
m_el[1].mVec128;
1029 r0 = _mm_add_ps( r0, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0)));
1030 r1 = _mm_add_ps( r1, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0x55)));
1031 r2 = _mm_add_ps( r2, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0xaa)));
1032 row =
m_el[2].mVec128;
1033 r0 = _mm_add_ps( r0, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0)));
1034 r1 = _mm_add_ps( r1, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0x55)));
1035 r2 = _mm_add_ps( r2, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0xaa)));
1038 #elif defined BT_USE_NEON 1040 static const uint32x4_t xyzMask = (
const uint32x4_t){ static_cast<uint32_t>(-1), static_cast<uint32_t>(-1), static_cast<uint32_t>(-1), 0 };
1041 float32x4_t m0 = (float32x4_t) vandq_u32( (uint32x4_t) m.
getRow(0).mVec128, xyzMask );
1042 float32x4_t m1 = (float32x4_t) vandq_u32( (uint32x4_t) m.
getRow(1).mVec128, xyzMask );
1043 float32x4_t m2 = (float32x4_t) vandq_u32( (uint32x4_t) m.
getRow(2).mVec128, xyzMask );
1044 float32x4_t row =
m_el[0].mVec128;
1045 float32x4_t r0 = vmulq_lane_f32( m0, vget_low_f32(row), 0);
1046 float32x4_t r1 = vmulq_lane_f32( m0, vget_low_f32(row), 1);
1047 float32x4_t r2 = vmulq_lane_f32( m0, vget_high_f32(row), 0);
1048 row =
m_el[1].mVec128;
1049 r0 = vmlaq_lane_f32( r0, m1, vget_low_f32(row), 0);
1050 r1 = vmlaq_lane_f32( r1, m1, vget_low_f32(row), 1);
1051 r2 = vmlaq_lane_f32( r2, m1, vget_high_f32(row), 0);
1052 row =
m_el[2].mVec128;
1053 r0 = vmlaq_lane_f32( r0, m2, vget_low_f32(row), 0);
1054 r1 = vmlaq_lane_f32( r1, m2, vget_low_f32(row), 1);
1055 r2 = vmlaq_lane_f32( r2, m2, vget_high_f32(row), 0);
1059 m_el[0].x() * m[0].x() +
m_el[1].x() * m[1].x() +
m_el[2].x() * m[2].x(),
1060 m_el[0].x() * m[0].y() +
m_el[1].x() * m[1].y() +
m_el[2].x() * m[2].y(),
1061 m_el[0].x() * m[0].z() +
m_el[1].x() * m[1].z() +
m_el[2].x() * m[2].z(),
1062 m_el[0].y() * m[0].x() +
m_el[1].y() * m[1].x() +
m_el[2].y() * m[2].x(),
1063 m_el[0].y() * m[0].y() +
m_el[1].y() * m[1].y() +
m_el[2].y() * m[2].y(),
1064 m_el[0].y() * m[0].z() +
m_el[1].y() * m[1].z() +
m_el[2].y() * m[2].z(),
1065 m_el[0].z() * m[0].x() +
m_el[1].z() * m[1].x() +
m_el[2].z() * m[2].x(),
1066 m_el[0].z() * m[0].y() +
m_el[1].z() * m[1].y() +
m_el[2].z() * m[2].y(),
1067 m_el[0].z() * m[0].z() +
m_el[1].z() * m[1].z() +
m_el[2].z() * m[2].z());
1074 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)) 1075 __m128 a0 =
m_el[0].mVec128;
1076 __m128 a1 =
m_el[1].mVec128;
1077 __m128 a2 =
m_el[2].mVec128;
1080 __m128 mx = mT[0].mVec128;
1081 __m128 my = mT[1].mVec128;
1082 __m128 mz = mT[2].mVec128;
1084 __m128 r0 = _mm_mul_ps(mx, _mm_shuffle_ps(a0, a0, 0x00));
1085 __m128 r1 = _mm_mul_ps(mx, _mm_shuffle_ps(a1, a1, 0x00));
1086 __m128 r2 = _mm_mul_ps(mx, _mm_shuffle_ps(a2, a2, 0x00));
1087 r0 = _mm_add_ps(r0, _mm_mul_ps(my, _mm_shuffle_ps(a0, a0, 0x55)));
1088 r1 = _mm_add_ps(r1, _mm_mul_ps(my, _mm_shuffle_ps(a1, a1, 0x55)));
1089 r2 = _mm_add_ps(r2, _mm_mul_ps(my, _mm_shuffle_ps(a2, a2, 0x55)));
1090 r0 = _mm_add_ps(r0, _mm_mul_ps(mz, _mm_shuffle_ps(a0, a0, 0xaa)));
1091 r1 = _mm_add_ps(r1, _mm_mul_ps(mz, _mm_shuffle_ps(a1, a1, 0xaa)));
1092 r2 = _mm_add_ps(r2, _mm_mul_ps(mz, _mm_shuffle_ps(a2, a2, 0xaa)));
1095 #elif defined BT_USE_NEON 1096 float32x4_t a0 =
m_el[0].mVec128;
1097 float32x4_t a1 =
m_el[1].mVec128;
1098 float32x4_t a2 =
m_el[2].mVec128;
1101 float32x4_t mx = mT[0].mVec128;
1102 float32x4_t my = mT[1].mVec128;
1103 float32x4_t mz = mT[2].mVec128;
1105 float32x4_t r0 = vmulq_lane_f32( mx, vget_low_f32(a0), 0);
1106 float32x4_t r1 = vmulq_lane_f32( mx, vget_low_f32(a1), 0);
1107 float32x4_t r2 = vmulq_lane_f32( mx, vget_low_f32(a2), 0);
1108 r0 = vmlaq_lane_f32( r0, my, vget_low_f32(a0), 1);
1109 r1 = vmlaq_lane_f32( r1, my, vget_low_f32(a1), 1);
1110 r2 = vmlaq_lane_f32( r2, my, vget_low_f32(a2), 1);
1111 r0 = vmlaq_lane_f32( r0, mz, vget_high_f32(a0), 0);
1112 r1 = vmlaq_lane_f32( r1, mz, vget_high_f32(a1), 0);
1113 r2 = vmlaq_lane_f32( r2, mz, vget_high_f32(a2), 0);
1127 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON) 1128 return v.
dot3(m[0], m[1], m[2]);
1138 #if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)) 1140 const __m128 vv = v.mVec128;
1142 __m128 c0 = bt_splat_ps( vv, 0);
1143 __m128 c1 = bt_splat_ps( vv, 1);
1144 __m128 c2 = bt_splat_ps( vv, 2);
1146 c0 = _mm_mul_ps(c0, _mm_and_ps(m[0].mVec128, btvFFF0fMask) );
1147 c1 = _mm_mul_ps(c1, _mm_and_ps(m[1].mVec128, btvFFF0fMask) );
1148 c0 = _mm_add_ps(c0, c1);
1149 c2 = _mm_mul_ps(c2, _mm_and_ps(m[2].mVec128, btvFFF0fMask) );
1152 #elif defined(BT_USE_NEON) 1153 const float32x4_t vv = v.mVec128;
1154 const float32x2_t vlo = vget_low_f32(vv);
1155 const float32x2_t vhi = vget_high_f32(vv);
1157 float32x4_t c0, c1, c2;
1159 c0 = (float32x4_t) vandq_s32((int32x4_t)m[0].mVec128, btvFFF0Mask);
1160 c1 = (float32x4_t) vandq_s32((int32x4_t)m[1].mVec128, btvFFF0Mask);
1161 c2 = (float32x4_t) vandq_s32((int32x4_t)m[2].mVec128, btvFFF0Mask);
1163 c0 = vmulq_lane_f32(c0, vlo, 0);
1164 c1 = vmulq_lane_f32(c1, vlo, 1);
1165 c2 = vmulq_lane_f32(c2, vhi, 0);
1166 c0 = vaddq_f32(c0, c1);
1167 c0 = vaddq_f32(c0, c2);
1178 #if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)) 1180 __m128 m10 = m1[0].mVec128;
1181 __m128 m11 = m1[1].mVec128;
1182 __m128 m12 = m1[2].mVec128;
1184 __m128 m2v = _mm_and_ps(m2[0].mVec128, btvFFF0fMask);
1186 __m128 c0 = bt_splat_ps( m10, 0);
1187 __m128 c1 = bt_splat_ps( m11, 0);
1188 __m128 c2 = bt_splat_ps( m12, 0);
1190 c0 = _mm_mul_ps(c0, m2v);
1191 c1 = _mm_mul_ps(c1, m2v);
1192 c2 = _mm_mul_ps(c2, m2v);
1194 m2v = _mm_and_ps(m2[1].mVec128, btvFFF0fMask);
1196 __m128 c0_1 = bt_splat_ps( m10, 1);
1197 __m128 c1_1 = bt_splat_ps( m11, 1);
1198 __m128 c2_1 = bt_splat_ps( m12, 1);
1200 c0_1 = _mm_mul_ps(c0_1, m2v);
1201 c1_1 = _mm_mul_ps(c1_1, m2v);
1202 c2_1 = _mm_mul_ps(c2_1, m2v);
1204 m2v = _mm_and_ps(m2[2].mVec128, btvFFF0fMask);
1206 c0 = _mm_add_ps(c0, c0_1);
1207 c1 = _mm_add_ps(c1, c1_1);
1208 c2 = _mm_add_ps(c2, c2_1);
1210 m10 = bt_splat_ps( m10, 2);
1211 m11 = bt_splat_ps( m11, 2);
1212 m12 = bt_splat_ps( m12, 2);
1214 m10 = _mm_mul_ps(m10, m2v);
1215 m11 = _mm_mul_ps(m11, m2v);
1216 m12 = _mm_mul_ps(m12, m2v);
1218 c0 = _mm_add_ps(c0, m10);
1219 c1 = _mm_add_ps(c1, m11);
1220 c2 = _mm_add_ps(c2, m12);
1224 #elif defined(BT_USE_NEON) 1226 float32x4_t rv0, rv1, rv2;
1227 float32x4_t v0, v1, v2;
1228 float32x4_t mv0, mv1, mv2;
1234 mv0 = (float32x4_t) vandq_s32((int32x4_t)m2[0].mVec128, btvFFF0Mask);
1235 mv1 = (float32x4_t) vandq_s32((int32x4_t)m2[1].mVec128, btvFFF0Mask);
1236 mv2 = (float32x4_t) vandq_s32((int32x4_t)m2[2].mVec128, btvFFF0Mask);
1238 rv0 = vmulq_lane_f32(mv0, vget_low_f32(v0), 0);
1239 rv1 = vmulq_lane_f32(mv0, vget_low_f32(v1), 0);
1240 rv2 = vmulq_lane_f32(mv0, vget_low_f32(v2), 0);
1242 rv0 = vmlaq_lane_f32(rv0, mv1, vget_low_f32(v0), 1);
1243 rv1 = vmlaq_lane_f32(rv1, mv1, vget_low_f32(v1), 1);
1244 rv2 = vmlaq_lane_f32(rv2, mv1, vget_low_f32(v2), 1);
1246 rv0 = vmlaq_lane_f32(rv0, mv2, vget_high_f32(v0), 0);
1247 rv1 = vmlaq_lane_f32(rv1, mv2, vget_high_f32(v1), 0);
1248 rv2 = vmlaq_lane_f32(rv2, mv2, vget_high_f32(v2), 0);
1279 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)) 1283 c0 = _mm_cmpeq_ps(m1[0].mVec128, m2[0].mVec128);
1284 c1 = _mm_cmpeq_ps(m1[1].mVec128, m2[1].mVec128);
1285 c2 = _mm_cmpeq_ps(m1[2].mVec128, m2[2].mVec128);
1287 c0 = _mm_and_ps(c0, c1);
1288 c0 = _mm_and_ps(c0, c2);
1290 int m = _mm_movemask_ps((__m128)c0);
1291 return (0x7 == (m & 0x7));
1295 ( m1[0][0] == m2[0][0] && m1[1][0] == m2[1][0] && m1[2][0] == m2[2][0] &&
1296 m1[0][1] == m2[0][1] && m1[1][1] == m2[1][1] && m1[2][1] == m2[2][1] &&
1297 m1[0][2] == m2[0][2] && m1[1][2] == m2[1][2] && m1[2][2] == m2[2][2] );
1318 for (
int i=0;i<3;i++)
1324 for (
int i=0;i<3;i++)
1331 for (
int i=0;i<3;i++)
1337 for (
int i=0;i<3;i++)
1343 for (
int i=0;i<3;i++)
1347 #endif //BT_MATRIX3x3_H const btScalar & x() const
Return the x value.
void deSerializeFloat(const struct btMatrix3x3FloatData &dataIn)
void serialize(struct btMatrix3x3Data &dataOut) const
btVector3DoubleData m_el[3]
bool operator==(const btMatrix3x3 &m1, const btMatrix3x3 &m2)
Equality operator between two matrices It will test all elements are equal.
btScalar tdoty(const btVector3 &v) const
btScalar tdotx(const btVector3 &v) const
btMatrix3x3 timesTranspose(const btMatrix3x3 &m) const
btScalar norm() const
Return the norm (length) of the vector.
void setValue(const btScalar &_x, const btScalar &_y, const btScalar &_z)
btVector3 solve33(const btVector3 &b) const
Solve A * x = b, where b is a column vector.
void setRotation(const btQuaternion &q)
Set the matrix from a quaternion.
void serializeFloat(struct btMatrix3x3FloatData &dataOut) const
btScalar tdotz(const btVector3 &v) const
btVector3 dot3(const btVector3 &v0, const btVector3 &v1, const btVector3 &v2) const
btScalar btSin(btScalar x)
btMatrix3x3 & operator *=(const btMatrix3x3 &m)
Multiply by the target matrix on the right.
static const btQuaternion & getIdentity()
const btVector3 & getRow(int i) const
Get a row of the matrix as a vector.
btScalar btSqrt(btScalar y)
#define SIMD_FORCE_INLINE
btScalar cofac(int r1, int c1, int r2, int c2) const
Calculate the matrix cofactor.
void diagonalize(btMatrix3x3 &rot, btScalar tolerance=1.0e-9, int maxIter=100)
diagonalizes this matrix
btMatrix3x3 operator+(const btMatrix3x3 &m1, const btMatrix3x3 &m2)
btMatrix3x3 & operator=(const btMatrix3x3 &other)
Assignment Operator.
btQuaternion inverse(const btQuaternion &q)
Return the inverse of a quaternion.
btVector3 m_el[3]
Data storage for the matrix, each vector is a row of the matrix.
btMatrix3x3 transpose() const
Return the transpose of the matrix.
btMatrix3x3(const btQuaternion &q)
Constructor from Quaternion.
btVector3 btCross(const btVector3 &v1, const btVector3 &v2)
Return the cross product of two vectors.
btQuaternion inverse() const
Return the inverse of this quaternion.
btMatrix3x3 operator *(const btMatrix3x3 &m, const btScalar &k)
btVector3 getColumn(int i) const
Get a column of the matrix as a vector.
void deSerialize(const struct btMatrix3x3Data &dataIn)
btMatrix3x3 transposeTimes(const btMatrix3x3 &m) const
const btScalar & x() const
Return the x value.
btQuaternion & normalize()
Normalize the quaternion Such that x^2 + y^2 + z^2 +w^2 = 1.
btMatrix3x3 adjoint() const
Return the adjoint of the matrix.
void deSerializeDouble(const struct btMatrix3x3DoubleData &dataIn)
btVector3 cross(const btVector3 &v) const
Return the cross product between this and another vector.
btScalar dot(const btVector3 &v) const
Return the dot product.
btScalar btAtan2(btScalar x, btScalar y)
void setValue(const btScalar &_x, const btScalar &_y, const btScalar &_z)
Set x,y,z and zero w.
btMatrix3x3 scaled(const btVector3 &s) const
Create a scaled copy of the matrix.
const btScalar & y() const
Return the y value.
const btScalar & z() const
Return the z value.
void getEulerZYX(btScalar &yaw, btScalar &pitch, btScalar &roll, unsigned int solution_number=1) const
Get the matrix represented as euler angles around ZYX.
void setValue(const btScalar &xx, const btScalar &xy, const btScalar &xz, const btScalar &yx, const btScalar &yy, const btScalar &yz, const btScalar &zx, const btScalar &zy, const btScalar &zz)
Set the values of the matrix explicitly (row major)
btMatrix3x3(const btScalar &xx, const btScalar &xy, const btScalar &xz, const btScalar &yx, const btScalar &yy, const btScalar &yz, const btScalar &zx, const btScalar &zy, const btScalar &zz)
Constructor with row major formatting.
const btScalar & z() const
Return the z value.
void getEulerYPR(btScalar &yaw, btScalar &pitch, btScalar &roll) const
Get the matrix represented as euler angles around YXZ, roundtrip with setEulerYPR.
btVector3 can be used to represent 3D points and vectors.
#define ATTRIBUTE_ALIGNED16(a)
btMatrix3x3 & operator-=(const btMatrix3x3 &m)
Substractss by the target matrix on the right.
void setEulerYPR(const btScalar &yaw, const btScalar &pitch, const btScalar &roll)
Set the matrix from euler angles using YPR around YXZ respectively.
btMatrix3x3 & operator+=(const btMatrix3x3 &m)
Adds by the target matrix on the right.
btMatrix3x3 operator-(const btMatrix3x3 &m1, const btMatrix3x3 &m2)
btScalar determinant() const
Return the determinant of the matrix.
btMatrix3x3 absolute() const
Return the matrix with all values non negative.
btMatrix3x3()
No initializaion constructor.
void getOpenGLSubMatrix(btScalar *m) const
Fill the rotational part of an OpenGL matrix and clear the shear/perspective.
const btVector3 & operator[](int i) const
Get a const reference to a row of the matrix as a vector.
btScalar length2() const
Return the length squared of the quaternion.
btVector3 & operator[](int i)
Get a mutable reference to a row of the matrix as a vector.
The btMatrix3x3 class implements a 3x3 rotation matrix, to perform linear algebra in combination with...
const btScalar & y() const
Return the y value.
btScalar dot(const btQuaternion &q1, const btQuaternion &q2)
Calculate the dot product between two quaternions.
btMatrix3x3(const btMatrix3x3 &other)
Copy constructor.
The btQuaternion implements quaternion to perform linear algebra rotations in combination with btMatr...
void setFromOpenGLSubMatrix(const btScalar *m)
Set from the rotational part of a 4x4 OpenGL matrix.
btScalar btAsin(btScalar x)
btScalar btDot(const btVector3 &v1, const btVector3 &v2)
Return the dot product between two vectors.
btMatrix3x3 inverse() const
Return the inverse of the matrix.
const btScalar & w() const
Return the w value.
btScalar btTriple(const btVector3 &v1, const btVector3 &v2, const btVector3 &v3)
void getRotation(btQuaternion &q) const
Get the matrix represented as a quaternion.
void setIdentity()
Set the matrix to the identity.
static const btMatrix3x3 & getIdentity()
float btScalar
The btScalar type abstracts floating point numbers, to easily switch between double and single floati...
void extractRotation(btQuaternion &q, btScalar tolerance=1.0e-9, int maxIter=100)
extractRotation is from "A robust method to extract the rotational part of deformations" See http://d...
btScalar btCos(btScalar x)
btVector3FloatData m_el[3]
btScalar btFabs(btScalar x)
void setEulerZYX(btScalar eulerX, btScalar eulerY, btScalar eulerZ)
Set the matrix from euler angles YPR around ZYX axes.