00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051 __forceinline
00052 Vector3D_t Vector3D ()
00053 {
00054 register Vector3D_t V0;
00055 #ifdef __SSE__ // SSE enabled
00056
00057 V0._ = _mm_setzero_ps();
00058 #else // SSE disabled
00059 V0.v.x = 0.0f;
00060 V0.v.y = 0.0f;
00061 V0.v.z = 0.0f;
00062 #endif
00063 return V0;
00064 }
00065
00066
00067 __forceinline
00068 Vector3D_t Vector3D (register float S0)
00069 {
00070 register Vector3D_t V0;
00071 #ifdef __SSE__ // SSE enabled
00072
00073 V0._ = _mm_set_ps1 (S0);
00074 #else // SSE disabled
00075 V0.v.x = S0;
00076 V0.v.y = S0;
00077 V0.v.z = S0;
00078 #endif
00079 return V0;
00080 }
00081
00082
00083 __forceinline
00084 Vector3D_t Vector3D (register float x, register float y, register float z)
00085 {
00086 register Vector3D_t V0;
00087 #ifdef __SSE__ // SSE enabled
00088
00089 V0._ = _mm_set_ps (x, y, z, 0.0f);
00090 #else // SSE disabled
00091 V0.v.x = x;
00092 V0.v.y = y;
00093 V0.v.z = z;
00094 #endif
00095 return V0;
00096 }
00097
00098
00099 __forceinline
00100 Vector3D_t add (register Vector3D_t L0, register Vector3D_t R0)
00101 {
00102 #ifdef __SSE__ // SSE enabled
00103
00104 L0._ = _mm_add_ps (L0._, R0._);
00105 #else // SSE disabled
00106 L0.v.x += R0.v.x;
00107 L0.v.y += R0.v.y;
00108 L0.v.z += R0.v.z;
00109 #endif
00110 return L0;
00111 }
00112
00113
00114 __forceinline
00115 void add_ (register Vector3D_t &L0, register Vector3D_t R0)
00116 {
00117 #ifdef __SSE__ // SSE enabled
00118
00119 L0._ = _mm_add_ps (L0._, R0._);
00120 #else // SSE disabled
00121 L0.v.x += R0.v.x;
00122 L0.v.y += R0.v.y;
00123 L0.v.z += R0.v.z;
00124 #endif
00125 }
00126
00127
00128 __forceinline
00129 Vector3D_t sub (register Vector3D_t L0, register Vector3D_t R0)
00130 {
00131 #ifdef __SSE__ // SSE enabled
00132
00133 L0._ = _mm_sub_ps (L0._, R0._);
00134 #else // SSE disabled
00135 L0.v.x -= R0.v.x;
00136 L0.v.y -= R0.v.y;
00137 L0.v.z -= R0.v.z;
00138 #endif
00139 return L0;
00140 }
00141
00142
00143 __forceinline
00144 void sub_ (register Vector3D_t &L0, register Vector3D_t R0)
00145 {
00146 #ifdef __SSE__ // SSE enabled
00147
00148 L0._ = _mm_sub_ps (L0._, R0._);
00149 #else // SSE disabled
00150 L0.v.x -= R0.v.x;
00151 L0.v.y -= R0.v.y;
00152 L0.v.z -= R0.v.z;
00153 #endif
00154 }
00155
00156
00157 __forceinline
00158 Vector3D_t mul (register Vector3D_t L0, register float R0)
00159 {
00160 #ifdef __SSE__ // SSE enabled
00161
00162 register Vector3D_t R1;
00163
00164 R1._ = _mm_set_ps1 (R0);
00165
00166 L0._ = _mm_mul_ps (L0._, R1._);
00167 #else // SSE disabled
00168 L0.v.x *= R0;
00169 L0.v.y *= R0;
00170 L0.v.z *= R0;
00171 #endif
00172 return L0;
00173 }
00174
00175
00176 __forceinline
00177 void mul_ (register Vector3D_t &L0, register float R0)
00178 {
00179 #ifdef __SSE__ // SSE enabled
00180
00181 register Vector3D_t R1;
00182
00183 R1._ = _mm_set_ps1 (R0);
00184
00185 L0._ = _mm_mul_ps (L0._, R1._);
00186 #else // SSE disabled
00187 L0.v.x *= R0;
00188 L0.v.y *= R0;
00189 L0.v.z *= R0;
00190 #endif
00191 }
00192
00193
00194 __forceinline
00195 Vector3D_t div (register Vector3D_t L0, register float R0)
00196 {
00197 #ifdef __SSE__ // SSE enabled
00198
00199 register Vector3D_t R1;
00200
00201 R1._ = _mm_set_ps1 (R0);
00202
00203 L0._ = _mm_div_ps (L0._, R1._);
00204 #else // SSE disabled
00205 L0.v.x /= R0;
00206 L0.v.y /= R0;
00207 L0.v.z /= R0;
00208 #endif
00209 return L0;
00210 }
00211
00212
00213 __forceinline
00214 void div_ (register Vector3D_t &L0, register float R0)
00215 {
00216 #ifdef __SSE__ // SSE enabled
00217
00218 register Vector3D_t R1;
00219
00220 R1._ = _mm_set_ps1 (R0);
00221
00222 L0._ = _mm_div_ps (L0._, R1._);
00223 #else // SSE disabled
00224 L0.v.x /= R0;
00225 L0.v.y /= R0;
00226 L0.v.z /= R0;
00227 #endif
00228 }
00229
00230
00231 __forceinline
00232 float dot (register Vector3D_t L0, register Vector3D_t R0)
00233 {
00234
00235 register float S0;
00236 #ifdef __SSE__ // SSE enabled
00237 #if defined(__SSE4_1__) || defined(__SSE4_2__) // calculate dot product with SSE4
00238 L0._ = _mm_dp_ps (L0._, R0._, VECTOR3D_DOTP_MASK);
00239 #else // calculate dot product without SSE4
00240
00241 L0._ = _mm_mul_ps (L0._, R0._);
00242 #ifdef __SSE3__ // SSE3 enabled
00243
00244
00245 L0._ = _mm_hadd_ps (L0._, L0._);
00246 L0._ = _mm_hadd_ps (L0._, L0._);
00247 #else // SSE3 disabled
00248
00249 register Vector3D_t V0;
00250
00251 R0._ = _mm_shuffle_ps (L0._, R0._, VECTOR3D_ROT1_MASK);
00252 V0._ = _mm_shuffle_ps (L0._, V0._, VECTOR3D_ROT2_MASK);
00253
00254 V0._ = _mm_add_ss (V0._, R0._);
00255 L0._ = _mm_add_ss (L0._, V0._);
00256 #endif
00257 #endif
00258
00259 _mm_store_ss (&S0, L0._);
00260 #else // SSE disabled
00261 S0 = L0.v.x * R0.v.x;
00262 S0 += L0.v.y * R0.v.y;
00263 S0 += L0.v.z * R0.v.z;
00264 #endif
00265
00266 return S0;
00267 }
00268
00269
00270 __forceinline
00271 Vector3D_t cross (register Vector3D_t L0, register Vector3D_t R0)
00272 {
00273 #ifdef __SSE__ // SSE enabled
00274
00275 register Vector3D_t L1;
00276 register Vector3D_t R1;
00277 L1._ = L0._;
00278 R1._ = R0._;
00279
00280 L0._ = _mm_shuffle_ps (L0._, L0._, VECTOR3D_ROT1_MASK);
00281 R1._ = _mm_shuffle_ps (R1._, R1._, VECTOR3D_ROT1_MASK);
00282 R0._ = _mm_shuffle_ps (R0._, R0._, VECTOR3D_ROT2_MASK);
00283 L1._ = _mm_shuffle_ps (L1._, L1._, VECTOR3D_ROT2_MASK);
00284
00285 L0._ = _mm_mul_ps (L0._, R0._ );
00286 L1._ = _mm_mul_ps (L1._, R1._);
00287
00288 L0._ = _mm_sub_ps (L0._, L1._);
00289
00290 return L0;
00291 #else // SSE disabled
00292
00293 register Vector3D_t V0;
00294
00295 V0.v.x = L0.v.y * R0.v.z;
00296 V0.v.y = L0.v.z * R0.v.x;
00297 V0.v.z = L0.v.x * R0.v.y;
00298 V0.v.x -= R0.v.y * L0.v.z;
00299 V0.v.y -= R0.v.z * L0.v.x;
00300 V0.v.z -= R0.v.x * L0.v.y;
00301
00302 return V0;
00303 #endif
00304 }
00305
00306
00307 __forceinline
00308 void cross_ (register Vector3D_t &L0, register Vector3D_t R0)
00309 {
00310 #ifdef __SSE__ // SSE enabled
00311
00312 register Vector3D_t L1;
00313 register Vector3D_t R1;
00314 L1._ = L0._;
00315 R1._ = R0._;
00316
00317 L0._ = _mm_shuffle_ps (L0._, L0._, VECTOR3D_ROT1_MASK);
00318 R1._ = _mm_shuffle_ps (R1._, R1._, VECTOR3D_ROT1_MASK);
00319 R0._ = _mm_shuffle_ps (R0._, R0._, VECTOR3D_ROT2_MASK);
00320 L1._ = _mm_shuffle_ps (L1._, L1._, VECTOR3D_ROT2_MASK);
00321
00322 L0._ = _mm_mul_ps (L0._, R0._ );
00323 L1._ = _mm_mul_ps (L1._, R1._);
00324
00325 L0._ = _mm_sub_ps (L0._, L1._);
00326 #else // SSE disabled
00327
00328 register Vector3D_t V0;
00329
00330 V0.v.x = L0.v.y * R0.v.z;
00331 V0.v.y = L0.v.z * R0.v.x;
00332 V0.v.z = L0.v.x * R0.v.y;
00333 V0.v.x -= R0.v.y * L0.v.z;
00334 V0.v.y -= R0.v.z * L0.v.x;
00335 V0.v.z -= R0.v.x * L0.v.y;
00336 L0.v.x = V0.v.x;
00337 L0.v.y = V0.v.y;
00338 L0.v.z = V0.v.z;
00339
00340 #endif
00341 }
00342
00343
00344 __forceinline
00345 float triple (register Vector3D_t L0, register Vector3D_t M0, register Vector3D_t R0)
00346 {
00347 register float S0;
00348 #ifdef __SSE__ // SSE enabled
00349
00350 register Vector3D_t M1;
00351 register Vector3D_t R1;
00352 M1._ = M0._;
00353 R1._ = R0._;
00354
00355 M0._ = _mm_shuffle_ps (M0._, M0._ , VECTOR3D_ROT1_MASK);
00356 R1._ = _mm_shuffle_ps (R1._, R1._, VECTOR3D_ROT1_MASK);
00357 R0._ = _mm_shuffle_ps (R0._, R0._ , VECTOR3D_ROT2_MASK);
00358 M1._ = _mm_shuffle_ps (M1._, M1._, VECTOR3D_ROT2_MASK);
00359
00360 M0._ = _mm_mul_ps (M0._, R0._ );
00361 M1._ = _mm_mul_ps (M1._, R1._);
00362
00363 M0._ = _mm_sub_ps (M0._, M1._);
00364 #if defined(__SSE4_1__) || defined(__SSE4_2__) // calculate dot product with SSE4
00365 L0._ = _mm_dp_ps (L0._, M0._, VECTOR3D_DOTP_MASK);
00366 #else // calculate dot product without SSE4
00367
00368 L0._ = _mm_mul_ps (L0._, M0._);
00369 #ifdef __SSE3__ // SSE3 enabled
00370
00371
00372 L0._ = _mm_hadd_ps (L0._, L0._);
00373 L0._ = _mm_hadd_ps (L0._, L0._);
00374 #else // SSE3 disabled
00375
00376 M0._ = _mm_shuffle_ps (L0._, M0._, VECTOR3D_ROT1_MASK);
00377 R0._ = _mm_shuffle_ps (L0._, R0._, VECTOR3D_ROT2_MASK);
00378
00379 R0._ = _mm_add_ss (R0._, M0._);
00380 L0._ = _mm_add_ss (L0._, R0._);
00381 #endif
00382 #endif
00383
00384 _mm_store_ss (&S0, L0._);
00385 #else // SSE disabled
00386
00387 register Vector3D_t V0;
00388
00389 V0.v.x = M0.v.y * R0.v.z;
00390 V0.v.y = M0.v.z * R0.v.x;
00391 V0.v.z = M0.v.x * R0.v.y;
00392 V0.v.x -= R0.v.y * M0.v.z;
00393 V0.v.y -= R0.v.z * M0.v.x;
00394 V0.v.z -= R0.v.x * M0.v.y;
00395
00396 S0 = L0.v.x * V0.v.x;
00397 S0 += L0.v.y * V0.v.y;
00398 S0 += L0.v.z * V0.v.z;
00399 #endif
00400 return S0;
00401 }
00402
00403
00404 __forceinline
00405 float abs (register Vector3D_t V0)
00406 {
00407
00408 register float S0;
00409 #ifdef __SSE__ // SSE enabled
00410 #if defined(__SSE4_1__) || defined(__SSE4_2__) // calculate dot product with SSE4
00411 V0._ = _mm_dp_ps (V0._, V0._, VECTOR3D_DOTP_MASK);
00412 #else // calculate dot product without SSE4
00413
00414 V0._ = _mm_mul_ps (V0._, V0._);
00415 #ifdef __SSE3__ // SSE3 enabled
00416
00417
00418 V0._ = _mm_hadd_ps (V0._, V0._);
00419 V0._ = _mm_hadd_ps (V0._, V0._);
00420 #else // SSE3 disabled
00421
00422 register Vector3D_t V1;
00423 register Vector3D_t V2;
00424
00425 V1._ = _mm_shuffle_ps (V0._, V1._, VECTOR3D_ROT1_MASK);
00426 V2._ = _mm_shuffle_ps (V0._, V2._, VECTOR3D_ROT2_MASK);
00427
00428 V2._ = _mm_add_ss (V2._, V1._);
00429 V0._ = _mm_add_ss (V0._, V2._);
00430 #endif
00431 #endif
00432
00433 V0._ = _mm_sqrt_ss (V0._);
00434
00435 _mm_store_ss (&S0, V0._);
00436 #else // SSE disabled
00437 S0 = V0.v.x * V0.v.x;
00438 S0 += V0.v.y * V0.v.y;
00439 S0 += V0.v.z * V0.v.z;
00440 S0 = sqrtf (S0);
00441 #endif
00442
00443 return S0;
00444 }
00445
00446
00447 __forceinline
00448 Vector3D_t unit (register Vector3D_t V0)
00449 {
00450 return div ( V0, abs(V0) );
00451 }
00452
00453
00454 __forceinline
00455 void unit_ (register Vector3D_t &V0)
00456 {
00457 div_ ( V0, abs(V0) );
00458 }
00459