1 #include "vector.h"
  2 #include "simulation_parameters.h"
  3 
  4 
  5 VEC_FUNC void vector3_add( Vector3* vec01, Vector3* vec02, Vector3* out )
  6 {
  7 #ifdef SIMD_VECTOR
  8 
  9 	__m128 r_vec01 = _mm_load_ps( vec01->data );
 10 	__m128 r_vec02 = _mm_load_ps( vec02->data );
 11 	__m128 r_out   = _mm_add_ps ( r_vec01, r_vec02 ); 
 12 
 13 	_mm_store_ps( out->data, r_out );
 14 
 15 #else	
 16 	out->x_com = vec01->x_com + vec02->x_com;
 17 	out->y_com = vec01->y_com + vec02->y_com;
 18 	out->z_com = vec01->z_com + vec02->z_com;
 19 #endif
 20 }
 21 
 22 VEC_FUNC void vector3_sub( Vector3* vec01, Vector3* vec02, Vector3* out )
 23 {
 24 #ifdef SIMD_VECTOR
 25 
 26 	__m128 r_vec01 = _mm_load_ps( vec01->data );
 27 	__m128 r_vec02 = _mm_load_ps( vec02->data );
 28 	__m128 r_out   = _mm_sub_ps ( r_vec01, r_vec02 ); 
 29 	_mm_store_ps( out->data, r_out );
 30 #else	
 31 	out->x_com = vec01->x_com - vec02->x_com;
 32 	out->y_com = vec01->y_com - vec02->y_com;
 33 	out->z_com = vec01->z_com - vec02->z_com;
 34 #endif
 35 }
 36 
 37 void vector3_crs( Vector3* vec01, Vector3* vec02, Vector3* out )
 38 {
 39 #ifdef SIMD_VECTOR	
 40 	__m128 r_vec01  = _mm_load_ps( vec01->data );	
 41 	__m128 r_vec02  = _mm_load_ps( vec02->data );	
 42 	//r_exp1a = [ 0, y2, z1, y1 ]
 43 	__m128 r_exp1a  = _mm_shuffle_ps( r_vec01, r_vec02, _MM_SHUFFLE(0,1,2,1) );
 44 	//r_exp1b = [ 0, x1, x2, z2 ]
 45 	__m128 r_exp1b  = _mm_shuffle_ps( r_vec02, r_vec01, _MM_SHUFFLE(0,0,0,2) );
 46 	//r_exp1 = [ 0, y2*x1, z1*x2, x1*z2 ]
 47 	__m128 r_exp1   = _mm_mul_ps( r_exp1a, r_exp1b );
 48 	//r_exp2a = [ 0, x2, x1, z1 ]
 49 	__m128 r_exp2a  = _mm_shuffle_ps( r_vec01, r_vec02, _MM_SHUFFLE(0,0,0,2) );
 50 	//r_exp2b = [ 0, y1, x2, z2 ]
 51 	__m128 r_exp2b  = _mm_shuffle_ps( r_vec02, r_vec01, _MM_SHUFFLE(0,1,2,1) );
 52 	//r_exp1 = [ 0, y2*x1, z1*x2, x1*z2 ]
 53 	__m128 r_exp2   = _mm_mul_ps( r_exp2a, r_exp2b );
 54 	__m128 r_out    = _mm_sub_ps( r_exp1, r_exp2 );
 55 	
 56 	_mm_store_ps( out->data, r_out );
 57 
 58 #else
 59 	out->x_com = vec01->y_com * vec02->z_com - vec01->z_com * vec02->y_com;
 60 	out->y_com = vec01->z_com * vec02->x_com - vec01->x_com * vec02->z_com;
 61 	out->z_com = vec01->x_com * vec02->y_com - vec01->y_com * vec02->x_com;
 62 #endif
 63 }
 64 
 65 VEC_FUNC float vector3_dot( Vector3* vec01, Vector3* vec02 )
 66 {
 67 #ifdef SIMD_VECTOR
 68 	
 69 	__m128 r_out;
 70 	SSE2_DOT( vec01, vec02, r_out )
 71 	return *((float *)&r_out);
 72 	//__m128 r_vec01  = _mm_load_ps( vec01->data );	
 73 	//__m128 r_vec02  = _mm_load_ps( vec02->data );	
 74 	//__m128 r_mul   = _mm_mul_ps( r_vec01, r_vec02 ); 
 75 	////r_mul = [ x4, x3, x2, x1 ]
 76 	////xi = r_vec[i] * r_vec[i]
 77 	//__m128 r_mul_s = _mm_shuffle_ps( r_mul, r_mul, _MM_SHUFFLE( 2, 2, 2, 2 ) );
 78 	////r_mul_s = [ x4, x4, x4, x3 ]
 79 	//	   r_mul_s = _mm_add_ps( r_mul, r_mul_s );
 80 	////r_mul_s = [ x4 + x4, x4 + x3, x4 + x2, x3 + x1 ]
 81 	//	   r_mul   = _mm_shuffle_ps( r_mul, r_mul, _MM_SHUFFLE( 1, 1, 1, 1 ) );
 82 	////r_mul = [ x4, x4, x4, x2 ]
 83 	//	   r_mul   = _mm_add_ss( r_mul, r_mul_s );
 84 	////r_mul = [ x4 + x4, x4 + x3, x4 + x2, x3 + x2 + x1 ]
 85 
 86 	//return *((float *)&r_mul);
 87 
 88 #else
 89 	return vec01->x_com * vec02->x_com +
 90 		   vec01->y_com * vec02->y_com +
 91 		   vec01->z_com * vec02->z_com;
 92 #endif
 93 }
 94 
 95 VEC_FUNC void vector3_mul_vfv( Vector3* vec, float constant, Vector3* out )
 96 {
 97 #ifdef SIMD_VECTOR
 98 
 99 	Vector3 constant_vec = VECTOR_INIT( constant, constant, constant );
100 
101 	__m128 r_vec01 = _mm_load_ps( vec->data );
102 	__m128 r_vec02 = _mm_load_ps( constant_vec.data );
103 	__m128 r_out   = _mm_mul_ps ( r_vec01, r_vec02 ); 
104 	
105 	_mm_store_ps( out->data, r_out );
106 
107 #else	
108 	out->x_com = vec->x_com * constant;
109 	out->y_com = vec->y_com * constant;
110 	out->z_com = vec->z_com * constant;
111 #endif
112 }
113 
114 VEC_FUNC void vector3_mul( Vector3* vec_inout, float constant )
115 {
116 #ifdef SIMD_VECTOR
117 
118 	Vector3 constant_vec = VECTOR_INIT( constant, constant, constant );
119 
120 	__m128 r_vec01 = _mm_load_ps( vec_inout->data );
121 	__m128 r_vec02 = _mm_load_ps( constant_vec.data );
122 	__m128 r_out   = _mm_mul_ps ( r_vec01, r_vec02 ); 
123 	
124 	_mm_store_ps( vec_inout->data, r_out );
125 
126 #else
127 	vec_inout->x_com *= constant;
128 	vec_inout->y_com *= constant;
129 	vec_inout->z_com *= constant;
130 #endif
131 }
132 
133 
134 VEC_FUNC float vector3_length( Vector3* vec )
135 {
136 #ifdef SIMD_VECTOR
137 
138 	__m128 r_out;
139 	SSE2_DOT( vec, vec, r_out )
140 	r_out = _mm_sqrt_ss( r_out ); 
141 	return *((float *)&r_out);
142 #else
143 	return sqrt( vec->x_com * vec->x_com +
144 				 vec->y_com * vec->y_com +
145 				 vec->z_com * vec->z_com );
146 #endif
147 }
148 
149 VEC_FUNC float vector3_length_sqr( Vector3* vec )
150 {
151 #ifdef SIMD_VECTOR
152 	__m128 r_out;
153 	SSE2_DOT( vec, vec, r_out )
154 	return *((float *)&r_out);
155 #else
156 	return vec->x_com * vec->x_com +
157 		   vec->y_com * vec->y_com +
158 		   vec->z_com * vec->z_com;
159 #endif
160 }
161 
162 VEC_FUNC void vector3_normalize( Vector3* vec_inout )
163 {
164 #ifdef SIMD_VECTOR
165 	__m128 r_vec   = _mm_load_ps( vec_inout->data );	
166 	__m128 r_mul   = _mm_mul_ps( r_vec, r_vec ); 
167 	//r_mul = [ x4, x3, x2, x1 ]
168 	//xi = r_vec[i] * r_vec[i]
169 	__m128 r_mul_s = _mm_shuffle_ps( r_mul, r_mul, _MM_SHUFFLE( 0, 0, 0, 2 ) );
170 	//r_mul_s = [ x4, x4, x4, x3 ]
171 		   r_mul_s = _mm_add_ps( r_mul, r_mul_s );
172 	//r_mul_s = [ x4 + x4, x4 + x3, x4 + x2, x3 + x1 ]
173 		   r_mul   = _mm_shuffle_ps( r_mul, r_mul, _MM_SHUFFLE( 0, 0, 0, 1 ) );
174 	//r_mul = [ x4, x4, x4, x2 ]
175 		   r_mul   = _mm_add_ss( r_mul, r_mul_s );
176 	//r_mul = [ x4 + x4, x4 + x3, x4 + x2, x3 + x2 + x1 ]
177 		   r_mul   = _mm_rsqrt_ss( r_mul );
178 		   r_mul   = _mm_shuffle_ps( r_mul, r_mul, _MM_SHUFFLE( 0, 0, 0, 0 ) );
179 	__m128 r_out   = _mm_mul_ps( r_mul, r_vec ); 	
180 	
181 	_mm_store_ps( vec_inout->data, r_out );
182 #else
183 	float length_inv = 	1 / sqrt( 
184 	   vec_inout->x_com * vec_inout->x_com +
185 	   vec_inout->y_com * vec_inout->y_com +
186 	   vec_inout->z_com * vec_inout->z_com );
187 
188 	vec_inout->x_com *= length_inv;
189 	vec_inout->y_com *= length_inv;
190 	vec_inout->z_com *= length_inv;
191 #endif
192 }
193 
194 VEC_FUNC void  vector3_normalize_vv( Vector3* vec, Vector3* out )
195 {
196 #ifdef SIMD_VECTOR
197 	__m128 r_vec   = _mm_load_ps( vec->data );	
198 	__m128 r_mul   = _mm_mul_ps( r_vec, r_vec ); 
199 	//r_mul = [ x4, x3, x2, x1 ]
200 	//xi = r_vec[i] * r_vec[i]
201 	__m128 r_mul_s = _mm_shuffle_ps( r_mul, r_mul, _MM_SHUFFLE( 0, 0, 0, 2 ) );
202 	//r_mul_s = [ x4, x4, x4, x3 ]
203 		   r_mul_s = _mm_add_ps( r_mul, r_mul_s );
204 	//r_mul_s = [ x4 + x4, x4 + x3, x4 + x2, x3 + x1 ]
205 		   r_mul   = _mm_shuffle_ps( r_mul, r_mul, _MM_SHUFFLE( 0, 0, 0, 1 ) );
206 	//r_mul = [ x4, x4, x4, x2 ]
207 		   r_mul   = _mm_add_ss( r_mul, r_mul_s );
208 	//r_mul = [ x4 + x4, x4 + x3, x4 + x2, x3 + x2 + x1 ]
209 		   r_mul   = _mm_rsqrt_ss( r_mul );
210 		   r_mul   = _mm_shuffle_ps( r_mul, r_mul, _MM_SHUFFLE( 0, 0, 0, 0 ) );
211 	__m128 r_out   = _mm_mul_ps( r_mul, r_vec ); 	
212 	
213 	_mm_store_ps( out->data, r_out );
214 #else
215 
216 	float length_inv = 	1 / sqrt( 
217 	   vec->x_com * vec->x_com +
218 	   vec->y_com * vec->y_com +
219 	   vec->z_com * vec->z_com );
220 	
221 	out->x_com = vec->x_com * length_inv;
222 	out->y_com = vec->y_com * length_inv;
223 	out->z_com = vec->z_com * length_inv;
224 #endif
225 }
226 
227 void vector3_print( FILE* file, Vector3* vec, const char* name )
228 {
229 	fprintf( file,
230 		"%s:[%f,%f,%f]",
231 		name,
232 		vec->x_com,
233 		vec->y_com,
234 		vec->z_com );
235 }
236 
237 void vector3_test_add( FILE* file )
238 {
239 	fprintf( file, "Addition test...\n" );
240 	
241 	Vector3 vec01 = {{ 1.0f, 1.0f, 1.0f }};
242 	vector3_print( file, &vec01, "vec01" );
243 	fprintf( file, "\n" );
244 
245 	Vector3 vec02 = {{ 2.0f, 2.0f, 2.0f }};
246 	vector3_print( file, &vec02, "vec02" );
247 	fprintf( file, "\n" );
248 
249 	Vector3 vec03 = {{ 0.0f, 0.0f, 0.0f }};
250 	vector3_print( file, &vec03, "vec03" );
251 	fprintf( file, "\n" );
252 	
253 	fprintf( file, "vec03 = vec01 + vec02\n" );	
254 	vector3_add( &vec01, &vec02, &vec03 );
255 	vector3_print( file, &vec03, "vec03" );
256 	fprintf( file, "\n" );
257 
258 	fprintf( file, "...end of addition test\n" );
259 }
260 
261 void vector3_test_sub( FILE* file )
262 {
263 	fprintf( file, "\n\nSubtraction test...\n" );
264 	
265 	Vector3 vec01 = {{ 1.0f, 1.0f, 1.0f }};
266 	vector3_print( file, &vec01, "vec01" );
267 	fprintf( file, "\n" );
268 
269 	Vector3 vec02 = {{ 2.0f, 2.0f, 2.0f }};
270 	vector3_print( file, &vec02, "vec02" );
271 	fprintf( file, "\n" );
272 
273 	Vector3 vec03 = {{ 0.0f, 0.0f, 0.0f }};
274 	vector3_print( file, &vec03, "vec03" );
275 	fprintf( file, "\n" );
276 	
277 	fprintf( file, "vec03 = vec01 - vec02\n" );	
278 	vector3_sub( &vec01, &vec02, &vec03 );
279 	vector3_print( file, &vec03, "vec03" );
280 	fprintf( file, "\n" );
281 
282 	fprintf( file, "...end of subtraction test\n" );
283 }
284 
285 void vector3_test_crs( FILE* file )
286 {
287 	fprintf( file, "\n\nCross product test...\n" );
288 	
289 	Vector3 vec01 = {{ 3.0f, 3.0f, 3.0f }};
290 	vector3_print( file, &vec01, "vec01" );
291 	fprintf( file, "\n" );
292 
293 	Vector3 vec02 = {{ 2.0f, 2.0f, 2.0f }};
294 	vector3_print( file, &vec02, "vec02" );
295 	fprintf( file, "\n" );
296 
297 	Vector3 vec03 = {{ 0.0f, 0.0f, 0.0f }};
298 	vector3_print( file, &vec03, "vec03" );
299 	fprintf( file, "\n" );
300 	
301 	fprintf( file, "vec03 = vec01 x vec02\n" );	
302 	vector3_crs( &vec01, &vec02, &vec03 );
303 	vector3_print( file, &vec03, "vec03" );
304 	fprintf( file, "\n" );
305 
306 	fprintf( file, "...end of cross product test\n" );
307 }
308 
309 void vector3_test_dot( FILE* file )
310 {
311 	fprintf( file, "\n\nDot product test...\n" );
312 	
313 	Vector3 vec01 = {{ 3.0f, 3.0f, 3.0f }};
314 	vector3_print( file, &vec01, "vec01" );
315 	fprintf( file, "\n" );
316 
317 	Vector3 vec02 = {{ 2.0f, 2.0f, 2.0f }};
318 	vector3_print( file, &vec02, "vec02" );
319 	fprintf( file, "\n" );
320 	
321 	fprintf( file, "dot = vec01 . vec02\n" );	
322 	
323 	fprintf( file, "dot = %f\n", 
324 		vector3_dot( &vec01, &vec02 ) );
325 	fprintf( file, "\n" );
326 
327 	fprintf( file, "...end of dot product test\n" );
328 }
329 
330 void vector3_test_mul( FILE* file )
331 {
332 	fprintf( file, "\n\nMultiply by constant test...\n" );
333 	
334 	Vector3 vec01 = {{ 3.0f, 3.0f, 3.0f }};
335 	vector3_print( file, &vec01, "vec01" );
336 	fprintf( file, "\n" );
337 
338 	Vector3 vec02 = {{ 0.0f, 0.0f, 0.0f }};
339 	vector3_print( file, &vec02, "vec02" );
340 	fprintf( file, "\n" );
341 
342 	float constant = 2.0f;	
343 	fprintf( file, "constant = %f\n", constant );	
344 	
345 	fprintf( file, "vec02 = vec01 x constant\n" );	
346 	vector3_mul_vfv( &vec01, constant, &vec02 );
347 	vector3_print( file, &vec02, "vec02" );
348 	fprintf( file, "\n\n" );	
349 	
350 	Vector3 vec03 = {{ 3.0f, 3.0f, 3.0f }};
351 	vector3_print( file, &vec01, "vec03" );
352 	fprintf( file, "\n" );
353 
354 	float constant02 = 4.0f;	
355 	fprintf( file, "constant02 = %f\n", constant02 );	
356 	
357 	fprintf( file, "vec03 = vec03 x constant02\n" );	
358 	vector3_mul( &vec03, constant02 );
359 	
360 	vector3_print( file, &vec03, "vec03" );
361 	fprintf( file, "\n" );
362 
363 	fprintf( file, "...end of mutliply by constant test\n" );
364 }
365 
366 void vector3_test_length( FILE* file )
367 {
368 	fprintf( file, "\n\nLength test...\n" );
369 	
370 	Vector3 vec01 = {{ 3.0f, 3.0f, 3.0f }};
371 	vector3_print( file, &vec01, "vec01" );
372 	fprintf( file, "\n" );	
373 	
374 	fprintf( file, "length = length( vec01 )\n" );
375 
376 	fprintf( file, "length = %f\n\n", vector3_length( &vec01 ) );
377 
378 	Vector3 vec02 = {{ 3.0f, 3.0f, 3.0f }};
379 	vector3_print( file, &vec02, "vec02" );
380 	fprintf( file, "\n" );	
381 	
382 	fprintf( file, "length_sqr = length_sqr( vec02 )\n" );
383 
384 	fprintf( file, "length = %f\n", vector3_length_sqr( &vec02 ) );
385 
386 	fprintf( file, "...end of length test\n" );
387 }
388 
389 void vector3_test_normalize( FILE* file )
390 {
391 	fprintf( file, "\n\nNormalize test...\n" );
392 	
393 	Vector3 vec01 = {{ 3.0f, 3.0f, 3.0f }};
394 	vector3_print( file, &vec01, "vec01" );
395 	fprintf( file, "\n" );	
396 	
397 	fprintf( file, "vec01 = normalize( vec01 )\n" );
398 
399 	vector3_normalize( &vec01 );
400 	vector3_print( file, &vec01, "vec01" );
401 	fprintf( file, "\n\n" );	
402 	
403 	Vector3 vec02 = {{ 3.0f, 3.0f, 3.0f }};
404 	vector3_print( file, &vec02, "vec02" );
405 	fprintf( file, "\n" );	
406 	
407 	Vector3 vec03 = {{ 0.0f, 0.0f, 0.0f }};
408 	vector3_print( file, &vec03, "vec03" );
409 	fprintf( file, "\n" );	
410 
411 	fprintf( file, "vec03 = normalize( vec02 )\n" );
412 	vector3_normalize_vv( &vec02, &vec03 );
413 
414 	vector3_print( file, &vec03, "vec03" );
415 	fprintf( file, "\n" );	
416 
417 	fprintf( file, "...end of normalize test\n" );
418 }
419 
420 
421 void vector3_test()
422 {
423 
424 	FILE* file;
425 	if( ( file = fopen( "output.txt","w" ) ) == NULL ) 
426 	{ 
427 		return;
428 	} 
429 	
430 	vector3_test_add( file );
431 	vector3_test_sub( file );
432 	vector3_test_crs( file );
433 	vector3_test_dot( file );
434 	vector3_test_mul( file );
435 
436 	vector3_test_length( file );
437 	vector3_test_normalize( file );
438 
439 	fclose( file );
440 }


syntax highlighted by Code2HTML, v. 0.9.1