ardour
sse_functions_xmm.cc
Go to the documentation of this file.
1 /*
2  Copyright (C) 2007 Paul sDavis
3  Written by Sampo Savolainen
4 
5  This program is free software; you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation; either version 2 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program; if not, write to the Free Software
17  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 
19 */
20 
21 #include <xmmintrin.h>
22 #include "ardour/types.h"
23 
24 void
25 x86_sse_find_peaks(const ARDOUR::Sample* buf, ARDOUR::pframes_t nframes, float *min, float *max)
26 {
27  __m128 current_max, current_min, work;
28 
29  // Load max and min values into all four slots of the XMM registers
30  current_min = _mm_set1_ps(*min);
31  current_max = _mm_set1_ps(*max);
32 
33  // Work input until "buf" reaches 16 byte alignment
34  while ( ((intptr_t)buf) % 16 != 0 && nframes > 0) {
35 
36  // Load the next float into the work buffer
37  work = _mm_set1_ps(*buf);
38 
39  current_min = _mm_min_ps(current_min, work);
40  current_max = _mm_max_ps(current_max, work);
41 
42  buf++;
43  nframes--;
44  }
45 
46  // use 64 byte prefetch for quadruple quads
47  while (nframes >= 16) {
48 #ifdef COMPILER_MSVC
49  _mm_prefetch(((char*)buf+64), 0); // A total guess! Assumed to be eqivalent to
50 #else // the line below but waiting to be tested !!
51  __builtin_prefetch(buf+64,0,0);
52 #endif
53  work = _mm_load_ps(buf);
54  current_min = _mm_min_ps(current_min, work);
55  current_max = _mm_max_ps(current_max, work);
56  buf+=4;
57  work = _mm_load_ps(buf);
58  current_min = _mm_min_ps(current_min, work);
59  current_max = _mm_max_ps(current_max, work);
60  buf+=4;
61  work = _mm_load_ps(buf);
62  current_min = _mm_min_ps(current_min, work);
63  current_max = _mm_max_ps(current_max, work);
64  buf+=4;
65  work = _mm_load_ps(buf);
66  current_min = _mm_min_ps(current_min, work);
67  current_max = _mm_max_ps(current_max, work);
68  buf+=4;
69  nframes-=16;
70  }
71 
72  // work through aligned buffers
73  while (nframes >= 4) {
74 
75  work = _mm_load_ps(buf);
76 
77  current_min = _mm_min_ps(current_min, work);
78  current_max = _mm_max_ps(current_max, work);
79 
80  buf+=4;
81  nframes-=4;
82  }
83 
84  // work through the rest < 4 samples
85  while ( nframes > 0) {
86 
87  // Load the next float into the work buffer
88  work = _mm_set1_ps(*buf);
89 
90  current_min = _mm_min_ps(current_min, work);
91  current_max = _mm_max_ps(current_max, work);
92 
93  buf++;
94  nframes--;
95  }
96 
97  // Find min & max value in current_max through shuffle tricks
98 
99  work = current_min;
100  work = _mm_shuffle_ps(work, work, _MM_SHUFFLE(2, 3, 0, 1));
101  work = _mm_min_ps (work, current_min);
102  current_min = work;
103  work = _mm_shuffle_ps(work, work, _MM_SHUFFLE(1, 0, 3, 2));
104  work = _mm_min_ps (work, current_min);
105 
106  _mm_store_ss(min, work);
107 
108  work = current_max;
109  work = _mm_shuffle_ps(work, work, _MM_SHUFFLE(2, 3, 0, 1));
110  work = _mm_max_ps (work, current_max);
111  current_max = work;
112  work = _mm_shuffle_ps(work, work, _MM_SHUFFLE(1, 0, 3, 2));
113  work = _mm_max_ps (work, current_max);
114 
115  _mm_store_ss(max, work);
116 }
117 
118 
119 
uint32_t pframes_t
Definition: types.h:61
float Sample
Definition: types.h:54
void x86_sse_find_peaks(const ARDOUR::Sample *buf, ARDOUR::pframes_t nframes, float *min, float *max)
int intptr_t
Definition: types.h:46