Main Page   Compound List   File List   Compound Members   File Members  

filters_sse.h

Go to the documentation of this file.
00001 /* Copyright (C) 2002 Jean-Marc Valin 
00002    File: filters.c
00003    Various analysis/synthesis filters
00004 
00005    Redistribution and use in source and binary forms, with or without
00006    modification, are permitted provided that the following conditions
00007    are met:
00008    
00009    - Redistributions of source code must retain the above copyright
00010    notice, this list of conditions and the following disclaimer.
00011    
00012    - Redistributions in binary form must reproduce the above copyright
00013    notice, this list of conditions and the following disclaimer in the
00014    documentation and/or other materials provided with the distribution.
00015    
00016    - Neither the name of the Xiph.org Foundation nor the names of its
00017    contributors may be used to endorse or promote products derived from
00018    this software without specific prior written permission.
00019    
00020    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00021    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00022    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
00023    A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
00024    CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00025    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00026    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00027    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00028    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00029    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00030    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00031 */
00032 
00033 void filter_mem2(float *x, float *_num, float *_den, float *y, int N, int ord, float *_mem)
00034 {
00035    float __num[20], __den[20], __mem[20];
00036    float *num, *den, *mem;
00037    int i;
00038 
00039    num = (float*)(((int)(__num+4))&0xfffffff0)-1;
00040    den = (float*)(((int)(__den+4))&0xfffffff0)-1;
00041    mem = (float*)(((int)(__mem+4))&0xfffffff0)-1;
00042    for (i=0;i<=10;i++)
00043       num[i]=den[i]=0;
00044    for (i=0;i<10;i++)
00045       mem[i]=0;
00046 
00047    for (i=0;i<ord+1;i++)
00048    {
00049       num[i]=_num[i];
00050       den[i]=_den[i];
00051    }
00052    for (i=0;i<ord;i++)
00053       mem[i]=_mem[i];
00054    for (i=0;i<N;i+=4)
00055    {
00056 
00057       __asm__ __volatile__ 
00058       (
00059        "\tmovss (%1), %%xmm0\n"
00060        "\tmovss (%0), %%xmm1\n"
00061        "\taddss %%xmm0, %%xmm1\n"
00062        "\tmovss %%xmm1, (%2)\n"
00063        "\tshufps $0x00, %%xmm0, %%xmm0\n"
00064        "\tshufps $0x00, %%xmm1, %%xmm1\n"
00065 
00066        "\tmovaps 4(%3),  %%xmm2\n"
00067        "\tmovaps 4(%4),  %%xmm3\n"
00068        "\tmulps  %%xmm0, %%xmm2\n"
00069        "\tmulps  %%xmm1, %%xmm3\n"
00070        "\tmovaps 20(%3), %%xmm4\n"
00071        "\tmulps  %%xmm0, %%xmm4\n"
00072        "\taddps  4(%0),  %%xmm2\n"
00073        "\tmovaps 20(%4), %%xmm5\n"
00074        "\tmulps  %%xmm1, %%xmm5\n"
00075        "\taddps  20(%0), %%xmm4\n"
00076        "\tsubps  %%xmm3, %%xmm2\n"
00077        "\tmovups %%xmm2, (%0)\n"
00078        "\tsubps  %%xmm5, %%xmm4\n"
00079        "\tmovups %%xmm4, 16(%0)\n"
00080 
00081        "\tmovss  36(%3), %%xmm2\n"
00082        "\tmulss  %%xmm0, %%xmm2\n"
00083        "\tmovss  36(%4), %%xmm3\n"
00084        "\tmulss  %%xmm1, %%xmm3\n"
00085        "\taddss  36(%0), %%xmm2\n"
00086        "\tmovss  40(%3), %%xmm4\n"
00087        "\tmulss  %%xmm0, %%xmm4\n"
00088        "\tmovss  40(%4), %%xmm5\n"
00089        "\tmulss  %%xmm1, %%xmm5\n"
00090        "\tsubss  %%xmm3, %%xmm2\n"
00091        "\tmovss  %%xmm2, 32(%0)       \n"
00092        "\tsubss  %%xmm5, %%xmm4\n"
00093        "\tmovss  %%xmm4, 36(%0)\n"
00094 
00095 
00096 
00097        "\tmovss 4(%1), %%xmm0\n"
00098        "\tmovss (%0), %%xmm1\n"
00099        "\taddss %%xmm0, %%xmm1\n"
00100        "\tmovss %%xmm1, 4(%2)\n"
00101        "\tshufps $0x00, %%xmm0, %%xmm0\n"
00102        "\tshufps $0x00, %%xmm1, %%xmm1\n"
00103 
00104        "\tmovaps 4(%3),  %%xmm2\n"
00105        "\tmovaps 4(%4),  %%xmm3\n"
00106        "\tmulps  %%xmm0, %%xmm2\n"
00107        "\tmulps  %%xmm1, %%xmm3\n"
00108        "\tmovaps 20(%3), %%xmm4\n"
00109        "\tmulps  %%xmm0, %%xmm4\n"
00110        "\taddps  4(%0),  %%xmm2\n"
00111        "\tmovaps 20(%4), %%xmm5\n"
00112        "\tmulps  %%xmm1, %%xmm5\n"
00113        "\taddps  20(%0), %%xmm4\n"
00114        "\tsubps  %%xmm3, %%xmm2\n"
00115        "\tmovups %%xmm2, (%0)\n"
00116        "\tsubps  %%xmm5, %%xmm4\n"
00117        "\tmovups %%xmm4, 16(%0)\n"
00118 
00119        "\tmovss  36(%3), %%xmm2\n"
00120        "\tmulss  %%xmm0, %%xmm2\n"
00121        "\tmovss  36(%4), %%xmm3\n"
00122        "\tmulss  %%xmm1, %%xmm3\n"
00123        "\taddss  36(%0), %%xmm2\n"
00124        "\tmovss  40(%3), %%xmm4\n"
00125        "\tmulss  %%xmm0, %%xmm4\n"
00126        "\tmovss  40(%4), %%xmm5\n"
00127        "\tmulss  %%xmm1, %%xmm5\n"
00128        "\tsubss  %%xmm3, %%xmm2\n"
00129        "\tmovss  %%xmm2, 32(%0)       \n"
00130        "\tsubss  %%xmm5, %%xmm4\n"
00131        "\tmovss  %%xmm4, 36(%0)\n"
00132 
00133 
00134 
00135        "\tmovss 8(%1), %%xmm0\n"
00136        "\tmovss (%0), %%xmm1\n"
00137        "\taddss %%xmm0, %%xmm1\n"
00138        "\tmovss %%xmm1, 8(%2)\n"
00139        "\tshufps $0x00, %%xmm0, %%xmm0\n"
00140        "\tshufps $0x00, %%xmm1, %%xmm1\n"
00141 
00142        "\tmovaps 4(%3),  %%xmm2\n"
00143        "\tmovaps 4(%4),  %%xmm3\n"
00144        "\tmulps  %%xmm0, %%xmm2\n"
00145        "\tmulps  %%xmm1, %%xmm3\n"
00146        "\tmovaps 20(%3), %%xmm4\n"
00147        "\tmulps  %%xmm0, %%xmm4\n"
00148        "\taddps  4(%0),  %%xmm2\n"
00149        "\tmovaps 20(%4), %%xmm5\n"
00150        "\tmulps  %%xmm1, %%xmm5\n"
00151        "\taddps  20(%0), %%xmm4\n"
00152        "\tsubps  %%xmm3, %%xmm2\n"
00153        "\tmovups %%xmm2, (%0)\n"
00154        "\tsubps  %%xmm5, %%xmm4\n"
00155        "\tmovups %%xmm4, 16(%0)\n"
00156 
00157        "\tmovss  36(%3), %%xmm2\n"
00158        "\tmulss  %%xmm0, %%xmm2\n"
00159        "\tmovss  36(%4), %%xmm3\n"
00160        "\tmulss  %%xmm1, %%xmm3\n"
00161        "\taddss  36(%0), %%xmm2\n"
00162        "\tmovss  40(%3), %%xmm4\n"
00163        "\tmulss  %%xmm0, %%xmm4\n"
00164        "\tmovss  40(%4), %%xmm5\n"
00165        "\tmulss  %%xmm1, %%xmm5\n"
00166        "\tsubss  %%xmm3, %%xmm2\n"
00167        "\tmovss  %%xmm2, 32(%0)       \n"
00168        "\tsubss  %%xmm5, %%xmm4\n"
00169        "\tmovss  %%xmm4, 36(%0)\n"
00170 
00171 
00172 
00173        "\tmovss 12(%1), %%xmm0\n"
00174        "\tmovss (%0), %%xmm1\n"
00175        "\taddss %%xmm0, %%xmm1\n"
00176        "\tmovss %%xmm1, 12(%2)\n"
00177        "\tshufps $0x00, %%xmm0, %%xmm0\n"
00178        "\tshufps $0x00, %%xmm1, %%xmm1\n"
00179 
00180        "\tmovaps 4(%3),  %%xmm2\n"
00181        "\tmovaps 4(%4),  %%xmm3\n"
00182        "\tmulps  %%xmm0, %%xmm2\n"
00183        "\tmulps  %%xmm1, %%xmm3\n"
00184        "\tmovaps 20(%3), %%xmm4\n"
00185        "\tmulps  %%xmm0, %%xmm4\n"
00186        "\taddps  4(%0),  %%xmm2\n"
00187        "\tmovaps 20(%4), %%xmm5\n"
00188        "\tmulps  %%xmm1, %%xmm5\n"
00189        "\taddps  20(%0), %%xmm4\n"
00190        "\tsubps  %%xmm3, %%xmm2\n"
00191        "\tmovups %%xmm2, (%0)\n"
00192        "\tsubps  %%xmm5, %%xmm4\n"
00193        "\tmovups %%xmm4, 16(%0)\n"
00194 
00195        "\tmovss  36(%3), %%xmm2\n"
00196        "\tmulss  %%xmm0, %%xmm2\n"
00197        "\tmovss  36(%4), %%xmm3\n"
00198        "\tmulss  %%xmm1, %%xmm3\n"
00199        "\taddss  36(%0), %%xmm2\n"
00200        "\tmovss  40(%3), %%xmm4\n"
00201        "\tmulss  %%xmm0, %%xmm4\n"
00202        "\tmovss  40(%4), %%xmm5\n"
00203        "\tmulss  %%xmm1, %%xmm5\n"
00204        "\tsubss  %%xmm3, %%xmm2\n"
00205        "\tmovss  %%xmm2, 32(%0)       \n"
00206        "\tsubss  %%xmm5, %%xmm4\n"
00207        "\tmovss  %%xmm4, 36(%0)\n"
00208 
00209        : : "r" (mem), "r" (x+i), "r" (y+i), "r" (num), "r" (den)
00210        : "memory" );
00211 
00212    }
00213    for (i=0;i<ord;i++)
00214       _mem[i]=mem[i];
00215 
00216 }
00217 
00218 
00219 void iir_mem2(float *x, float *_den, float *y, int N, int ord, float *_mem)
00220 {
00221    float  __den[20], __mem[20];
00222    float *den, *mem;
00223    int i;
00224 
00225    den = (float*)(((int)(__den+4))&0xfffffff0)-1;
00226    mem = (float*)(((int)(__mem+4))&0xfffffff0)-1;
00227    for (i=0;i<=10;i++)
00228       den[i]=0;
00229    for (i=0;i<10;i++)
00230       mem[i]=0;
00231    for (i=0;i<ord+1;i++)
00232    {
00233       den[i]=_den[i];
00234    }
00235    for (i=0;i<ord;i++)
00236       mem[i]=_mem[i];
00237 
00238    for (i=0;i<N;i++)
00239    {
00240 #if 0
00241       y[i] = x[i] + mem[0];
00242       for (j=0;j<ord-1;j++)
00243       {
00244          mem[j] = mem[j+1] - den[j+1]*y[i];
00245       }
00246       mem[ord-1] = - den[ord]*y[i];
00247 #else
00248       __asm__ __volatile__ 
00249       (
00250        "\tmovss (%1), %%xmm0\n"
00251        "\tmovss (%0), %%xmm1\n"
00252        "\taddss %%xmm0, %%xmm1\n"
00253        "\tmovss %%xmm1, (%2)\n"
00254        "\tshufps $0x00, %%xmm0, %%xmm0\n"
00255        "\tshufps $0x00, %%xmm1, %%xmm1\n"
00256 
00257        
00258        "\tmovaps 4(%3),  %%xmm2\n"
00259        "\tmovaps 20(%3), %%xmm3\n"
00260        "\tmulps  %%xmm1, %%xmm2\n"
00261        "\tmulps  %%xmm1, %%xmm3\n"
00262        "\tmovss  36(%3), %%xmm4\n"
00263        "\tmovss  40(%3), %%xmm5\n"
00264        "\tmulss  %%xmm1, %%xmm4\n"
00265        "\tmulss  %%xmm1, %%xmm5\n"
00266        "\tmovaps 4(%0),  %%xmm6\n"
00267        "\tsubps  %%xmm2, %%xmm6\n"
00268        "\tmovups %%xmm6, (%0)\n"
00269        "\tmovaps 20(%0), %%xmm7\n"
00270        "\tsubps  %%xmm3, %%xmm7\n"
00271        "\tmovups %%xmm7, 16(%0)\n"
00272 
00273 
00274        "\tmovss  36(%0), %%xmm7\n"
00275        "\tsubss  %%xmm4, %%xmm7\n"
00276        "\tmovss  %%xmm7, 32(%0)       \n"
00277        "\txorps  %%xmm2, %%xmm2\n"
00278        "\tsubss  %%xmm5, %%xmm2\n"
00279        "\tmovss  %%xmm2, 36(%0)\n"
00280 
00281        : : "r" (mem), "r" (x+i), "r" (y+i), "r" (den)
00282        : "memory" );
00283 #endif
00284    }
00285    for (i=0;i<ord;i++)
00286       _mem[i]=mem[i];
00287 
00288 }
00289 

Generated on Wed Feb 11 13:48:36 2004 for speex by doxygen1.2.15