Ptex
PtexSeparableKernel.cpp
Go to the documentation of this file.
1 /*
2 PTEX SOFTWARE
3 Copyright 2009 Disney Enterprises, Inc. All rights reserved
4 
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are
7 met:
8 
9  * Redistributions of source code must retain the above copyright
10  notice, this list of conditions and the following disclaimer.
11 
12  * Redistributions in binary form must reproduce the above copyright
13  notice, this list of conditions and the following disclaimer in
14  the documentation and/or other materials provided with the
15  distribution.
16 
17  * The names "Disney", "Walt Disney Pictures", "Walt Disney Animation
18  Studios" or the names of its contributors may NOT be used to
19  endorse or promote products derived from this software without
20  specific prior written permission from Walt Disney Pictures.
21 
22 Disclaimer: THIS SOFTWARE IS PROVIDED BY WALT DISNEY PICTURES AND
23 CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING,
24 BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS
25 FOR A PARTICULAR PURPOSE, NONINFRINGEMENT AND TITLE ARE DISCLAIMED.
26 IN NO EVENT SHALL WALT DISNEY PICTURES, THE COPYRIGHT HOLDER OR
27 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
28 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
29 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND BASED ON ANY
31 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
33 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
34 */
35 #include "PtexPlatform.h"
36 #include "PtexUtils.h"
37 #include "PtexHalf.h"
38 #include "PtexSeparableKernel.h"
39 
40 namespace {
41  // apply to 1..4 channels (unrolled channel loop) of packed data (nTxChan==nChan)
42  template<class T, int nChan>
43  void Apply(PtexSeparableKernel& k, float* result, void* data, int /*nChan*/, int /*nTxChan*/)
44  {
45  float* rowResult = (float*) alloca(nChan*sizeof(float));
46  int rowlen = k.res.u() * nChan;
47  int datalen = k.uw * nChan;
48  int rowskip = rowlen - datalen;
49  float* kvp = k.kv;
50  T* p = (T*)data + (k.v * k.res.u() + k.u) * nChan;
51  T* pEnd = p + k.vw * rowlen;
52  while (p != pEnd)
53  {
54  float* kup = k.ku;
55  T* pRowEnd = p + datalen;
56  // just mult and copy first element
57  PtexUtils::VecMult<T,nChan>()(rowResult, p, *kup++);
58  p += nChan;
59  // accumulate remaining elements
60  while (p != pRowEnd) {
61  // rowResult[i] = p[i] * ku[u] for i in {0..n-1}
62  PtexUtils::VecAccum<T,nChan>()(rowResult, p, *kup++);
63  p += nChan;
64  }
65  // result[i] += rowResult[i] * kv[v] for i in {0..n-1}
66  PtexUtils::VecAccum<float,nChan>()(result, rowResult, *kvp++);
67  p += rowskip;
68  }
69  }
70 
71  // apply to 1..4 channels (unrolled channel loop) w/ pixel stride
72  template<class T, int nChan>
73  void ApplyS(PtexSeparableKernel& k, float* result, void* data, int /*nChan*/, int nTxChan)
74  {
75  float* rowResult = (float*) alloca(nChan*sizeof(float));
76  int rowlen = k.res.u() * nTxChan;
77  int datalen = k.uw * nTxChan;
78  int rowskip = rowlen - datalen;
79  float* kvp = k.kv;
80  T* p = (T*)data + (k.v * k.res.u() + k.u) * nTxChan;
81  T* pEnd = p + k.vw * rowlen;
82  while (p != pEnd)
83  {
84  float* kup = k.ku;
85  T* pRowEnd = p + datalen;
86  // just mult and copy first element
87  PtexUtils::VecMult<T,nChan>()(rowResult, p, *kup++);
88  p += nTxChan;
89  // accumulate remaining elements
90  while (p != pRowEnd) {
91  // rowResult[i] = p[i] * ku[u] for i in {0..n-1}
92  PtexUtils::VecAccum<T,nChan>()(rowResult, p, *kup++);
93  p += nTxChan;
94  }
95  // result[i] += rowResult[i] * kv[v] for i in {0..n-1}
96  PtexUtils::VecAccum<float,nChan>()(result, rowResult, *kvp++);
97  p += rowskip;
98  }
99  }
100 
101  // apply to N channels (general case)
102  template<class T>
103  void ApplyN(PtexSeparableKernel& k, float* result, void* data, int nChan, int nTxChan)
104  {
105  float* rowResult = (float*) alloca(nChan*sizeof(float));
106  int rowlen = k.res.u() * nTxChan;
107  int datalen = k.uw * nTxChan;
108  int rowskip = rowlen - datalen;
109  float* kvp = k.kv;
110  T* p = (T*)data + (k.v * k.res.u() + k.u) * nTxChan;
111  T* pEnd = p + k.vw * rowlen;
112  while (p != pEnd)
113  {
114  float* kup = k.ku;
115  T* pRowEnd = p + datalen;
116  // just mult and copy first element
117  PtexUtils::VecMultN<T>()(rowResult, p, nChan, *kup++);
118  p += nTxChan;
119  // accumulate remaining elements
120  while (p != pRowEnd) {
121  // rowResult[i] = p[i] * ku[u] for i in {0..n-1}
122  PtexUtils::VecAccumN<T>()(rowResult, p, nChan, *kup++);
123  p += nTxChan;
124  }
125  // result[i] += rowResult[i] * kv[v] for i in {0..n-1}
126  PtexUtils::VecAccumN<float>()(result, rowResult, nChan, *kvp++);
127  p += rowskip;
128  }
129  }
130 }
131 
132 
133 
136  // nChan == nTxChan
137  ApplyN<uint8_t>, ApplyN<uint16_t>, ApplyN<PtexHalf>, ApplyN<float>,
138  Apply<uint8_t,1>, Apply<uint16_t,1>, Apply<PtexHalf,1>, Apply<float,1>,
139  Apply<uint8_t,2>, Apply<uint16_t,2>, Apply<PtexHalf,2>, Apply<float,2>,
140  Apply<uint8_t,3>, Apply<uint16_t,3>, Apply<PtexHalf,3>, Apply<float,3>,
141  Apply<uint8_t,4>, Apply<uint16_t,4>, Apply<PtexHalf,4>, Apply<float,4>,
142 
143  // nChan != nTxChan (need pixel stride)
144  ApplyN<uint8_t>, ApplyN<uint16_t>, ApplyN<PtexHalf>, ApplyN<float>,
145  ApplyS<uint8_t,1>, ApplyS<uint16_t,1>, ApplyS<PtexHalf,1>, ApplyS<float,1>,
146  ApplyS<uint8_t,2>, ApplyS<uint16_t,2>, ApplyS<PtexHalf,2>, ApplyS<float,2>,
147  ApplyS<uint8_t,3>, ApplyS<uint16_t,3>, ApplyS<PtexHalf,3>, ApplyS<float,3>,
148  ApplyS<uint8_t,4>, ApplyS<uint16_t,4>, ApplyS<PtexHalf,4>, ApplyS<float,4>,
149 };
Platform-specific classes, functions, and includes.
static ApplyFn applyFunctions[40]
int u() const
U resolution in texels.
Definition: Ptexture.h:178
void(* ApplyFn)(PtexSeparableKernel &k, float *dst, void *data, int nChan, int nTxChan)
Half-precision floating-point type.