Skip to content

Commit ecab9e2

Browse files
aras-plgritz
authored andcommitted
HDR: speed up reading by around 4x (AcademySoftwareFoundation#3590)
Most of the cost in HDR file reading was converting from RGBE to floats, inside ldexpf(), which at least on Windows/VS2022 spends most of the time inside _ctrlfp(). But we only have 256 possible exponents in total, so just use a precomputed table. On my PC (Windows 10, VS2022, Ryzen 5950X) this change gets file read time for an 8x resolution .HDR image from 1.10s down to 0.27s
1 parent 9d8aa26 commit ecab9e2

File tree

1 file changed

+72
-4
lines changed

1 file changed

+72
-4
lines changed

src/hdr.imageio/hdrinput.cpp

Lines changed: 72 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ OIIO_PLUGIN_NAMESPACE_BEGIN
1717
// .hdr / .rgbe files - HDR files from Radiance
1818
//
1919
// General info on the hdr/rgbe format can be found at:
20-
// http://local.wasp.uwa.edu.au/~pbourke/dataformats/pic/
20+
// http://paulbourke.net/dataformats/pic/
2121
//
2222
// Also see Greg Ward's "Real Pixels" chapter in Graphics Gems II for an
2323
// explanation of the encoding that's used in Radiance rgba files.
@@ -115,15 +115,83 @@ OIIO_EXPORT const char* hdr_input_extensions[] = { "hdr", "rgbe", nullptr };
115115
OIIO_PLUGIN_EXPORTS_END
116116

117117

118+
// Table of ldexpf(1.0f, i - (int)(128 + 8)) values.
119+
// Note: Ward uses ldexp(col+0.5,exp-(128+8)). However we wanted pixels
120+
// in the range [0,1] to map back into the range [0,1].
121+
static const float exponent_table[256] = {
122+
1.1479437e-41f, 2.2958874e-41f, 4.5917748e-41f, 9.1835496e-41f,
123+
1.8367099e-40f, 3.6734198e-40f, 7.3468397e-40f, 1.4693679e-39f,
124+
2.9387359e-39f, 5.8774718e-39f, 1.1754944e-38f, 2.3509887e-38f,
125+
4.7019774e-38f, 9.4039548e-38f, 1.880791e-37f, 3.7615819e-37f,
126+
7.5231638e-37f, 1.5046328e-36f, 3.0092655e-36f, 6.0185311e-36f,
127+
1.2037062e-35f, 2.4074124e-35f, 4.8148249e-35f, 9.6296497e-35f,
128+
1.9259299e-34f, 3.8518599e-34f, 7.7037198e-34f, 1.540744e-33f,
129+
3.0814879e-33f, 6.1629758e-33f, 1.2325952e-32f, 2.4651903e-32f,
130+
4.9303807e-32f, 9.8607613e-32f, 1.9721523e-31f, 3.9443045e-31f,
131+
7.8886091e-31f, 1.5777218e-30f, 3.1554436e-30f, 6.3108872e-30f,
132+
1.2621774e-29f, 2.5243549e-29f, 5.0487098e-29f, 1.009742e-28f,
133+
2.0194839e-28f, 4.0389678e-28f, 8.0779357e-28f, 1.6155871e-27f,
134+
3.2311743e-27f, 6.4623485e-27f, 1.2924697e-26f, 2.5849394e-26f,
135+
5.1698788e-26f, 1.0339758e-25f, 2.0679515e-25f, 4.1359031e-25f,
136+
8.2718061e-25f, 1.6543612e-24f, 3.3087225e-24f, 6.6174449e-24f,
137+
1.323489e-23f, 2.646978e-23f, 5.2939559e-23f, 1.0587912e-22f,
138+
2.1175824e-22f, 4.2351647e-22f, 8.4703295e-22f, 1.6940659e-21f,
139+
3.3881318e-21f, 6.7762636e-21f, 1.3552527e-20f, 2.7105054e-20f,
140+
5.4210109e-20f, 1.0842022e-19f, 2.1684043e-19f, 4.3368087e-19f,
141+
8.6736174e-19f, 1.7347235e-18f, 3.469447e-18f, 6.9388939e-18f,
142+
1.3877788e-17f, 2.7755576e-17f, 5.5511151e-17f, 1.110223e-16f,
143+
2.220446e-16f, 4.4408921e-16f, 8.8817842e-16f, 1.7763568e-15f,
144+
3.5527137e-15f, 7.1054274e-15f, 1.4210855e-14f, 2.8421709e-14f,
145+
5.6843419e-14f, 1.1368684e-13f, 2.2737368e-13f, 4.5474735e-13f,
146+
9.094947e-13f, 1.8189894e-12f, 3.6379788e-12f, 7.2759576e-12f,
147+
1.4551915e-11f, 2.910383e-11f, 5.8207661e-11f, 1.1641532e-10f,
148+
2.3283064e-10f, 4.6566129e-10f, 9.3132257e-10f, 1.8626451e-09f,
149+
3.7252903e-09f, 7.4505806e-09f, 1.4901161e-08f, 2.9802322e-08f,
150+
5.9604645e-08f, 1.1920929e-07f, 2.3841858e-07f, 4.7683716e-07f,
151+
9.5367432e-07f, 1.9073486e-06f, 3.8146973e-06f, 7.6293945e-06f,
152+
1.5258789e-05f, 3.0517578e-05f, 6.1035156e-05f, 0.00012207031f,
153+
0.00024414062f, 0.00048828125f, 0.0009765625f, 0.001953125f,
154+
0.00390625f, 0.0078125f, 0.015625f, 0.03125f,
155+
0.0625f, 0.125f, 0.25f, 0.5f,
156+
1.0f, 2.0f, 4.0f, 8.0f,
157+
16.0f, 32.0f, 64.0f, 128.0f,
158+
256.0f, 512.0f, 1024.0f, 2048.0f,
159+
4096.0f, 8192.0f, 16384.0f, 32768.0f,
160+
65536.0f, 131072.0f, 262144.0f, 524288.0f,
161+
1048576.0f, 2097152.0f, 4194304.0f, 8388608.0f,
162+
16777216.0f, 33554432.0f, 67108864.0f, 1.3421773e+08f,
163+
2.6843546e+08f, 5.3687091e+08f, 1.0737418e+09f, 2.1474836e+09f,
164+
4.2949673e+09f, 8.5899346e+09f, 1.7179869e+10f, 3.4359738e+10f,
165+
6.8719477e+10f, 1.3743895e+11f, 2.7487791e+11f, 5.4975581e+11f,
166+
1.0995116e+12f, 2.1990233e+12f, 4.3980465e+12f, 8.796093e+12f,
167+
1.7592186e+13f, 3.5184372e+13f, 7.0368744e+13f, 1.4073749e+14f,
168+
2.8147498e+14f, 5.6294995e+14f, 1.1258999e+15f, 2.2517998e+15f,
169+
4.5035996e+15f, 9.0071993e+15f, 1.8014399e+16f, 3.6028797e+16f,
170+
7.2057594e+16f, 1.4411519e+17f, 2.8823038e+17f, 5.7646075e+17f,
171+
1.1529215e+18f, 2.305843e+18f, 4.611686e+18f, 9.223372e+18f,
172+
1.8446744e+19f, 3.6893488e+19f, 7.3786976e+19f, 1.4757395e+20f,
173+
2.9514791e+20f, 5.9029581e+20f, 1.1805916e+21f, 2.3611832e+21f,
174+
4.7223665e+21f, 9.444733e+21f, 1.8889466e+22f, 3.7778932e+22f,
175+
7.5557864e+22f, 1.5111573e+23f, 3.0223145e+23f, 6.0446291e+23f,
176+
1.2089258e+24f, 2.4178516e+24f, 4.8357033e+24f, 9.6714066e+24f,
177+
1.9342813e+25f, 3.8685626e+25f, 7.7371252e+25f, 1.547425e+26f,
178+
3.0948501e+26f, 6.1897002e+26f, 1.23794e+27f, 2.4758801e+27f,
179+
4.9517602e+27f, 9.9035203e+27f, 1.9807041e+28f, 3.9614081e+28f,
180+
7.9228163e+28f, 1.5845633e+29f, 3.1691265e+29f, 6.338253e+29f,
181+
1.2676506e+30f, 2.5353012e+30f, 5.0706024e+30f, 1.0141205e+31f,
182+
2.028241e+31f, 4.0564819e+31f, 8.1129638e+31f, 1.6225928e+32f,
183+
3.2451855e+32f, 6.4903711e+32f, 1.2980742e+33f, 2.5961484e+33f,
184+
5.1922969e+33f, 1.0384594e+34f, 2.0769187e+34f, 4.1538375e+34f,
185+
8.307675e+34f, 1.661535e+35f, 3.32307e+35f, 6.64614e+35f,
186+
};
187+
118188

119189
/* standard conversion from rgbe to float pixels */
120-
/* note: Ward uses ldexp(col+0.5,exp-(128+8)). However we wanted pixels */
121-
/* in the range [0,1] to map back into the range [0,1]. */
122190
inline void
123191
rgbe2float(float& red, float& green, float& blue, unsigned char rgbe[4])
124192
{
125193
if (rgbe[3]) { /*nonzero pixel*/
126-
float f = ldexpf(1.0f, rgbe[3] - (int)(128 + 8));
194+
float f = exponent_table[rgbe[3]];
127195
red = rgbe[0] * f;
128196
green = rgbe[1] * f;
129197
blue = rgbe[2] * f;

0 commit comments

Comments
 (0)