rgb to yuv420 algorithm efficiency

Sambatyon picture Sambatyon · Feb 27, 2012 · Viewed 14.7k times · Source

I wrote an algorithm to convert a RGB image to a YUV420. I spend a long time trying to make it faster but I haven't find any other way to boost its efficiency, so now I turn to you so you can tell me if this is as good as I get, or if there's another more efficient way to do it (the algorithm is in C++ but C and assembler are also options)

namespace {
// lookup tables
int lookup_m_94[] = {
      0,    -94,   -188,   -282,   -376,   -470,   -564,   -658,
    -752,   -846,   -940,  -1034,  -1128,  -1222,  -1316,  -1410,
  -1504,  -1598,  -1692,  -1786,  -1880,  -1974,  -2068,  -2162,
  -2256,  -2350,  -2444,  -2538,  -2632,  -2726,  -2820,  -2914,
  -3008,  -3102,  -3196,  -3290,  -3384,  -3478,  -3572,  -3666,
  -3760,  -3854,  -3948,  -4042,  -4136,  -4230,  -4324,  -4418,
  -4512,  -4606,  -4700,  -4794,  -4888,  -4982,  -5076,  -5170,
  -5264,  -5358,  -5452,  -5546,  -5640,  -5734,  -5828,  -5922,
  -6016,  -6110,  -6204,  -6298,  -6392,  -6486,  -6580,  -6674,
  -6768,  -6862,  -6956,  -7050,  -7144,  -7238,  -7332,  -7426,
  -7520,  -7614,  -7708,  -7802,  -7896,  -7990,  -8084,  -8178,
  -8272,  -8366,  -8460,  -8554,  -8648,  -8742,  -8836,  -8930,
  -9024,  -9118,  -9212,  -9306,  -9400,  -9494,  -9588,  -9682,
  -9776,  -9870,  -9964, -10058, -10152, -10246, -10340, -10434,
  -10528, -10622, -10716, -10810, -10904, -10998, -11092, -11186,
  -11280, -11374, -11468, -11562, -11656, -11750, -11844, -11938,
  -12032, -12126, -12220, -12314, -12408, -12502, -12596, -12690,
  -12784, -12878, -12972, -13066, -13160, -13254, -13348, -13442,
  -13536, -13630, -13724, -13818, -13912, -14006, -14100, -14194,
  -14288, -14382, -14476, -14570, -14664, -14758, -14852, -14946,
  -15040, -15134, -15228, -15322, -15416, -15510, -15604, -15698,
  -15792, -15886, -15980, -16074, -16168, -16262, -16356, -16450,
  -16544, -16638, -16732, -16826, -16920, -17014, -17108, -17202,
  -17296, -17390, -17484, -17578, -17672, -17766, -17860, -17954,
  -18048, -18142, -18236, -18330, -18424, -18518, -18612, -18706,
  -18800, -18894, -18988, -19082, -19176, -19270, -19364, -19458,
  -19552, -19646, -19740, -19834, -19928, -20022, -20116, -20210,
  -20304, -20398, -20492, -20586, -20680, -20774, -20868, -20962,
  -21056, -21150, -21244, -21338, -21432, -21526, -21620, -21714,
  -21808, -21902, -21996, -22090, -22184, -22278, -22372, -22466,
  -22560, -22654, -22748, -22842, -22936, -23030, -23124, -23218,
  -23312, -23406, -23500, -23594, -23688, -23782, -23876, -23970
};

int lookup_m_74[] = {
      0,    -74,   -148,   -222,   -296,   -370,   -444,   -518,
    -592,   -666,   -740,   -814,   -888,   -962,  -1036,  -1110,
  -1184,  -1258,  -1332,  -1406,  -1480,  -1554,  -1628,  -1702,
  -1776,  -1850,  -1924,  -1998,  -2072,  -2146,  -2220,  -2294,
  -2368,  -2442,  -2516,  -2590,  -2664,  -2738,  -2812,  -2886,
  -2960,  -3034,  -3108,  -3182,  -3256,  -3330,  -3404,  -3478,
  -3552,  -3626,  -3700,  -3774,  -3848,  -3922,  -3996,  -4070,
  -4144,  -4218,  -4292,  -4366,  -4440,  -4514,  -4588,  -4662,
  -4736,  -4810,  -4884,  -4958,  -5032,  -5106,  -5180,  -5254,
  -5328,  -5402,  -5476,  -5550,  -5624,  -5698,  -5772,  -5846,
  -5920,  -5994,  -6068,  -6142,  -6216,  -6290,  -6364,  -6438,
  -6512,  -6586,  -6660,  -6734,  -6808,  -6882,  -6956,  -7030,
  -7104,  -7178,  -7252,  -7326,  -7400,  -7474,  -7548,  -7622,
  -7696,  -7770,  -7844,  -7918,  -7992,  -8066,  -8140,  -8214,
  -8288,  -8362,  -8436,  -8510,  -8584,  -8658,  -8732,  -8806,
  -8880,  -8954,  -9028,  -9102,  -9176,  -9250,  -9324,  -9398,
  -9472,  -9546,  -9620,  -9694,  -9768,  -9842,  -9916,  -9990,
  -10064, -10138, -10212, -10286, -10360, -10434, -10508, -10582,
  -10656, -10730, -10804, -10878, -10952, -11026, -11100, -11174,
  -11248, -11322, -11396, -11470, -11544, -11618, -11692, -11766,
  -11840, -11914, -11988, -12062, -12136, -12210, -12284, -12358,
  -12432, -12506, -12580, -12654, -12728, -12802, -12876, -12950,
  -13024, -13098, -13172, -13246, -13320, -13394, -13468, -13542,
  -13616, -13690, -13764, -13838, -13912, -13986, -14060, -14134,
  -14208, -14282, -14356, -14430, -14504, -14578, -14652, -14726,
  -14800, -14874, -14948, -15022, -15096, -15170, -15244, -15318,
  -15392, -15466, -15540, -15614, -15688, -15762, -15836, -15910,
  -15984, -16058, -16132, -16206, -16280, -16354, -16428, -16502,
  -16576, -16650, -16724, -16798, -16872, -16946, -17020, -17094,
  -17168, -17242, -17316, -17390, -17464, -17538, -17612, -17686,
  -17760, -17834, -17908, -17982, -18056, -18130, -18204, -18278,
  -18352, -18426, -18500, -18574, -18648, -18722, -18796, -18870
};

int lookup_m_38[] = {
      0,    -38,    -76,   -114,   -152,   -190,   -228,   -266,
    -304,   -342,   -380,   -418,   -456,   -494,   -532,   -570,
    -608,   -646,   -684,   -722,   -760,   -798,   -836,   -874,
    -912,   -950,   -988,  -1026,  -1064,  -1102,  -1140,  -1178,
  -1216,  -1254,  -1292,  -1330,  -1368,  -1406,  -1444,  -1482,
  -1520,  -1558,  -1596,  -1634,  -1672,  -1710,  -1748,  -1786,
  -1824,  -1862,  -1900,  -1938,  -1976,  -2014,  -2052,  -2090,
  -2128,  -2166,  -2204,  -2242,  -2280,  -2318,  -2356,  -2394,
  -2432,  -2470,  -2508,  -2546,  -2584,  -2622,  -2660,  -2698,
  -2736,  -2774,  -2812,  -2850,  -2888,  -2926,  -2964,  -3002,
  -3040,  -3078,  -3116,  -3154,  -3192,  -3230,  -3268,  -3306,
  -3344,  -3382,  -3420,  -3458,  -3496,  -3534,  -3572,  -3610,
  -3648,  -3686,  -3724,  -3762,  -3800,  -3838,  -3876,  -3914,
  -3952,  -3990,  -4028,  -4066,  -4104,  -4142,  -4180,  -4218,
  -4256,  -4294,  -4332,  -4370,  -4408,  -4446,  -4484,  -4522,
  -4560,  -4598,  -4636,  -4674,  -4712,  -4750,  -4788,  -4826,
  -4864,  -4902,  -4940,  -4978,  -5016,  -5054,  -5092,  -5130,
  -5168,  -5206,  -5244,  -5282,  -5320,  -5358,  -5396,  -5434,
  -5472,  -5510,  -5548,  -5586,  -5624,  -5662,  -5700,  -5738,
  -5776,  -5814,  -5852,  -5890,  -5928,  -5966,  -6004,  -6042,
  -6080,  -6118,  -6156,  -6194,  -6232,  -6270,  -6308,  -6346,
  -6384,  -6422,  -6460,  -6498,  -6536,  -6574,  -6612,  -6650,
  -6688,  -6726,  -6764,  -6802,  -6840,  -6878,  -6916,  -6954,
  -6992,  -7030,  -7068,  -7106,  -7144,  -7182,  -7220,  -7258,
  -7296,  -7334,  -7372,  -7410,  -7448,  -7486,  -7524,  -7562,
  -7600,  -7638,  -7676,  -7714,  -7752,  -7790,  -7828,  -7866,
  -7904,  -7942,  -7980,  -8018,  -8056,  -8094,  -8132,  -8170,
  -8208,  -8246,  -8284,  -8322,  -8360,  -8398,  -8436,  -8474,
  -8512,  -8550,  -8588,  -8626,  -8664,  -8702,  -8740,  -8778,
  -8816,  -8854,  -8892,  -8930,  -8968,  -9006,  -9044,  -9082,
  -9120,  -9158,  -9196,  -9234,  -9272,  -9310,  -9348,  -9386,
  -9424,  -9462,  -9500,  -9538,  -9576,  -9614,  -9652,  -9690
};

int lookup_m_18[] = {
      0,    -18,    -36,    -54,    -72,    -90,   -108,   -126,
    -144,   -162,   -180,   -198,   -216,   -234,   -252,   -270,
    -288,   -306,   -324,   -342,   -360,   -378,   -396,   -414,
    -432,   -450,   -468,   -486,   -504,   -522,   -540,   -558,
    -576,   -594,   -612,   -630,   -648,   -666,   -684,   -702,
    -720,   -738,   -756,   -774,   -792,   -810,   -828,   -846,
    -864,   -882,   -900,   -918,   -936,   -954,   -972,   -990,
  -1008,  -1026,  -1044,  -1062,  -1080,  -1098,  -1116,  -1134,
  -1152,  -1170,  -1188,  -1206,  -1224,  -1242,  -1260,  -1278,
  -1296,  -1314,  -1332,  -1350,  -1368,  -1386,  -1404,  -1422,
  -1440,  -1458,  -1476,  -1494,  -1512,  -1530,  -1548,  -1566,
  -1584,  -1602,  -1620,  -1638,  -1656,  -1674,  -1692,  -1710,
  -1728,  -1746,  -1764,  -1782,  -1800,  -1818,  -1836,  -1854,
  -1872,  -1890,  -1908,  -1926,  -1944,  -1962,  -1980,  -1998,
  -2016,  -2034,  -2052,  -2070,  -2088,  -2106,  -2124,  -2142,
  -2160,  -2178,  -2196,  -2214,  -2232,  -2250,  -2268,  -2286,
  -2304,  -2322,  -2340,  -2358,  -2376,  -2394,  -2412,  -2430,
  -2448,  -2466,  -2484,  -2502,  -2520,  -2538,  -2556,  -2574,
  -2592,  -2610,  -2628,  -2646,  -2664,  -2682,  -2700,  -2718,
  -2736,  -2754,  -2772,  -2790,  -2808,  -2826,  -2844,  -2862,
  -2880,  -2898,  -2916,  -2934,  -2952,  -2970,  -2988,  -3006,
  -3024,  -3042,  -3060,  -3078,  -3096,  -3114,  -3132,  -3150,
  -3168,  -3186,  -3204,  -3222,  -3240,  -3258,  -3276,  -3294,
  -3312,  -3330,  -3348,  -3366,  -3384,  -3402,  -3420,  -3438,
  -3456,  -3474,  -3492,  -3510,  -3528,  -3546,  -3564,  -3582,
  -3600,  -3618,  -3636,  -3654,  -3672,  -3690,  -3708,  -3726,
  -3744,  -3762,  -3780,  -3798,  -3816,  -3834,  -3852,  -3870,
  -3888,  -3906,  -3924,  -3942,  -3960,  -3978,  -3996,  -4014,
  -4032,  -4050,  -4068,  -4086,  -4104,  -4122,  -4140,  -4158,
  -4176,  -4194,  -4212,  -4230,  -4248,  -4266,  -4284,  -4302,
  -4320,  -4338,  -4356,  -4374,  -4392,  -4410,  -4428,  -4446,
  -4464,  -4482,  -4500,  -4518,  -4536,  -4554,  -4572,  -4590
};

int lookup25[] = {
      0,     25,     50,     75,    100,    125,    150,    175,
    200,    225,    250,    275,    300,    325,    350,    375,
    400,    425,    450,    475,    500,    525,    550,    575,
    600,    625,    650,    675,    700,    725,    750,    775,
    800,    825,    850,    875,    900,    925,    950,    975,
    1000,   1025,   1050,   1075,   1100,   1125,   1150,   1175,
    1200,   1225,   1250,   1275,   1300,   1325,   1350,   1375,
    1400,   1425,   1450,   1475,   1500,   1525,   1550,   1575,
    1600,   1625,   1650,   1675,   1700,   1725,   1750,   1775,
    1800,   1825,   1850,   1875,   1900,   1925,   1950,   1975,
    2000,   2025,   2050,   2075,   2100,   2125,   2150,   2175,
    2200,   2225,   2250,   2275,   2300,   2325,   2350,   2375,
    2400,   2425,   2450,   2475,   2500,   2525,   2550,   2575,
    2600,   2625,   2650,   2675,   2700,   2725,   2750,   2775,
    2800,   2825,   2850,   2875,   2900,   2925,   2950,   2975,
    3000,   3025,   3050,   3075,   3100,   3125,   3150,   3175,
    3200,   3225,   3250,   3275,   3300,   3325,   3350,   3375,
    3400,   3425,   3450,   3475,   3500,   3525,   3550,   3575,
    3600,   3625,   3650,   3675,   3700,   3725,   3750,   3775,
    3800,   3825,   3850,   3875,   3900,   3925,   3950,   3975,
    4000,   4025,   4050,   4075,   4100,   4125,   4150,   4175,
    4200,   4225,   4250,   4275,   4300,   4325,   4350,   4375,
    4400,   4425,   4450,   4475,   4500,   4525,   4550,   4575,
    4600,   4625,   4650,   4675,   4700,   4725,   4750,   4775,
    4800,   4825,   4850,   4875,   4900,   4925,   4950,   4975,
    5000,   5025,   5050,   5075,   5100,   5125,   5150,   5175,
    5200,   5225,   5250,   5275,   5300,   5325,   5350,   5375,
    5400,   5425,   5450,   5475,   5500,   5525,   5550,   5575,
    5600,   5625,   5650,   5675,   5700,   5725,   5750,   5775,
    5800,   5825,   5850,   5875,   5900,   5925,   5950,   5975,
    6000,   6025,   6050,   6075,   6100,   6125,   6150,   6175,
    6200,   6225,   6250,   6275,   6300,   6325,   6350,   6375
};

int lookup66[] = {
      0,     66,    132,    198,    264,    330,    396,    462,
    528,    594,    660,    726,    792,    858,    924,    990,
    1056,   1122,   1188,   1254,   1320,   1386,   1452,   1518,
    1584,   1650,   1716,   1782,   1848,   1914,   1980,   2046,
    2112,   2178,   2244,   2310,   2376,   2442,   2508,   2574,
    2640,   2706,   2772,   2838,   2904,   2970,   3036,   3102,
    3168,   3234,   3300,   3366,   3432,   3498,   3564,   3630,
    3696,   3762,   3828,   3894,   3960,   4026,   4092,   4158,
    4224,   4290,   4356,   4422,   4488,   4554,   4620,   4686,
    4752,   4818,   4884,   4950,   5016,   5082,   5148,   5214,
    5280,   5346,   5412,   5478,   5544,   5610,   5676,   5742,
    5808,   5874,   5940,   6006,   6072,   6138,   6204,   6270,
    6336,   6402,   6468,   6534,   6600,   6666,   6732,   6798,
    6864,   6930,   6996,   7062,   7128,   7194,   7260,   7326,
    7392,   7458,   7524,   7590,   7656,   7722,   7788,   7854,
    7920,   7986,   8052,   8118,   8184,   8250,   8316,   8382,
    8448,   8514,   8580,   8646,   8712,   8778,   8844,   8910,
    8976,   9042,   9108,   9174,   9240,   9306,   9372,   9438,
    9504,   9570,   9636,   9702,   9768,   9834,   9900,   9966,
  10032,  10098,  10164,  10230,  10296,  10362,  10428,  10494,
  10560,  10626,  10692,  10758,  10824,  10890,  10956,  11022,
  11088,  11154,  11220,  11286,  11352,  11418,  11484,  11550,
  11616,  11682,  11748,  11814,  11880,  11946,  12012,  12078,
  12144,  12210,  12276,  12342,  12408,  12474,  12540,  12606,
  12672,  12738,  12804,  12870,  12936,  13002,  13068,  13134,
  13200,  13266,  13332,  13398,  13464,  13530,  13596,  13662,
  13728,  13794,  13860,  13926,  13992,  14058,  14124,  14190,
  14256,  14322,  14388,  14454,  14520,  14586,  14652,  14718,
  14784,  14850,  14916,  14982,  15048,  15114,  15180,  15246,
  15312,  15378,  15444,  15510,  15576,  15642,  15708,  15774,
  15840,  15906,  15972,  16038,  16104,  16170,  16236,  16302,
  16368,  16434,  16500,  16566,  16632,  16698,  16764,  16830
};

int lookup112[] = {
      0,    112,    224,    336,    448,    560,    672,    784,
    896,   1008,   1120,   1232,   1344,   1456,   1568,   1680,
    1792,   1904,   2016,   2128,   2240,   2352,   2464,   2576,
    2688,   2800,   2912,   3024,   3136,   3248,   3360,   3472,
    3584,   3696,   3808,   3920,   4032,   4144,   4256,   4368,
    4480,   4592,   4704,   4816,   4928,   5040,   5152,   5264,
    5376,   5488,   5600,   5712,   5824,   5936,   6048,   6160,
    6272,   6384,   6496,   6608,   6720,   6832,   6944,   7056,
    7168,   7280,   7392,   7504,   7616,   7728,   7840,   7952,
    8064,   8176,   8288,   8400,   8512,   8624,   8736,   8848,
    8960,   9072,   9184,   9296,   9408,   9520,   9632,   9744,
    9856,   9968,  10080,  10192,  10304,  10416,  10528,  10640,
  10752,  10864,  10976,  11088,  11200,  11312,  11424,  11536,
  11648,  11760,  11872,  11984,  12096,  12208,  12320,  12432,
  12544,  12656,  12768,  12880,  12992,  13104,  13216,  13328,
  13440,  13552,  13664,  13776,  13888,  14000,  14112,  14224,
  14336,  14448,  14560,  14672,  14784,  14896,  15008,  15120,
  15232,  15344,  15456,  15568,  15680,  15792,  15904,  16016,
  16128,  16240,  16352,  16464,  16576,  16688,  16800,  16912,
  17024,  17136,  17248,  17360,  17472,  17584,  17696,  17808,
  17920,  18032,  18144,  18256,  18368,  18480,  18592,  18704,
  18816,  18928,  19040,  19152,  19264,  19376,  19488,  19600,
  19712,  19824,  19936,  20048,  20160,  20272,  20384,  20496,
  20608,  20720,  20832,  20944,  21056,  21168,  21280,  21392,
  21504,  21616,  21728,  21840,  21952,  22064,  22176,  22288,
  22400,  22512,  22624,  22736,  22848,  22960,  23072,  23184,
  23296,  23408,  23520,  23632,  23744,  23856,  23968,  24080,
  24192,  24304,  24416,  24528,  24640,  24752,  24864,  24976,
  25088,  25200,  25312,  25424,  25536,  25648,  25760,  25872,
  25984,  26096,  26208,  26320,  26432,  26544,  26656,  26768,
  26880,  26992,  27104,  27216,  27328,  27440,  27552,  27664,
  27776,  27888,  28000,  28112,  28224,  28336,  28448,  28560
};

int lookup129[] = {
      0,    129,    258,    387,    516,    645,    774,    903,
    1032,   1161,   1290,   1419,   1548,   1677,   1806,   1935,
    2064,   2193,   2322,   2451,   2580,   2709,   2838,   2967,
    3096,   3225,   3354,   3483,   3612,   3741,   3870,   3999,
    4128,   4257,   4386,   4515,   4644,   4773,   4902,   5031,
    5160,   5289,   5418,   5547,   5676,   5805,   5934,   6063,
    6192,   6321,   6450,   6579,   6708,   6837,   6966,   7095,
    7224,   7353,   7482,   7611,   7740,   7869,   7998,   8127,
    8256,   8385,   8514,   8643,   8772,   8901,   9030,   9159,
    9288,   9417,   9546,   9675,   9804,   9933,  10062,  10191,
  10320,  10449,  10578,  10707,  10836,  10965,  11094,  11223,
  11352,  11481,  11610,  11739,  11868,  11997,  12126,  12255,
  12384,  12513,  12642,  12771,  12900,  13029,  13158,  13287,
  13416,  13545,  13674,  13803,  13932,  14061,  14190,  14319,
  14448,  14577,  14706,  14835,  14964,  15093,  15222,  15351,
  15480,  15609,  15738,  15867,  15996,  16125,  16254,  16383,
  16512,  16641,  16770,  16899,  17028,  17157,  17286,  17415,
  17544,  17673,  17802,  17931,  18060,  18189,  18318,  18447,
  18576,  18705,  18834,  18963,  19092,  19221,  19350,  19479,
  19608,  19737,  19866,  19995,  20124,  20253,  20382,  20511,
  20640,  20769,  20898,  21027,  21156,  21285,  21414,  21543,
  21672,  21801,  21930,  22059,  22188,  22317,  22446,  22575,
  22704,  22833,  22962,  23091,  23220,  23349,  23478,  23607,
  23736,  23865,  23994,  24123,  24252,  24381,  24510,  24639,
  24768,  24897,  25026,  25155,  25284,  25413,  25542,  25671,
  25800,  25929,  26058,  26187,  26316,  26445,  26574,  26703,
  26832,  26961,  27090,  27219,  27348,  27477,  27606,  27735,
  27864,  27993,  28122,  28251,  28380,  28509,  28638,  28767,
  28896,  29025,  29154,  29283,  29412,  29541,  29670,  29799,
  29928,  30057,  30186,  30315,  30444,  30573,  30702,  30831,
  30960,  31089,  31218,  31347,  31476,  31605,  31734,  31863,
  31992,  32121,  32250,  32379,  32508,  32637,  32766,  32895
};
}

void Bitmap2Yuv420p(boost::uint8_t *destination, boost::uint8_t *rgb,
                    const int &width, const int &height) {
  boost::uint8_t *y;
  boost::uint8_t *u;
  boost::uint8_t *v;

  boost::uint8_t *r;
  boost::uint8_t *g;
  boost::uint8_t *b;

  std::size_t image_size = width * height;
  std::size_t upos = image_size;
  std::size_t vpos = upos + upos / 4;
  for (std::size_t i = 0; i < image_size; ++i) {
    r = rgb + 3 * i;
    g = rgb + 3 * i + 1;
    b = rgb + 3 * i + 2;
    y = destination + i;
    *y = ((lookup66[*r] + lookup129[*g] + lookup25[*b]) >> 8) + 16;
    if (!((i / width) % 2) && !(i % 2)) {
      u = destination + upos++;
      v = destination + vpos++;
      *u = ((lookup_m_38[*r] + lookup_m_74[*g] + lookup112[*b]) >> 8) + 128;
      *v = ((lookup112[*r] + lookup_m_94[*g] + lookup_m_18[*b]) >> 8) + 128;
    }
  }
}

Answer

Timbo picture Timbo · Feb 27, 2012

Unroll your loop, and get rid of the if in the inner loop. But do not run over the image data 3 times, and it is even faster!

void Bitmap2Yuv420p_calc2(uint8_t *destination, uint8_t *rgb, size_t width, size_t height)
{
    size_t image_size = width * height;
    size_t upos = image_size;
    size_t vpos = upos + upos / 4;
    size_t i = 0;

    for( size_t line = 0; line < height; ++line )
    {
        if( !(line % 2) )
        {
            for( size_t x = 0; x < width; x += 2 )
            {
                uint8_t r = rgb[3 * i];
                uint8_t g = rgb[3 * i + 1];
                uint8_t b = rgb[3 * i + 2];

                destination[i++] = ((66*r + 129*g + 25*b) >> 8) + 16;

                destination[upos++] = ((-38*r + -74*g + 112*b) >> 8) + 128;
                destination[vpos++] = ((112*r + -94*g + -18*b) >> 8) + 128;

                r = rgb[3 * i];
                g = rgb[3 * i + 1];
                b = rgb[3 * i + 2];

                destination[i++] = ((66*r + 129*g + 25*b) >> 8) + 16;
            }
        }
        else
        {
            for( size_t x = 0; x < width; x += 1 )
            {
                uint8_t r = rgb[3 * i];
                uint8_t g = rgb[3 * i + 1];
                uint8_t b = rgb[3 * i + 2];

                destination[i++] = ((66*r + 129*g + 25*b) >> 8) + 16;
            }
        }
    }
}

In my tests, this was about 25% faster than your accepted answer (VS 2010, depending on whether x86 or x64 is enabled.)