How to capture live camera frames in RGB with DirectShow

Simplex picture Simplex · Dec 1, 2009 · Viewed 8.9k times · Source

I'm implementing live video capture through DirectShow for live processing and display. (Augmented Reality app).

I can access the pixels easily enough, but it seems I can't get the SampleGrabber to provide RGB data. The device (an iSight -- running VC++ Express in VMWare) only reports MEDIASUBTYPE_YUY2.

After extensive Googling, I still can't figure out whether DirectShow is supposed to provide built-in color space conversion for this sort of thing. Some sites report that there is no YUV<->RGB conversion built in, others report that you just have to call SetMediaType on your ISampleGrabber with an RGB subtype.

Any advice is greatly appreciated, I'm going nuts on this one. Code provided below. Please note that

  • The code works, except that it doesn't provide RGB data
  • I'm aware that I can implement my own conversion filter, but this is not feasible because I'd have to anticipate every possible device format, and this is a relatively small project

    // Playback
    IGraphBuilder *pGraphBuilder = NULL;
    ICaptureGraphBuilder2 *pCaptureGraphBuilder2 = NULL;
    IMediaControl *pMediaControl = NULL;
    IBaseFilter *pDeviceFilter = NULL;
    IAMStreamConfig *pStreamConfig = NULL;
    BYTE *videoCaps = NULL;
    AM_MEDIA_TYPE **mediaTypeArray = NULL;
    
    // Device selection
    ICreateDevEnum *pCreateDevEnum = NULL;
    IEnumMoniker *pEnumMoniker = NULL;
    IMoniker *pMoniker = NULL;
    ULONG nFetched = 0;
    
    HRESULT hr = CoInitializeEx(NULL, COINIT_MULTITHREADED);
    
    // Create CreateDevEnum to list device
    hr = CoCreateInstance(CLSID_SystemDeviceEnum, NULL, CLSCTX_INPROC_SERVER, IID_ICreateDevEnum, (PVOID *)&pCreateDevEnum);
    if (FAILED(hr)) goto ReleaseDataAndFail;
    
    // Create EnumMoniker to list devices 
    hr = pCreateDevEnum->CreateClassEnumerator(CLSID_VideoInputDeviceCategory, &pEnumMoniker, 0);
    if (FAILED(hr)) goto ReleaseDataAndFail;
    
    pEnumMoniker->Reset();
    
    // Find desired device
    while (pEnumMoniker->Next(1, &pMoniker, &nFetched) == S_OK) 
    {
      IPropertyBag *pPropertyBag;
      TCHAR devname[256];
    
      // bind to IPropertyBag
      hr = pMoniker->BindToStorage(0, 0, IID_IPropertyBag, (void **)&pPropertyBag);
    
      if (FAILED(hr))
      {
        pMoniker->Release();
        continue;
      }
    
      VARIANT varName;
      VariantInit(&varName);
      HRESULT hr = pPropertyBag->Read(L"DevicePath", &varName, 0);
    
      if (FAILED(hr))
      {
        pMoniker->Release();
        pPropertyBag->Release();
        continue;
      }
    
      char devicePath[DeviceInfo::STRING_LENGTH_MAX] = "";
    
      wcstombs(devicePath, varName.bstrVal, DeviceInfo::STRING_LENGTH_MAX);
    
      if (strcmp(devicePath, deviceId) == 0)
      {
        // Bind Moniker to Filter
        pMoniker->BindToObject(0, 0, IID_IBaseFilter, (void**)&pDeviceFilter);
    
        break;
      }
    
      pMoniker->Release();
      pPropertyBag->Release();
    }
    
    if (pDeviceFilter == NULL) goto ReleaseDataAndFail;
    
    // Create sample grabber
    IBaseFilter *pGrabberF = NULL;
    hr = CoCreateInstance(CLSID_SampleGrabber, NULL, CLSCTX_INPROC_SERVER, IID_IBaseFilter, (void**)&pGrabberF);
    if (FAILED(hr)) goto ReleaseDataAndFail;
    
    hr = pGrabberF->QueryInterface(IID_ISampleGrabber, (void**)&pGrabber);
    if (FAILED(hr)) goto ReleaseDataAndFail;
    
    // Create FilterGraph
    hr = CoCreateInstance(CLSID_FilterGraph,
    NULL,
    CLSCTX_INPROC,
    IID_IGraphBuilder,
    (LPVOID *)&pGraphBuilder);
    if (FAILED(hr)) goto ReleaseDataAndFail;
    
    // create CaptureGraphBuilder2
    hr = CoCreateInstance(CLSID_CaptureGraphBuilder2, NULL, CLSCTX_INPROC, IID_ICaptureGraphBuilder2, (LPVOID *)&pCaptureGraphBuilder2);
    if (FAILED(hr)) goto ReleaseDataAndFail;
    
    // set FilterGraph
    hr = pCaptureGraphBuilder2->SetFiltergraph(pGraphBuilder);
    if (FAILED(hr)) goto ReleaseDataAndFail;
    
    // get MediaControl interface
    hr = pGraphBuilder->QueryInterface(IID_IMediaControl, (LPVOID *)&pMediaControl);
    if (FAILED(hr)) goto ReleaseDataAndFail;
    
    // Add filters
    hr = pGraphBuilder->AddFilter(pDeviceFilter, L"Device Filter");
    if (FAILED(hr)) goto ReleaseDataAndFail;
    
    hr = pGraphBuilder->AddFilter(pGrabberF, L"Sample Grabber");
    if (FAILED(hr)) goto ReleaseDataAndFail;
    
    // Set sampe grabber options
    AM_MEDIA_TYPE mt;
    ZeroMemory(&mt, sizeof(AM_MEDIA_TYPE));
    mt.majortype = MEDIATYPE_Video;
    mt.subtype = MEDIASUBTYPE_RGB32;
    hr = pGrabber->SetMediaType(&mt);
    if (FAILED(hr)) goto ReleaseDataAndFail;
    
    hr = pGrabber->SetOneShot(FALSE);
    if (FAILED(hr)) goto ReleaseDataAndFail;
    
    hr = pGrabber->SetBufferSamples(TRUE);
    if (FAILED(hr)) goto ReleaseDataAndFail;
    
    // Get stream config interface
    hr = pCaptureGraphBuilder2->FindInterface(NULL, &MEDIATYPE_Video, pDeviceFilter, IID_IAMStreamConfig, (void **)&pStreamConfig);
    if (FAILED(hr)) goto ReleaseDataAndFail;
    
    int streamCapsCount = 0, capsSize, bestFit = -1, bestFitPixelDiff = 1000000000, desiredPixelCount = _width * _height,
    bestFitWidth = 0, bestFitHeight = 0;
    
    float desiredAspectRatio = (float)_width / (float)_height;
    
    hr = pStreamConfig->GetNumberOfCapabilities(&streamCapsCount, &capsSize);
    if (FAILED(hr)) goto ReleaseDataAndFail;
    
    videoCaps = (BYTE *)malloc(capsSize * streamCapsCount);
    mediaTypeArray = (AM_MEDIA_TYPE **)malloc(sizeof(AM_MEDIA_TYPE *) * streamCapsCount);
    
    for (int i = 0; i < streamCapsCount; i++)
    {
      hr = pStreamConfig->GetStreamCaps(i, &mediaTypeArray[i], videoCaps + capsSize * i);
      if (FAILED(hr)) continue;
    
      VIDEO_STREAM_CONFIG_CAPS *currentVideoCaps = (VIDEO_STREAM_CONFIG_CAPS *)(videoCaps + capsSize * i);
    
      int closestWidth = MAX(currentVideoCaps->MinOutputSize.cx, MIN(currentVideoCaps->MaxOutputSize.cx, width));
      int closestHeight = MAX(currentVideoCaps->MinOutputSize.cy, MIN(currentVideoCaps->MaxOutputSize.cy, height));
    
      int pixelDiff = ABS(desiredPixelCount - closestWidth * closestHeight);
    
      if (pixelDiff < bestFitPixelDiff && ABS(desiredAspectRatio - (float)closestWidth / (float)closestHeight) < 0.1f)
      {
        bestFit = i;
        bestFitPixelDiff = pixelDiff;
        bestFitWidth = closestWidth;
        bestFitHeight = closestHeight;
      }
    }
    
    if (bestFit == -1) goto ReleaseDataAndFail;
    
    AM_MEDIA_TYPE *mediaType;
    hr = pStreamConfig->GetFormat(&mediaType);
    if (FAILED(hr)) goto ReleaseDataAndFail;
    
    VIDEOINFOHEADER *videoInfoHeader = (VIDEOINFOHEADER *)mediaType->pbFormat;
    videoInfoHeader->bmiHeader.biWidth = bestFitWidth;
    videoInfoHeader->bmiHeader.biHeight = bestFitHeight;
    //mediaType->subtype = MEDIASUBTYPE_RGB32;
    hr = pStreamConfig->SetFormat(mediaType);
    if (FAILED(hr)) goto ReleaseDataAndFail;
    
    pStreamConfig->Release();
    pStreamConfig = NULL;
    
    free(videoCaps);
    videoCaps = NULL;
    free(mediaTypeArray);
    mediaTypeArray = NULL;
    
    // Connect pins
    IPin *pDeviceOut = NULL, *pGrabberIn = NULL;
    
    if (FindPin(pDeviceFilter, PINDIR_OUTPUT, 0, &pDeviceOut) && FindPin(pGrabberF, PINDIR_INPUT, 0, &pGrabberIn))
    {
      hr = pGraphBuilder->Connect(pDeviceOut, pGrabberIn);
      if (FAILED(hr)) goto ReleaseDataAndFail;
    }
    else
    {
      goto ReleaseDataAndFail;
    }
    
    // start playing
    hr = pMediaControl->Run();
    if (FAILED(hr)) goto ReleaseDataAndFail;
    
    hr = pGrabber->GetConnectedMediaType(&mt);
    
    // Set dimensions
    width = bestFitWidth;
    height = bestFitHeight;
    _width = bestFitWidth;
    _height = bestFitHeight;
    
    // Allocate pixel buffer
    pPixelBuffer = (unsigned *)malloc(width * height * 4);
    
    // Release objects
    pGraphBuilder->Release();
    pGraphBuilder = NULL;
    pEnumMoniker->Release();
    pEnumMoniker = NULL;
    pCreateDevEnum->Release();
    pCreateDevEnum = NULL;
    
    return true;
    

Answer

Geraint Davies picture Geraint Davies · Dec 2, 2009

The stock colour space converter does not support YUY2 to RGB conversion. However, there are a number of apps and devices that install a converter of some sort, and if this is properly registered, dshow will use it automatically. That's why some people report that it just works. (of course some devices offer RGB, so no conversion is needed in those cases).

You can download a freely-available YUV conversion filter, "yuvxfm" from YUV Transform (at the bottom of the page). Register this on your system and it should allow capture in any reasonable RGB or YUV format.

G