DLIB Integration

The DLIB integration API consists of DlibImage class that automatically converts the image type used by the VisionAppster platform to the corresponding DLIB image types. It makes it possible to call DLIB functions from your custom tool implementations without much boilerplate code.

Please see the tool C API for general instructions on how to set you your environment and build tool plugins. Since DLIB is implemented in C++, it is important to understand compiler requirements.

The VisionAppster SDK comes with the source code of the built-in DLIB plugin. To get started, have a look at sdk/examples/dlib under your installation. Modify the supplied Makefile to fit your setup. If you installed everything to default locations, building your first DLIB plugin on Linux can be as simple as this:

cd ~/VisionAppster/sdk/examples/dlib
make install

Windows (MinGW-w64), assuming you copied the SDK to your desktop folder:

cd %USERPROFILE%\Desktop\sdk\examples\dlib
mingw32-make
:: This may require admin rights.
mingw32-make install

Windows (MinGW-w64 + MSYS2), assuming you copied the SDK to your desktop folder:

cd /c/Users/$USERNAME/Desktop/sdk/examples/dlib
make
# This may require admin rights.
make install

If you use DLIB to implement your own tools, you are strongly encouraged to use the version that comes with the platform SDK. While it would be technically possible to use multiple different versions of shared libraries in a single process, it is not currently supported. If you absolutely need a different version, you may link the required parts of DLIB statically to your tool plugin.

Example

The following example shows a complete implementation of a plugin and a tool that wraps the dlib::frontal_face_detector function. This is how the built-in Detect Faces tool is actually implemented. The input to the tool is any image and the output is a set of frame and size objects which define the bounding boxes containing the faces.

The SDK that comes with the VisionAppster platform contains this and many other DLIB functions as examples. Please see the sdk/examples/dlib directory in your installation.

KUVIO_IMPLEMENT_PLUGIN(KuvioDlib, 1.0.0)

KUVIO_REGISTER_TOOLS(
( IDENTIFIER     (FaceDetection),
  NAME           ("Face Detection"),
  TAG            ("feature detection"),
  PROCESS        (FaceDetection_process),
  REQUIRED_INPUT (kuvio_image, image),
  /* Faces smaller than this many world units will not be recognized. */
  OPTIONAL_INPUT (double, minFaceSize, 100.0),
  OUTPUT         (kuvio_dmatrix, frame),
  OUTPUT         (kuvio_dmatrix, size),
  STATIC_META    (kuvio_string, out_frame_typeName, "Matrix<double>/frame"),
  STATIC_META    (kuvio_string, out_size_typeName, "Matrix<double>/size"),
  STATIC_META    (kuvio_string, out_size_linkedTo, "frame/region"),
  STATIC_META    (int32_t, out_frame_blockSize, 4),
  STATIC_META    (int32_t, out_size_blockSize, 1)
))

using Kuvio::DlibImage;
using Kuvio::DlibRgbType;
typedef double FourByFour[4][4];

// Functor which detects faces which are roughly in upright position (+- 30 degs).
// The width of each face in world coordinates is assumed to be at least
// minFaceSize world units.
template <class T>
struct DlibDetectFaces
{
  int operator()(const kuvio_image* imgIn, double minFaceSize, kuvio_dmatrix* frame, kuvio_dmatrix* size) const
  {
    dlib::frontal_face_detector detector = dlib::get_frontal_face_detector();

    double dScaling = kuvio_to_world_scaling_at(
      &imgIn->camera_frame, &imgIn->calibration_data, imgIn->calibration_data.cx, imgIn->calibration_data.cx);

    const int iFaceTargetSizePxl = 100;
    int iMinFaceSizePxl = std::max(int(minFaceSize / dScaling + 0.5), 10);

    kuvio_image* imgPyramid = 0;
    // Zoom the image up to ensure that each face is at least
    // iFaceTargetSizePxl pixels wide. This is needed because DLIB
    // detector is trained like that.
    if (iMinFaceSizePxl < iFaceTargetSizePxl)
      {
        imgPyramid = kuvio_image_alloc_copy(imgIn);
        while (iMinFaceSizePxl < iFaceTargetSizePxl && std::max(imgPyramid->width, imgPyramid->height) < 2048)
          {
            dlib::pyramid_up(DlibImage<T>(imgIn).constRef(),
                             DlibImage<T>(imgPyramid).selfRef(), // May reallocate
                             dlib::pyramid_down<2>());
            iMinFaceSizePxl *= 2;
            imgPyramid->calibration_data.fx *= 2;
            imgPyramid->calibration_data.fy *= 2;
            imgPyramid->calibration_data.cx *= 2;
            imgPyramid->calibration_data.cy *= 2;
            imgIn = imgPyramid;
          }
      }

    std::vector<dlib::rectangle> vecRect = detector(DlibImage<T>(imgIn).constRef());
    int iNumFaces = int(vecRect.size());
    for (int i = 0; i < iNumFaces; ++i)
      {
        FourByFour* pFrame = reinterpret_cast<FourByFour*>(kuvio_dmatrix_row(frame, 4 * i));
        double* pSize = kuvio_dmatrix_row(size, i);
        const dlib::rectangle& rect = vecRect.at(i);

        // Convert from pixel to world coordinates
        double aPxlToWorld[8] = {double(rect.left()),
                                 double(rect.top()),
                                 double(rect.right()),
                                 double(rect.top()),
                                 double(rect.right()),
                                 double(rect.bottom()),
                                 double(rect.left()),
                                 double(rect.bottom())};
        kuvio_pixel_to_world(&imgIn->camera_frame, &imgIn->calibration_data, aPxlToWorld, 4, 2, aPxlToWorld);

        memset(pFrame, 0, sizeof(FourByFour));
        (*pFrame)[0][0] = (*pFrame)[1][1] = (*pFrame)[2][2] = (*pFrame)[3][3] = 1;
        (*pFrame)[0][3] = aPxlToWorld[0];
        (*pFrame)[1][3] = aPxlToWorld[1];
        (*pFrame)[2][3] = 0;

        pSize[0] = std::hypot(aPxlToWorld[2] - aPxlToWorld[0], aPxlToWorld[3] - aPxlToWorld[1]);
        pSize[1] = std::hypot(aPxlToWorld[6] - aPxlToWorld[0], aPxlToWorld[7] - aPxlToWorld[1]);
      }
    frame->rows = 4 * iNumFaces;
    size->rows = iNumFaces;
    if (imgPyramid)
      kuvio_image_free(imgPyramid);
    return KUVIO_SUCCESS;
  }
};

// Generic image processor for DLIB tool objects.
// It can deal with both gray level and color image types.
template <template <class> class Function, class... Args>
static int dlibProcessImage(kuvio_image_type imageType, Args&&... args)
{
  switch (imageType)
    {
    case kuvio_gray8_image_type:
      return Function<uchar>()(std::forward<Args>(args)...);
    case kuvio_gray16_image_type:
      return Function<short>()(std::forward<Args>(args)...);
    case kuvio_gray32_image_type:
      return Function<int>()(std::forward<Args>(args)...);
    case kuvio_rgb32_image_type:
      return Function<DlibRgbType>()(std::forward<Args>(args)...);
    default:
      break;
    }
  return KUVIO_ERR_INVALID_PARAMETER;
}

// First level processor which selects the operation and catches possible exceptions.
int dlib_detect_faces(const kuvio_image* imgIn, double minFaceSize, kuvio_dmatrix* frame, kuvio_dmatrix* size)
{
  try
    {
      return dlibProcessImage<DlibDetectFaces>(imgIn->type, imgIn, minFaceSize, frame, size);
    }
  catch (std::bad_alloc&)
    {
      return KUVIO_ERR_OUT_OF_MEMORY;
    }
  return KUVIO_SUCCESS;
}

int FaceDetection_process(void* instance, void* arguments)
{
  struct FaceDetection_args* args = (struct FaceDetection_args*)arguments;
  (void)instance; // unused
  const int iMaxDetections = 99;
  args->out.frame = kuvio_dmatrix_calloc(4 * iMaxDetections, 4);
  args->out.size = kuvio_dmatrix_calloc(iMaxDetections, 2);

  return dlib_detect_faces(args->in.image, args->in.minFaceSize, args->out.frame, args->out.size);
}