Alignment improvements for #50.

bamos · bamos · commit a78043923b18 · 2015-12-08T16:29:09.000-05:00
Thanks @hbredin for the great feedback! 1. Make it clear dlib expects RGB images. 2. Keep the mean landmarks in the alignment source code rather than loading them from a txt file. 3. Normalize the mean landmarks between 0 and 1 for the transformation.
diff --git a/openface/alignment/naive_dlib.py b/openface/alignment/naive_dlib.py
@@ -25,85 +25,88 @@
 from .. import helper
 from .. import data
 
+TEMPLATE = np.float32([
+    (0.0792396913815, 0.339223741112), (0.0829219487236, 0.456955367943),
+    (0.0967927109165, 0.575648016728), (0.122141515615, 0.691921601066),
+    (0.168687863544, 0.800341263616), (0.239789390707, 0.895732504778),
+    (0.325662452515, 0.977068762493), (0.422318282013, 1.04329000149),
+    (0.531777802068, 1.06080371126), (0.641296298053, 1.03981924107),
+    (0.738105872266, 0.972268833998), (0.824444363295, 0.889624082279),
+    (0.894792677532, 0.792494155836), (0.939395486253, 0.681546643421),
+    (0.96111933829, 0.562238253072), (0.970579841181, 0.441758925744),
+    (0.971193274221, 0.322118743967), (0.163846223133, 0.249151738053),
+    (0.21780354657, 0.204255863861), (0.291299351124, 0.192367318323),
+    (0.367460241458, 0.203582210627), (0.4392945113, 0.233135599851),
+    (0.586445962425, 0.228141644834), (0.660152671635, 0.195923841854),
+    (0.737466449096, 0.182360984545), (0.813236546239, 0.192828009114),
+    (0.8707571886, 0.235293377042), (0.51534533827, 0.31863546193),
+    (0.516221448289, 0.396200446263), (0.517118861835, 0.473797687758),
+    (0.51816430343, 0.553157797772), (0.433701156035, 0.604054457668),
+    (0.475501237769, 0.62076344024), (0.520712933176, 0.634268222208),
+    (0.565874114041, 0.618796581487), (0.607054002672, 0.60157671656),
+    (0.252418718401, 0.331052263829), (0.298663015648, 0.302646354002),
+    (0.355749724218, 0.303020650651), (0.403718978315, 0.33867711083),
+    (0.352507175597, 0.349987615384), (0.296791759886, 0.350478978225),
+    (0.631326076346, 0.334136672344), (0.679073381078, 0.29645404267),
+    (0.73597236153, 0.294721285802), (0.782865376271, 0.321305281656),
+    (0.740312274764, 0.341849376713), (0.68499850091, 0.343734332172),
+    (0.353167761422, 0.746189164237), (0.414587777921, 0.719053835073),
+    (0.477677654595, 0.706835892494), (0.522732900812, 0.717092275768),
+    (0.569832064287, 0.705414478982), (0.635195811927, 0.71565572516),
+    (0.69951672331, 0.739419187253), (0.639447159575, 0.805236879972),
+    (0.576410514055, 0.835436670169), (0.525398405766, 0.841706377792),
+    (0.47641545769, 0.837505914975), (0.41379548902, 0.810045601727),
+    (0.380084785646, 0.749979603086), (0.477955996282, 0.74513234612),
+    (0.523389793327, 0.748924302636), (0.571057789237, 0.74332894691),
+    (0.672409137852, 0.744177032192), (0.572539621444, 0.776609286626),
+    (0.5240106503, 0.783370783245), (0.477561227414, 0.778476346951)])
+
+TPL_MIN, TPL_MAX = np.min(TEMPLATE, axis=0), np.max(TEMPLATE, axis=0)
+MINMAX_TEMPLATE = (TEMPLATE - TPL_MIN) / (TPL_MAX - TPL_MIN)
 
 class NaiveDlib:
+    OUTER_EYES_AND_BOTTOM_LIP = np.array([39, 42, 57])
+    INNER_EYES_AND_NOSE = np.array([36, 45, 33])
 
-    def __init__(self, faceMean, facePredictor):
+    def __init__(self, facePredictor):
         """Initialize the dlib-based alignment."""
         self.detector = dlib.get_frontal_face_detector()
-        self.normMeanLandmarks = loadMeanPoints(faceMean)
         self.predictor = dlib.shape_predictor(facePredictor)
 
-    def getAllFaceBoundingBoxes(self, img):
-        return self.detector(img, 1)
+    def getAllFaceBoundingBoxes(self, rgbImg):
+        try:
+            return self.detector(rgbImg, 1)
+        except Exception as e:
+            print("Warning: {}".format(e))
+            # In rare cases, exceptions are thrown.
+            return []
 
-    def getLargestFaceBoundingBox(self, img):
-        faces = self.detector(img, 1)
+    def getLargestFaceBoundingBox(self, rgbImg):
+        faces = self.getAllFaceBoundingBoxes(rgbImg)
         if len(faces) > 0:
             return max(faces, key=lambda rect: rect.width() * rect.height())
 
-    def align(self, img, bb):
-        points = self.predictor(img, bb)
+    def align(self, rgbImg, bb):
+        points = self.predictor(rgbImg, bb)
         return list(map(lambda p: (p.x, p.y), points.parts()))
 
-    EYES_AND_NOSE = np.array([36, 45, 33])
-    def alignImg(self, method, size, img, bb=None,
-                 landmarks=None, landmarkIndices=EYES_AND_NOSE):
+    def alignImg(self, method, size, rgbImg, bb=None,
+                 landmarks=None, landmarkIndices=INNER_EYES_AND_NOSE):
         if bb is None:
-            try:
-                bb = self.getLargestFaceBoundingBox(img)
-            except Exception as e:
-                print("Warning: {}".format(e))
-                # In rare cases, exceptions are thrown.
-                return
+            bb = self.getLargestFaceBoundingBox(rgbImg)
             if bb is None:
-                # Most failed detection attempts return here.
                 return
 
         if landmarks is None:
-            landmarks = self.align(img, bb)
+            landmarks = self.align(rgbImg, bb)
 
         npLandmarks = np.float32(landmarks)
-        npNormMeanLandmarks = np.float32(self.normMeanLandmarks)
 
         if method == 'affine':
             H = cv2.getAffineTransform(npLandmarks[landmarkIndices],
-                                       size*npNormMeanLandmarks[landmarkIndices])
-            thumbnail = cv2.warpAffine(img, H, (size, size))
+                                       size*MINMAX_TEMPLATE[landmarkIndices])
+            thumbnail = cv2.warpAffine(rgbImg, H, (size, size))
         else:
             raise Exception('Unrecognized method: {}'.format(method))
 
         return thumbnail
-
-def transformPoints(points, bb, toImgCoords):
-    if toImgCoords:
-        def scale(p):
-            (x, y) = p
-            return (int((x * bb.width()) + bb.left()),
-                    int((y * bb.height()) + bb.top()))
-    else:
-        def scale(p):
-            (x, y) = p
-            return (float(x - bb.left()) / bb.width(),
-                    float(y - bb.top()) / bb.height())
-    return list(map(scale, points))
-
-
-def loadMeanPoints(modelFname):
-    def parse(line):
-        (x, y) = line.strip().split(",")
-        return (float(x), float(y))
-    with open(modelFname, 'r') as f:
-        return [parse(line) for line in f]
-
-
-def annotate(img, box, points=None, meanPoints=None):
-    a = np.copy(img)
-    bl = (box.left(), box.bottom())
-    tr = (box.right(), box.top())
-    cv2.rectangle(a, bl, tr, color=(153, 255, 204), thickness=3)
-    for p in points:
-        cv2.circle(a, center=p, radius=3, color=(102, 204, 255), thickness=-1)
-    for p in meanPoints:
-        cv2.circle(a, center=p, radius=3, color=(0, 0, 0), thickness=-1)
-    return a