Issue with scale() function

Hi! I’ve been working on a processing script using OpenCV to automatically align, scale and rotate a given set of pictures of frontal faces so that the eyes always appear in a fixed specified position in the result. It works, but I have been unable to achieve the correct result using the scale() function to get the right level of zoom: after having correctly translated and rotated the image, I simply grabbed the output in a PImage, resized that to the adequate size, rendered the image again in the correct position, and saved the frame as a result. However, when the zoom is too extreme, this causes memory issues, so I would like to get it working with simple transformations.
Basically, I have an image the size of the window, a pair of coordinates float xm and float ym corresponding to the location of the midpoints between the eyes in the original image, an angle float theta which corresponds to the tilt of the face, and a scale float prop by which I wish to zoom in. I would like in the result the point (xm,ym) to be in the center of the rendered output.
Here is the code that works, but bugs out occasionally:`

pushMatrix();
  translate(xm,ym);
  rotate(-theta);
  translate(-xm,-ym);
  translate(0.5*width-xm,0.5*height-ym);
  image(face,0,0);
  popMatrix();
  face=get();
  face.resize((int)(width*prop),(int)(height*prop));
  background(0);
  imageMode(CENTER);
  image(face,0.5*width,0.5*height);
  saveFrame("example.jpg");

Now is the code that I would like to get working:

pushMatrix();//geometric transformations part
    translate(xm,ym);
    rotate(-theta);//we want the face to rotate about the eyes midpoint
   scale(prop);//???
   translate(-xm,-ym);//???
   translate(0.5*width-xm,0.5*height-ym);//then we center the canvas on the eyes midpont ???
    image(face,0,0);
  popMatrix();

This yields incorrect results, and I’ve been twisting myself in knots trying to make it work.

For completeness’ sake, here is the entire code (which just needs some (and just!) pictures in the data folder), which gives incorrect results

import gab.opencv.*;
import java.awt.Rectangle;
java.io.File dossier;
String[] fichiers;
String savepath="test/";
int c=0;

OpenCV opencv;
Rectangle[] eyes,noses,mouths,faces;
Rectangle candidate1,candidate2;
boolean alignnose=true;
boolean alignmouth=true;
PVector eye1,eye2;
PImage face;
int idx1,idx2;
float x1,x2,y1,y2,xm,ym;
float xi,xj,yi,yj,xmt,ymt;
float xbridge,ybridge;
float theta,prop,l;
float wratio=0.3;
boolean detectable;

void setup(){
  dossier = new java.io.File(dataPath(""));
  fichiers = dossier.list(); //list all pictures
size(384,480);
imageMode(CENTER);

}

void draw(){
  imageMode(CORNER);
  background(0);
  face=loadImage(fichiers[c]);
  face.resize(width,height);
  opencv=new OpenCV(this,face);
  
  opencv.loadCascade(OpenCV.CASCADE_EYE);
  eyes=opencv.detect();//detect the eyes
  
  opencv.loadCascade(OpenCV.CASCADE_NOSE);
  noses=opencv.detect();//detect the nose
  
  opencv.loadCascade(OpenCV.CASCADE_FRONTALFACE);
  faces=opencv.detect();//detect the face
  
  detectable=true;

  //image(face,0,0);
 if(eyes.length<2){detectable=false;}  
 
 if(eyes.length>2){//little routine to select the pair of eyes that minimise the L1-distance in a space of variable parameters, depending on the features we have detected and use as references (nose, face, etc..)
    float dmin=99999999;
    float d,dy,dwidth,dfacecenter,dnose;
    
      //in a situation when there is ambiguity about the center of the face, just assume the face is centered in the image, and the eyes are close to the horizontal midline;
      for(int i=0;i<eyes.length;i++){
        for(int j=i+1;j<eyes.length;j++){
          dnose=0;
          xi=eyes[i].x+0.5*eyes[i].width;
          xj=eyes[j].x+0.5*eyes[j].width;
          yi=eyes[i].y+0.5*eyes[i].height;
          yj=eyes[j].y+0.5*eyes[j].height;
          xmt=0.5*(xi+xj);
          ymt=0.5*(yi+yj);
          
          if(noses.length==1){//if we have an unambiguous nose, use this as a metric (eyes midpoint are close to the bridge of the nose, whose position we estimate...
            xbridge=noses[0].x+0.5*noses[0].width;//...here...
            ybridge=noses[0].y-noses[0].height;//...and here)
            dnose=abs(xmt-xbridge)+abs(ymt-ybridge);
          }
          
          if(faces.length==1){//if we have an unambiguous face, use the center of the face as a metric (eyes midpoint are close to the center of the face)
            dfacecenter=abs(xmt-(faces[0].x+0.5*faces[0].width))+abs(ymt-(faces[0].y+0.5*faces[0].height));
          }
          else{//otherwise assume the image is centered and use the center of the image as the center of the face
            dfacecenter=abs(xmt-0.5*width)+abs(ymt-0.5*height);
          }
          dy=abs(yi-yj);//eyes are more or less aligned horizontally
          dwidth=abs(eyes[i].width-eyes[j].width);//eyes have approximately the same size
          
          //next, rules to determine which metric we use according to which features we detected
          
          if(noses.length!=1){d=dy+dwidth+dfacecenter;}//face case is already handled above.
          else{d=dy+dwidth+dfacecenter+dnose;}
          
          
          
          if(d<dmin){//Minimize according to the chosen metric
          dmin=d;
          idx1=i;
          idx2=j;
          }
        }
      }
      
      //if(d>width){detectable=false;}//if we have absurd cases, assume the detection malfunctioned, eg if the distance is too extreme, or if
      
      
  candidate1=new Rectangle(eyes[idx1].x,eyes[idx1].y,eyes[idx1].width,eyes[idx1].height);
  candidate2=new Rectangle(eyes[idx2].x,eyes[idx2].y,eyes[idx2].width,eyes[idx2].height);
  eyes=new Rectangle[2];
  eyes[0]=new Rectangle(candidate1.x,candidate1.y,candidate1.width,candidate1.height);
  eyes[1]=new Rectangle(candidate2.x,candidate2.y,candidate2.width,candidate2.height);
  }
  
  
 if(detectable){ 
  //now we have eye coordinates
  x1=eyes[0].x+0.5*eyes[0].width;
  x2=eyes[1].x+0.5*eyes[1].width;
  y1=eyes[0].y+0.5*eyes[0].height;
  y2=eyes[1].y+0.5*eyes[1].height;
  theta=atan((y2-y1)/(x2-x1));
  xm=(x1+x2)/2;
  ym=(y1+y2)/2;
  
  if(abs(theta)>0.5){detectable=false;}//if the skew is too extreme, assume we have a failure case and ignore it. 
  
  resetMatrix();
   if(detectable){//If we have a good idea that the detection worked, then do the actual geometry to unskew the picture.
    l=sqrt((x1-x2)*(x1-x2)+(y1-y2)*(y1-y2));//distance between the eyes
    prop=(width*wratio)/l;//we want the eye-to-eye line to occupy a fixed portion of the screen, namely the centered wratio% of the horizontal midline
    
  pushMatrix();//geometric transformations part
    translate(xm,ym);
    rotate(-theta);//we want the face to rotate about the eyes midpoint
   scale(prop);//???
   translate(-xm,-ym);//???
   translate(0.5*width-xm,0.5*height-ym);//then we center the canvas on the eyes midpont ???
    image(face,0,0);
  popMatrix();
  saveFrame(savepath+str(c)+".jpg");//and save it
c++;
}
else{c++;}//if detection failed, just move on to the next case


println(str(c)+"/"+str(fichiers.length));
if(c==fichiers.length){noLoop();}

 }
 else{c++;}
}

Thanks a lot!

1 Like

Hi, what about the order of the transformations? in order to get the desired effect you have to scale first, then rotate and finally translate.

Hi Bryan, thanks for your answer.

Well my issue is everything is scaled with respect to the origin, so in order not to lose track of the point (xm,ym), i need to translate so that the transformations are centered about that point. essentially i’m just trying to replicate

pushMatrix();
  translate(xm,ym);
  rotate(-theta);
  translate(-xm,-ym);
  translate(0.5*width-xm,0.5*height-ym);
  image(face,0,0);
  popMatrix();
  face=get();
  face.resize((int)(width*prop),(int)(height*prop));
  background(0);
  imageMode(CENTER);
  image(face,0.5*width,0.5*height);
  saveFrame("example.jpg");

but using a scale function instead. Would you mind posting a version of the code you had in mind?

Thanks

Ok,

So the following code seems to give an adequate answer:

pushMatrix();
  translate(xm,ym);
  scale(prop);
  rotate(-theta);
  translate(-xm,-ym);
  translate((0.5*width-xm)/prop,(0.5*height-ym)/prop);
   image(face,0,0);
   popMatrix();

I don’t know if this is just an impression or if I stumbled upon the solution.

Thanks Bryan, it did help to scale before anything else

1 Like

Hi noe, glad you make it work! unfortunately i wasn’t able to run your code and just see that you achieve your goal. :slight_smile:

Hi there,

I have a question regarding your original sketch for this post. I can see here (your code quoted below) that you loaded, multiple Cascades for the same OpenCV object I just wanted to confirm if this works this way? I was under the impression that a cascade worked only for one OpenCv object…
I am writing a sketch that detects if a person has their eyes open or not,(if there are eyes, do something, no eyes, do something else) and I’m trying to do it directly with the eye cascade, but based on my research so far, I think it is better if I find a face first, and then the eyes, using both cascades? for face and eye?

Many thanks,

1 Like

Hi Gus,

It seems to work fine for me, as long as you store the result of the detection in a separate variable.
Here is the little test I ran which convinced me.

Good luck,

Noé

import gab.opencv.*;
import java.awt.Rectangle;
java.io.File dossier;
String[] fichiers;
String savepath="test/";
int c=0;
PImage face;

OpenCV opencv;
Rectangle[] eyes,noses,mouths,faces;

void setup(){
  dossier = new java.io.File(dataPath(""));
  fichiers = dossier.list(); //list all pictures
size(384,480);
noFill();
strokeWeight(1);

}

void draw(){
  imageMode(CORNER);
  background(0);
  face=loadImage(fichiers[c]);
  face.resize(width,height);
  image(face,0,0);
  opencv=new OpenCV(this,face);
  
  opencv.loadCascade(OpenCV.CASCADE_EYE);
  eyes=opencv.detect();//detect the eyes
  
  opencv.loadCascade(OpenCV.CASCADE_NOSE);
  noses=opencv.detect();//detect the nose
  
  opencv.loadCascade(OpenCV.CASCADE_FRONTALFACE);
  faces=opencv.detect();//detect the face
   stroke(0,0,255);
 if(faces.length>0){
   for(int i=0;i<faces.length;i++){
     rect(faces[i].x,faces[i].y,faces[i].width,faces[i].height);
   } 
 }
 stroke(0,255,0);
  if(noses.length>0){
   for(int i=0;i<noses.length;i++){
     rect(noses[i].x,noses[i].y,noses[i].width,noses[i].height);
   } 
 }
  stroke(255,0,0);
  if(eyes.length>0){
   for(int i=0;i<eyes.length;i++){
     rect(eyes[i].x,eyes[i].y,eyes[i].width,eyes[i].height);
   } 
 }


saveFrame(str(c)+".jpg");
println(str(c)+"/"+str(fichiers.length));
c++;
if(c==fichiers.length){noLoop();}

}
1 Like