Segmentation Success!

Posted: October 5, 2011 at 3:01 pm

I’ve finally managed to get the segmentation data into a useful form. Following is a reconstruction of the original image from extracted patches, and a corresponding image that shows the segments, filled in random colours.

The white/transparent areas in the top image are the regions that were not extracted (because they were smaller than 20×20 pixels). In this example there are 231 patches where each patch is stored in a class instance that will represent a single perceptual unit available to the system, which will include features, location in space, time, etc.. The above segmentation takes approximately 9 seconds for this single, HD resolution image, thus this method is far from real-time. The current state of the code is as follows:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/core/core.hpp"
#include "opencv2/imgproc/imgproc.hpp"

#include <iostream>
#include <stdio.h>
#include <time.h>
#include <vector> // For dynamic arrays.

using namespace cv;
using namespace std;

// Class to hold the perceptual chunks.
class percepUnit {

    // externally accessible class members?
    public:
        // constructor method
        percepUnit(Mat ROI, Mat alpha, int ix, int iy, int iw, int ih) {
            ROI.copyTo(image); // make copies since args are refs!!
            alpha.copyTo(mask);
            x1 = ix;
            y1 = iy;
            w = iw;
            h = ih;
            x2 = x1 + iw;
            y2 = y1 + ih;
        }

        void dump() {
            printf("members: x:%d, y:%d, w:%d, h:%d\n", x1, y1, w, h, x2, y2);
        }

        Mat image; // percept itself
        Mat mask; // alpha channel
        int x1; // x location of percept in frame
        int y1; // y location
        int w; // width of patch (not sure these will be needed)
        int h; // height of patch
        int x2; // coord of far X edge
        int y2; // coord of far Y edge

        // Destructor
        ~percepUnit() {
            // Free image and mask
            image.release();
            mask.release();
        }
};

// pixel by pixel offset copy of perceptUnit into larger image.
int copyPercept(percepUnit &unit, Mat &dest) {

    // Loop through pixels in percept image
    for( int y = 0; y < unit.image.rows; y++ ) {
        for( int x = 0; x < unit.image.cols; x++ ) {
            // Make sure this pixel is in the mask.
            if (unit.mask.at<char>(y,x) != 0) {
                // get pixels from src image:
                Vec3b pixel = unit.image.at<Vec3b>(y,x); // Vec3b is a 3 element vector of unsigned chars.

                // set pixels in dest image (offset for this percept)
                dest.at<Vec3b>(y+unit.y1,x+unit.x1) = pixel;
            }
        }
    }

    return(0);
}

int main(int argc, char** argv)
{
    Mat orig, image, meanshift, mask, flood, reconstruction;
    vector<percepUnit> percepUnits; // dynamic vector to store instances of percepUnit.

    namedWindow( "flood", CV_WINDOW_NORMAL );
    namedWindow( "reconstruction", CV_WINDOW_NORMAL );
    image = imread( argv[1] );
    CV_Assert( !image.empty() ); // bail if there is no loaded image.

    // log the current time
    time_t t1 = time(NULL);

    // Copy the original image for in-place processing.
    image.copyTo(orig);

    // morphology (supports in place operation)
    Mat element = getStructuringElement(MORPH_ELLIPSE, Size(5,5), Point(2, 2) );
    morphologyEx(image, image, MORPH_CLOSE, element);
    morphologyEx(image, image, MORPH_OPEN, element);

    // Mean shift filtering
    pyrMeanShiftFiltering(image, meanshift, 10, 30, 3); // (10, 30--35, 3) good results for 640x360, (10, 30, 3) for 1920x1080

    RNG rng = theRNG();
    // place to store ffill masks
    mask = Mat( meanshift.rows+2, meanshift.cols+2, CV_8UC1, Scalar::all(0) ); // Make black single-channel image.
    meanshift.copyTo(flood); // copy image
    int numRegions = 0;
    int numROIs = 0;
    int area;
    char filenameImage[50], filenameMask[50]; // 50 chars enough?
    Rect *boundingRect = new Rect(); // Stored bounding box for each flooded area.

    // Loop through all the pixels and flood fill.
    for( int y = 0; y < meanshift.rows; y++ )
    {
        for( int x = 0; x < meanshift.cols; x++ )
        {
            if( mask.at<uchar>(y+1, x+1) == 0 ) // mask is offset from original image.
            {
                numRegions++;
                Scalar newVal( rng(256), rng(256), rng(256) );
                //Flags: connectivity=8, fill value = 255, boundingRect is the size of the filled region. additional flags?
                area = floodFill( flood, mask, Point(x,y), newVal, boundingRect, Scalar::all(1), Scalar::all(1), 8|255<<8);
                //Extract a subimage for each flood, if the flood is large enough.
                if (boundingRect->width >20 && boundingRect->height >20) {
                   
                    Mat ROI = orig(*boundingRect); // Make a cropped reference (not copy) of the image

                    // crop translated mask to register with original image.
                    boundingRect->y++;
                    boundingRect->height++;
                    boundingRect->x++;
                    boundingRect->width++;
                    Mat alpha = mask(*boundingRect);

                    // Append an instance to the vector.
                    percepUnits.push_back(percepUnit(ROI, alpha, boundingRect->x-1, boundingRect->y-1, boundingRect->width-1, boundingRect->height-1));

                    /* For now regions are written to Disk.
                    sprintf(filenameImage, "region/%d_image.jpg", numROIs);
                    sprintf(filenameMask, "region/%d_mask.jpg", numROIs);
                    imwrite(filenameImage, ROI);
                    imwrite(filenameMask, alpha);*/


                    numROIs++;
                }
               
            }
        }
    }

    time_t t2 = time(NULL);
    printf("Processing Time (segmentation): %f\n", difftime(t2, t1));

    // New Image for reconstruction
    reconstruction = Mat(1080,1920, CV_8UC3, Scalar(0,0,255)); // red background

    // loop through instances and print
    for(int i = 0; i <percepUnits.size(); i++) {

        // Copy percept into reconstruction.
        copyPercept(percepUnits[i], reconstruction);
        //printf("unit: %d\n", i);
    }

    //copyPercept(percepUnits[0], reconstruction);
   
    time_t t3 = time(NULL);
    printf("Processing Time (reconstruction): %f\n", difftime(t3, t2));
    printf("Total Regions: %d\n", numRegions);
    printf("ROI Regions: %d\n", numROIs);
    imshow("flood", flood );
    imshow("reconstruction", reconstruction);

    cvWaitKey();
    imwrite("reconstruction.png", reconstruction);

    // cleanup
    percepUnits.clear();
    return(0);

}