Segmentation Update

Posted: December 9, 2011 at 5:21 pm

I wanted to post my progress on segmentation before I start working on clustering and video sequences. Here are images that show the current state of segmentation (down to under 2s per frame) of a more realistic Vancouver scene captured on video: (reconstruction, mean-shift filtering, original image)

Current state of the code:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/core/core.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/gpu/gpu.hpp" // For GPU processing

#include <iostream>
#include <fstream> // for writing to files.
#include <stdio.h>
#include <sys/time.h>
#include <vector> // For dynamic arrays.

#define MAXSTRING 50 // 50 chars enough?

using namespace cv;
using namespace std;

// Class to hold the perceptual chunks.
class percepUnit {

    // externally accessible class members?
    public:
        Mat image; // percept itself
        Mat mask; // alpha channel
        Mat Lhist; // hist Luminance
        Mat Uhist; // hist Blue - Yellow
        Mat Vhist; // hist Green - Red
        int x1, y1; // close edges
        int w, h; // width / height of patch
        int id; // unique ID for each instance
        int x2, y2; // coord of far edges
        int cx, cy; // location of percept in frame


        // constructor method
        percepUnit(Mat ROI, Mat alpha, int ix, int iy, int iw, int ih) {
            ROI.copyTo(image); // make copies since args are refs!!
            alpha.copyTo(mask);
            x1 = ix;
            y1 = iy;
            w = iw;
            h = ih;
            x2 = x1 + iw;
            y2 = y1 + ih;
            cx = x1 + (w / 2);
            cy = y1 + (h / 2);
        }

        // Dumps data
        void dump(int id) {
            ofstream outputFile;
            char filenameData[MAXSTRING];
            sprintf(filenameData, "data/%d_data.csv", id); // TODO is this the proper C++ way?
            outputFile.open(filenameData, ios::out);
            outputFile << "instance,cx,cy,lhist,uhist,vhist\n";
            for (int i=0; i<256; i++) { // TODO change 5 to 256!
                outputFile << id << "," << cx << "," << cy << "," <<
                Lhist.at<float>(i) << "," << Uhist.at<float>(i) << "," << Vhist.at<float>(i) << endl;
            }
            outputFile.close();
        }

        // Write image and mask to disk
        void writeImages(int id) {
            char filenameImage[MAXSTRING], filenameMask[MAXSTRING];
            // Write regions to Disk. TODO make sure "region" folder exists, create it if not
            sprintf(filenameImage, "regions/%d_image.jpg", id); // TODO is this the proper C++ way?
            sprintf(filenameMask, "regions/%d_mask.jpg", id);
            imwrite(filenameImage, image);
            imwrite(filenameMask, mask);
        }

        void calcFeatures() {
            Mat luvImage;
            vector<Mat> imagePlanes;
            int bins = 256; // 256 bins/channel

            cvtColor(image, luvImage, CV_BGR2Luv); // convert from BGR to LUV
            split(luvImage, imagePlanes); // Scale and Split channels
           
            // Histograms TODO should these be normalized?
            calcHist(&imagePlanes[0], 1, 0, mask, Lhist, 1, &bins, 0); // L
            calcHist(&imagePlanes[1], 1, 0, mask, Uhist, 1, &bins, 0); // U
            calcHist(&imagePlanes[2], 1, 0, mask, Vhist, 1, &bins, 0); // V
        }

        // Destructor
        ~percepUnit() {
            // Free memory
            image.release();
            mask.release();
            Lhist.release();
            Uhist.release();
            Vhist.release();
        }
};

// pixel by pixel offset copy of perceptUnit into larger image.
// TODO try and dump the dimentions of the image and mask files, and also dump the bounding box sizes to check if they match.
int copyPercept(percepUnit &unit, Mat &dest) {

    // Loop through pixels in percept image
    for( int y = 0; y < unit.image.rows; y++ ) {
        for( int x = 0; x < unit.image.cols; x++ ) {
            // Make sure this pixel is in the mask.
            if (unit.mask.at<char>(y,x) != 0) {
                // get pixels from src image:
                Vec3b pixel = unit.image.at<Vec3b>(y,x); // Vec3b is a 3 element vector of unsigned chars.

                // set pixels in dest image (offset for this percept)
                dest.at<Vec3b>(y+unit.y1,x+unit.x1) = pixel;
            }
        }
    }

    return(0);
}

// get the current time (for rough profiling)
double getTime() {
    timeval rawtime;
    double time;

    gettimeofday(&rawtime, NULL);
    time = rawtime.tv_sec+(rawtime.tv_usec/1000000.0);
    return(time);
}

// Process one frame, append items to an existing vector of percepUnits.
int segmentImage(Mat &image, vector<percepUnit> &percepUnits) {

    Mat orig, meanshift,meanshift4C, mask, flood, image4C;
    gpu::GpuMat gpuImage, temp1, temp2, temp3;
    double t1, t2;
    int numRegions = 0;
    int numROIs = 0;
    int area, rectArea;
    Rect *boundingRect = new Rect(); // Stored bounding box for each flooded area.

    t1 = getTime();

    // Try and do hard image processing on the GPU: (only works on micro!)

    // Copy the original image for in-place processing.
    image.copyTo(orig); // Main memory version.

    // convert image to 4 channels and upload to GPU for GPU operations.
    // TODO try and use Luv rather than BGR for these operations? Don't forget Luv images are 0-1 not 0-255!
    cvtColor(image, image4C, CV_BGR2BGRA,4); // 4 channels for gpu operation
    gpuImage.upload(image4C);

    // morphology (supports in place operation)
    Mat element = getStructuringElement(MORPH_ELLIPSE, Size(5,5), Point(2, 2) );
    gpu::morphologyEx(gpuImage, temp1, MORPH_CLOSE, element);
    gpu::morphologyEx(temp1, temp2, MORPH_OPEN, element);

    // Mean shift filtering
    TermCriteria iterations = TermCriteria(CV_TERMCRIT_ITER, 2, 0);
    gpu::meanShiftFiltering(temp2, temp3, 10, 40, iterations); // 10, 40, 2 iterations

    // Download and convert to 3 channels the processed image from GPU to main memory: TODO where to free gpuMat?
    temp3.download(meanshift4C);
    cvtColor(meanshift4C,meanshift,CV_BGRA2BGR, 3); // convert to three channels

    // place to store ffill masks
    mask = Mat( meanshift.rows+2, meanshift.cols+2, CV_8UC1, Scalar::all(0) ); // Make black single-channel image.
    meanshift.copyTo(flood); // copy image

    // Loop through all the pixels and flood fill.
    for( int y = 0; y < flood.rows; y++ )
    {
        for( int x = 0; x < flood.cols; x++ )
        {
            if( mask.at<uchar>(y+1, x+1) == 0 ) // mask is offset from original image.
            {
                numRegions++;
                //Flags: connectivity=8, fill value = 255, boundingRect is the size of the filled region. additional flags?
                area = floodFill( flood, mask, Point(x,y), NULL, boundingRect, Scalar::all(1), Scalar::all(1), 8|255<<8);
                //Extract a subimage for each flood, if the flood is large enough.
                rectArea = boundingRect->width*boundingRect->height;
                if (rectArea > 400 && rectArea < 2073600) { // greater than 20x20 and smaller than 1920x1080
                   
                    Mat ROI = orig(*boundingRect); // Make a cropped reference (not copy) of the image

                    // crop translated mask to register with original image.
                    boundingRect->y++;
                    boundingRect->height++;
                    boundingRect->x++;
                    boundingRect->width++;
                    Mat tmp = mask(*boundingRect);
                    Mat alpha = tmp(Range(0,tmp.rows-1),Range(0,tmp.cols-1)); // crop mask to match image

                    // Append an instance to the vector.
                    percepUnits.push_back(percepUnit(ROI, alpha, boundingRect->x-1, boundingRect->y-1, boundingRect->width-1, boundingRect->height-1));
                    numROIs++;
                }
            }
        }
    }

    t2 = getTime();
   
    // TODO add a debug mode.
    cout << "Processing Time (segmentation): " << t2-t1 << endl;
    cout << "Total Regions: " << numRegions << endl;
    cout << "ROI Regions: " << numROIs << endl;
   
    return(0);
}

void help(string programName) {
    cout << "Usage: " << programName << " [image to read] --frame --sequence --reconstruction --dump" << endl;
    exit(1);
}

int main(int argc, char* argv[])
{
    Mat image, reconstruction;
    bool writeReconstruction = false;
    bool dumpData = false;
    bool doSegmentation = false;
    bool imageLoaded = false;
    string imageFilename;
    double t1, t2;
    vector<percepUnit> percepUnits; // dynamic vector to store instances of percepUnit.

    // Test in case there are no arguments.
    if (argc <= 2) {
        help(argv[0]);
        exit(1);
    }

    imageFilename = argv[1];

    // Loop through remaining arguments
    for (int i=2; i<argc; i++) {
        string arg = argv[i]; // why does argv[i] == "-f" not work?

        // only check args that start with --
        if (arg.find("--") == 0) {
            if (!arg.compare("--frame")) {
                doSegmentation = true;
            } else if (!arg.compare("--reconstruction")) {
                writeReconstruction = true;
            } else if (!arg.compare("--dump")) {
                dumpData = true;
            } else {
                help(argv[0]);
            }
        } else {
            help(argv[0]);
        }
    }

    if (doSegmentation) {
       
        image = imread( imageFilename ); // load input image from disk to main memory.
        CV_Assert( !image.empty() ); // bail if image load fails.

        segmentImage(image, percepUnits);

        imageLoaded = true;
    }

    if (writeReconstruction) {
        if (imageLoaded) {

            reconstruction = Mat(1080,1920, CV_8UC3, Scalar(255,255,255)); // white background

            //copyPercept(percepUnits[2], reconstruction);
            //imwrite("reconstruction.png",reconstruction);
            for(int i = 0; i <percepUnits.size(); i++) {
                copyPercept(percepUnits[i], reconstruction);
            }

            imwrite("reconstruction.png",reconstruction);
        } else {
            cout << "Reconstruction requires a loaded image." << "You must use --frame or --sequence" << endl;
            help(argv[0]);
        }
    }

    if (dumpData) {

        // Calculate features for all percepUnits
        t1 = getTime();
        for(int i = 0; i <percepUnits.size(); i++) {
            percepUnits[i].calcFeatures();
            percepUnits[i].dump(i);
            percepUnits[i].writeImages(i);
        }
        t2 = getTime();

        cout << "Processing Time (calcFeatures): " << t2-t1 << endl;
    }

    percepUnits.clear(); // cleaup
    return(0);
}