I developed the below code within the environment:
/*
* Hierchical K-mean algorithm for GPS spatial clustering
*
* Developed by Pil Ho Kim, 2011.
*
* History:
* 2011-02-11 First revision
*
*/
#include <iostream>
#include <opencv.hpp>
#include <sstream>
#include <fstream>
#include <vector>
#include <string>
#include <unistd.h>
using namespace std;
char m_sSourceFile[1024];
char m_sTargetFile[1024];
char m_sRegionFile[1024];
#define ID_MAX 0
#define ID_MIN 1
#define ID_LAT 0
#define ID_LOG 1
int m_iRegionID = 0;
#define CLUSTER_MIN_RADIUS 0.01 // Minimum cluster radius in Kilometer
#define CLUSTER_MIN_POINTS 2 // Minimum number of GPS points contained in the cluster
int getSampleCount(char *sFile) {
FILE *fp;
char fstr[200];
int k;
if (!(fp = fopen(sFile, "r"))) return 0;
k = 0;
while(fgets(fstr,200,fp)){
k++;
}
fclose(fp);
return k;
}
#define d2r (M_PI / 180.0)
// /calculate haversine distance for linear distance
// http://stackoverflow.com/questions/365826/calculate-distance-between-2-gps-coordinates
double getDistance(CvPoint2D32f pt_max, CvPoint2D32f pt_min) {
double lat1 = pt_max.x;
double long1 = pt_max.y;
double lat2 = pt_min.x;
double long2 = pt_min.y;
double dlong = (long2 - long1) * d2r;
double dlat = (lat2 - lat1) * d2r;
double a = pow(sin(dlat/2.0), 2) + cos(lat1*d2r) * cos(lat2*d2r) * pow(sin(dlong/2.0), 2);
double c = 2 * atan2(sqrt(a), sqrt(1-a));
double d = 6367 * c;
return d;
}
int classifyRegion(CvMat* points, CvMat* clusters, int iRegionID) {
FILE *fp_point_cluster;
FILE *fp_point_region;
int m_iRegionCount[2];
CvPoint2D32f pt_maxmin[2][2];
CvPoint2D32f pt, pt_center[2];
CvPoint2D32f* pt_target;
double lat, log, region_radius;
int cluster_idx;
int i, j, k, iResult;
int points_count = cvGetDimSize(points, 0);
int m_iSubRegionID[2];
// i is a cluster id
for (i = 0; i < 2; ++i) {
pt_maxmin[ID_MAX][i].x = -1000;
pt_maxmin[ID_MIN][i].x = 1000;
pt_maxmin[ID_MAX][i].y = -1000;
pt_maxmin[ID_MIN][i].y = 1000;
m_iRegionCount[i] = 0;
m_iSubRegionID[i] = ++m_iRegionID;
pt_center[i].x = pt_center[i].y = 0;
}
iResult = cvKMeans2( points, 2, clusters,
cvTermCriteria( CV_TERMCRIT_EPS+CV_TERMCRIT_ITER, 10, 1.0));
// Write down the cluster
if (!(fp_point_cluster = fopen(m_sTargetFile, "a"))) return 0;
for( i = 0; i < points_count; i++ )
{
pt = ((CvPoint2D32f*)points->data.fl)[i];
lat = pt.x;
log = pt.y;
cluster_idx = clusters->data.i[i];
// Compute statistics
// Weighted average to calculate the region center
pt_center[cluster_idx].x += pt.x;
pt_center[cluster_idx].y += pt.y;
if (lat > pt_maxmin[ID_MAX][cluster_idx].x)
pt_maxmin[ID_MAX][cluster_idx].x = lat;
if (lat < pt_maxmin[ID_MIN][cluster_idx].x)
pt_maxmin[ID_MIN][cluster_idx].x = lat;
if (log > pt_maxmin[ID_MAX][cluster_idx].y)
pt_maxmin[ID_MAX][cluster_idx].y= log;
if (log < pt_maxmin[ID_MIN][cluster_idx].y)
pt_maxmin[ID_MIN][cluster_idx].y = log;
m_iRegionCount[cluster_idx] = m_iRegionCount[cluster_idx] + 1;
fprintf(fp_point_cluster, "%d\t%d\t%f\t%f\n",
iRegionID,
m_iSubRegionID[cluster_idx],
pt.x,
pt.y
);
}
fclose(fp_point_cluster);
printf("Region\t%d:\tClustered into\t%d\tand\t%d.\n", iRegionID, m_iRegionCount[0], m_iRegionCount[1]);
// Check the distribution
for (i = 0; i < 2; ++i) {
// If the radius is bigger than 10 meter, then split.
region_radius = getDistance(pt_maxmin[ID_MAX][i], pt_maxmin[ID_MIN][i]);
// Log the region information
if (m_iRegionCount[i] > 0) {
if (!(fp_point_region = fopen(m_sRegionFile, "a"))) return 0;
fprintf(fp_point_region, "%d\t%d\t%f\t%f\t%f\t%f\t%f\t%f\t%f\n",
m_iSubRegionID[i],
m_iRegionCount[i],
region_radius,
pt_center[i].x / m_iRegionCount[i],
pt_center[i].y / m_iRegionCount[i],
pt_maxmin[ID_MAX][i].x,
pt_maxmin[ID_MAX][i].y,
pt_maxmin[ID_MIN][i].x,
pt_maxmin[ID_MIN][i].y
);
fclose(fp_point_region);
}
if (region_radius > CLUSTER_MIN_RADIUS && m_iRegionCount[i] > CLUSTER_MIN_POINTS) {
// Recursive iteration
CvMat* subpoints = cvCreateMat( m_iRegionCount[i], 1, CV_32FC2 );
CvMat* subclusters = cvCreateMat( m_iRegionCount[i], 1, CV_32SC1 );
// Copy points
k = 0;
for (j = 0; j < points_count; ++j) {
if (clusters->data.i[j] == i) {
pt = ((CvPoint2D32f*)points->data.fl)[j];
pt_target = (CvPoint2D32f*)subpoints->data.fl + k;
pt_target->x = pt.x;
pt_target->y = pt.y;
++k;
}
}
classifyRegion(subpoints, subclusters, m_iSubRegionID[i]);
cvReleaseMat( &subpoints );
cvReleaseMat( &subclusters );
}
}
return 1;
}
int main (int argc, char * const argv[]) {
// insert code here...
int i;
int iResult;
CvPoint2D32f* pt;
char fstr[200];
FILE *fp;
FILE *fp_point_cluster;
sprintf(m_sSourceFile, "iphone_gps.txt");
sprintf(m_sTargetFile, "iphone_gps_clusters.txt");
sprintf(m_sRegionFile, "iphone_gps_regions.txt");
int sample_count = getSampleCount(m_sSourceFile);
CvMat* points = cvCreateMat( sample_count, 1, CV_32FC2 );
CvMat* clusters = cvCreateMat( sample_count, 1, CV_32SC1 );
i = 0;
if (!(fp = fopen(m_sSourceFile, "r"))) return 0;
while(fgets(fstr,200,fp)){
pt = (CvPoint2D32f*)points->data.fl +i;
sscanf(fstr,"%f %f", &(pt->x), &(pt->y));
++i;
}
fclose(fp);
// Initialize region ID
m_iRegionID = 0;
if (!(fp_point_cluster = fopen(m_sTargetFile, "w"))) return 0;
fclose(fp_point_cluster);
if (!(fp_point_cluster = fopen(m_sRegionFile, "w"))) return 0;
fclose(fp_point_cluster);
iResult = classifyRegion(points, clusters, m_iRegionID);
cvReleaseMat( &points );
cvReleaseMat( &clusters );
return 0;
}