Commit 257cf6cf authored by Mathias Goldau's avatar Mathias Goldau
Browse files

[ADD #445] The MST is now implemented for GP clustering, the UnionFind approach will follow.

parent ba8bd4f0
......@@ -23,23 +23,3 @@
//---------------------------------------------------------------------------
#include "WDendrogram.h"
WDendrogram::WDendrogram( unsigned int numElements )
: m_tree( numElements, Node() )
{
}
WDendrogram::~WDendrogram()
{
}
WDendrogram::Node::Node()
{
parentTreeIdx = 0;
minTreeIdx = 0;
maxTreeIdx = 0;
dataIdx = 0;
height = 0.0;
}
......@@ -27,90 +27,83 @@
#include <vector>
#include <boost/shared_ptr.hpp>
/**
* Hirachical binary tree datastructure with spatial layout information. Since
* join points in a dendrogram do normally not intersect a special index is
* needed. Additionally a parameter for each node and leaf, its height, is
* available.
* Hirachical binary tree datastructure with spatial layout information called dendrogram.
*
* The following description is taken from: http://en.wikipedia.org/wiki/Dendrogram A dendrogram (from Greek
* dendron "tree", -gramma "drawing") is a tree diagram frequently used to illustrate the arrangement of the
* clusters produced by hierarchical clustering.
*
* Each leaf and inner node will obtain a new index called TreeIndex, so the
* hirachy will not overlap or intersect. As a leaf already has an index we
* call this index the DataIndex. So a leaf with DataIndex 5 is the fifth data
* element in the dataset of which this dendrogram is used for. Where as a leaf
* with TreeIndex 5 is the fifth leaf from left.
* This implementation is based on three arrays (\ref m_objectOrder, \ref m_branching, \ref m_levelHeight) and
* requires implicitly object lables from <dfn>0..n-1</dfn>. The idea is very similar to the idea described in
* the paper: <em>F.J. Rohlf, Algorithm 76: Hierarchical clustering using the minimum spanning tree. Comput.
* J. 16 (1973), pp. 93–95.</em>
*/
class WDendrogram
{
friend class WDendrogramTest;
public:
typedef boost::shared_ptr< std::vector< size_t > > LabelArray;
typedef LabelArray LevelArray;
typedef boost::shared_ptr< std::vector< double > > HeightArray;
// WDendrogram( LabelArray objectOrder, LevelArray branches, HeightArray heights );
protected:
private:
/**
* Creates a new Dendrogram with unjoined elements aka leafs.
* Since the dendrogram has nonintersecting edges as this:
*
* \param numElements The number of unjoined or initial elements
*
* TODO(math): Unsigned int is used to save memory here, but can't we use
* size_t (8bytes) => ask christian..
*/
explicit WDendrogram( unsigned int numElements );
\verbatim
|
,------'--. --- 4th level
| |
|```````| | --- 3rd level
| | |
| | ...'... --- 2nd level
| | | |
|''''''''| | | | --- 1st level
| | | | |
| | | | |
/**
* Destructs a Dendrogram.
*/
virtual ~WDendrogram();
2 0 3 1 4
protected:
/**
* Representing a node inside of the Dendrogram.
\endverbatim
*
* \note Be aware of the different kinds of indices used in here. See
* Description of the WDendrogram class for more details on TreeIndices and
* DataIndices.
* we need to arrage the objects from left to right so merging will not produce intersections. For this
* ordering this array is used and contains the object labels from left to right and provide thus the
* special ordering.
*/
struct Node
{
/**
* Default constructor for a new node. So all members are an valid
* initial value: zero.
*/
Node();
LabelArray m_objectOrder;
/**
* The TreeIndex of the parent it belongs to.
*/
unsigned int parentTreeIdx;
/**
* This array stores when the nodes will join or branch. Just imaging we rotate the dendrogram as follows:
*
\verbatim
/**
* All leafs grouped by this node have an bigger or equal TreeIndex
* then this \e minTreeIdx. In other words: The leftmost leaf of the
* subtree with this node as root has this TreeIndex.
*/
unsigned int minTreeIdx;
2 ----.
|
0 ----'-----------.
|
3 ----------------'-----.
1 ---------. |
| |
4 ---------'------------'-----
/**
* All leafs grouped by this node have an lower or equal TreeIndex then
* this \e maxTreeIdx. In other words: The rightmost leaf of the
* subtree with this node as root has this TreeIndex.
*/
unsigned int maxTreeIdx;
----+----+------+-----+--------> levels
1st 2nd 3rd 4th
/**
* This is used to have a reference to the data element this node
* represents. For inner nodes clearly none exists, but for leafs this
* is the number inside of the dataset.
*/
unsigned int dataIdx;
\endverbatim
* so the array for the example above would be: <dfn>[ 1, 3, 4, 2, - ]</dfn>
*/
LevelArray m_branching;
/**
* The height of each node. Leafs have an default height of zero.
*/
double height;
};
private:
/**
* Save the whole dendrogram and keep track of the link from leafs to the
* dataset, as well as information of parents and leafs for each node.
* Stores for each join level its height which may be used for spatial layouting.
*/
std::vector< Node > m_tree;
std::vector< double > m_levelHeight;
};
#endif // WDENDROGRAM_H
......@@ -40,15 +40,6 @@ public:
*/
void testNewNodesHaveAlwaysZerosAssignedInitially( void )
{
WDendrogram d( 5 );
for( int i = 0; i < 5; ++i )
{
TS_ASSERT_EQUALS( d.m_tree[i].parentTreeIdx, 0 );
TS_ASSERT_EQUALS( d.m_tree[i].minTreeIdx, 0 );
TS_ASSERT_EQUALS( d.m_tree[i].maxTreeIdx, 0 );
TS_ASSERT_EQUALS( d.m_tree[i].dataIdx, 0 );
TS_ASSERT_EQUALS( d.m_tree[i].height, 0.0 );
}
}
};
......
//---------------------------------------------------------------------------
//
// Project: OpenWalnut ( http://www.openwalnut.org )
//
// Copyright 2009 OpenWalnut Community, BSV@Uni-Leipzig and CNCF@MPI-CBS
// For more information see http://www.openwalnut.org/copying
//
// This file is part of OpenWalnut.
//
// OpenWalnut is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// OpenWalnut is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with OpenWalnut. If not, see <http://www.gnu.org/licenses/>.
//
//---------------------------------------------------------------------------
#include <vector>
#include <boost/shared_ptr.hpp>
#include "../common/math/WPosition.h"
#include "WTractAdapter.h"
WTractAdapter::WTractAdapter( boost::shared_ptr< const std::vector< float > > pointComponents, size_t startIndex, size_t numPoints )
: m_pointComponents( pointComponents ),
m_numPoints( numPoints ),
m_startIndex( startIndex )
{
}
wmath::WPosition WTractAdapter::operator[]( size_t index ) const
{
#ifdef DEBUG
assert( m_pointComponents && "Invalid point component array inside of WTractAdapter." );
return wmath::WPosition( m_pointComponents->at( m_startIndex + index * 3 ),
m_pointComponents->at( m_startIndex + index * 3 + 1 ),
m_pointComponents->at( m_startIndex + index * 3 + 2 ) );
#else
return wmath::WPosition( ( *m_pointComponents )[ m_startIndex + index * 3],
( *m_pointComponents )[ m_startIndex + index * 3 + 1],
( *m_pointComponents )[ m_startIndex + index * 3 + 2] );
#endif
}
//---------------------------------------------------------------------------
//
// Project: OpenWalnut ( http://www.openwalnut.org )
//
// Copyright 2009 OpenWalnut Community, BSV@Uni-Leipzig and CNCF@MPI-CBS
// For more information see http://www.openwalnut.org/copying
//
// This file is part of OpenWalnut.
//
// OpenWalnut is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// OpenWalnut is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with OpenWalnut. If not, see <http://www.gnu.org/licenses/>.
//
//---------------------------------------------------------------------------
#ifndef WTRACTADAPTER_H
#define WTRACTADAPTER_H
#include <vector>
#include <boost/shared_ptr.hpp>
namespace wmath
{
class WVector3D;
typedef WVector3D WPosition;
}
/**
* Adapter
*/
class WTractAdapter
{
public:
/**
* Constructs a new WTract which has \ref numPoints points and a startIndex inside of the
* given components array.
*
* \param pointComponents Array where the components of the tract points are inside of
* \param startIndex The position inside of the components array of the first x coordinate.
* \param numPoints How many points this tract has
*/
WTractAdapter( boost::shared_ptr< const std::vector< float > > pointComponents, size_t startIndex, size_t numPoints );
/**
* How many positions this tract incorporates.
*/
size_t numPoints() const;
/**
* Constructs and returns a \ref wmath::WPosition out of the i'th position of this tract.
*
* \param index The index of the position of this tract. It may start at \c 0 and is always
* smaller than \ref numPoints().
*
* \return The i'th position of this tract as \ref wmath::WPosition.
*/
wmath::WPosition operator[]( size_t index ) const;
// void reset( boost::shared_ptr< const WTractData > tracts, size_t startIndex, size_t numPoints )
protected:
private:
/**
* The array where the components of this tracts live. But you will need the starting position
* and the length of the tract to access them.
*/
boost::shared_ptr< const std::vector< float > > m_pointComponents;
/**
* How many \e points aka WPositions this tract consists of.
*/
size_t m_numPoints;
/**
* The index of the x-component of the first point of this tract inside the \ref m_pointComponents array.
*/
size_t m_startIndex;
};
inline size_t WTractAdapter::numPoints() const
{
return m_numPoints;
}
#endif // WTRACTADAPTER_H
//---------------------------------------------------------------------------
//
// Project: OpenWalnut ( http://www.openwalnut.org )
//
// Copyright 2009 OpenWalnut Community, BSV@Uni-Leipzig and CNCF@MPI-CBS
// For more information see http://www.openwalnut.org/copying
//
// This file is part of OpenWalnut.
//
// OpenWalnut is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// OpenWalnut is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with OpenWalnut. If not, see <http://www.gnu.org/licenses/>.
//
//---------------------------------------------------------------------------
#include <vector>
#include <boost/shared_ptr.hpp>
#include "WTractData.h"
WTractData::WTractData( boost::shared_ptr< std::vector< float > > pointComponents, boost::shared_ptr< std::vector< size_t > > startIndices )
: m_pointComponents( pointComponents ),
m_startIndices( startIndices )
{
}
//---------------------------------------------------------------------------
//
// Project: OpenWalnut ( http://www.openwalnut.org )
//
// Copyright 2009 OpenWalnut Community, BSV@Uni-Leipzig and CNCF@MPI-CBS
// For more information see http://www.openwalnut.org/copying
//
// This file is part of OpenWalnut.
//
// OpenWalnut is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// OpenWalnut is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with OpenWalnut. If not, see <http://www.gnu.org/licenses/>.
//
//---------------------------------------------------------------------------
#ifndef WTRACTDATA_H
#define WTRACTDATA_H
#include <vector>
#include <boost/shared_ptr.hpp>
/**
* Stores the data of deterministic fiber tractograms. Derived or optional data as tangents, FA,
* etc. are not saved in here, and never will be! Just the polylines.
*/
class WTractData
{
public:
/**
* Constructs a new WTractData.
*/
WTractData( boost::shared_ptr< std::vector< float > > pointComponents, boost::shared_ptr< std::vector< size_t > > startIndices );
size_t numTracts() const;
protected:
private:
/**
* Stores the all components of all vertices of all tracts. First x, y and finally z component
* are arranged in this manner: \f$[x_0, y_0, z_0, ..., x_{m_0}, y_{m_0}, z_{m_0}, ... , ..., x_{m_k},
* y_{m_k}, z_{m_k}]\f$ where there are \f$k\f$ many tracts where the i'th tract has \f$m_i\f$
* vertices, but \f$3m_i\f$ compontents. In other words: m_points.size() / 3 == number of
* vertices.
*
* \note the reason for beeing restricted to float is, that graphic boards and also the tracking
* algorithms which produce those tracks are just using floats.
*/
boost::shared_ptr< std::vector< float > > m_pointComponents;
/**
* Stores for every tract the index number where it starts in the \ref m_pointComponents array.
* This means the index of each tracts first component \f$x_0\f$.
*
* \note the reason for using \c size_t instead of \c unsigned \c int is that more tracts with
* more points are in sight.
*/
boost::shared_ptr< std::vector< size_t > > m_startIndices;
};
inline size_t WTractData::numTracts() const
{
if( m_startIndices )
{
return m_startIndices->size();
}
return 0;
}
#endif // WTRACTDATA_H
......@@ -25,6 +25,8 @@
#include <string>
#include <vector>
#include "../../../common/WLimits.h"
#include "../../../common/datastructures/WDendrogram.h"
#include "../../../kernel/WKernel.h"
#include "../../emptyIcon.xpm" // Please put a real icon here.
#include "WMDetTractClusteringGP.h"
......@@ -99,7 +101,9 @@ void WMDetTractClusteringGP::moduleMain()
debugLog() << "Start Clustering";
m_maxSegmentLength = searchGlobalMaxSegementLength( dataSet );
computeDistanceMatrix( dataSet );
// computeDistanceMatrix( dataSet );
computeEMST( dataSet );
debugLog() << "done";
}
}
......@@ -112,6 +116,96 @@ double WMDetTractClusteringGP::searchGlobalMaxSegementLength( boost::shared_ptr<
// NOLINT return std::max_element( dataSet->begin(), dataSet->end(), [](WGaussProcess p1, WGaussProcess p2){ return p1.getMaxSegmentLength() < p2.getMaxSegmentLength(); } )->getMaxSegmentLength();
}
boost::shared_ptr< WMDetTractClusteringGP::MST > WMDetTractClusteringGP::computeEMST( boost::shared_ptr< const WDataSetGP > dataSet ) const
{
boost::shared_ptr< MST > edges( new MST() );
if( !dataSet )
{
return edges; // consider dataset as empty
}
boost::shared_ptr< WProgress > progress( new WProgress( "EMST computation", dataSet->size() - 1 ) );
m_progress->addSubProgress( progress );
// we need the diagonal elements: (i,i) of the similarity matrix for poper similarity compuation
std::vector< double > diagonal( dataSet->size() );
for( size_t i = 0; i < dataSet->size(); ++i )
{
diagonal[i] = std::sqrt( gauss::innerProduct( ( *dataSet )[i], ( *dataSet )[i] ) );
}
// initialize the first the similarities to the root node.
const WGaussProcess& root = dataSet->front(); // is the root vertex of the MST
std::vector< double > similarities( dataSet->size(), 0.0 );
similarities[0] = wlimits::MAX_DOUBLE; // root node has maximal similarity and is selected first
for( size_t i = 1; i < dataSet->size(); ++i )
{
const WGaussProcess& p = ( *dataSet )[i];
if( root.getBB().minDistance( p.getBB() ) < root.getMaxSegmentLength() + p.getMaxSegmentLength() )
{
similarities[i] = gauss::innerProduct( root, p ) / diagonal[0] / diagonal[i];
}
// Note: it is 0.0 otherwise as the initial value is 0.0
}
std::vector< size_t > queue( dataSet->size() - 1 );
for( size_t i = 0; i < dataSet->size() - 1; ++i )
{
queue[i] = i + 1;
}
std::vector< size_t > parent( dataSet->size(), 0 );
while( !queue.empty() )
{
// find maximal similarity which corresponds to the minimum distance.
size_t closestTractIndex = 0; // the tract index inside the queue where this tract has highest innerProduct score among all others
for( size_t i = 1; i < queue.size(); ++i )
{
if( similarities[ queue[ i ] ] > similarities[ queue[ closestTractIndex ] ] )
{
closestTractIndex = i;
}
}
// remove closest tract from queue
size_t closestTract = queue[ closestTractIndex ];
queue[ closestTractIndex ] = queue.back();
queue.pop_back();
// add edge to MST
edges->insert( std::pair< double, Edge >( similarities[ closestTract ], Edge( parent[ closestTract ], closestTract ) ) );
// update similaritys and parents
const WGaussProcess& v = ( *dataSet )[ closestTract ];
for( size_t i = 0; i < queue.size(); ++i )
{
// compute similarity to last inserted tract
const WGaussProcess& p = ( *dataSet )[ queue[ i ] ];
double similarity = 0.0;
if( v.getBB().minDistance( p.getBB() ) < v.getMaxSegmentLength() + p.getMaxSegmentLength() )
{
similarity = gauss::innerProduct( v, p ) / diagonal[closestTract] / diagonal[ queue[ i ] ];
}
// update similarities and parent array if the new edge is closer to the MST sofar
if( similarities[ queue[ i ] ] < similarity )
{
similarities[ queue[ i ] ] = similarity;
parent[ queue[ i ] ] = closestTract;
}
}
++*progress;
}
progress->finish();
return edges;
}
boost::shared_ptr< WDendrogram > WMDetTractClusteringGP::computeDendrogram( boost::shared_ptr< const WMDetTractClusteringGP::MST > edges ) const
{
boost::shared_ptr< WDendrogram > result( new WDendrogram() );
return result;
}
void WMDetTractClusteringGP::computeDistanceMatrix( boost::shared_ptr< const WDataSetGP > dataSet )
{
boost::shared_ptr< WProgress > progress( new WProgress( "Similarity matrix computation", ( dataSet->size()*dataSet->size() - dataSet->size() ) / 2 + dataSet->size() ) ); // NOLINT line length
......
......@@ -26,6 +26,8 @@
#define WMDETTRACTCLUSTERINGGP_H
#include <string>
#include <map>
#include <utility>
#include <osg/Geode>
......@@ -35,6 +37,8 @@
#include "../../../kernel/WModuleOutputData.h"
#include "../WDataSetGP.h"
class WDendrogram;
/**
* Module for clustering gaussian processes which representing deterministic tracts.
*
......@@ -80,6 +84,17 @@ public:
virtual const char** getXPMIcon() const;
protected:
/**
* Represents an edge from one vertex/tract to another one.
*/
typedef std::pair< size_t, size_t > Edge;
/**
* Implicit definition of an Minimum Spanning Tree with weighted edges. First is the weight of second the edge. The vertexes
* or tracts are just implicit given with the number.
*/
typedef std::multimap< double, Edge > MST;
/**
* Entry point after loading the module. Runs in separate thread.
*/
......@@ -113,6 +128,10 @@ protected:
*/
void computeDistanceMatrix( boost::shared_ptr< const WDataSetGP > dataSet );
boost::shared_ptr< WMDetTractClusteringGP::MST > computeEMST( boost::shared_ptr< const WDataSetGP > dataSet ) const;
boost::shared_ptr< WDendrogram > computeDendrogram<