Commit a4fb966e authored by Mathias Goldau's avatar Mathias Goldau
Browse files

[ADD #445] Now the Dendrogram is computed from the MST, but we need to apply...

[ADD #445] Now the Dendrogram is computed from the MST, but we need to apply ralphs dendrogram viewer for that. in order to see if the clustering will work.
parent 257cf6cf
......@@ -22,4 +22,36 @@
//
//---------------------------------------------------------------------------
#include <sstream>
#include "WDendrogram.h"
WDendrogram::WDendrogram( size_t n )
: m_heights( n - 1, 0.0 )
{
m_parents.reserve( 2 * n - 1 );
m_parents.resize( n, 0 );
}
size_t WDendrogram::merge( size_t i, size_t j, double height )
{
#ifdef DEBUG
std::stringstream ss;
ss << "Bug: n=" << m_heights.size() << " many leafs can lead maximal to 2n-1 many nodes in a tree but this was violated now!" << std::endl;
WAssert( m_parents.size() < 2 * m_heights.size() + 1, ss.str() );
#endif
m_parents.push_back( m_parents.size() ); // the root s always self referencing
#ifdef DEBUG
m_heights->at( m_parents.size() - 2 - m_heights.size() ) = height;
m_parents->at( i ) = m_parents.size() - 1;
m_parents->at( j ) = m_parents.size() - 1;
#else
m_heights[ m_parents.size() - 2 - m_heights.size() ] = height;
m_parents[ i ] = m_parents.back();
m_parents[ j ] = m_parents.back();
#endif
return m_parents.size() - 1;
}
......@@ -33,77 +33,60 @@
* Hirachical binary tree datastructure with spatial layout information called dendrogram.
*
* The following description is taken from: http://en.wikipedia.org/wiki/Dendrogram A dendrogram (from Greek
* dendron "tree", -gramma "drawing") is a tree diagram frequently used to illustrate the arrangement of the
* clusters produced by hierarchical clustering.
* dendron "tree", -gramma "drawing") is a tree diagram frequently used to illustrate the arrangement of
* clusters produced by hierarchical clustering. Please note that each level has its height.
*
* This implementation is based on three arrays (\ref m_objectOrder, \ref m_branching, \ref m_levelHeight) and
* requires implicitly object lables from <dfn>0..n-1</dfn>. The idea is very similar to the idea described in
* the paper: <em>F.J. Rohlf, Algorithm 76: Hierarchical clustering using the minimum spanning tree. Comput.
* J. 16 (1973), pp. 93–95.</em>
\verbatim
|
,------'--. --- 4th level
| |
|```````| | --- 3rd level
| | |
| | ...'... --- 2nd level
| | | |
|''''''''| | | | --- 1st level
| | | | |
| | | | |
o o o o o --- 0 level
\endverbatim
*
* In order to use this class for your objects ensure that the objects are labeled from <dfn>0,...,n-1</dfn>.
*/
class WDendrogram
{
friend class WDendrogramTest;
public:
typedef boost::shared_ptr< std::vector< size_t > > LabelArray;
typedef LabelArray LevelArray;
typedef boost::shared_ptr< std::vector< double > > HeightArray;
// WDendrogram( LabelArray objectOrder, LevelArray branches, HeightArray heights );
protected:
private:
/**
* Since the dendrogram has nonintersecting edges as this:
*
\verbatim
|
,------'--. --- 4th level
| |
|```````| | --- 3rd level
| | |
| | ...'... --- 2nd level
| | | |
|''''''''| | | | --- 1st level
| | | | |
| | | | |
2 0 3 1 4
\endverbatim
* Constructs a new dendrogram for \ref size many objects.
*
* we need to arrage the objects from left to right so merging will not produce intersections. For this
* ordering this array is used and contains the object labels from left to right and provide thus the
* special ordering.
* \param size The number of leafs.
*/
LabelArray m_objectOrder;
explicit WDendrogram( size_t n );
/**
* This array stores when the nodes will join or branch. Just imaging we rotate the dendrogram as follows:
* Merges two elements (either inner nodes or leafs) given via the indices \e i and \e j.
*
\verbatim
2 ----.
|
0 ----'-----------.
|
3 ----------------'-----.
1 ---------. |
| |
4 ---------'------------'-----
----+----+------+-----+--------> levels
1st 2nd 3rd 4th
* \param i The index referring either to an inner node or to a leaf.
* \param j The other index of a leaf or inner node.
* \param height The height at which those to elements join.
*
* \return The number of the inner node now representing now the parent of \e i and \e j.
*/
size_t merge( size_t i, size_t j, double height );
\endverbatim
* so the array for the example above would be: <dfn>[ 1, 3, 4, 2, - ]</dfn>
protected:
private:
/**
* Stores the parents of leafs as well as of inner nodes. The first half of the arrary corresponds to the
* parents of the leafs and the second of the inner nodes. The last inner node is the top of the
* dendrogram.
*/
LevelArray m_branching;
std::vector< size_t > m_parents;
/**
* Stores for each join level its height which may be used for spatial layouting.
* Stores only for the inner nodes their heights.
*/
std::vector< double > m_levelHeight;
std::vector< double > m_heights;
};
#endif // WDENDROGRAM_H
......@@ -27,6 +27,7 @@
#include "../../../common/WLimits.h"
#include "../../../common/datastructures/WDendrogram.h"
#include "../../../common/datastructures/WUnionFind.h"
#include "../../../kernel/WKernel.h"
#include "../../emptyIcon.xpm" // Please put a real icon here.
#include "WMDetTractClusteringGP.h"
......@@ -102,7 +103,7 @@ void WMDetTractClusteringGP::moduleMain()
m_maxSegmentLength = searchGlobalMaxSegementLength( dataSet );
// computeDistanceMatrix( dataSet );
computeEMST( dataSet );
computeDendrogram( computeEMST( dataSet ) );
debugLog() << "done";
}
}
......@@ -202,7 +203,44 @@ boost::shared_ptr< WMDetTractClusteringGP::MST > WMDetTractClusteringGP::compute
boost::shared_ptr< WDendrogram > WMDetTractClusteringGP::computeDendrogram( boost::shared_ptr< const WMDetTractClusteringGP::MST > edges ) const
{
boost::shared_ptr< WDendrogram > result( new WDendrogram() );
boost::shared_ptr< WProgress > progress( new WProgress( "MST => Dendrogram", edges->size() ) ); // NOLINT line length
m_progress->addSubProgress( progress );
boost::shared_ptr< WDendrogram > result( new WDendrogram( edges->size() + 1 ) ); // there are exactly n-1 edges
WUnionFind uf( edges->size() + 1 );
std::vector< size_t > in( edges->size() + 1 ); // The refernces from the canonical Elements (cE) to the inner nodes.
for( size_t i = 0; i < in.size(); ++i )
{
in[i] = i; // initialize them with their corresponding leafs.
}
#ifdef DEBUG
double similarity = wlimits::MAX_DOUBLE; // corresponds to the height, and enables the sorting check
#endif
for( MST::const_reverse_iterator cit = edges->rbegin(); cit != edges->rend(); ++cit ) // NOLINT line length but: note: reverse iterating since the edge with highest similarity is at the end
{
#ifdef DEBUG
WAssert( cit->first <= similarity, "Bug: The edges aren't sorted!" );
similarity = cit->first;
#endif
// (u,v) - edge
size_t u = cit->second.first;
size_t v = cit->second.second;
// u and v may already contain to a cluster, thus we need their cannonical elements
size_t cEu = uf.find( u );
size_t cEv = uf.find( v );
// get the references to their inner nodes (of the dendrogram)
size_t innerNodeU = in[ cEu ];
size_t innerNodeV = in[ cEv ];
size_t newInnerNode = result->merge( innerNodeU, innerNodeV, cit->first );
uf.merge( cEu, cEv );
in[ uf.find( cEu ) ] = newInnerNode;
++*progress;
}
progress->finish();
return result;
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment