//---------------------------------------------------------------------------
//
// Project: OpenWalnut ( http://www.openwalnut.org )
//
// Copyright 2009 OpenWalnut Community, BSV-Leipzig and CNCF-CBS
// For more information see http://www.openwalnut.org/copying
//
// This file is part of OpenWalnut.
//
// OpenWalnut is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// OpenWalnut is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with OpenWalnut. If not, see .
//
//---------------------------------------------------------------------------
#include
#include
#include
#include
#include
#include "core/common/datastructures/WDendrogram.h"
#include "core/common/datastructures/WUnionFind.h"
#include "core/common/WLimits.h"
#include "core/kernel/WKernel.h"
#include "WMDetTractClusteringGP.h"
WMDetTractClusteringGP::WMDetTractClusteringGP():
WModule()
{
}
WMDetTractClusteringGP::~WMDetTractClusteringGP()
{
}
boost::shared_ptr< WModule > WMDetTractClusteringGP::factory() const
{
return boost::shared_ptr< WModule >( new WMDetTractClusteringGP() );
}
const std::string WMDetTractClusteringGP::getName() const
{
return "Deterministic Tract Clustering using Gaussian Proccesses";
}
const std::string WMDetTractClusteringGP::getDescription() const
{
return "Clusters Gaussian Processes which represents deterministic tracts with the Wassermann "
"approach described in the paper: http://dx.doi.org/10.1016/j.neuroimage.2010.01.004";
}
void WMDetTractClusteringGP::connectors()
{
m_gpIC = WModuleInputData< WDataSetGP >::createAndAdd( shared_from_this(), "gpInput", "WDataSetGP providing the Gaussian processes" );
m_dendOC = WModuleOutputData< WDendrogram >::createAndAdd( shared_from_this(), "dendrogramOutput", "WDendrogram as a result of this clustering" );
m_matrixOC = WModuleOutputData< WDataSetMatrixSymFLT >::createAndAdd( shared_from_this(), "similaritiesOutput",
"WDataSetMatrixSym containing the similarities" );
WModule::connectors();
}
void WMDetTractClusteringGP::properties()
{
WModule::properties();
}
void WMDetTractClusteringGP::moduleMain()
{
m_moduleState.setResetable( true, true ); // remember actions when actually not waiting for actions
m_moduleState.add( m_gpIC->getDataChangedCondition() );
ready();
while( !m_shutdownFlag() ) // loop until the module container requests the module to quit
{
debugLog() << "Waiting..";
m_moduleState.wait();
if( !m_gpIC->getData().get() ) // ok, the output has not yet sent data
{
continue;
}
boost::shared_ptr< WDataSetGP > dataSet = m_gpIC->getData();
if( !dataSet || dataSet->size() == 0 )
{
debugLog() << "Invalid data--> continue";
continue;
}
if( m_gpIC->handledUpdate() )
{
debugLog() << "Input has been updated...";
}
infoLog() << "Generating similarity matrix...";
computeDistanceMatrix( dataSet );
if( m_shutdownFlag() ) // since computeDistanceMatrix may quit earlier due to the m_shutdownFlag()
{
debugLog() << "Abort all computations since shutdown requested";
break;
}
infoLog() << "Building dendrogram...";
boost::shared_ptr< WDendrogram > result( computeDendrogram( dataSet->size() ) );
if( m_shutdownFlag() ) // since computeDendrogram may quit earlier due to the m_shutdownFlag()
{
debugLog() << "Abort all computations since shutdown requested";
break;
}
m_dendOC->updateData( result );
infoLog() << "Done.";
}
}
void WMDetTractClusteringGP::computeDistanceMatrix( boost::shared_ptr< const WDataSetGP > dataSet )
{
const size_t steps = dataSet->size() * ( dataSet->size() - 2 ) / 2; // n(n-2)/2
boost::shared_ptr< WProgress > progress( new WProgress( "Similarity matrix computation", steps ) );
m_progress->addSubProgress( progress );
m_similarities = WMatrixSymFLT::SPtr( new WMatrixSymFLT( dataSet->size() ) );
#pragma omp parallel for schedule(guided)
for( int i = 0; i < static_cast< int >( dataSet->size() ); ++i )
{
for( size_t j = i + 1; j < dataSet->size() && !m_shutdownFlag(); ++j )
{
const WGaussProcess& p1 = ( *dataSet )[i];
const WGaussProcess& p2 = ( *dataSet )[j];
bool computeInnerProduct = p1.getBB().minDistance( p2.getBB() ) < ( p1.getRadius() + p2.getRadius() );
float innerProduct = 0.0;
if( computeInnerProduct )
{
innerProduct = gauss::innerProduct( p1, p2 ); // As written in the paper, we don't use the normalized inner product
}
#pragma omp critical
{
(*m_similarities)( i, j ) = innerProduct;
}
}
#pragma omp critical
{
*progress = *progress + ( dataSet->size() - i - 1 );
}
}
m_matrixOC->updateData( WDataSetMatrixSymFLT::SPtr( new WDataSetMatrixSymFLT( m_similarities ) ) );
progress->finish();
m_progress->removeSubProgress( progress );
}
boost::shared_ptr< WDendrogram > WMDetTractClusteringGP::computeDendrogram( size_t n )
{
boost::shared_ptr< WDendrogram > dend( new WDendrogram( n ) );
boost::shared_ptr< WProgress > progress( new WProgress( "Matrix => Dendrogram", n - 1 ) );
m_progress->addSubProgress( progress );
WUnionFind uf( n );
std::vector< size_t > innerNode( n ); // The refernces from the canonical Elements (cE) to the inner nodes.
std::set< size_t > idx; // valid indexes, to keep trac of already erased columns
std::vector< size_t > clusterSize( n, 1 ); // to keep trac how many elements a cluster has.
for( size_t i = 0; i < n; ++i )
{
innerNode[i] = i; // initialize them with their corresponding leafs.
idx.insert( i );
}
SimSortArray sims;
IdxSimMap map;
for( size_t i = 0; i < n ; ++i ) {
for( size_t j = i + 1; j < n; ++j ) {
IdxSimSPtr current( new IndexSimilarity( (*m_similarities)( i, j ), i, j ) );
sims.push( current );
map[ ( i * n + j - ( i + 1 ) * ( i + 2 ) / 2 ) ] = current;
}
}
for( size_t i = 0; i < n - 1 && !m_shutdownFlag(); ++i )
{
while( !sims.empty() && sims.top() && ( idx.find( sims.top()->_i ) == idx.end() || idx.find( sims.top()->_j ) == idx.end() ) ) {
sims.pop();
}
if( sims.empty() || m_shutdownFlag() ) {
break;
}
float maxSim = sims.top()->_sim;
size_t p = sims.top()->_i;
size_t q = sims.top()->_j;
sims.pop();
uf.merge( p, q );
size_t newCE = uf.find( p );
innerNode[ newCE ] = dend->merge( innerNode[ p ], innerNode[ q ], maxSim );
idx.erase( newCE == p ? q : p );
// update the column where now the new cluster pq resides
for( std::set< size_t >::const_iterator it = idx.begin(); it != idx.end(); ++it )
{
if( *it != newCE )
{
// we have two Gauss processes p and q. We have merged p and q into pq. Hence for all valid indexes we must
// recompute < pq, k > where k is a GP identified through an valid index, where:
// < pq, k > = |p| / ( |p| + |q| ) < p, k > + |q| / (|p| + |q|) < q, k >
double firstFactor = static_cast< double >( clusterSize[ p ] ) / ( clusterSize[ p ] + clusterSize[ q ] );
double secondFactor = static_cast< double >( clusterSize[ q ] ) / ( clusterSize[ p ] + clusterSize[ q ] );
size_t r = newCE > *it ? *it : newCE;
size_t s = newCE > *it ? newCE : *it;
(*m_similarities)( newCE, *it ) = firstFactor * (*m_similarities)( p, *it ) + secondFactor * (*m_similarities)( q, *it );
IdxSimSPtr current( new IndexSimilarity( (*m_similarities)( r, s ), r, s ) );
IdxSimMap::iterator old = map.find( ( r * n + s - ( r + 1 ) * ( r + 2 ) / 2 ) );
if( old != map.end() && old->second ) {
old->second->_i = std::numeric_limits< size_t >::max();
old->second = current;
}
sims.push( current );
}
}
clusterSize[ newCE ] = clusterSize[ p ] + clusterSize[ q ];
++*progress;
}
progress->finish();
m_progress->removeSubProgress( progress );
return dend;
}