Commit 2fa8c39e authored by Sebastian Eichelbaum's avatar Sebastian Eichelbaum

[CHANGE] - rethought the value mapping in the histogram. The old mapping did...

[CHANGE] - rethought the value mapping in the histogram. The old mapping did not respect the last, open interval. This is now fixed.
parent dec823bf
......@@ -35,6 +35,7 @@ WValueSetHistogram::WValueSetHistogram( boost::shared_ptr< WValueSetBase > value
m_maximum( valueSet->getMaximumValue() )
{
// create base histogram
WAssert( buckets > 1, "WValueSetHistogram::WValueSetHistogram : number of buckets needs to be larger than 1." );
m_nInitialBuckets = buckets - 1;
m_initialBucketSize = ( m_maximum - m_minimum ) / static_cast< double >( m_nInitialBuckets );
WAssert( m_initialBucketSize > 0.0, "WValueSetHistogram::WValueSetHistogram() : m_initialBucketSize to small." );
......@@ -69,6 +70,7 @@ WValueSetHistogram::WValueSetHistogram( const WValueSetBase& valueSet, size_t bu
m_maximum( valueSet.getMaximumValue() )
{
// create base histogram
WAssert( buckets > 1, "WValueSetHistogram::WValueSetHistogram : number of buckets needs to be larger than 1." );
m_nInitialBuckets = buckets - 1;
m_initialBucketSize = ( m_maximum - m_minimum ) / static_cast< double >( m_nInitialBuckets );
WAssert( m_initialBucketSize > 0.0, "WValueSetHistogram::WValueSetHistogram() : m_initialBucketSize to small." );
......@@ -109,40 +111,40 @@ WValueSetHistogram::WValueSetHistogram( const WValueSetHistogram& histogram, siz
m_mappedBucketSize( histogram.m_mappedBucketSize )
{
// apply modification of the histogram bucket size?
if( buckets == 0 )
if( ( buckets == 0 ) || ( buckets == m_nMappedBuckets ) )
{
return;
}
WAssert( buckets > 0, "WValueSetHistogram::WValueSetHistogram : number of buckets needs to be larger than zero." );
WAssert( buckets > 1, "WValueSetHistogram::WValueSetHistogram : number of buckets needs to be larger than 1." );
WAssert( buckets < m_nInitialBuckets, "WValueSetHistogram::WValueSetHistogram : number of buckets needs to be smaller than the initial bucket count." );
// number of elements in the new mapped histogram = division + (round up)
m_nMappedBuckets = buckets;
m_nMappedBuckets = buckets - 1;
m_mappedBucketSize = ( m_maximum - m_minimum ) / static_cast< double >( m_nMappedBuckets );
size_t ratio = static_cast<size_t>( buckets / m_nInitialBuckets );
// map it
m_mappedBuckets.reset();
// NOTE: as all the intervals are right-open, we need an additional slot in our array for the last interval [m_maximum,\infinity). For the
// calculation of interval sizes, the value must not be incremented
m_nMappedBuckets++;
size_t ratio = static_cast<size_t>( m_nInitialBuckets / m_nMappedBuckets );
m_mappedBuckets.reset();
size_t* mappedBuckets = new size_t[ m_nMappedBuckets ];
memset( mappedBuckets, 0, m_nMappedBuckets * sizeof( size_t ) );
// *mappedBuckets = { 0 }; // works with C++0x
m_mappedBuckets = boost::shared_array< size_t >( mappedBuckets );
// map it
size_t index = 0;
for( size_t i = 0; i != m_nInitialBuckets; ++i )
for( size_t i = 0; i < m_nInitialBuckets; ++i )
{
if( i % ratio == 0 && i != 0 )
if( ( i % ratio == 0 ) && ( i != 0 ) && ( i != m_nInitialBuckets - 1 ) )
{
index++;
}
m_mappedBuckets[index] += m_initialBuckets[i];
m_mappedBuckets[ index ] += m_initialBuckets[i];
}
}
......
......@@ -37,7 +37,8 @@
/**
* Used to find the occurrence frequencies of values in a value set. It implements a classical histogram but allows easy modification of bucket
* sizes without unnecessary recalculation of the whole histogram.
* sizes without unnecessary recalculation of the whole histogram. This histogram uses right-open intervals for counting, which is why there
* always is a bucket at the end from max to infinity which holds all the max values.
*
* \note This histogram is different from from WValueSetHistogram which is a generic histogram class.
*/
......@@ -49,16 +50,15 @@ public:
* Constructor. Creates the histogram for the specified value set.
*
* \param valueSet source of the data for the histogram
* \param buckets the number of buckets to use. If not specified, 1000 is used as default
* \param buckets the number of buckets to use. If not specified, 1000 is used as default. Must be larger than 1.
*/
explicit WValueSetHistogram( boost::shared_ptr< WValueSetBase > valueSet, size_t buckets = 1000 );
/**
* Constructor. Creates the histogram for the specified value set.
*
* \param valueSet source of the data for the histogram
* \param buckets the number of buckets to use. If not specified, 1000 is used as default
* \param buckets the number of buckets to use. If not specified, 1000 is used as default. Must be larger than 1.
*/
explicit WValueSetHistogram( const WValueSetBase& valueSet, size_t buckets = 1000 );
......@@ -67,7 +67,7 @@ public:
* \note this does not deep copy the m_initialBuckets and m_mappedBuckets array as these are shared_array instances.
*
* \param histogram another WValueSetHistogram
* \param buckets the new number of buckets.
* \param buckets the new number of buckets. Must be larger than 1 if specified.
*/
WValueSetHistogram( const WValueSetHistogram& histogram, size_t buckets = 0 );
......
......@@ -166,6 +166,7 @@ class WValueSetHistogramTest : public CxxTest::TestSuite
TS_ASSERT_EQUALS( hist.m_nInitialBuckets, hist3.m_nInitialBuckets );
TS_ASSERT_EQUALS( hist.m_mappedBuckets, hist3.m_mappedBuckets );
TS_ASSERT_EQUALS( hist.m_nMappedBuckets, hist3.m_nMappedBuckets );
TS_ASSERT_EQUALS( hist.m_nMappedBuckets, 10 );
TS_ASSERT_EQUALS( hist.m_mappedBucketSize, hist3.m_mappedBucketSize );
}
......@@ -174,6 +175,31 @@ class WValueSetHistogramTest : public CxxTest::TestSuite
**/
void testCopyWithIntervalChanges( void )
{
// create some test data
double a[5] = { 0.0, 4.0, 1.0, 2.0, 1.0 };
const std::vector< double > v( a, a + sizeof( a ) / sizeof( double ) );
WValueSet< double >* valueSet = new WValueSet< double >( 0, 1, v, W_DT_DOUBLE );
// create histogram
WValueSetHistogram hist( *valueSet, 4 );
WValueSetHistogram hist2( hist, 2 ); // create a copy of hist but change the number of intervals.
TS_ASSERT_THROWS_ANYTHING( WValueSetHistogram hist2( hist, 1 ) ); // number of buckets must be at least 1
// it needs to keep the original initialBucket stuff
TS_ASSERT_EQUALS( hist.m_minimum, hist2.m_minimum );
TS_ASSERT_EQUALS( hist.m_maximum, hist2.m_maximum );
TS_ASSERT_EQUALS( hist.m_initialBucketSize, hist2.m_initialBucketSize );
TS_ASSERT_EQUALS( hist.m_initialBuckets.get(), hist2.m_initialBuckets.get() ); // initial buckets must be the same as it is a shared array
TS_ASSERT_EQUALS( hist.m_nInitialBuckets, hist2.m_nInitialBuckets );
// test the mapped stuff
TS_ASSERT_EQUALS( hist2.m_mappedBucketSize, 4.0 );
TS_ASSERT_EQUALS( hist2.m_nMappedBuckets, 2 );
TS_ASSERT( hist2.m_mappedBuckets != hist.m_mappedBuckets );
// test values
TS_ASSERT_EQUALS( hist2.at( 0 ), 4 ); // 0.0, 1.0, 2.0 and 1.0
TS_ASSERT_EQUALS( hist2.at( 1 ), 1 ); // 4.0
}
};
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment