Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
OpenWalnut Core
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
44
Issues
44
List
Boards
Labels
Service Desk
Milestones
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
OpenWalnut
OpenWalnut Core
Commits
92a308dd
Commit
92a308dd
authored
Aug 06, 2010
by
Sebastian Eichelbaum
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[CHANGE] - histogram now can clamp values to a certain user defined min-max interval
parent
46ee1e7d
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
186 additions
and
34 deletions
+186
-34
src/dataHandler/datastructures/WValueSetHistogram.cpp
src/dataHandler/datastructures/WValueSetHistogram.cpp
+57
-34
src/dataHandler/datastructures/WValueSetHistogram.h
src/dataHandler/datastructures/WValueSetHistogram.h
+82
-0
src/dataHandler/datastructures/test/WValueSetHistogram_test.h
...dataHandler/datastructures/test/WValueSetHistogram_test.h
+47
-0
No files found.
src/dataHandler/datastructures/WValueSetHistogram.cpp
View file @
92a308dd
...
...
@@ -23,10 +23,12 @@
//---------------------------------------------------------------------------
#include <cstring> // memset()
#include <numeric>
#include <utility>
#include "../../common/WAssert.h"
#include "../../common/WLimits.h"
#include "../../common/exceptions/WOutOfBounds.h"
#include "../WDataHandlerEnums.h"
#include "WValueSetHistogram.h"
...
...
@@ -34,45 +36,36 @@
WValueSetHistogram
::
WValueSetHistogram
(
boost
::
shared_ptr
<
WValueSetBase
>
valueSet
,
size_t
buckets
)
:
WHistogram
(
valueSet
->
getMinimumValue
(),
valueSet
->
getMaximumValue
(),
buckets
)
{
// create base histogram
WAssert
(
buckets
>
1
,
"WValueSetHistogram::WValueSetHistogram : number of buckets needs to be larger than 1."
);
m_nInitialBuckets
=
buckets
-
1
;
m_initialBucketSize
=
(
m_maximum
-
m_minimum
)
/
static_cast
<
double
>
(
m_nInitialBuckets
);
WAssert
(
m_initialBucketSize
>
0.0
,
"WValueSetHistogram::WValueSetHistogram() : m_initialBucketSize to small."
);
// NOTE: as all the intervals are right-open, we need an additional slot in our array for the last interval [m_maximum,\infinity). For the
// calculation of interval sizes, the value must not be incremented
m_nInitialBuckets
++
;
// create and initialize array to zero which finally contains the counts
size_t
*
initialBuckets
=
new
size_t
[
m_nInitialBuckets
];
memset
(
initialBuckets
,
0
,
m_nInitialBuckets
*
sizeof
(
size_t
)
);
// *initialBuckets = { 0 }; // this should works with C++0x (instead memset), TEST IT!
buildHistogram
(
*
valueSet
);
}
// the array can be shared among several instances of WValueSetHistogram.
m_initialBuckets
=
boost
::
shared_array
<
size_t
>
(
initialBuckets
);
WValueSetHistogram
::
WValueSetHistogram
(
const
WValueSetBase
&
valueSet
,
size_t
buckets
)
:
WHistogram
(
valueSet
.
getMinimumValue
(),
valueSet
.
getMaximumValue
(),
buckets
)
{
buildHistogram
(
valueSet
);
}
// no mapping applied yet. Initial and mapped are equal
m_nMappedBuckets
=
m_nInitialBuckets
;
m_mappedBuckets
=
m_initialBuckets
;
m_mappedBucketSize
=
m_initialBucketSize
;
WValueSetHistogram
::
WValueSetHistogram
(
boost
::
shared_ptr
<
WValueSetBase
>
valueSet
,
double
min
,
double
max
,
size_t
buckets
)
:
WHistogram
(
min
,
max
,
buckets
)
{
buildHistogram
(
*
valueSet
);
}
// and finally create the histogram
for
(
size_t
i
=
0
;
i
<
valueSet
->
size
();
++
i
)
{
double
tmp
=
valueSet
->
getScalarDouble
(
i
);
insert
(
tmp
);
}
WValueSetHistogram
::
WValueSetHistogram
(
const
WValueSetBase
&
valueSet
,
double
min
,
double
max
,
size_t
buckets
)
:
WHistogram
(
min
,
max
,
buckets
)
{
buildHistogram
(
valueSet
);
}
WValueSetHistogram
::
WValueSetHistogram
(
const
WValueSetBase
&
valueSet
,
size_t
buckets
)
:
WHistogram
(
valueSet
.
getMinimumValue
(),
valueSet
.
getMaximumValue
(),
buckets
)
void
WValueSetHistogram
::
buildHistogram
(
const
WValueSetBase
&
valueSet
)
{
m_nbTotalElements
=
0
;
// create base histogram
WAssert
(
buckets
>
1
,
"WValueSetHistogram::WValueSet
Histogram : number of buckets needs to be larger than 1."
);
m_nInitialBuckets
=
b
uckets
-
1
;
WAssert
(
m_nbBuckets
>
1
,
"WValueSetHistogram::build
Histogram : number of buckets needs to be larger than 1."
);
m_nInitialBuckets
=
m_nbB
uckets
-
1
;
m_initialBucketSize
=
(
m_maximum
-
m_minimum
)
/
static_cast
<
double
>
(
m_nInitialBuckets
);
WAssert
(
m_initialBucketSize
>
0.0
,
"WValueSetHistogram::
WValueSet
Histogram() : m_initialBucketSize to small."
);
WAssert
(
m_initialBucketSize
>
0.0
,
"WValueSetHistogram::
build
Histogram() : m_initialBucketSize to small."
);
// NOTE: as all the intervals are right-open, we need an additional slot in our array for the last interval [m_maximum,\infinity). For the
// calculation of interval sizes, the value must not be incremented
...
...
@@ -97,6 +90,8 @@ WValueSetHistogram::WValueSetHistogram( const WValueSetBase& valueSet, size_t bu
double
tmp
=
valueSet
.
getScalarDouble
(
i
);
insert
(
tmp
);
}
m_nbTotalElements
=
valueSet
.
size
();
}
WValueSetHistogram
::
WValueSetHistogram
(
const
WValueSetHistogram
&
histogram
,
size_t
buckets
)
:
...
...
@@ -106,7 +101,8 @@ WValueSetHistogram::WValueSetHistogram( const WValueSetHistogram& histogram, siz
m_nInitialBuckets
(
histogram
.
m_nInitialBuckets
),
m_mappedBuckets
(
histogram
.
m_mappedBuckets
),
m_nMappedBuckets
(
histogram
.
m_nMappedBuckets
),
m_mappedBucketSize
(
histogram
.
m_mappedBucketSize
)
m_mappedBucketSize
(
histogram
.
m_mappedBucketSize
),
m_nbTotalElements
(
histogram
.
m_nbTotalElements
)
{
// apply modification of the histogram bucket size?
if
(
(
buckets
==
0
)
||
(
buckets
==
m_nMappedBuckets
)
)
...
...
@@ -193,8 +189,7 @@ double WValueSetHistogram::getBucketSize( size_t /* index */ ) const
void
WValueSetHistogram
::
insert
(
double
value
)
{
size_t
index
=
static_cast
<
size_t
>
(
(
value
-
m_minimum
)
/
static_cast
<
double
>
(
m_mappedBucketSize
)
);
m_mappedBuckets
[
index
]
++
;
m_mappedBuckets
[
getIndexForValue
(
value
)
]
++
;
}
size_t
WValueSetHistogram
::
operator
[](
size_t
index
)
const
...
...
@@ -216,6 +211,11 @@ size_t WValueSetHistogram::size() const
return
m_nMappedBuckets
;
// overwrite the WHistogram::size here as we have our own size.
}
size_t
WValueSetHistogram
::
getTotalElementCount
()
const
{
return
m_nbTotalElements
;
}
std
::
pair
<
double
,
double
>
WValueSetHistogram
::
getIntervalForIndex
(
size_t
index
)
const
{
double
first
=
m_minimum
+
m_mappedBucketSize
*
index
;
...
...
@@ -223,6 +223,29 @@ std::pair< double, double > WValueSetHistogram::getIntervalForIndex( size_t inde
return
std
::
make_pair
(
first
,
second
);
}
size_t
WValueSetHistogram
::
accumulate
(
size_t
startIndex
,
size_t
endIndex
)
const
{
if
(
startIndex
>
endIndex
)
{
std
::
swap
(
startIndex
,
endIndex
);
}
// valid index?
if
(
endIndex
>
size
()
)
// as endIndex is exclusive, it is allowed to equal size()
{
throw
WOutOfBounds
(
"The specified endIndex is out of bounds."
);
}
// unfortunately, shared_array can't be used for std::accumulate
size_t
acc
=
0
;
while
(
startIndex
!=
endIndex
)
{
acc
+=
m_mappedBuckets
[
startIndex
++
];
}
return
acc
;
}
std
::
ostream
&
operator
<<
(
std
::
ostream
&
out
,
const
WValueSetHistogram
&
h
)
{
for
(
size_t
i
=
0
;
i
<
h
.
size
()
-
1
;
++
i
)
...
...
src/dataHandler/datastructures/WValueSetHistogram.h
View file @
92a308dd
...
...
@@ -63,6 +63,30 @@ public:
*/
explicit
WValueSetHistogram
(
const
WValueSetBase
&
valueSet
,
size_t
buckets
=
1000
);
/**
* Constructor. Creates a histogram from the specified value set but allows cropping of values below the given min and above the given max.
* It actually interprets all values below min and above max to be exactly min and exactly max and sorts them into the appropriate bin. This
* is especially useful to filter out outliers in data.
*
* \param valueSet source data
* \param min the new minimum to use
* \param max the maximum to use
* \param buckets the number of buckets to use. If not specified, 1000 is used as default. Must be larger than 1.
*/
WValueSetHistogram
(
boost
::
shared_ptr
<
WValueSetBase
>
valueSet
,
double
min
,
double
max
,
size_t
buckets
=
1000
);
/**
* Constructor. Creates a histogram from the specified value set but allows cropping of values below the given min and above the given max.
* It actually interprets all values below min and above max to be exactly min and exactly max and sorts them into the appropriate bin. This
* is especially useful to filter out outliers in data.
*
* \param valueSet source data
* \param min the new minimum to use
* \param max the maximum to use
* \param buckets the number of buckets to use. If not specified, 1000 is used as default. Must be larger than 1.
*/
WValueSetHistogram
(
const
WValueSetBase
&
valueSet
,
double
min
,
double
max
,
size_t
buckets
=
1000
);
/**
* Copy constructor. If another interval size is given the histogram gets matched to it using the initial bucket data.
* \note this does not deep copy the m_initialBuckets and m_mappedBuckets array as these are shared_array instances.
...
...
@@ -132,6 +156,34 @@ public:
*/
virtual
std
::
pair
<
double
,
double
>
getIntervalForIndex
(
size_t
index
)
const
;
/**
* Returns the right index to the bucket containing the given value. If a value larger than the maximum, the maximum index is returned. Same
* for minimum; if the value is smaller than the minimum, 0 is returned.
*
* \param value the value to search the index for
*
* \return the index of the bucket
*/
virtual
size_t
getIndexForValue
(
double
value
)
const
;
/**
* This returns the number of value set entries added to the histogram. This is especially useful to normalize the histogram counts.
*
* \return the number of elements distributed in the buckets.
*/
virtual
size_t
getTotalElementCount
()
const
;
/**
* Sums up the buckets in the specified interval. Especially useful for cumulative distribution functions or similar.
*
* \param startIndex the index where to start counting including this one
* \param endIndex the index where to end summing up excluding this one.
*
* \return the sum of all buckets in the interval.
* \throw WOutOfBounds if one of the indices is invalid.
*/
virtual
size_t
accumulate
(
size_t
startIndex
,
size_t
endIndex
)
const
;
protected:
/**
* Return the initial buckets.
...
...
@@ -193,6 +245,18 @@ private:
* Size of one bucket in the mapped histogram.
*/
double
m_mappedBucketSize
;
/**
* The number of elements distributed in the buckets.
*/
size_t
m_nbTotalElements
;
/**
* Actually builds the histogram. This function is simply used for avoiding code duplication in all these constructors.
*
* \param valueSet the value set.
*/
void
buildHistogram
(
const
WValueSetBase
&
valueSet
);
};
/**
...
...
@@ -200,5 +264,23 @@ private:
*/
std
::
ostream
&
operator
<<
(
std
::
ostream
&
out
,
const
WValueSetHistogram
&
h
);
inline
size_t
WValueSetHistogram
::
getIndexForValue
(
double
value
)
const
{
// the position on the scala
double
pos
=
(
value
-
m_minimum
)
/
static_cast
<
double
>
(
m_mappedBucketSize
);
// the index is the floor( position )
size_t
idx
=
static_cast
<
size_t
>
(
pos
);
// is the index larger than the size?
bool
inU
=
(
idx
<
m_nMappedBuckets
);
// is the index smaller than the size?
bool
inL
=
(
pos
>
0.0
);
// the trick done here is to clamp value into [m_minimum,m_maximum] without using if statements. The C++ Standard says that booleans are
// always 1 if true.
// NOTE: this is integral arithmetic
return
(
inL
&&
inU
)
*
idx
+
(
!
inU
&&
inL
)
*
(
m_nMappedBuckets
-
1
);
}
#endif // WVALUESETHISTOGRAM_H
src/dataHandler/datastructures/test/WValueSetHistogram_test.h
View file @
92a308dd
...
...
@@ -132,6 +132,53 @@ class WValueSetHistogramTest : public CxxTest::TestSuite
TS_ASSERT_EQUALS
(
hist
.
getBucketSize
(),
1.0
);
// 0.0, 1.0, 2.0, 3.0 and 4.0
}
/**
* Test getIndexForValue()
*/
void
testIndex
(
void
)
{
// create some test data
double
a
[
5
]
=
{
0.0
,
4.0
,
1.0
,
2.0
,
1.0
};
const
std
::
vector
<
double
>
v
(
a
,
a
+
sizeof
(
a
)
/
sizeof
(
double
)
);
WValueSet
<
double
>*
valueSet
=
new
WValueSet
<
double
>
(
0
,
1
,
v
,
W_DT_DOUBLE
);
// create histogram
WValueSetHistogram
hist
(
*
valueSet
,
5
);
// 0 = [0, 1) = 1
// 1 = [1, 2) = 2
// 2 = [2, 3) = 1
// 3 = [3, 4) = 0
// 4 = [4, inf) = 1
TS_ASSERT_EQUALS
(
hist
.
getIndexForValue
(
4.0
),
4
);
TS_ASSERT_EQUALS
(
hist
.
getIndexForValue
(
3.999
),
3
);
TS_ASSERT_EQUALS
(
hist
.
getIndexForValue
(
0.0
),
0
);
TS_ASSERT_EQUALS
(
hist
.
getIndexForValue
(
122.0
),
4
);
// test values above maximum
TS_ASSERT_EQUALS
(
hist
.
getIndexForValue
(
-
122.0
),
0
);
// test values below minumum
}
/**
* Test accumulate
*/
void
testAccum
(
void
)
{
// create some test data
double
a
[
5
]
=
{
0.0
,
4.0
,
1.0
,
2.0
,
1.0
};
const
std
::
vector
<
double
>
v
(
a
,
a
+
sizeof
(
a
)
/
sizeof
(
double
)
);
WValueSet
<
double
>*
valueSet
=
new
WValueSet
<
double
>
(
0
,
1
,
v
,
W_DT_DOUBLE
);
// create histogram
WValueSetHistogram
hist
(
*
valueSet
,
5
);
std
::
cout
<<
hist
<<
std
::
endl
;
TS_ASSERT_EQUALS
(
hist
.
accumulate
(
0
,
2
),
3
);
TS_ASSERT_EQUALS
(
hist
.
accumulate
(
2
,
0
),
3
);
// it also needs to handle switched indices
TS_ASSERT_EQUALS
(
hist
.
accumulate
(
2
,
2
),
0
);
// exclude second index properly?
TS_ASSERT
(
hist
.
accumulate
(
2
,
2
)
!=
hist
[
2
]
);
// exclude second index properly?
TS_ASSERT_THROWS_ANYTHING
(
hist
.
accumulate
(
0
,
123
)
);
}
/**
* Test copy construction.
**/
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment