WValueSetHistogram.h 6.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35
//---------------------------------------------------------------------------
//
// Project: OpenWalnut ( http://www.openwalnut.org )
//
// Copyright 2009 OpenWalnut Community, BSV@Uni-Leipzig and CNCF@MPI-CBS
// For more information see http://www.openwalnut.org/copying
//
// This file is part of OpenWalnut.
//
// OpenWalnut is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// OpenWalnut is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with OpenWalnut. If not, see <http://www.gnu.org/licenses/>.
//
//---------------------------------------------------------------------------

#ifndef WVALUESETHISTOGRAM_H
#define WVALUESETHISTOGRAM_H

#include <map>
#include <list>
#include <utility>

#include <boost/shared_ptr.hpp>
#include <boost/scoped_array.hpp>
#include <boost/shared_array.hpp>

36
#include "../../common/WHistogram.h"
37 38 39 40
#include "../WValueSet.h"

/**
 * Used to find the occurrence frequencies of values in a value set. It implements a classical histogram but allows easy modification of bucket
41 42
 * sizes without unnecessary recalculation of the whole histogram. This histogram uses right-open intervals for counting, which is why there
 * always is a bucket at the end from max to infinity which holds all the max values.
43 44 45
 *
 * \note This histogram is different from from WValueSetHistogram which is a generic histogram class.
 */
46
class WValueSetHistogram: public WHistogram
47
{
48
friend class WValueSetHistogramTest;
49 50 51 52 53
public:
    /**
     * Constructor. Creates the histogram for the specified value set.
     *
     * \param valueSet source of the data for the histogram
54
     * \param buckets the number of buckets to use. If not specified, 1000 is used as default. Must be larger than 1.
55
     */
56
    explicit WValueSetHistogram( boost::shared_ptr< WValueSetBase > valueSet, size_t buckets = 1000 );
57 58 59 60 61

    /**
     * Constructor. Creates the histogram for the specified value set.
     *
     * \param valueSet source of the data for the histogram
62
     * \param buckets the number of buckets to use. If not specified, 1000 is used as default. Must be larger than 1.
63
     */
64
    explicit WValueSetHistogram( const WValueSetBase& valueSet, size_t buckets = 1000 );
65 66

    /**
67
     * Copy constructor. If another interval size is given the histogram gets matched to it using the initial bucket data.
68
     * \note this does not deep copy the m_initialBuckets and m_mappedBuckets array as these are shared_array instances.
69 70
     *
     * \param histogram another WValueSetHistogram
71
     * \param buckets the new number of buckets. Must be larger than 1 if specified.
72
     */
73
    WValueSetHistogram( const WValueSetHistogram& histogram, size_t buckets = 0 );
74 75 76 77 78 79

    /**
     * Destructor.
     */
    ~WValueSetHistogram();

80 81 82 83 84 85 86 87 88 89
    /**
     * Copy assignment. Copies the contents of the specified histogram to this instance.
     *
     * \param other the other instance
     *
     * \return this instance with the contents of the other one.
     * \note this does not deep copy the m_initialBuckets and m_mappedBuckets array as these are shared_array instances.
     */
    WValueSetHistogram& operator=( const WValueSetHistogram& other );

90
    /**
91
     * Get the count of the bucket.
92
     *
93
     * \param index which buckets count is to be returned; starts with 0 which is the bucket containing the smallest values.
94 95 96
     *
     * \return elements in the bucket.
     */
97
    virtual size_t operator[]( size_t index ) const;
98 99

    /**
100
     * Get the count of the bucket. Testing if the position is valid.
101
     *
102
     * \param index which buckets count is to be returned; starts with 0 which is the bucket containing the smallest values.
103 104 105
     *
     * \return elements in the bucket
     */
106
    virtual size_t at( size_t index ) const;
107 108

    /**
109
     * Returns the number of buckets in the histogram with the actual mapping.
110 111 112
     *
     * \return number of buckets
     */
113
    virtual size_t size() const;
114 115 116 117

    /**
     * Return the size of one bucket.
     *
118 119
     * \param index the width for this bucket is queried.
     *
120 121
     * \return the size of a bucket.
     */
122
    virtual double getBucketSize( size_t index = 0 ) const;
123

124
    /**
125 126 127
     * Returns the actual interval associated with the given index. The interval is open, meaning that
     * getIntervalForIndex( i ).second == getIntervalForIndex( i + 1 ).first but does not belong anymore to the interval itself but every value
     * smaller than getIntervalForIndex( i ).second.
128 129 130
     *
     * \param index the intex
     *
131
     * \return the open interval.
132
     */
133
    virtual std::pair< double, double > getIntervalForIndex( size_t index ) const;
134

135 136 137 138 139 140
protected:
    /**
     * Return the initial buckets.
     *
     * \return m_initialBuckets
     */
141
    boost::shared_array< size_t > getInitialBuckets() const;
142 143 144 145 146 147

    /**
     * Return the number of initial buckets.
     *
     * \return m_nInitialBuckets
     */
148
    size_t getNInitialBuckets() const;
149 150 151 152 153 154

    /**
     * Return the size of one initial bucket.
     *
     * \return m_bucketSize
     */
155
    double getInitialBucketSize() const;
156 157

    /**
158 159 160 161
     * increment the value by one, contains the logic to find the element place in the array.
     * Should only be used in the constructor i.e. while iterating over WValueSet.
     *
     * \param value value to increment
162
     */
163
    virtual void insert( double value );
164

165
private:
166 167 168 169

    /**
     * Size of one bucket in the initial histogram.
     */
170
    double m_initialBucketSize;
171 172 173 174

    /**
     * Pointer to all initial buckets of the histogram.
     */
175
    boost::shared_array< size_t > m_initialBuckets;
176 177 178 179

    /**
     * Number of buckets in the initial histogram.
     */
180
    size_t m_nInitialBuckets;
181 182

    /**
183
     * Pointer to all initial buckets of the histogram.
184
     */
185
    boost::shared_array< size_t > m_mappedBuckets;
186 187 188 189

    /**
     * Tracks the number of a buckets in the mapped histogram.
     */
190 191 192 193 194 195
    size_t m_nMappedBuckets;

    /**
     * Size of one bucket in the mapped histogram.
     */
    double m_mappedBucketSize;
196 197
};

198 199 200 201 202
/**
 * Write a histogram in string representation to the given output stream.
 */
std::ostream& operator<<( std::ostream& out, const WValueSetHistogram& h );

203 204
#endif  // WVALUESETHISTOGRAM_H