Commit 59b3b482 authored by daniel.bub's avatar daniel.bub
Browse files

[ADD #132] detect column types from data cell values

 * also refactor WCsvConverter unittest
 * add fixtures folder in filterProtonData/test
parent c791bd16
......@@ -60,6 +60,11 @@ public:
*/
typedef std::vector< std::string > ContentElem;
/**
* represents a shared pointer to a ContentElem object.
*/
typedef boost::shared_ptr< std::vector< std::string > > ContentElemSPtr;
/**
* Construct WDataSetCSV object
*
......
......@@ -89,9 +89,6 @@ boost::shared_ptr< WDataSetCSV > WReaderCSV::read()
file.close();
header->push_back( data->at( 0 ) );
data->erase( data->begin() );
boost::shared_ptr< WDataSetCSV > datasetcsv = boost::shared_ptr< WDataSetCSV >( new WDataSetCSV( header, data ) );
datasetcsv->setRawDataSet( rawRow );
......
......@@ -22,8 +22,10 @@
//
//---------------------------------------------------------------------------
#include <regex>
#include <string>
#include <vector>
#include <boost/lexical_cast.hpp>
#include "WProtonData.h"
......@@ -65,6 +67,13 @@ void WProtonData::setCSVData( WDataSetCSV::ContentSPtr csvData )
throw WException( "Can not set data! No data specified!" );
}
if( csvData->empty() )
{
throw WException( "Can not set data! No data content found!" );
}
detectColumnTypesFromCsvData( csvData );
m_csvData = csvData;
}
......@@ -104,3 +113,44 @@ int WProtonData::getColumnIndex( std::string columnName )
return m_columnMap[ columnName ];
}
WDataSetCSV::ContentElemSPtr WProtonData::getColumnTypes()
{
return m_columnTypes;
}
void WProtonData::detectColumnTypesFromCsvData( WDataSetCSV::ContentSPtr csvData )
{
m_columnTypes = WDataSetCSV::ContentElemSPtr( new std::vector< std::string >() );
auto currentRow = csvData->begin();
// determine column types based on first csv data row
for( auto cell : *currentRow )
{
m_columnTypes->push_back( determineColumnTypeByString( cell ) );
}
assert( m_columnTypes != nullptr );
assert( !m_columnTypes->empty() );
assert( m_columnTypes->size() == m_csvHeader->at( 0 ).size() );
}
std::string WProtonData::determineColumnTypeByString( std::string cellValue )
{
std::regex regexInt( R"(^-?[[:d:]]+$)" );
std::regex regexDouble( R"(^([+-]?(?:[[:d:]]+\.?|[[:d:]]*\.[[:d:]]+))(?:[Ee][+-]?[[:d:]]+)?$)" );
if( std::regex_search( cellValue, regexInt ) )
{
return "int";
}
else if( std::regex_search( cellValue, regexDouble ) )
{
return "double";
}
else
{
return "string";
}
}
......@@ -25,6 +25,7 @@
#ifndef WPROTONDATA_H
#define WPROTONDATA_H
#include <regex>
#include <map>
#include <string>
......@@ -113,6 +114,13 @@ public:
*/
bool isColumnAvailable( std::string columnName );
/**
* Get column types, stored in a string vector.
* Positions within this vector are linked to positions in m_csvHeader
* \return a shared pointer to m_columnTypes
*/
WDataSetCSV::ContentElemSPtr getColumnTypes();
private:
/**
* Stores column index of data.
......@@ -133,6 +141,24 @@ private:
* Stores index of the selected single-selector (ColumnPropertyHandler)
*/
std::map< std::string, int > m_ColumnMapSelectedIndex;
/**
* Stores the information, which data type is stored in associated column
*/
WDataSetCSV::ContentElemSPtr m_columnTypes;
/**
* Reads csv data and stores column types in m_columnTypes
* \param csvData the input csv data
*/
void detectColumnTypesFromCsvData( WDataSetCSV::ContentSPtr csvData );
/**
* Determines column type due to cellValue
* \param cellValue the value of a cell on the basis of which the column type is to be determined
* \return either "int", "double" or "string"
*/
std::string determineColumnTypeByString( std::string cellValue );
};
#endif // WPROTONDATA_H
......@@ -76,7 +76,7 @@ public:
boost::make_shared< WVisualizationPropertyHandler>( tmpVisualizationPropertyHandler )
);
TS_ASSERT_THROWS_ANYTHING( WCsvConverter( boost::make_shared< WProtonData >( protonData ),
TS_ASSERT_THROWS_NOTHING( WCsvConverter( boost::make_shared< WProtonData >( protonData ),
tmpPropertyStatus,
tmpColorBar )
);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment