Commit 4f103eab authored by Daniel Bub's avatar Daniel Bub
Browse files

Merge branch 'top2021s5/#132_detect_column_types' into 'sprint_6'

[MERGE #132] detect column types

See merge request top/21s/sivert-vis/sivert-vis-project!90
parents f9eb1377 19aa16bc
......@@ -60,6 +60,11 @@ public:
*/
typedef std::vector< std::string > ContentElem;
/**
* represents a shared pointer to a ContentElem object.
*/
typedef boost::shared_ptr< std::vector< std::string > > ContentElemSPtr;
/**
* Construct WDataSetCSV object
*
......
......@@ -89,9 +89,6 @@ boost::shared_ptr< WDataSetCSV > WReaderCSV::read()
file.close();
header->push_back( data->at( 0 ) );
data->erase( data->begin() );
boost::shared_ptr< WDataSetCSV > datasetcsv = boost::shared_ptr< WDataSetCSV >( new WDataSetCSV( header, data ) );
datasetcsv->setRawDataSet( rawRow );
......
//---------------------------------------------------------------------------
//
// Project: OpenWalnut ( http://www.openwalnut.org )
//
// Copyright 2009 OpenWalnut Community, BSV@Uni-Leipzig and CNCF@MPI-CBS
// For more information see http://www.openwalnut.org/copying
//
// This file is part of OpenWalnut.
//
// OpenWalnut is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// OpenWalnut is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with OpenWalnut. If not, see <http://www.gnu.org/licenses/>.
//
//---------------------------------------------------------------------------
#include <string>
#include "WDataType.h"
std::string WDataType::getInt()
{
return "int";
}
std::string WDataType::getDouble()
{
return "double";
}
std::string WDataType::getString()
{
return "string";
}
std::string WDataType::getDefault()
{
return "default";
}
//---------------------------------------------------------------------------
//
// Project: OpenWalnut ( http://www.openwalnut.org )
//
// Copyright 2009 OpenWalnut Community, BSV@Uni-Leipzig and CNCF@MPI-CBS
// For more information see http://www.openwalnut.org/copying
//
// This file is part of OpenWalnut.
//
// OpenWalnut is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// OpenWalnut is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with OpenWalnut. If not, see <http://www.gnu.org/licenses/>.
//
//---------------------------------------------------------------------------
#ifndef WDATATYPE_H
#define WDATATYPE_H
#include <string>
/**
* Holds the datatypes as string
*/
class WDataType
{
public:
/**
* getter
* \return datatype int as string
*/
static std::string getInt();
/**
* getter
* \return datatype double as string
*/
static std::string getDouble();
/**
* getter
* \return datatype string as string
*/
static std::string getString();
/**
* getter
* \return default as string
*/
static std::string getDefault();
};
#endif // WDATATYPE_H
......@@ -22,8 +22,11 @@
//
//---------------------------------------------------------------------------
#include <regex>
#include <list>
#include <string>
#include <vector>
#include <boost/lexical_cast.hpp>
#include "WProtonData.h"
......@@ -66,7 +69,14 @@ void WProtonData::setCSVData( WDataSetCSV::ContentSPtr csvData )
throw WException( "Can not set data! No data specified!" );
}
if( csvData->empty() )
{
throw WException( "Can not set data! No data content found!" );
}
m_csvData = csvData;
detectColumnTypesFromCsvData( csvData );
}
WDataSetCSV::ContentSPtr WProtonData::getCSVData()
......@@ -105,3 +115,102 @@ int WProtonData::getColumnIndex( std::string columnName )
return m_columnMap[ columnName ];
}
WDataSetCSV::ContentElemSPtr WProtonData::getColumnTypes()
{
return m_columnTypes;
}
void WProtonData::detectColumnTypesFromCsvData( WDataSetCSV::ContentSPtr csvData )
{
m_columnTypes = WDataSetCSV::ContentElemSPtr( new std::vector< std::string >() );
auto currentRow = csvData->begin();
// determine column types based on first csv data row
for( auto cell : *currentRow )
{
m_columnTypes->push_back( determineColumnTypeByString( cell ) );
}
assert( m_columnTypes != nullptr );
assert( !m_columnTypes->empty() );
assert( m_columnTypes->size() == m_csvHeader->at( 0 ).size() );
for( size_t idx = 0; idx < m_columnTypes->size(); idx++ )
{
if(m_columnTypes->at( idx ) == WDataType::getDouble() )
{
if( checkIfDoubleColumnCanBeInteger( idx ) )
{
m_columnTypes->at( idx ) = WDataType::getInt();
}
}
}
}
std::string WProtonData::determineColumnTypeByString( std::string cellValue )
{
std::regex regexInt( R"(^[-\+]?[[:d:]]+([eE]\+?0?[1-9])?$)" );
std::regex regexDouble( R"(^([+-]?(?:[[:d:]]+\.?|[[:d:]]*\.[[:d:]]+))(?:[Ee][+-]?[[:d:]]+)?$)" );
if( std::regex_search( cellValue, regexInt ) )
{
std::cout << cellValue << "\t INT" << std::endl;
return WDataType::getInt();
}
else if( std::regex_search( cellValue, regexDouble ) )
{
std::cout << cellValue << "\t DOUBLE" << std::endl;
return WDataType::getDouble();
}
else
{
std::cout << cellValue << "\t STRING" << std::endl;
return WDataType::getString();
}
}
bool WProtonData::checkIfDoubleColumnCanBeInteger( int columnNumber )
{
double doubleValue;
int intValue;
for( auto row : *m_csvData )
{
doubleValue = boost::lexical_cast< double >( row.at( columnNumber ) );
intValue = ( int )doubleValue;
if( doubleValue - intValue != 0 )
{
return false;
}
}
return true;
}
std::vector< std::string > WProtonData::getHeaderFromType( std::list< std::string > typeNames )
{
std::vector< std::string > header = m_csvHeader->at( 0 );
std::vector< std::string > columnTypes = *m_columnTypes;
std::vector< std::string > filterHeader;
for( size_t i = 0; i < columnTypes.size(); i++)
{
for( std::string type : typeNames )
{
if( type == WDataType::getDefault() )
{
return header;
}
if( columnTypes[ i ] == type )
{
filterHeader.push_back( header[i] );
}
}
}
return filterHeader;
}
......@@ -25,10 +25,14 @@
#ifndef WPROTONDATA_H
#define WPROTONDATA_H
#include <regex>
#include <list>
#include <map>
#include <string>
#include <vector>
#include "core/dataHandler/WDataSetCSV.h"
#include "WDataType.h"
/**
......@@ -113,6 +117,20 @@ public:
*/
bool isColumnAvailable( std::string columnName );
/**
* Get column types, stored in a string vector.
* Positions within this vector are linked to positions in m_csvHeader
* \return a shared pointer to m_columnTypes
*/
WDataSetCSV::ContentElemSPtr getColumnTypes();
/**
* Return a vector of filtered Headers
* \param type Type of filter
* \return Return a vector of filtered Headers
*/
std::vector< std::string > getHeaderFromType( std::list< std::string > typeNames );
private:
/**
* Stores column index of data.
......@@ -133,6 +151,31 @@ private:
* Stores index of the selected single-selector (ColumnPropertyHandler)
*/
std::map< std::string, int > m_ColumnMapSelectedIndex;
/**
* Stores the information, which data type is stored in associated column
*/
WDataSetCSV::ContentElemSPtr m_columnTypes;
/**
* Reads csv data and stores column types in m_columnTypes
* \param csvData the input csv data
*/
void detectColumnTypesFromCsvData( WDataSetCSV::ContentSPtr csvData );
/**
* Determines column type due to cellValue
* \param cellValue the value of a cell on the basis of which the column type is to be determined
* \return either "int", "double" or "string"
*/
std::string determineColumnTypeByString( std::string cellValue );
/**
* Checks, if values of a column, containing double values, can be converted to integers
* \param columnNumber the column number within m_csvHeader
* \return true, if all double values of a column ends with ".0"; false otherwise
*/
bool checkIfDoubleColumnCanBeInteger( int columnNumber );
};
#endif // WPROTONDATA_H
......@@ -25,6 +25,7 @@
#include <list>
#include <string>
#include "WDataType.h"
#include "WSingleSelectorName.h"
std::string WSingleSelectorName::getX()
......@@ -62,61 +63,96 @@ std::string WSingleSelectorName::getParentId()
return "Parent id";
}
WSingleSelectorName::NameDescriptionSearch WSingleSelectorName::getXwithDescription()
WSingleSelectorName::NameDescriptionSearchTyp WSingleSelectorName::getXwithDescription()
{
return WSingleSelectorName::NameDescriptionSearch( getX(),
"Choose the column which should be used to determine the x coordinate.",
"posX" );
return WSingleSelectorName::NameDescriptionSearchTyp(
getX(),
"Choose the column which should be used to determine the x coordinate.",
"posX",
std::list< std::string >
{
WDataType::getInt(), WDataType::getDouble()
} );
}
WSingleSelectorName::NameDescriptionSearch WSingleSelectorName::getYwithDescription()
WSingleSelectorName::NameDescriptionSearchTyp WSingleSelectorName::getYwithDescription()
{
return WSingleSelectorName::NameDescriptionSearch( getY(),
"Choose the column which should be used to determine the y coordinate.",
"posY" );
return WSingleSelectorName::NameDescriptionSearchTyp(
getY(),
"Choose the column which should be used to determine the y coordinate.",
"posY",
std::list< std::string >
{
WDataType::getInt(), WDataType::getDouble()
} );
}
WSingleSelectorName::NameDescriptionSearch WSingleSelectorName::getZwithDescription()
WSingleSelectorName::NameDescriptionSearchTyp WSingleSelectorName::getZwithDescription()
{
return WSingleSelectorName::NameDescriptionSearch( getZ(),
"Choose the column which should be used to determine the z coordinate.",
"posZ" );
return WSingleSelectorName::NameDescriptionSearchTyp(
getZ(),
"Choose the column which should be used to determine the z coordinate.",
"posZ",
std::list< std::string >
{
WDataType::getInt(), WDataType::getDouble()
} );
}
WSingleSelectorName::NameDescriptionSearch WSingleSelectorName::getPDGwithDescription()
WSingleSelectorName::NameDescriptionSearchTyp WSingleSelectorName::getPDGwithDescription()
{
return WSingleSelectorName::NameDescriptionSearch( getPDG(),
"Choose the column which should be used to determine the particle data group.",
"PDGEncoding" );
return WSingleSelectorName::NameDescriptionSearchTyp(
getPDG(),
"Choose the column which should be used to determine the particle data group.",
"PDGEncoding",
std::list< std::string >
{
WDataType::getInt()
} );
}
WSingleSelectorName::NameDescriptionSearch WSingleSelectorName::getEdepWithDescription()
WSingleSelectorName::NameDescriptionSearchTyp WSingleSelectorName::getEdepWithDescription()
{
return WSingleSelectorName::NameDescriptionSearch( getEdep(),
"Choose the column which should be used to determine the energy deposition.",
"edep" );
return WSingleSelectorName::NameDescriptionSearchTyp(
getEdep(),
"Choose the column which should be used to determine the energy deposition.",
"edep",
std::list< std::string >
{
WDataType::getInt(), WDataType::getDouble()
} );
}
WSingleSelectorName::NameDescriptionSearch WSingleSelectorName::getEventIdWithDescription()
WSingleSelectorName::NameDescriptionSearchTyp WSingleSelectorName::getEventIdWithDescription()
{
return WSingleSelectorName::NameDescriptionSearch( getEventId(),
return WSingleSelectorName::NameDescriptionSearchTyp(
getEventId(),
"Choose the column which should be used to determine the event id."
"Tracks will be drawn based on the the event id, all particles with the same event id will be connected.",
"eventID" );
"eventID",
std::list< std::string >
{
WDataType::getInt()
} );
}
WSingleSelectorName::NameDescriptionSearch WSingleSelectorName::getParentIdWithDescription()
WSingleSelectorName::NameDescriptionSearchTyp WSingleSelectorName::getParentIdWithDescription()
{
return WSingleSelectorName::NameDescriptionSearch( getParentId(),
return WSingleSelectorName::NameDescriptionSearchTyp(
getParentId(),
"Choose the column which should be used to determine the parent id."
"Primaries and secondaries filtering is based on that id, if a "
"particle has the parent id 0 it is a primary otherwise it is a secondary.",
"parentID" );
"parentID",
std::list< std::string >
{
WDataType::getInt()
} );
}
std::list< WSingleSelectorName::NameDescriptionSearch > WSingleSelectorName::getListOfSelectorContent()
std::list< WSingleSelectorName::NameDescriptionSearchTyp > WSingleSelectorName::getListOfSelectorContent()
{
std::list< WSingleSelectorName::NameDescriptionSearch > list;
std::list< WSingleSelectorName::NameDescriptionSearchTyp > list;
list.push_back( getXwithDescription() );
list.push_back( getYwithDescription() );
......
......@@ -29,6 +29,8 @@
#include <list>
#include <string>
#include "WDataType.h"
/**
* Holds the single selector content.
......@@ -42,7 +44,7 @@ public:
* 2. Desciption of single-selector,
* 3. Value that is searched for in the csv header
*/
typedef std::tuple< std::string, std::string , std::string > NameDescriptionSearch;
typedef std::tuple< std::string, std::string, std::string, std::list < std::string > > NameDescriptionSearchTyp;
/**
* getter
......@@ -90,49 +92,49 @@ public:
* getter
* \return the name of the single-selector (X) with Description
*/
static WSingleSelectorName::NameDescriptionSearch getXwithDescription();
static WSingleSelectorName::NameDescriptionSearchTyp getXwithDescription();
/**
* getter
* \return the name of the single-selector (Y) with Description
*/
static WSingleSelectorName::NameDescriptionSearch getYwithDescription();
static WSingleSelectorName::NameDescriptionSearchTyp getYwithDescription();
/**
* getter
* \return the name of the single-selector (Z) with Description
*/
static WSingleSelectorName::NameDescriptionSearch getZwithDescription();
static WSingleSelectorName::NameDescriptionSearchTyp getZwithDescription();
/**
* getter
* \return the name of the single-selector (pdg) with Description
*/
static WSingleSelectorName::NameDescriptionSearch getPDGwithDescription();
static WSingleSelectorName::NameDescriptionSearchTyp getPDGwithDescription();
/**
* getter
* \return the name of the single-selector (edep) with Description
*/
static WSingleSelectorName::NameDescriptionSearch getEdepWithDescription();
static WSingleSelectorName::NameDescriptionSearchTyp getEdepWithDescription();
/**
* getter
* \return the name of the single-selector (Event id) with Description
*/
static WSingleSelectorName::NameDescriptionSearch getEventIdWithDescription();
static WSingleSelectorName::NameDescriptionSearchTyp getEventIdWithDescription();
/**
* getter
* \return the name of the single-selector (Parent id) with Description
*/
static WSingleSelectorName::NameDescriptionSearch getParentIdWithDescription();
static WSingleSelectorName::NameDescriptionSearchTyp getParentIdWithDescription();
/**
* getter
* \return Vector of names of the single-selectors with Description and Value that is searched for in the csv header
*/
static std::list< WSingleSelectorName::NameDescriptionSearch > getListOfSelectorContent();
static std::list< WSingleSelectorName::NameDescriptionSearchTyp > getListOfSelectorContent();
};
#endif // WSINGLESELECTORNAME_H
......@@ -44,19 +44,15 @@ void WColumnPropertyHandler::createProperties()
WPropertyBase::PropertyChangeNotifierType notifier = boost::bind( &WColumnPropertyHandler::propertyNotifier,
this, boost::placeholders::_1 );
InitializeSelectionItem();
m_columnSelectionGroup = m_properties->addPropertyGroup( "Select columns", "Select the columns which should be used" );
std::list< std::tuple< std::string, std::string, std::string > > names = WSingleSelectorName::getListOfSelectorContent();
for( std::tuple< std::string, std::string, std::string > selectorElement : names )
std::list< WColumnPropertyHandler::NameDescriptionSearchTyp > names = WSingleSelectorName::getListOfSelectorContent();
for( WColumnPropertyHandler::NameDescriptionSearchTyp selectorElement : names )
{
std::string columnName = std::get< 0 >( selectorElement );
std::string desciption = std::get< 1 >( selectorElement );
std::string defName = std::get< 2 >( selectorElement );
mapPropSelectionsToString.insert(
std::map< WPropSelection, std::string >::value_type( addHeaderProperty( columnName, desciption, defName, notifier ), columnName )
std::map< WPropSelection, std::string >::value_type( addHeaderProperty( selectorElement, notifier ), columnName )
);
}
}
......@@ -70,27 +66,63 @@ void WColumnPropertyHandler::updateProperty()
{
}
void WColumnPropertyHandler::InitializeSelectionItem()
boost::shared_ptr< WItemSelection > WColumnPropertyHandler::initializeSelectionItem( std::list< std::string > typeNames )
{
m_possibleSelectionsUsingTypes = WItemSelection::SPtr( new WItemSelection() );
boost::shared_ptr< WItemSelection > possibleSelectionsUsingTypes = WItemSelection::SPtr( new WItemSelection() );
std::vector< std::string > header = m_protonData->getHeaderFromType( typeNames );
std::vector< std::string > header = m_protonData->getCSVHeader()->at( 0 );
for( std::vector<std::string>::iterator colName = header.begin(); colName != header.end(); colName++ )
{
m_possibleSelectionsUsingTypes->addItem( ItemType::create( *colName, *colName, "", NULL ) );
possibleSelectionsUsingTypes->addItem( ItemType::create( *colName, *colName, "", NULL ) );
}
possibleSelectionsUsingTypes->addItem( ItemType::create( "- no selection -", "- no selection -", "", NULL ) );
return possibleSelectionsUsingTypes;
}
int WColumnPropertyHandler::getFilterIndex( int index, std::list< std::string > typeName )
{
std::vector< std::string > headerToSearch = m_protonData->getCSVHeader()->at( 0 );
std::string refheader = headerToSearch.at( index );
std::vector< std::string > singleSelectorContent = m_protonData->getHeaderFromType( typeName );
size_t indexCounter = 0;
for( std::vector<std::string>::iterator colName = singleSelectorContent.begin(); colName != singleSelectorContent.end(); colName++ )
{
if( *colName == refheader )
{
return indexCounter;
}