#include <stdlib.h>
#include <iostream>
#include <chrono>
#include <omp.h>

template<typename T>
std::chrono::milliseconds PerfClockDurationMs(const T &dur) {
	return std::chrono::duration_cast<std::chrono::milliseconds>(dur);
}

size_t CountPositiveValues(const float *pD, size_t size)
{
    size_t count = 0;
    
    for(size_t i = 0; i < size; ++i)
    {
        // This has to be compiled *without* optimalization so that 
        // branch is not optimalized out.
        if(pD[i] > 0.0f)
            count++;
    }
    
    return count;
}

int main(int argc, char *argv[])
{
    typedef std::chrono::steady_clock PerfClock_t;
    
    // Allocate array of 256*(2^10) floats (1 MB)
    const size_t size = 256 * 1024;
    const unsigned repeats = 1024;
    float *pD = new float[size];
    
    // Fill array with either random values in (-0.5 to +0.5) range
    for(size_t i = 0; i < size; ++i)
        pD[i] = (float)rand() / (float)(RAND_MAX + 1) - 0.5f;
    
    {
        std::cout << "Counting positive values: ";
        auto startTime = PerfClock_t::now();
        
        size_t count = 0;
        for(unsigned i = 0; i < repeats; ++i)
            count = CountPositiveValues(pD, size);
        
        std::cout << PerfClockDurationMs(PerfClock_t::now() - startTime).count() << " ms (" << count << ")" << std::endl;
    }
    
    // Fill array with ones instead and try again
    for(size_t i = 0; i < size; ++i)
        pD[i] = 1.0f;
    
    {
        std::cout << "Counting positive values: ";
        auto startTime = PerfClock_t::now();
        
        size_t count = 0;
        for(unsigned i = 0; i < repeats; ++i)
            count = CountPositiveValues(pD, size);
        
        std::cout << PerfClockDurationMs(PerfClock_t::now() - startTime).count() << " ms (" << count << ")" << std::endl;
    }
    
    delete[] pD;
    
    return 0;
}