/**
* @file    kS3dPhaseSampler.x.h
*
* @internal
* Copyright (C) 2015-2022 by LMI Technologies Inc.  All rights reserved.
*/
#ifndef kS3D_PHASE_SAMPLER_X_H
#define kS3D_PHASE_SAMPLER_X_H

#define kS3D_PHASE_SAMPLER_FRACTION_LUT_SHIFT            (8)
#define kS3D_PHASE_SAMPLER_FRACTION_LUT_MAX              (1 << kS3D_PHASE_SAMPLER_FRACTION_LUT_SHIFT)
#define kS3D_PHASE_SAMPLER_FRACTION_LUT_COUNT            (kS3D_PHASE_SAMPLER_FRACTION_LUT_MAX + 1)
#define kS3D_PHASE_SAMPLER_STEP_THRESHOLD_X              (5)
#define kS3D_PHASE_SAMPLER_STEP_THRESHOLD_Y              (2)

#define kS3dStripPhasePixel_IsValid_(P)                  (((P)->code != k8U_NULL) && ((P)->phase != k16S_NULL))

//////////////////////////////////////////////////////////////////////////

typedef struct kS3dPhaseRowSample
{
    k32s x;
    k8u intensity;
} kS3dPhaseRowSample;

// pack X & intensity into k32s //
typedef struct kS3dPhaseSample
{
    k32s content; 
} kS3dPhaseSample;


#define kS3D_PHASE_SAMPLE_X_MASK      (0xFFFFFF00)
#define kS3D_PHASE_SAMPLE_X_SHIFT     (8)

#define kS3D_PHASE_SAMPLE_I_MASK      (0x000000FF)
#define kS3D_PHASE_SAMPLE_I_SHIFT     (0)

kCudaInlineFx(void) kS3dPhaseSample_Init(kS3dPhaseSample* sample, k32u phase, k8u intensity)
{
    kField_InsertNamed_(&sample->content, kS3D_PHASE_SAMPLE_X, phase);
    kField_InsertNamed_(&sample->content, kS3D_PHASE_SAMPLE_I, intensity);
}

kCudaInlineFx(k32s) kS3dPhaseSample_Coord(const kS3dPhaseSample* sample)
{
    return (k32s)kField_ExtractNamed_(sample->content, kS3D_PHASE_SAMPLE_X);
}

kCudaInlineFx(k8u) kS3dPhaseSample_Intensity(const kS3dPhaseSample* sample)
{
    return (k8u)kField_ExtractNamed_(sample->content, kS3D_PHASE_SAMPLE_I);
}

kCudaInlineFx(kBool) kS3dPhaseSample_IsNull(const kS3dPhaseSample* sample)
{
    return sample->content == k32S_NULL;
}

//////////////////////////////////////////////////////////////////////////


typedef struct kS3dPhaseYSampleContext 
{
    k32s x;
    k8u  intensity;
    k16s xProj;
    k16s yProj;

    k16s yProjPosition;
    k16s yProjMaxFilled;

    k16s* xDstIt;
    k16s* xDstEnd;
    k8u* intensityDstIt;
    kPoint32s* textureDstIt;

} kS3dPhaseYSampleContext;

typedef struct kS3dPhaseSamplerClass
{
    kObjectClass base;
    kS3dStereoProfiler profiler;

    // Algorithm parameters
    kSize imageWidth;
    kSize imageHeight;
    kSize phasePeriod;

    kSize phaseBegin;
    kSize phaseStepShift;
    kSize outputHeight;

    kSSize ySampleBegin;
    kSize ySampleStep;
    kSize outputWidth;

    // Internal data
    kVsJobQueue jobQueue;

    kArray2 fractionLut;
    kArray2 rowSamples;
    kArray1 ySampleContext;

    // Input and output handles 
    kSize inputViewIndex;
    kArray2 inputStripePhaseMap;
    kArray2 outputXProj;
    kArray2 outputIntensity;
    kArray2 outputTexture;

    // Cuda buffers for kS3dPhaseSampler_RunCuda() //
    kArray2 cudaRowSamples; // kArray2<kS3dPhaseSample> packed for atomic
    kArray2 cudaOutXProj;   // kArray2<kS3dPhaseSample> packed 

    kCudaStream cudaStream;

} kS3dPhaseSamplerClass;

kDeclareClassEx(kVs, kS3dPhaseSampler, kObject)

//semi-private exported functions (virtual override methods)

kVsFx(kStatus) kS3dPhaseSampler_VInitClone(kS3dPhaseSampler sampler, kS3dPhaseSampler source, kAlloc allocator);
kVsFx(kStatus) kS3dPhaseSampler_VRelease(kS3dPhaseSampler sampler);
kVsFx(kSize)   kS3dPhaseSampler_VSize(kS3dPhaseSampler sampler);

//non-exported (private) methods
kStatus kS3dPhaseSampler_Init(kS3dPhaseSampler sampler, kS3dStereoProfiler profiler, kAlloc allocator);

// CUDA //////////////////////////////////////////////////////////////////
#if defined (K_HAVE_CUDA)
kStatus kS3dPhaseSampler_SetupCuda(kS3dPhaseSampler sampler);
kStatus kS3dPhaseSampler_ReleaseCuda(kS3dPhaseSampler sampler);

// Cuda internals //
kStatus kS3dPhaseSampler_NullSamplesCuda(kS3dPhaseSampler sampler, kArray2 phaseSamples);
kStatus kS3dPhaseSampler_ProcessPhaseCuda(kS3dPhaseSampler sampler, kSize viewIndex, kArray2 inStripePhaseMap, kArray2 outRowSamples, kBool intensityFlag);
kStatus kS3dPhaseSampler_ProcessYProjectionCuda(kS3dPhaseSampler sampler, kSize viewIndex, kArray2 inRowSamples, kArray2 outXProj, kBool intensityFlag);
kStatus kS3dPhaseSampler_ExportSamplesCuda(kS3dPhaseSampler sampler, kArray2 cudaInXProj, kArray2 cudaOutXProj, kArray2 cudaOutIntensity, kBool intensityFlag);

#else
kInlineFx(kStatus) kS3dPhaseSampler_SetupCuda(kS3dPhaseSampler sampler) { return kOK; }
kInlineFx(kStatus) kS3dPhaseSampler_ReleaseCuda(kS3dPhaseSampler sampler) { return kOK; }
#endif

// HOST //////////////////////////////////////////////////////////////////

kStatus kS3dPhaseSampler_ProcessPhase(kS3dPhaseSampler sampler, kSize viewIndex, kArray2 stripePhaseMap, kArray2 rowSamples, kSize startRow, kSize rowCount);
kStatus kS3dPhaseSampler_ProcessYProjection(kS3dPhaseSampler sampler, kSize viewIndex, kArray2 rowSamples, kArray2 xProj, kArray2 intensity, kArray2 texture, kSize startPhaseIndex, kSize phaseCount);

// Per pixel sampling //

kStatus kCall kS3dPhaseSampler_ProcessPhaseHandler(kS3dPhaseSampler sampler, k64u id);
kStatus kCall kS3dPhaseSampler_ProcessYProjectionHandler(kS3dPhaseSampler sampler, k64u id);


//semi-private methods (API can change any time)
kVsFx(kStatus) kS3dPhaseSampler_SetJobQueue(kS3dPhaseSampler sampler, kVsJobQueue queue);

//////////////////////////////////////////////////////////////////////////
//
// Inline 
//
//////////////////////////////////////////////////////////////////////////

kCudaInlineFx(kStatus) kS3dPhaseSampler_ProcessPhaseRow(kS3dPhaseSamplerClass* obj, k32s zCompareSign, const kPhasePixel2* stripePhaseMapRow, kS3dPhaseRowSample* rowSamplesRow)
{
    const kpp::PhasePixel2* RESTRICTED srcIt = (const kpp::PhasePixel2*)stripePhaseMapRow + 1;
    kS3dPhaseRowSample* RESTRICTED dstBegin = rowSamplesRow;
    kS3dPhaseRowSample* RESTRICTED dstIt = dstBegin;
    kS3dPhaseRowSample* RESTRICTED dstEnd = dstIt + obj->outputHeight;

    k32s phaseBegin = (k32s)obj->phaseBegin;
    k32s phaseStep = 1 << obj->phaseStepShift;
    k32s phaseMax = phaseBegin + ((k32s)(obj->outputHeight - 1))* phaseStep;
    k32s phasePeriod = (k32s)obj->phasePeriod;
    k32s phaseDeltaMax = phasePeriod / 2;
    k32s phase0, phase1;
    k32s backtrackIndex;
    k32s fract, dstX;
    k8u dstIntensity;

    k32s dstValue = phaseBegin;
    k32s dstFilledMax = k32S_MIN; // keep track of the maximum filled value for backtracking 

    LOOP_MUST_ITERATE(8, , 8);
    for (kSize j = 1; j < obj->imageWidth; j++, srcIt++)
    {
        if (!srcIt[0].IsValid() || !srcIt[-1].IsValid()) continue;

        // reverse phase values for sampling (raw phase decreases as stripes increase)
        phase0 = srcIt[-1].Phase();
        phase1 = srcIt[0].Phase();

        if (phase1 <= phase0 || (phase1 - phase0) >= phaseDeltaMax) continue;

        if (phase0 >= phaseBegin && phase0 < dstFilledMax && phase1 < phaseMax)  // Source data backtracked due to occlusion
        {
            //Check if the existing value is closer or farther to the camera
            backtrackIndex = (phase0 - phaseBegin) >> obj->phaseStepShift;
            dstIt = &dstBegin[backtrackIndex];
            dstValue = phaseBegin + backtrackIndex * phaseStep;
        }

        // filling new data
        while (dstIt != dstEnd && dstValue < phase0)
        {
            if (dstValue > dstFilledMax)    // avoid overwriting previously filled data with nulls
            {
                dstIt->x = k32S_NULL;
                dstIt->intensity = 0;
                dstFilledMax = dstValue;
            }

            dstValue += phaseStep;
            dstIt++;
        }

        while (dstIt != dstEnd && dstValue < phase1)
        {
            fract = ((dstValue - phase0) << kSPOT_CENTRE_SHIFT) / (phase1 - phase0);
            dstX = ((((k32s)j) - 1) << kSPOT_CENTRE_SHIFT) + fract;
            dstIntensity = (k8u)(srcIt[-1].Intensity() + (((((k32s)(srcIt->Intensity())) - ((k32s)(srcIt[-1].Intensity()))) * fract) >> kSPOT_CENTRE_SHIFT));

            if (dstValue > dstFilledMax)
            {
                dstIt->x = dstX;
                dstIt->intensity = dstIntensity;
                dstFilledMax = dstValue;
            }
            else if (dstIt->x == k32S_NULL || (dstX*zCompareSign > dstIt->x*zCompareSign))
            {
                dstIt->x = dstX;
                dstIt->intensity = dstIntensity;
            }

            dstIt++;
            dstValue += phaseStep;
        }
    }

    while (dstIt != dstEnd)
    {
        if (dstValue > dstFilledMax)
        {
            dstIt->x = k32S_NULL;
            dstIt->intensity = 0;
        }

        dstValue += phaseStep;
        dstIt++;
    }

    return kOK;
}

//////////////////////////////////////////////////////////////////////////
// split by columns if rowSamples
//////////////////////////////////////////////////////////////////////////

kCudaInlineFx(kStatus) kS3dPhaseSampler_ProcessYProjectionPixel(
    kS3dPhaseRowSample* srcIt, k32s scaledRow, 
    kSize viewIndex, k32s zCompareSign,
    k16s ySampleBegin, k16s ySampleStep, k16s ySampleMax, kSize outputWidth,
    k32s xProjThreshold, k32s yProjThreshold, 
    k16s* fractionLut,
    kS3dPhaseYSampleContext* samplingContext,
    kS3dStereoProfilerHwContext hwContext,
    k16s* xProjRow, k8u* intensityRow, kPoint32s* textureRow)
{
    k16s xProj0, yProj0;
    k16s xProj1, yProj1;
    k32s fract0, fract1;
    k32s backtrackIndex;
    k16s xDst;
    k8u intensityDst;
    kPoint32s nullTexturePt = { k32S_NULL, k32S_NULL };


    if (srcIt->x != k32S_NULL)
    {
        kS3dStereoProfilerHwContext_RectifyLinear(hwContext, viewIndex, srcIt->x, scaledRow, &xProj1, &yProj1);
    }
    else
    {
        xProj1 = yProj1 = k16S_NULL;
    }

    xProj0 = samplingContext->xProj;
    yProj0 = samplingContext->yProj;


    if (yProj0 != k16S_NULL && yProj1 != k16S_NULL && (yProj1 - yProj0) < yProjThreshold && kAbs_(xProj1 - xProj0) < xProjThreshold)
    {
        if (yProj0 < samplingContext->yProjMaxFilled && yProj0 >= ySampleBegin && yProj1 < ySampleMax)  // backtrack
        {
            //Check if the existing value is closer or farther to the camera
            backtrackIndex = (yProj0 - ySampleBegin) / ySampleStep;
            samplingContext->xDstIt = xProjRow + backtrackIndex;
            samplingContext->intensityDstIt = (k8u*)(kIsNull(intensityRow) ? kNULL : intensityRow + backtrackIndex);
            samplingContext->textureDstIt = (kPoint32s*)(kIsNull(textureRow) ? kNULL : textureRow + backtrackIndex);
            samplingContext->yProjPosition = ySampleBegin + (k16s)(backtrackIndex*ySampleStep);
        }

        while (samplingContext->xDstIt != samplingContext->xDstEnd && samplingContext->yProjPosition < yProj0)
        {
            if (samplingContext->yProjPosition > samplingContext->yProjMaxFilled)
            {
                samplingContext->yProjMaxFilled = samplingContext->yProjPosition;

                if (!kIsNull(samplingContext->intensityDstIt))    *samplingContext->intensityDstIt++ = 0;
                if (!kIsNull(samplingContext->textureDstIt))      *samplingContext->textureDstIt++ = nullTexturePt;

                *samplingContext->xDstIt++ = k16S_NULL;
                samplingContext->yProjPosition += ySampleStep;
            }
            else
            {
                if (!kIsNull(samplingContext->intensityDstIt))    samplingContext->intensityDstIt++;
                if (!kIsNull(samplingContext->textureDstIt))      samplingContext->textureDstIt++;

                samplingContext->xDstIt++;
                samplingContext->yProjPosition += ySampleStep;
            }
        }

        while (samplingContext->xDstIt != samplingContext->xDstEnd && samplingContext->yProjPosition < yProj1)
        {
            fract1 = fractionLut[(samplingContext->yProjPosition - yProj0)*kS3D_PHASE_SAMPLER_FRACTION_LUT_COUNT + (yProj1 - yProj0)];
            fract0 = kS3D_PHASE_SAMPLER_FRACTION_LUT_MAX - fract1;
            xDst = (k16s)((((k32s)xProj0)*fract0 + ((k32s)xProj1)*fract1) >> kS3D_PHASE_SAMPLER_FRACTION_LUT_SHIFT);

            if ((samplingContext->yProjPosition > samplingContext->yProjMaxFilled)      // new data. fill unconditionally
                || ((*samplingContext->xDstIt) == k16S_NULL)                      // or backtracking and filling previously unfilled data 
                || (zCompareSign*xDst > zCompareSign*(*samplingContext->xDstIt))) // or backtracking and replacing data that's further from the camera
            {

                *samplingContext->xDstIt++ = xDst;
                samplingContext->yProjMaxFilled = kMax_(samplingContext->yProjPosition, samplingContext->yProjMaxFilled);

                if (!kIsNull(samplingContext->intensityDstIt))
                {
                    intensityDst = (k8u)((((k32s)samplingContext->intensity)*fract0 + ((k32s)srcIt->intensity)*fract1) >> kS3D_PHASE_SAMPLER_FRACTION_LUT_SHIFT);
                    *samplingContext->intensityDstIt++ = intensityDst;
                }

                if (!kIsNull(samplingContext->textureDstIt))
                {
                    if (samplingContext->x != k32S_NULL && srcIt->x != k32S_NULL)
                    {
                        samplingContext->textureDstIt->x = (samplingContext->x * fract0 + srcIt->x * fract1) >> kS3D_PHASE_SAMPLER_FRACTION_LUT_SHIFT;
                        samplingContext->textureDstIt->y = ((scaledRow - kSPOT_CENTRE_SCALE) * fract0 + scaledRow * fract1) >> kS3D_PHASE_SAMPLER_FRACTION_LUT_SHIFT;
                    }
                    else
                    {
                        *samplingContext->textureDstIt = nullTexturePt;
                    }
                    samplingContext->textureDstIt++;
                }

            }

            samplingContext->yProjPosition += ySampleStep;
        }
    }

    // save last row's value
    samplingContext->x = srcIt->x;
    samplingContext->intensity = srcIt->intensity;
    samplingContext->xProj = xProj1;
    samplingContext->yProj = yProj1;

    return kOK;
}

//////////////////////////////////////////////////////////////////////////
// Save rawSamples for debug
//
// packedFlag(true)  == kS3dPhaseSample
// packedFlag(false) == kS3dPhaseRowSample
//
//////////////////////////////////////////////////////////////////////////

//template<kBool intensityFlag, kBool packedFlag>
//kInlineFx(kStatus) kS3dPhaseSampler_SavePhaseSamples(kArray2 rowSamples, const kText256 folderName)
//{
//    kArray2 xSamples, iSamples;
//
//    const kSize height = kArray2_Length(rowSamples, 0);
//    const kSize width = kArray2_Length(rowSamples, 1);
//
//    kCheck(kArray2_Construct(&xSamples, kTypeOf(k32s), height, width, kNULL));
//    kCheck(kArray2_Construct(&iSamples, kTypeOf(k8u), height, width, kNULL));
//
//    for (kSize h = 0; h < height; ++h)
//    {
//        for (kSize w = 0; w < width; ++w)
//        {
//
//            if (packedFlag) // Cuda
//            {
//                const kS3dPhaseSample* sample = (kS3dPhaseSample*)kArray2_At(rowSamples, h, w);
//
//                if (kS3dPhaseSample_IsNull(sample))
//                {
//                    kArray2_AsT(iSamples, h, w, k8u) = 0;
//                    kArray2_AsT(xSamples, h, w, k32s) = k32S_NULL;
//                }
//                else if (intensityFlag)
//                {
//                    kArray2_AsT(iSamples, h, w, k8u) = kS3dPhaseSample_Intensity(sample);
//                    kArray2_AsT(xSamples, h, w, k32s) = kS3dPhaseSample_Coord(sample);
//                }
//                else
//                {
//                    kArray2_AsT(iSamples, h, w, k8u) = 0;
//                    kArray2_AsT(xSamples, h, w, k32s) = sample->content;
//                }
//            } // CPU
//            else
//            {
//                const kS3dPhaseRowSample* sample = (kS3dPhaseRowSample*)kArray2_At(rowSamples, h, w);
//
//                kArray2_AsT(xSamples, h, w, k32s) = sample->x;
//                kArray2_AsT(iSamples, h, w, k8u) = sample->intensity;
//            }
//        }
//    }
//
//    // //
//    kChar fileName[kPATH_MAX];
//
//    kCheck(kPath_Combine(folderName, "rowSamplesX.kdat", fileName, kCountOf(fileName)));
//
//    kCheck(kSave5(xSamples, fileName));
//
//    kCheck(kPath_Combine(folderName, "rowSamplesI.kdat", fileName, kCountOf(fileName)));
//
//    kCheck(kSave5(iSamples, fileName));
//
//    // //
//    kCheck(kDisposeRef(&xSamples));
//    kCheck(kDisposeRef(&iSamples));
//
//    return kOK;
//}
//
////////////////////////////////////////////////////////////////////////////
////
////////////////////////////////////////////////////////////////////////////
//
//kInlineFx(kStatus) kS3dPhaseSampler_SavePhaseSamples(kArray2 rowSamples, const kText256 folderName, kBool intensityFlag)
//{
//    kStatus status;
//
//    if (intensityFlag)
//        status = kS3dPhaseSampler_SavePhaseSamples<kTRUE, kTRUE>(rowSamples, folderName);
//    else
//        status = kS3dPhaseSampler_SavePhaseSamples<kFALSE, kTRUE>(rowSamples, folderName);
//
//    return status;
//}

//////////////////////////////////////////////////////////////////////////
//
//////////////////////////////////////////////////////////////////////////

#endif  /* #ifndef kS3D_PHASE_SAMPLER_X_H */
