//*****************************************************************************
// Copyright 2017-2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************

#pragma once

#include <string>
#include <vector>

#include "gpu_compiled_function.hpp"
#include "gpu_tensor_wrapper.hpp"
#include "ngraph/node.hpp"

namespace ngraph
{
    namespace runtime
    {
        namespace gpu
        {
            class GPU_Emitter
            {
            public:
                static std::function<std::string(EMIT_ARGS)> get_emit_function(const Node& node);

// This defines a collection of function declarations like this
// static std::string emit_Abs(EMIT_ARGS);
// static std::string emit_Acos(EMIT_ARGS);
#define NGRAPH_OP(OP, NAMESPACE) static std::string emit_##OP(EMIT_ARGS);
#include "op/op_tbl.hpp"
#undef NGRAPH_OP

                template <typename T>
                static std::string emit_elementwise(EMIT_ARGS)
                {
                    if (out[0].get_size() == 0)
                    {
                        return "";
                    }
                    else if (out.size() > 1)
                    {
                        throw std::runtime_error(
                            "Multi-output elementwise ops are not currently supported.");
                    }
                    auto& cuda_emitter =
                        compiled_function->get_primitive_emitter()->get_cuda_emitter();

                    std::vector<std::string> dtypes;
                    for (auto& arg : args)
                    {
                        dtypes.push_back(arg.get_type());
                    }
                    // Special case for bool data type.
                    if (out[0].get_element_type() == element::boolean)
                    {
                        dtypes.push_back("bool");
                    }
                    else
                    {
                        dtypes.push_back(out[0].get_type());
                    }
                    auto ew_index = cuda_emitter->build_elementwise<T>(dtypes, out[0].get_shape());

                    return compiled_function->add_to_runtime(ew_index, function_name, args, out);
                }

                static std::string emit_ArgReduce(EMIT_ARGS, cudnnReduceTensorOp_t);
                static std::string emit_Sum_0(EMIT_ARGS);
                static std::string emit_Sum_1(EMIT_ARGS);

                /// \brief Create a list of node names for each arg in args
                /// \param args list of tensor arguments
                /// \param arg_indexes a list of indexes into args for which args to include in
                ///    the output list, so {1, 2} will include args 1 and 2 and skip 0.
                /// \ return returns a string containing "arg0_name, arg1_name, etc."
                static std::string node_names(const std::vector<GPUTensorWrapper>& args,
                                              std::initializer_list<int> arg_indexes = {});
            };

            Shape get_padded_shape(const Shape& input_shape,
                                   const Shape& padding_below,
                                   const Shape& padding_above,
                                   const Shape& padding_interior);
        }
    }
}
