package sproto

import (
	"context"
	"strconv"
	"time"

	"golang.org/x/exp/maps"
	"google.golang.org/protobuf/types/known/timestamppb"

	"github.com/determined-ai/determined/master/pkg/aproto"
	"github.com/determined-ai/determined/master/pkg/cproto"
	"github.com/determined-ai/determined/master/pkg/device"
	"github.com/determined-ai/determined/master/pkg/logger"
	"github.com/determined-ai/determined/master/pkg/model"
	"github.com/determined-ai/determined/master/pkg/schemas/expconf"
	"github.com/determined-ai/determined/master/pkg/syncx/queue"
	"github.com/determined-ai/determined/master/pkg/tasks"
	"github.com/determined-ai/determined/proto/pkg/taskv1"
)

// Task-related cluster level messages.
type (
	// AllocateRequest notifies resource managers to assign resources to a task.
	AllocateRequest struct {
		// Identifying information.
		AllocationID      model.AllocationID
		TaskID            model.TaskID
		JobID             model.JobID
		RequestTime       time.Time
		JobSubmissionTime time.Time
		// IsUserVisible determines whether the AllocateRequest should
		// be considered in user-visible reports.
		IsUserVisible bool
		State         SchedulingState
		Name          string

		// Resource configuration.
		SlotsNeeded         int
		ResourcePool        string
		FittingRequirements FittingRequirements

		// Behavioral configuration.
		Preemption  PreemptionConfig
		IdleTimeout *IdleTimeoutConfig
		ProxyPorts  []*ProxyPortConfig
		Restore     bool
		ProxyTLS    bool

		// Logging context of the allocation actor.
		LogContext logger.Context

		BlockedNodes []string
	}

	// IdleTimeoutConfig configures how idle timeouts should behave.
	IdleTimeoutConfig struct {
		ServiceID       string
		UseProxyState   bool
		UseRunnerState  bool
		TimeoutDuration time.Duration
		Debug           bool
	}

	// PreemptionConfig configures task preemption.
	PreemptionConfig struct {
		Preemptible     bool
		TimeoutDuration time.Duration
	}

	// ProxyPortConfig configures a proxy the allocation should start.
	ProxyPortConfig struct {
		ServiceID       string `json:"service_id"`
		Port            int    `json:"port"`
		ProxyTCP        bool   `json:"proxy_tcp"`
		Unauthenticated bool   `json:"unauthenticated"`
	}

	// ResourcesReleased notifies resource providers to return resources from a task.
	ResourcesReleased struct {
		AllocationID model.AllocationID
		ResourcesID  *ResourcesID
		ResourcePool string
	}
	// AllocationSummary contains information about a task for external display.
	AllocationSummary struct {
		TaskID         model.TaskID       `json:"task_id"`
		AllocationID   model.AllocationID `json:"allocation_id"`
		Name           string             `json:"name"`
		RegisteredTime time.Time          `json:"registered_time"`
		ResourcePool   string             `json:"resource_pool"`
		SlotsNeeded    int                `json:"slots_needed"`
		Resources      []ResourcesSummary `json:"resources"`
		SchedulerType  string             `json:"scheduler_type"`
		Priority       *int               `json:"priority"`
		ProxyPorts     []*ProxyPortConfig `json:"proxy_ports,omitempty"`
	}

	// ValidateResourcesRequest is a message asking resource manager whether the given
	// resource pool can (or, rather, if it's not impossible to) fulfill the request
	// for the given amount of slots.
	ValidateResourcesRequest struct {
		ResourcePool string
		Slots        int
		IsSingleNode bool
		TaskID       *model.TaskID
	}

	// ValidateResourcesResponse is the response to ValidateResourcesRequest.
	ValidateResourcesResponse struct {
		// Fulfillable values:
		// - false: impossible to fulfill
		// - true: ok or unknown
		Fulfillable bool
	}
)

// ResourcesEvent describes a change in status or state of an allocation's resources.
type ResourcesEvent interface{ ResourcesEvent() }

// ResourcesReleasedEvent notes when the RM has acknowledged resources are released.
type ResourcesReleasedEvent struct{}

// ResourcesEvent implements ResourcesEvent.
func (ResourcesReleasedEvent) ResourcesEvent() {}

// ResourcesEvent implements ResourcesEvent.
func (*ResourcesAllocated) ResourcesEvent() {}

// ResourcesEvent implements ResourcesEvent.
func (*InvalidResourcesRequestError) ResourcesEvent() {}

// ResourcesEvent implements ResourcesEvent.
func (*ReleaseResources) ResourcesEvent() {}

// ResourcesEvent implements ResourcesEvent.
func (*ResourcesStateChanged) ResourcesEvent() {}

// ResourcesEvent implements ResourcesEvent.
func (*ResourcesFailedError) ResourcesEvent() {}

// ResourcesEvent implements ResourcesEvent.
func (*ContainerLog) ResourcesEvent() {}

// ResourcesUnsubscribeFn closes a subscription.
type ResourcesUnsubscribeFn func()

// ResourcesSubscription is a subscription for streaming ResourcesEvents's. It must be closed when
// you are finished consuming events. Blocking on C forever can cause the publisher to backup
// and adversely affect the system.
type ResourcesSubscription struct {
	// C is never closed, because only the consumer knows, by aggregating events, when events stop.
	inbox *queue.Queue[ResourcesEvent]
	unsub ResourcesUnsubscribeFn
}

// NewAllocationSubscription create a new subcription.
func NewAllocationSubscription(
	inbox *queue.Queue[ResourcesEvent],
	cl ResourcesUnsubscribeFn,
) *ResourcesSubscription {
	return &ResourcesSubscription{
		inbox: inbox,
		unsub: cl,
	}
}

// Get blocks until an event is published for our subscription's topic. When the
// subscription is closed, ResourcesReleasedEvent is returned.
func (a *ResourcesSubscription) Get() ResourcesEvent {
	return a.inbox.Get()
}

// GetWithContext blocks until an event is published for our subscription's topic or the context
// is canceled. When the subscription is closed, ResourcesReleasedEvent is returned.
func (a *ResourcesSubscription) GetWithContext(ctx context.Context) (ResourcesEvent, error) {
	return a.inbox.GetWithContext(ctx)
}

// Close unsubscribes us from further updates.
func (a *ResourcesSubscription) Close() {
	a.unsub()
}

// Len returns the count of pending events.
func (a *ResourcesSubscription) Len() int {
	return a.inbox.Len()
}

// Proto returns the proto representation of ProxyPortConfig.
func (p *ProxyPortConfig) Proto() *taskv1.ProxyPortConfig {
	if p == nil {
		return nil
	}

	return &taskv1.ProxyPortConfig{
		ServiceId:       p.ServiceID,
		Port:            int32(p.Port),
		ProxyTcp:        p.ProxyTCP,
		Unauthenticated: p.Unauthenticated,
	}
}

// Proto returns the proto representation of AllocationSummary.
func (a *AllocationSummary) Proto() *taskv1.AllocationSummary {
	if a == nil {
		return nil
	}

	pbResources := []*taskv1.ResourcesSummary{}
	for _, resource := range a.Resources {
		pbResourcesSummary := resource.Proto()
		pbResources = append(pbResources, pbResourcesSummary)
	}

	pbAllocationSummary := taskv1.AllocationSummary{
		TaskId:         string(a.TaskID),
		AllocationId:   string(a.AllocationID),
		Name:           a.Name,
		RegisteredTime: timestamppb.New(a.RegisteredTime),
		ResourcePool:   a.ResourcePool,
		SlotsNeeded:    int32((a.SlotsNeeded)),
		Resources:      pbResources,
		SchedulerType:  a.SchedulerType,
	}

	if a.Priority != nil {
		pbPriority := int32(*a.Priority)
		pbAllocationSummary.Priority = &pbPriority
	}

	if a.ProxyPorts != nil {
		pbProxyPorts := []*taskv1.ProxyPortConfig{}
		for _, proxyPortConfig := range a.ProxyPorts {
			pbProxyPorts = append(pbProxyPorts, proxyPortConfig.Proto())
		}

		pbAllocationSummary.ProxyPorts = pbProxyPorts
	}

	return &pbAllocationSummary
}

// Incoming task actor messages; task actors must accept these messages.
type (
	// ChangeRP notifies the task actor that to set itself for a new resource pool.
	ChangeRP struct {
		ResourcePool string
	}
	// ResourcesAllocated notifies the task actor of assigned resources.
	ResourcesAllocated struct {
		ID                model.AllocationID
		ResourcePool      string
		Resources         ResourceList
		JobSubmissionTime time.Time
		Recovered         bool
	}
	// PendingPreemption notifies the task actor that it should release
	// resources due to a pending system-triggered preemption.
	PendingPreemption struct {
		AllocationID model.AllocationID
	}

	// NotifyContainerRunning notifies the launcher (dispatcher) resource
	// manager that the container is running.
	NotifyContainerRunning struct {
		AllocationID model.AllocationID
		Rank         int32
		NumPeers     int32
		NodeName     string
	}

	// ReleaseResources notifies the task actor to release resources.
	ReleaseResources struct {
		Reason string
		// If specified as true (default false), Requestor wants to force
		// a preemption attempt instead of an immediate kill.
		ForcePreemption bool
		ForceKill       bool
	}
	// ResourcesRuntimeInfo is all the information provided at runtime to make a task spec.
	ResourcesRuntimeInfo struct {
		Token        string
		AgentRank    int
		IsMultiAgent bool
	}
)

const (
	// ResourcesTypeEnvVar is the name of the env var indicating the resource type to a task.
	ResourcesTypeEnvVar = "DET_RESOURCES_TYPE"
	// SlurmRendezvousIfaceEnvVar is the name of the env var for indicating the net iface on which
	// to rendezvous (horovodrun will use the IPs of the nodes on this interface to launch).
	SlurmRendezvousIfaceEnvVar = "DET_SLURM_RENDEZVOUS_IFACE"
	// SlurmProxyIfaceEnvVar is the env var for overriding the net iface used to proxy between
	// the master and agents.
	SlurmProxyIfaceEnvVar = "DET_SLURM_PROXY_IFACE"
	// ResourcesTypeK8sJob indicates the resources are a handle for a k8s pod.
	ResourcesTypeK8sJob ResourcesType = "k8s-job"
	// ResourcesTypeDockerContainer indicates the resources are a handle for a docker container.
	ResourcesTypeDockerContainer ResourcesType = "docker-container"
	// ResourcesTypeSlurmJob indicates the resources are a handle for a slurm job.
	ResourcesTypeSlurmJob ResourcesType = "slurm-job"
)

// Clone clones ResourcesAllocated. Used to not pass mutable refs to other actors.
func (ra ResourcesAllocated) Clone() *ResourcesAllocated {
	return &ResourcesAllocated{
		ID:                ra.ID,
		ResourcePool:      ra.ResourcePool,
		Resources:         maps.Clone(ra.Resources),
		JobSubmissionTime: ra.JobSubmissionTime,
		Recovered:         ra.Recovered,
	}
}

// ResourcesSummary provides a summary of the resources comprising what we know at the time the
// allocation is granted, but for k8s it is granted before being scheduled so it isn't really much
// and `agent_devices` are missing for k8s.
type ResourcesSummary struct {
	ResourcesID   ResourcesID                   `json:"resources_id"`
	ResourcesType ResourcesType                 `json:"resources_type"`
	AllocationID  model.AllocationID            `json:"allocation_id"`
	AgentDevices  map[aproto.ID][]device.Device `json:"agent_devices"`

	// Available if the RM can give information on the container level.
	ContainerID *cproto.ID `json:"container_id"`

	// Available if the RM knows the resource is already started / exited.
	Started *ResourcesStarted
	Exited  *ResourcesStopped
}

// Proto returns the proto representation of ResourcesSummary.
func (s *ResourcesSummary) Proto() *taskv1.ResourcesSummary {
	if s == nil {
		return nil
	}

	pbAgentDevices := make(map[string]*taskv1.ResourcesSummary_Devices)

	for agentID, devices := range s.AgentDevices {
		pbDevices := taskv1.ResourcesSummary_Devices{}

		for _, device := range devices {
			pbDevice := device.Proto()
			pbDevices.Devices = append(pbDevices.Devices, pbDevice)
		}
		pbAgentDevices[string(agentID)] = &pbDevices
	}

	pbResourcesSummary := taskv1.ResourcesSummary{
		ResourcesId:   string(s.ResourcesID),
		ResourcesType: string(s.ResourcesType),
		AllocationId:  string(s.AllocationID),
		AgentDevices:  pbAgentDevices,
		Started:       s.Started.Proto(),
		Exited:        s.Exited.Proto(),
	}

	if s.ContainerID != nil {
		pbContainerID := string(*s.ContainerID)
		pbResourcesSummary.ContainerId = &pbContainerID
	}

	return &pbResourcesSummary
}

// Slots returns slot count for the resources.
func (s ResourcesSummary) Slots() int {
	var res int
	for _, devs := range s.AgentDevices {
		res += len(devs)
	}
	return res
}

// Resources is an interface that provides function for task actors
// to start tasks on assigned resources.
type Resources interface {
	Summary() ResourcesSummary
	Start(logger.Context, tasks.TaskSpec, ResourcesRuntimeInfo) error
	Kill(logger.Context)
}

// ResourceList is a wrapper for a list of resources.
type ResourceList map[ResourcesID]Resources

// NewProxyPortConfig converts expconf proxy configs into internal representation.
func NewProxyPortConfig(input expconf.ProxyPortsConfig, taskID model.TaskID) []*ProxyPortConfig {
	out := []*ProxyPortConfig{}
	for _, epp := range input {
		serviceID := string(taskID)
		if !epp.DefaultServiceID() {
			serviceID = string(taskID) + ":" + strconv.Itoa(epp.ProxyPort())
		}
		out = append(out, &ProxyPortConfig{
			Port:            epp.ProxyPort(),
			ProxyTCP:        epp.ProxyTCP(),
			Unauthenticated: epp.Unauthenticated(),
			ServiceID:       serviceID,
		})
	}

	return out
}
