In [28]:
# pylint: disable=broad-exception-caught

# Startup integration test
- Sequence of steps to start up LOW CSP (https://skaoffice.jamacloud.com/perspective.req#/testCases/1085128?projectId=328).
- Check what to expects for commands available via the `csp_controller`.

References:
- [LOW.CSP LMC Documentation](https://developer.skatelescope.org/projects/ska-csp-lmc-low/en/latest/lmc/low_csp_lmc.html)
- [LOW.CSP LMC Tango Clients Examples](https://developer.skatelescope.org/projects/ska-csp-lmc-low/en/latest/example/example.html)
- [CSP LMC commands for AA05](https://confluence.skatelescope.org/display/SE/CSP+LMC+commands+for+AA05)

**User note**:
- As stated in https://confluence.skatelescope.org/display/SE/Standby, the Standby command is not yet supported in the current version of Low SCP.LMC and this notebook will fail    
The choice is to retain the notebook test describing expected functionality and allow the steps that is still under development to fail.
Failures in LOW CSP test cases are allowed and considered valid when deviating from the expected/anticipated.
In the case of this notebook test the failure is not preventing integration as workarounds is available, all workarounds and alternatives are provided in separate notebooks to allow integration to continue.
- Currently the test is restricted to testing only a single subarray: subarray 1

Although this test fails, it does not currently affect the fundamental operation of the LOW CSP.    
These mode status changes are not currently a requirement for the correlator functionality tests.

### Viewing Tango attributes

The notebook will interogate device states and report back attribute values as part of the verification output.    
For visual inspection of device attributes the Taranta API interface is used.    
You can access the interface via a web browser by pointing the URL to the appropriate namespace on your k8s cluster    
`http://<k8s_CLUSTER>/<KUBE_NAMESPACE>/taranta/devices/low-csp/`    
e.g. for a deployment on the CLP    
`http://k8s.clp.skao.int/ska-low-csp-baseline/taranta/devices/low-csp/`

### Prerequisites
- All necessary equipment are installed and verified
- Assume a network is available and all equipment/systems are powered
- P4 switch is configured in order to control CBF
- LOW CSP has been deployed to the k8s cluster

### Imports

In [1]:
import json
import os
import time
from contextlib import suppress
from typing import Any

from ska_control_model import AdminMode, ObsState
from tango import ConnectionFailed, DeviceProxy, DevState

### Tango config

This section links the notebook execution to the tango devices on the cluster.
The most important parameter is the namespace name: KUBE_NAMESPACE.    
This identifies the k8s namespace with which to intend to interact.

In [2]:
# specify here the namespace to connect in this cluster
KUBE_NAMESPACE = "ska-low-csp-baseline-no-hw"
KUBE_NAMESPACE = "ci-ska-low-csp-top-683-init-function-investigation"
# set the name of the databaseds service
DATABASEDS_NAME = "ska-low-csp-databaseds"

# finally set the TANGO_HOST
os.environ["TANGO_HOST"] = f"{DATABASEDS_NAME}.{KUBE_NAMESPACE}.svc.cluster.local:10000"

### Tango proxy devices

In [3]:
subarray_id = 1
csp_controller = DeviceProxy("low-csp/control/0")
csp_subarray_1 = DeviceProxy(f"low-csp/subarray/{subarray_id:02}")

In [4]:
cbf_controller = DeviceProxy("low-cbf/control/0")
cbf_subarray_1 = DeviceProxy(f"low-cbf/subarray/{subarray_id:02}")

In [5]:
csp_devices = (csp_controller, csp_subarray_1)
cbf_devices = (cbf_controller, cbf_subarray_1)
all_devices = csp_devices + cbf_devices

### Helper functions

In [6]:
def wait_until(
    predicate: callable, message_on_fail: str = None, timeout: int = 300, poll_frequency: int = 2
) -> None:
    start = time.time()
    while True:
        try:
            return_val = predicate()
            if return_val:
                return return_val
        except Exception:
            time.sleep(0.1)
        if time.time() - start > timeout:
            raise TimeoutError(f"Timeout occurred: {message_on_fail}")
        time.sleep(poll_frequency)

In [7]:
def wait_for_attribute_value(
    device: DeviceProxy,
    attribute: str,
    value: Any = True,
    failure_message: str = "Timed out waiting for attribute value",
    timeout_sec: int = 120,
) -> None:
    """
    Wait until an attribute has a certain value

    :param device: Tango device proxy with the attribute to check
    :param attribute: The name of the attribute
    :param value: Expected value (defaults to True)
    :param failure_message: Message for the exception on failure.
    Defaults to "Timed out waiting for attribute value".
    A note about duration is appended.
    :param timeout_sec: Approximate time-out period  in seconds (in reality
    it could be longer due to delays waiting for each attribute read)
    :raises RuntimeError: if expected value not seen before timing out
    """
    deadline = time.time() + timeout_sec
    poll_interval_seconds = 2
    while time.time() < deadline:
        if device.read_attribute(attribute).value == value:
            break
        time.sleep(poll_interval_seconds)
    else:
        raise RuntimeError(f"{failure_message} after {timeout_sec} sec")

In [8]:
def wait_for_device_response(
    device: DeviceProxy,
    failure_message: str = "Timed out waiting for device to respond",
    timeout_sec: int = 120,
) -> None:
    """
    Wait until a device responds.

    :param device: Tango device proxy to wait for
    :param failure_message: Message for the exception on failure.
    Defaults to "Timed out waiting for device to respond".
    A note about duration is appended.
    :param timeout_sec: Approximate time-out period in seconds
    :raises RuntimeError: if the device does not respond in time
    """
    deadline = time.time() + timeout_sec
    poll_interval_seconds = 2
    while time.time() < deadline:
        try:
            device.ping()
            return
        except ConnectionFailed:
            time.sleep(poll_interval_seconds)
    raise RuntimeError(f"{failure_message} after {timeout_sec} sec")

In [9]:
# Colored printing functions for strings that use universal ANSI escape sequences.
# fail: bold red, pass: bold green, warn: bold yellow,
# info: bold blue, bold: bold white


def print_fail(message, start="", end="\n"):
    print(f"{start}\x1b[1;31m{message.strip()}\x1b[0m", end=end)


def print_pass(message, start="", end="\n"):
    print(f"{start}\x1b[1;32m{message.strip()}\x1b[0m", end=end)


def print_warn(message, start="", end="\n"):
    print(f"{start}\x1b[1;33m{message.strip()}\x1b[0m", end=end)


def print_debug(message, start="", end="\n"):
    print(f"{start}\x1b[1;34m{message.strip()}\x1b[0m", end=end)


def print_info(message, start="", end="\n"):
    print(f"{start}{message.strip()}", end=end)


def print_bold(message, start="", end="\n"):
    print(f"{start}\x1b[1;37m{message.strip()}\x1b[0m", end=end)

In [10]:
# show current state of device


def color_print(device):
    # device.wait_for_device_response()
    wait_for_device_response(device)
    if device.state() == DevState.FAULT:
        print_fail(f"{device.status()}", start="\t")
    elif device.state() == DevState.ALARM:
        print_warn(f"{device.status()}", start="\t")
    else:
        print_info(f"{device.status()}", start="\t")


def show_state():
    for device in all_devices:
        print(f"TANGO device: {device.name()}")
        color_print(device)
        try:
            print(f"\t{str(device.adminMode)}")
        except Exception:
            print("raises error in this state")
        print(f"\t{str(device.healthState)}")
        with suppress(AttributeError):
            print(f"\t{str(device.obsState)}")

### Init devices

**WARNING**:    
Initialisation of the system only happens once after a fresh deployment.    
Only run the cell below if the test is running on a freshly deployed system that has not been initialised yet.
Rerunning Init() on an already deployed system may lead to errors and FAULT conditions

In [11]:
if csp_controller.adminMode == AdminMode.OFFLINE:
    for device in all_devices:
        print(f"Initializing TANGO device: {device.name()}")
        device.set_timeout_millis(60_000)
        device.Init()

In [12]:
show_state()

TANGO device: low-csp/control/0
	The device is in ON state.
	adminMode.ONLINE
	healthState.UNKNOWN
TANGO device: low-csp/subarray/01
	The device is in ON state.
	adminMode.ONLINE
	healthState.UNKNOWN
	obsState.IDLE
TANGO device: low-cbf/control/0
	The device is in ON state.
	adminMode.ONLINE
	healthState.UNKNOWN
TANGO device: low-cbf/subarray/01
	The device is in ON state.
	adminMode.ONLINE
	healthState.UNKNOWN
	obsState.IDLE


### Set CSP to OFFLINE
Anticipated behaviour is that Init() will bring the system ONLINE (controllers to ON)    
But just in case because if the active development, check if low-csp adminMode is perhaps still OFFLINE    
In which case execute the next block, else ignore it

In [13]:
if csp_controller.adminMode == AdminMode.OFFLINE:
    csp_controller.adminMode = AdminMode.ONLINE
    wait_for_attribute_value(csp_controller, "isCommunicating", True)

In [14]:
print(f"\t{csp_controller.isCommunicating}")

	True


In [15]:
show_state()

TANGO device: low-csp/control/0
	The device is in ON state.
	adminMode.ONLINE
	healthState.UNKNOWN
TANGO device: low-csp/subarray/01
	The device is in ON state.
	adminMode.ONLINE
	healthState.UNKNOWN
	obsState.IDLE
TANGO device: low-cbf/control/0
	The device is in ON state.
	adminMode.ONLINE
	healthState.UNKNOWN
TANGO device: low-cbf/subarray/01
	The device is in ON state.
	adminMode.ONLINE
	healthState.UNKNOWN
	obsState.IDLE


### Transition LOW CSP to STANDBY state

In [16]:
csp_controller.standby([])
wait_until(
    lambda: "standby completed" in csp_controller.longRunningCommandResult[1],
    "Standby is not completed after 300s",
)
assert cbf_controller.state() == DevState.STANDBY, "cbf controller is not in STANDBY state"

AssertionError: cbf controller is not in STANDBY state

Expected Results:   
- LOW CSP.LMC and LOW CBF controllers and subarrays reports STANDBY state   
- LOW CSP.LMC and LOW CBF obsState reports EMPTY    
- The LOW CSP healthState is OK

In [17]:
if cbf_controller.state() == DevState.STANDBY:
    print_pass(f"{cbf_controller.name()} is {csp_controller.state()}")
    print_pass(f"{cbf_controller.name()} is {cbf_controller.state()}")
    print_pass("LOW CSP reports STANDBY")
    print_pass("Test passed")
else:
    print_fail(f"{csp_controller.name()} is {csp_controller.state()}")
    print_fail(f"{cbf_controller.name()} is {cbf_controller.state()}")
    print_fail("LOW CSP not in STANDBY")
    print_fail("Test failed")

[1;31mlow-csp/control/0 is ON[0m
[1;31mlow-cbf/control/0 is ON[0m
[1;31mLOW CSP not in STANDBY[0m
[1;31mTest failed[0m


In [18]:
show_state()

TANGO device: low-csp/control/0
	The device is in ON state.
	adminMode.ONLINE
	healthState.UNKNOWN
TANGO device: low-csp/subarray/01
	The device is in ON state.
	adminMode.ONLINE
	healthState.UNKNOWN
	obsState.EMPTY
TANGO device: low-cbf/control/0
	The device is in ON state.
	adminMode.ONLINE
	healthState.UNKNOWN
TANGO device: low-cbf/subarray/01
	The device is in ON state.
	adminMode.ONLINE
	healthState.UNKNOWN
	obsState.EMPTY


### Verify STANDBY state
Verify signal processing commands are not accepted by assigning resources to a subarray

In [19]:
print("Assign resources")
# resources can only be assigned if the array is empty
print(f"{csp_subarray_1.dev_name()} in {str(csp_subarray_1.obsState)}")
assert csp_subarray_1.obsState == ObsState.EMPTY

assign_resources_json = {
    "interface": "https://schema.skao.int/ska-low-csp-assignresources/2.0",
    "common": {
        "subarray_id": subarray_id,
    },
    "lowcbf": {},
}
print(assign_resources_json)

Assign resources
low-csp/subarray/01 in obsState.EMPTY
{'interface': 'https://schema.skao.int/ska-low-csp-assignresources/2.0', 'common': {'subarray_id': 1}, 'lowcbf': {}}


In [20]:
try:
    csp_subarray_1.AssignResources(json.dumps(assign_resources_json))
except Exception:
    print("Resources were not assigned")

Expected Results:   
All operational states remain the same as in the previous step and all subarrays remains EMPTY

In [21]:
if csp_subarray_1.obsState == ObsState.IDLE:
    print_fail(f"{csp_controller.name()} is {csp_controller.state()}")
    print_fail(f"{csp_subarray_1.name()} is {csp_subarray_1.state()}")
    print_fail(
        f"{csp_subarray_1.name()} transitioned from EMPTY to {str(csp_subarray_1.obsState)}"
    )
    print_fail("Test failed")
else:
    print_pass(f"{csp_controller.name()} is {csp_controller.state()}")
    print_pass(f"{csp_subarray_1.name()} is {csp_subarray_1.state()}")
    print_pass(f"{csp_subarray_1.name()} ignored transitioned from EMPTY to IDLE")
    print_fail(f"{csp_subarray_1.name()} in {str(csp_subarray_1.obsState)} state")
    print_pass("Test passed")

[1;31mlow-csp/control/0 is ON[0m
[1;31mlow-csp/subarray/01 is ON[0m
[1;31mlow-csp/subarray/01 transitioned from EMPTY to obsState.IDLE[0m
[1;31mTest failed[0m


In [22]:
show_state()

TANGO device: low-csp/control/0
	The device is in ON state.
	adminMode.ONLINE
	healthState.UNKNOWN
TANGO device: low-csp/subarray/01
	The device is in ON state.
	adminMode.ONLINE
	healthState.UNKNOWN
	obsState.IDLE
TANGO device: low-cbf/control/0
	The device is in ON state.
	adminMode.ONLINE
	healthState.UNKNOWN
TANGO device: low-cbf/subarray/01
	The device is in ON state.
	adminMode.ONLINE
	healthState.UNKNOWN
	obsState.IDLE


### Transition LOW CSP to ON state

In [23]:
csp_controller.on([])
wait_for_attribute_value(csp_controller, "isCommunicating", True)
assert csp_controller.state() == DevState.ON, "CSP was not transited to ON state"

Expected results:
- LOW CSP.LMC and LOW CBF controllers and subarrays reports ON state    
- LOW CSP.LMC and LOW CBF obsState reports EMPTY      
- The LOW CSP healthState is OK

In [24]:
if csp_controller.state() == DevState.ON:
    print_pass(f"{csp_controller.name()} is {csp_controller.state()}")
    print_pass("Test passed")

[1;32mlow-csp/control/0 is ON[0m
[1;32mTest passed[0m


In [25]:
show_state()

TANGO device: low-csp/control/0
	The device is in ON state.
	adminMode.ONLINE
	healthState.UNKNOWN
TANGO device: low-csp/subarray/01
	The device is in ON state.
	adminMode.ONLINE
	healthState.UNKNOWN
	obsState.IDLE
TANGO device: low-cbf/control/0
	The device is in ON state.
	adminMode.ONLINE
	healthState.UNKNOWN
TANGO device: low-cbf/subarray/01
	The device is in ON state.
	adminMode.ONLINE
	healthState.UNKNOWN
	obsState.IDLE


In [26]:
csp_subarray_2 = DeviceProxy("low-csp/subarray/02")
csp_subarray_2.Init()
csp_controller.on(
    [
        "low-csp/subarray/02",
    ]
)
wait_for_attribute_value(csp_controller, "isCommunicating", True)
print(f"TANGO device: {csp_controller.name()}")
print(csp_controller.status())
print("\n")
print(f"TANGO device: {csp_subarray_1.name()}")
print(csp_subarray_1.status())
print("\n")
print(f"TANGO device: {csp_subarray_2.name()}")
print(csp_subarray_2.status())
print("\n")

TANGO device: low-csp/control/0
The device is in ON state.


TANGO device: low-csp/subarray/01
The device is in ON state.


TANGO device: low-csp/subarray/02
The device is in DISABLE state.




In [27]:
csp_subarray_2.On()
wait_for_attribute_value(csp_subarray_2, "isCommunicating", True)
print(f"TANGO device: {csp_controller.name()}")
print(csp_controller.status())
print("\n")
print(f"TANGO device: {csp_subarray_1.name()}")
print(csp_subarray_1.status())
print("\n")
print(f"TANGO device: {csp_subarray_2.name()}")
print(csp_subarray_2.status())
print("\n")

DevFailed: DevFailed[
DevError[
    desc = Command On not allowed when the device is in DISABLE state
  origin = CORBA::Any* Tango::DeviceClass::command_handler(Tango::DeviceImpl*, const string&, const CORBA::Any&) at (/src/cppTango/cppapi/server/deviceclass.cpp:1139)
  reason = API_CommandNotAllowed
severity = ERR]

DevError[
    desc = Failed to execute command_inout on device low-csp/subarray/02, command On
  origin = virtual Tango::DeviceData Tango::Connection::command_inout(const string&, const Tango::DeviceData&) at (/src/cppTango/cppapi/client/devapi_base.cpp:1338)
  reason = API_CommandFailed
severity = ERR]
]

### Verify ON state
Verify signal processing commands are accepted by assigning resources to a subarray

In [28]:
csp_subarray_1.AssignResources(json.dumps(assign_resources_json))

[array([2], dtype=int32),
 ['1697205239.4301665_76108799261637_AssignResources']]

In [29]:
print(csp_subarray_1.validScanConfiguration)




Expected results:    
CSP and CBF subarray state transitions to IDLE and resources are assigned to the subarray

In [30]:
wait_for_attribute_value(csp_subarray_1, "obsState", ObsState.IDLE, "Assignment not finished")
print(f"{csp_subarray_1.dev_name()} in {str(csp_subarray_1.obsState)}")
print(f"{cbf_subarray_1.dev_name()} in {str(cbf_subarray_1.obsState)}")

low-csp/subarray/01 in obsState.IDLE
low-cbf/subarray/01 in obsState.IDLE


### Release resources

In [31]:
csp_subarray_1.ReleaseAllResources()

[array([2], dtype=int32),
 ['1697205245.4517508_5655379500085_ReleaseAllResources']]

### Verify resources released
Subarray resources are released and the subarray state transitions to EMPTY

In [32]:
wait_for_attribute_value(
    csp_subarray_1, "obsState", ObsState.EMPTY, "Release resources is not finished"
)
print(f"{csp_subarray_1.dev_name()} in {str(csp_subarray_1.obsState)}")
print(f"{cbf_subarray_1.dev_name()} in {str(cbf_subarray_1.obsState)}")

low-csp/subarray/01 in obsState.EMPTY
low-cbf/subarray/01 in obsState.EMPTY
