May
23

Nutanix Calm Blueprint for Auto Scaling Compute Resources for Cloudera

Adding additional compute resources by first looking at seasonal trending data from Prism Central. Auto scaling is done with using Nutanix Calm.

Save the below code as a json file.
cdh_parcel.version:\n cdh_parcel = p\n\nprint cdh_parcel.product + ' ' + cdh_parcel.version + \" downloaded\"\n\n# distribute the parcel\nprint \"Starting parcel distribution. This might take a while.\"\ncmd = cdh_parcel.start_distribution()\nif cmd.success != True:\n print \"Parcel distribution failed!\"\n exit(0)\n\nprint 'wait 4 min'\n# make sure the distribution finishes\n#while cdh_parcel.stage != \"DISTRIBUTED\":\nsleep(200)\n# cdh_parcel = get_parcel(api, cdh_parcel.product, '5.13.3-1.cdh5.13.3.p0.2', cluster_name)\n\nprint cdh_parcel.product + ' ' + cdh_parcel.version + \" distributed\"\n\n# activate the parcel\ncmd = cdh_parcel.activate()\nif cmd.success != True:\n print \"Parcel activation failed!\"\n exit(0)\n\n# make sure the activation finishes\n#while cdh_parcel.stage != \"ACTIVATED\":\n# cdh_parcel = get_parcel(api, cdh_parcel.product, '5.13.3-1.cdh5.13.3.p0.2', cluster_name)\n\nprint cdh_parcel.product + ' ' + cdh_parcel.version + \" activated\"\n\n\nyarn = cluster.get_service('yarn')\nprint yarn.name\nhostsid = api.get_all_hosts()\n#range = len(hostsid)\n#print 'range: ' + range\n\nfor current in hostsid:\n if current.rackId == '/default/default':\n print 'host name to add' + current.hostname\n currentshort1 = current.hostname\n currentshort = currentshort1.split(\".\", 1)\n short = currentshort[0]\n print short\n yarn.create_role(short, 'NODEMANAGER', current.hostId)\n\ncluster.restart(restart_only_stale_services=True, redeploy_client_configuration=True)\n\nexit(0)", "script_type": "sh", "type": "", "command_line_args": "", "login_credential_local_reference": {"kind": "app_credential", "name": "go", "uuid": "f356d2b6-2fac-453c-84ad-d91bc21bb1ba"}}, "timeout_secs": "", "type": "EXEC", "variable_list": [], "uuid": "b24dd948-e65d-2c1e-52d8-c4d21d615274"}], "description": "", "name": "7da2879a_runbook_cloned_1", "state": "ACTIVE", "main_task_local_reference": {"kind": "app_task", "name": "6405be68_dag", "uuid": "7ec268af-4e53-3c55-930a-740501d41cfb"}, "message_list": [], "variable_list": [], "uuid": "2363d280-af23-b97a-557f-1e6a3d156fbb"}, "type": "", "uninstall_runbook": {"task_definition_list": [{"target_any_local_reference": {"kind": "app_package", "name": "cloudera", "uuid": "44bbdab6-b71e-8989-d19b-aeb8c940122c"}, "description": "", "message_list": [], "child_tasks_local_reference_list": [], "name": "de4520ca_dag", "state": "ACTIVE", "attrs": {"edges": [], "type": ""}, "timeout_secs": "", "type": "DAG", "variable_list": [], "uuid": "aabae04b-08bb-466e-76a2-1fcfc3c816cd"}], "description": "", "name": "339b443d_runbook_cloned_1", "state": "ACTIVE", "main_task_local_reference": {"kind": "app_task", "name": "de4520ca_dag", "uuid": "aabae04b-08bb-466e-76a2-1fcfc3c816cd"}, "message_list": [], "variable_list": [], "uuid": "ff185ae5-bc26-095b-6840-9fc957638621"}}, "timeout_secs": "", "type": "CUSTOM_PACKAGE_INSTALL", "variable_list": [], "uuid": "47831e03-3fa0-4a8e-b26c-8244da30ab82"}], "description": "", "name": "SYS_GEN__Runbook_Package_44bbdab6_b71e_8989_d19b_aeb8c940122c", "state": "ACTIVE", "main_task_local_reference": {"kind": "app_task", "name": "SYS_GEN__DAG_Package_44bbdab6_b71e_8989_d19b_aeb8c940122c", "uuid": "078dca59-9b50-4ad8-b048-2ca148f8251e"}, "message_list": [], "variable_list": [], "uuid": "b81bf28e-424c-4daa-9beb-8ae9a62855ba"}, "message_list": [], "name": "action_install"}, {"description": "System action for creating an application", "type": "system", "uuid": "563a264b-ca8e-4279-ad98-96e4e8d77282", "state": "ACTIVE", "critical": true, "attrs": {}, "runbook": {"task_definition_list": [{"target_any_local_reference": {"kind": "app_package", "name": "cloudera", "uuid": "44bbdab6-b71e-8989-d19b-aeb8c940122c"}, "description": "", "message_list": [], "child_tasks_local_reference_list": [{"kind": "app_task", "name": "SYS_GEN__install_CRb_Package_44bbdab6_b71e_8989_d19b_aeb8c940122c", "uuid": "d4857a2b-32f6-471d-9e05-295df9e45c87"}, {"kind": "app_task", "name": "SYS_GEN__create_CRb_Service_0afc9748_88f7_8e8c_f7fe_b2b2cd6669e1", "uuid": "55697484-6734-4bdd-afde-75e8c171697e"}, {"kind": "app_task", "name": "SYS_GEN__start_CRb_Service_0afc9748_88f7_8e8c_f7fe_b2b2cd6669e1", "uuid": "d5751cd2-f72a-4f35-9fd6-104079471e54"}], "name": "SYS_GEN__Composite_DAG_Package_44bbdab6_b71e_8989_d19b_aeb8c940122c", "state": "ACTIVE", "attrs": {"edges": [{"from_task_reference": {"kind": "app_task", "name": "SYS_GEN__create_CRb_Service_0afc9748_88f7_8e8c_f7fe_b2b2cd6669e1", "uuid": "55697484-6734-4bdd-afde-75e8c171697e"}, "edge_type": "inherent", "type": "", "uuid": "873c853d-d849-46a7-9344-625a6b9c0898", "to_task_reference": {"kind": "app_task", "name": "SYS_GEN__start_CRb_Service_0afc9748_88f7_8e8c_f7fe_b2b2cd6669e1", "uuid": "d5751cd2-f72a-4f35-9fd6-104079471e54"}}, {"from_task_reference": {"kind": "app_task", "name": "SYS_GEN__install_CRb_Package_44bbdab6_b71e_8989_d19b_aeb8c940122c", "uuid": "d4857a2b-32f6-471d-9e05-295df9e45c87"}, "edge_type": "inherent", "type": "", "uuid": "d316c8f7-435c-4b1c-ad0c-7b1d02696dcd", "to_task_reference": {"kind": "app_task", "name": "SYS_GEN__create_CRb_Service_0afc9748_88f7_8e8c_f7fe_b2b2cd6669e1", "uuid": "55697484-6734-4bdd-afde-75e8c171697e"}}], "type": "DAG"}, "timeout_secs": "", "type": "DAG", "variable_list": [], "uuid": "dcc6c9a0-ef16-406c-8960-b313800452b3"}, {"target_any_local_reference": {"kind": "app_package", "name": "cloudera", "uuid": "44bbdab6-b71e-8989-d19b-aeb8c940122c"}, "description": "", "message_list": [], "child_tasks_local_reference_list": [], "name": "SYS_GEN__install_CRb_Package_44bbdab6_b71e_8989_d19b_aeb8c940122c", "state": "ACTIVE", "attrs": {"type": "CALL_RUNBOOK", "inarg_list": [], "runbook_reference": {"kind": "app_runbook", "name": "SYS_GEN__Runbook_Package_44bbdab6_b71e_8989_d19b_aeb8c940122c", "uuid": "b81bf28e-424c-4daa-9beb-8ae9a62855ba"}}, "timeout_secs": "", "type": "CALL_RUNBOOK", "variable_list": [], "uuid": "d4857a2b-32f6-471d-9e05-295df9e45c87"}, {"target_any_local_reference": {"kind": "app_service", "name": "code", "uuid": "0afc9748-88f7-8e8c-f7fe-b2b2cd6669e1"}, "description": "", "message_list": [], "child_tasks_local_reference_list": [], "name": "SYS_GEN__create_CRb_Service_0afc9748_88f7_8e8c_f7fe_b2b2cd6669e1", "state": "ACTIVE", "attrs": {"type": "CALL_RUNBOOK", "inarg_list": [], "runbook_reference": {"kind": "app_runbook", "name": "eba78c76_runbook_cloned_1", "uuid": "e3ec95e5-6ede-c168-b629-600d771f5803"}}, "timeout_secs": "", "type": "CALL_RUNBOOK", "variable_list": [], "uuid": "55697484-6734-4bdd-afde-75e8c171697e"}, {"target_any_local_reference": {"kind": "app_service", "name": "code", "uuid": "0afc9748-88f7-8e8c-f7fe-b2b2cd6669e1"}, "description": "", "message_list": [], "child_tasks_local_reference_list": [], "name": "SYS_GEN__start_CRb_Service_0afc9748_88f7_8e8c_f7fe_b2b2cd6669e1", "state": "ACTIVE", "attrs": {"type": "CALL_RUNBOOK", "inarg_list": [], "runbook_reference": {"kind": "app_runbook", "name": "3cedb317_runbook_cloned_1", "uuid": "aeedc907-4495-18e4-e7aa-93b781b90737"}}, "timeout_secs": "", "type": "CALL_RUNBOOK", "variable_list": [], "uuid": "d5751cd2-f72a-4f35-9fd6-104079471e54"}], "description": "", "name": "SYS_GEN__Runbook_Package_44bbdab6_b71e_8989_d19b_aeb8c940122c", "state": "ACTIVE", "main_task_local_reference": {"kind": "app_task", "name": "SYS_GEN__Composite_DAG_Package_44bbdab6_b71e_8989_d19b_aeb8c940122c", "uuid": "dcc6c9a0-ef16-406c-8960-b313800452b3"}, "message_list": [], "variable_list": [], "uuid": "daf9342b-98d9-4877-aee9-b322ed207462"}, "message_list": [], "name": "action_create"}, {"description": "System action for uninstalling an application", "type": "system", "uuid": "3255d19b-f2af-4d1f-a517-f91bf14bdc93", "state": "ACTIVE", "critical": true, "attrs": {}, "runbook": {"task_definition_list": [{"target_any_local_reference": {"kind": "app_package", "name": "cloudera", "uuid": "44bbdab6-b71e-8989-d19b-aeb8c940122c"}, "description": "", "message_list": [], "child_tasks_local_reference_list": [{"kind": "app_task", "name": "PE_Uninstall_44bbdab6_b71e_8989_d19b_aeb8c940122c", "uuid": "aa9bdd7f-4d40-4d9c-af4d-13091c066f70"}], "name": "SYS_GEN__DAG_Package_44bbdab6_b71e_8989_d19b_aeb8c940122c", "state": "ACTIVE", "attrs": {"edges": [], "type": "DAG"}, "timeout_secs": "", "type": "DAG", "variable_list": [], "uuid": "92cd3b4b-fc1a-4dc3-9f17-6a5de9a2473e"}, {"target_any_local_reference": {"kind": "app_package", "name": "cloudera", "uuid": "44bbdab6-b71e-8989-d19b-aeb8c940122c"}, "description": "", "message_list": [], "child_tasks_local_reference_list": [], "name": "PE_Uninstall_44bbdab6_b71e_8989_d19b_aeb8c940122c", "state": "ACTIVE", "attrs": {"install_runbook": {"task_definition_list": [{"target_any_local_reference": {"kind": "app_package", "name": "cloudera", "uuid": "44bbdab6-b71e-8989-d19b-aeb8c940122c"}, "description": "", "message_list": [], "child_tasks_local_reference_list": [{"kind": "app_task", "name": "PackageInstallTask", "uuid": "b24dd948-e65d-2c1e-52d8-c4d21d615274"}], "name": "6405be68_dag", "state": "ACTIVE", "attrs": {"edges": [], "type": ""}, "timeout_secs": "", "type": "DAG", "variable_list": [], "uuid": "7ec268af-4e53-3c55-930a-740501d41cfb"}, {"target_any_local_reference": {"kind": "app_package", "name": "cloudera", "uuid": "44bbdab6-b71e-8989-d19b-aeb8c940122c"}, "description": "", "message_list": [], "child_tasks_local_reference_list": [], "name": "PackageInstallTask", "state": "ACTIVE", "attrs": {"exit_status": [], "script": "#!/usr/bin/env python\nfrom cm_api.api_client import ApiResource\nfrom cm_api.endpoints.clusters import ApiCluster\nfrom cm_api.endpoints.clusters import create_cluster\nfrom cm_api.endpoints.parcels import ApiParcel\nfrom cm_api.endpoints.parcels import get_parcel\nfrom cm_api.endpoints.cms import ClouderaManager\nfrom cm_api.endpoints.services import ApiService, ApiServiceSetupInfo\nfrom cm_api.endpoints.services import create_service\nfrom cm_api.endpoints.types import ApiCommand, ApiRoleConfigGroupRef\nfrom cm_api.endpoints.role_config_groups import get_role_config_group\nfrom cm_api.endpoints.role_config_groups import ApiRoleConfigGroup\nfrom cm_api.endpoints.roles import ApiRole\nfrom time import sleep\n\nhost_username = 'root'\ncm_repo_url = None\ncdh_version = \"CDH5\" # also valid: \"CDH4\"\ncdh_version_number = \"5\"\n#240\nsleep(240)\n\nhostsIP = [ ]\nhostNames = [ ]\nhostNamesLong = [ ]\nthostNames = [ ]\nthostNames = \"@@{compute.name}@@\"\n#hostsIP = thostsIP.split(,)\nhostNames = thostNames.split(\",\")\n\n#for name in hostName:\n # hostNamesLong = hostName.append(name + '.rf9.local')\n\nfor name in hostNames:\n hostNamesLong.append(name + '.rf9.local')\n print hostNamesLong\n \nprint \"@@{code.cmip}@@\"\nprint \"@@{code.cmPassword}@@\"\n\napi = ApiResource(\"@@{code.cmip}@@\", '7180', \"@@{code.cmAdmin}@@\", \"@@{code.cmPassword}@@\", version=\"@@{code.APIversion}@@\")\n\n#get the CM instance\ncm = api.get_cluster(\"@@{code.clusterName}@@\")\ncm = ClouderaManager(api)\n\n\ncmd = cm.host_install(\"@@{code.hostAdmin}@@\", hostNamesLong, password=\"@@{code.hostPassword}@@\", cm_repo_url=cm_repo_url)\nprint \"Installing hosts. This might take a while.\"\nwhile cmd.success == None:\n sleep(15)\n cmd = cmd.fetch()\n\nif cmd.success != True:\n print \"cm_host_install failed: \" + cmd.resultMessage\n# exit(0)\nprint \"cm_host_install succeeded\"\n\ncluster = api.get_cluster(\"@@{code.clusterName}@@\")\n\nhostsadded = cluster.add_hosts(hostNamesLong)\n\nparcels_list = []\n# get and list all available parcels\nprint \"Available parcels:\"\nfor p in cluster.get_all_parcels():\n print p.version\n if p.version.startswith(\"@@{code.cmPackage}@@\") and p.product == \"CDH\":\n parcels_list.append(p)\n\nif len(parcels_list) == 0:\n print \"No \"\n print cdh_version\n print \" parcel found!\"\n exit(0)\n\ncdh_parcel = parcels_list[0]\n\nfor p in parcels_list:\n if p.version > cdh_parcel.version:\n cdh_parcel = p\n\nprint cdh_parcel.product + ' ' + cdh_parcel.version + \" downloaded\"\n\n# distribute the parcel\nprint \"Starting parcel distribution. This might take a while.\"\ncmd = cdh_parcel.start_distribution()\nif cmd.success != True:\n print \"Parcel distribution failed!\"\n exit(0)\n\nprint 'wait 4 min'\n# make sure the distribution finishes\n#while cdh_parcel.stage != \"DISTRIBUTED\":\nsleep(200)\n# cdh_parcel = get_parcel(api, cdh_parcel.product, '5.13.3-1.cdh5.13.3.p0.2', cluster_name)\n\nprint cdh_parcel.product + ' ' + cdh_parcel.version + \" distributed\"\n\n# activate the parcel\ncmd = cdh_parcel.activate()\nif cmd.success != True:\n print \"Parcel activation failed!\"\n exit(0)\n\n# make sure the activation finishes\n#while cdh_parcel.stage != \"ACTIVATED\":\n# cdh_parcel = get_parcel(api, cdh_parcel.product, '5.13.3-1.cdh5.13.3.p0.2', cluster_name)\n\nprint cdh_parcel.product + ' ' + cdh_parcel.version + \" activated\"\n\n\nyarn = cluster.get_service('yarn')\nprint yarn.name\nhostsid = api.get_all_hosts()\n#range = len(hostsid)\n#print 'range: ' + range\n\nfor current in hostsid:\n if current.rackId == '/default/default':\n print 'host name to add' + current.hostname\n currentshort1 = current.hostname\n currentshort = currentshort1.split(\".\", 1)\n short = currentshort[0]\n print short\n yarn.create_role(short, 'NODEMANAGER', current.hostId)\n\ncluster.restart(restart_only_stale_services=True, redeploy_client_configuration=True)\n\nexit(0)", "script_type": "sh", "type": "", "command_line_args": "", "login_credential_local_reference": {"kind": "app_credential", "name": "go", "uuid": "f356d2b6-2fac-453c-84ad-d91bc21bb1ba"}}, "timeout_secs": "", "type": "EXEC", "variable_list": [], "uuid": "b24dd948-e65d-2c1e-52d8-c4d21d615274"}], "description": "", "name": "7da2879a_runbook_cloned_1", "state": "ACTIVE", "main_task_local_reference": {"kind": "app_task", "name": "6405be68_dag", "uuid": "7ec268af-4e53-3c55-930a-740501d41cfb"}, "message_list": [], "variable_list": [], "uuid": "2363d280-af23-b97a-557f-1e6a3d156fbb"}, "type": "", "uninstall_runbook": {"task_definition_list": [{"target_any_local_reference": {"kind": "app_package", "name": "cloudera", "uuid": "44bbdab6-b71e-8989-d19b-aeb8c940122c"}, "description": "", "message_list": [], "child_tasks_local_reference_list": [], "name": "de4520ca_dag", "state": "ACTIVE", "attrs": {"edges": [], "type": ""}, "timeout_secs": "", "type": "DAG", "variable_list": [], "uuid": "aabae04b-08bb-466e-76a2-1fcfc3c816cd"}], "description": "", "name": "339b443d_runbook_cloned_1", "state": "ACTIVE", "main_task_local_reference": {"kind": "app_task", "name": "de4520ca_dag", "uuid": "aabae04b-08bb-466e-76a2-1fcfc3c816cd"}, "message_list": [], "variable_list": [], "uuid": "ff185ae5-bc26-095b-6840-9fc957638621"}}, "timeout_secs": "", "type": "CUSTOM_PACKAGE_UNINSTALL", "variable_list": [], "uuid": "aa9bdd7f-4d40-4d9c-af4d-13091c066f70"}], "description": "", "name": "SYS_GEN__Runbook_Package_44bbdab6_b71e_8989_d19b_aeb8c940122c", "state": "ACTIVE", "main_task_local_reference": {"kind": "app_task", "name": "SYS_GEN__DAG_Package_44bbdab6_b71e_8989_d19b_aeb8c940122c", "uuid": "92cd3b4b-fc1a-4dc3-9f17-6a5de9a2473e"}, "message_list": [], "variable_list": [], "uuid": "3efda7af-c2c5-4527-91b8-0c58ae037827"}, "message_list": [], "name": "action_uninstall"}, {"description": "System action for deleting an application. Deletes physical machines as well", "type": "system", "uuid": "7ad1b2bf-b87c-4d3d-8da5-33abeb3cb757", "state": "ACTIVE", "critical": true, "attrs": {}, "runbook": {"task_definition_list": [{"target_any_local_reference": {"kind": "app_package", "name": "cloudera", "uuid": "44bbdab6-b71e-8989-d19b-aeb8c940122c"}, "description": "", "message_list": [], "child_tasks_local_reference_list": [{"kind": "app_task", "name": "SYS_GEN__stop_CRb_Service_0afc9748_88f7_8e8c_f7fe_b2b2cd6669e1", "uuid": "5b2b1a62-4e83-4d1b-934b-862d89fc58f9"}, {"kind": "app_task", "name": "SYS_GEN__delete_CRb_Service_0afc9748_88f7_8e8c_f7fe_b2b2cd6669e1", "uuid": "2ce000b8-1296-4f5d-a8ae-8da96433a1cf"}, {"kind": "app_task", "name": "SYS_GEN__uninstall_CRb_Package_44bbdab6_b71e_8989_d19b_aeb8c940122c", "uuid": "1a05ced5-575f-4daf-ae7c-033403a0e77d"}], "name": "SYS_GEN__Composite_DAG_Package_44bbdab6_b71e_8989_d19b_aeb8c940122c", "state": "ACTIVE", "attrs": {"edges": [{"from_task_reference": {"kind": "app_task", "name": "SYS_GEN__stop_CRb_Service_0afc9748_88f7_8e8c_f7fe_b2b2cd6669e1", "uuid": "5b2b1a62-4e83-4d1b-934b-862d89fc58f9"}, "edge_type": "inherent", "type": "", "uuid": "60c3c99d-672f-49af-b994-89bd9f92909e", "to_task_reference": {"kind": "app_task", "name": "SYS_GEN__delete_CRb_Service_0afc9748_88f7_8e8c_f7fe_b2b2cd6669e1", "uuid": "2ce000b8-1296-4f5d-a8ae-8da96433a1cf"}}, {"from_task_reference": {"kind": "app_task", "name": "SYS_GEN__delete_CRb_Service_0afc9748_88f7_8e8c_f7fe_b2b2cd6669e1", "uuid": "2ce000b8-1296-4f5d-a8ae-8da96433a1cf"}, "edge_type": "inherent", "type": "", "uuid": "be945022-5672-4d55-9a03-54584bb4b48f", "to_task_reference": {"kind": "app_task", "name": "SYS_GEN__uninstall_CRb_Package_44bbdab6_b71e_8989_d19b_aeb8c940122c", "uuid": "1a05ced5-575f-4daf-ae7c-033403a0e77d"}}], "type": "DAG"}, "timeout_secs": "", "type": "DAG", "variable_list": [], "uuid": "24022115-efdb-4ecf-b9b2-6275a9944739"}, {"target_any_local_reference": {"kind": "app_service", "name": "code", "uuid": "0afc9748-88f7-8e8c-f7fe-b2b2cd6669e1"}, "description": "", "message_list": [], "child_tasks_local_reference_list": [], "name": "SYS_GEN__stop_CRb_Service_0afc9748_88f7_8e8c_f7fe_b2b2cd6669e1", "state": "ACTIVE", "attrs": {"type": "CALL_RUNBOOK", "inarg_list": [], "runbook_reference": {"kind": "app_runbook", "name": "5be55dea_runbook_cloned_1", "uuid": "aec97efe-ee30-0743-f90f-192698db865c"}}, "timeout_secs": "", "type": "CALL_RUNBOOK", "variable_list": [], "uuid": "5b2b1a62-4e83-4d1b-934b-862d89fc58f9"}, {"target_any_local_reference": {"kind": "app_service", "name": "code", "uuid": "0afc9748-88f7-8e8c-f7fe-b2b2cd6669e1"}, "description": "", "message_list": [], "child_tasks_local_reference_list": [], "name": "SYS_GEN__delete_CRb_Service_0afc9748_88f7_8e8c_f7fe_b2b2cd6669e1", "state": "ACTIVE", "attrs": {"type": "CALL_RUNBOOK", "inarg_list": [], "runbook_reference": {"kind": "app_runbook", "name": "740b4861_runbook_cloned_1", "uuid": "3b9eb309-30d3-a2ae-137b-9d1552ea6038"}}, "timeout_secs": "", "type": "CALL_RUNBOOK", "variable_list": [], "uuid": "2ce000b8-1296-4f5d-a8ae-8da96433a1cf"}, {"target_any_local_reference": {"kind": "app_package", "name": "cloudera", "uuid": "44bbdab6-b71e-8989-d19b-aeb8c940122c"}, "description": "", "message_list": [], "child_tasks_local_reference_list": [], "name": "SYS_GEN__uninstall_CRb_Package_44bbdab6_b71e_8989_d19b_aeb8c940122c", "state": "ACTIVE", "attrs": {"type": "CALL_RUNBOOK", "inarg_list": [], "runbook_reference": {"kind": "app_runbook", "name": "SYS_GEN__Runbook_Package_44bbdab6_b71e_8989_d19b_aeb8c940122c", "uuid": "3efda7af-c2c5-4527-91b8-0c58ae037827"}}, "timeout_secs": "", "type": "CALL_RUNBOOK", "variable_list": [], "uuid": "1a05ced5-575f-4daf-ae7c-033403a0e77d"}], "description": "", "name": "SYS_GEN__Runbook_Package_44bbdab6_b71e_8989_d19b_aeb8c940122c", "state": "ACTIVE", "main_task_local_reference": {"kind": "app_task", "name": "SYS_GEN__Composite_DAG_Package_44bbdab6_b71e_8989_d19b_aeb8c940122c", "uuid": "24022115-efdb-4ecf-b9b2-6275a9944739"}, "message_list": [], "variable_list": [], "uuid": "13878dbf-d8a6-46ae-b2ba-91b2d94c80b6"}, "message_list": [], "name": "action_delete"}, {"description": "System action for deleting an application. Does not delete physical machines", "type": "system", "uuid": "514bcd42-76cc-4588-aaca-d8c090d2b394", "state": "ACTIVE", "critical": true, "attrs": {}, "runbook": {"task_definition_list": [{"target_any_local_reference": {"kind": "app_package", "name": "cloudera", "uuid": "44bbdab6-b71e-8989-d19b-aeb8c940122c"}, "description": "", "message_list": [], "child_tasks_local_reference_list": [{"kind": "app_task", "name": "SYS_GEN__soft_delete_CRb_Service_0afc9748_88f7_8e8c_f7fe_b2b2cd6669e1", "uuid": "4378066f-61a2-455f-bac8-42fded9ad756"}, {"kind": "app_task", "name": "SYS_GEN__Package_Element_Delete_44bbdab6_b71e_8989_d19b_aeb8c940122c", "uuid": "c3dcc740-7e1c-40b9-9c5f-80243f049090"}], "name": "SYS_GEN__Composite_DAG_Package_44bbdab6_b71e_8989_d19b_aeb8c940122c", "state": "ACTIVE", "attrs": {"edges": [{"from_task_reference": {"kind": "app_task", "name": "SYS_GEN__soft_delete_CRb_Service_0afc9748_88f7_8e8c_f7fe_b2b2cd6669e1", "uuid": "4378066f-61a2-455f-bac8-42fded9ad756"}, "edge_type": "inherent", "type": "", "uuid": "eaa03831-dedd-430b-a855-b2f31ce3a575", "to_task_reference": {"kind": "app_task", "name": "SYS_GEN__Package_Element_Delete_44bbdab6_b71e_8989_d19b_aeb8c940122c", "uuid": "c3dcc740-7e1c-40b9-9c5f-80243f049090"}}], "type": "DAG"}, "timeout_secs": "", "type": "DAG", "variable_list": [], "uuid": "b0c7ab87-e166-4b64-8e34-ecff50898741"}, {"target_any_local_reference": {"kind": "app_service", "name": "code", "uuid": "0afc9748-88f7-8e8c-f7fe-b2b2cd6669e1"}, "description": "", "message_list": [], "child_tasks_local_reference_list": [], "name": "SYS_GEN__soft_delete_CRb_Service_0afc9748_88f7_8e8c_f7fe_b2b2cd6669e1", "state": "ACTIVE", "attrs": {"type": "CALL_RUNBOOK", "inarg_list": [], "runbook_reference": {"kind": "app_runbook", "name": "SYS_GEN__Runbook_Service_0afc9748_88f7_8e8c_f7fe_b2b2cd6669e1", "uuid": "d9c0f039-0740-41ee-9d53-d5846fb8aa33"}}, "timeout_secs": "", "type": "CALL_RUNBOOK", "variable_list": [], "uuid": "4378066f-61a2-455f-bac8-42fded9ad756"}, {"target_any_local_reference": {"kind": "app_package", "name": "cloudera", "uuid": "44bbdab6-b71e-8989-d19b-aeb8c940122c"}, "description": "", "message_list": [], "child_tasks_local_reference_list": [], "name": "SYS_GEN__Package_Element_Delete_44bbdab6_b71e_8989_d19b_aeb8c940122c", "state": "ACTIVE", "attrs": {"type": ""}, "timeout_secs": "", "type": "SOFT_DELETE_ELEMENT", "variable_list": [], "uuid": "c3dcc740-7e1c-40b9-9c5f-80243f049090"}], "description": "", "name": "SYS_GEN__Runbook_Package_44bbdab6_b71e_8989_d19b_aeb8c940122c", "state": "ACTIVE", "main_task_local_reference": {"kind": "app_task", "name": "SYS_GEN__Composite_DAG_Package_44bbdab6_b71e_8989_d19b_aeb8c940122c", "uuid": "b0c7ab87-e166-4b64-8e34-ecff50898741"}, "message_list": [], "variable_list": [], "uuid": "a33c5528-ee5d-45c1-a762-1130af869bfd"}, "message_list": [], "name": "action_soft_delete"}, {"description": "System action for starting an application", "type": "system", "uuid": "a85a6591-bc55-4b22-9057-dc49a156d6d0", "state": "ACTIVE", "critical": true, "attrs": {}, "runbook": {"task_definition_list": [{"target_any_local_reference": {"kind": "app_package", "name": "cloudera", "uuid": "44bbdab6-b71e-8989-d19b-aeb8c940122c"}, "description": "", "message_list": [], "child_tasks_local_reference_list": [{"kind": "app_task", "name": "SYS_GEN__start_CRb_Service_0afc9748_88f7_8e8c_f7fe_b2b2cd6669e1", "uuid": "4fbf8a01-298d-4363-8968-6f4b034ee440"}], "name": "SYS_GEN__Composite_DAG_Package_44bbdab6_b71e_8989_d19b_aeb8c940122c", "state": "ACTIVE", "attrs": {"edges": [], "type": "DAG"}, "timeout_secs": "", "type": "DAG", "variable_list": [], "uuid": "c340dcbc-2937-40b0-82a9-e82c9ea49f06"}, {"target_any_local_reference": {"kind": "app_service", "name": "code", "uuid": "0afc9748-88f7-8e8c-f7fe-b2b2cd6669e1"}, "description": "", "message_list": [], "child_tasks_local_reference_list": [], "name": "SYS_GEN__start_CRb_Service_0afc9748_88f7_8e8c_f7fe_b2b2cd6669e1", "state": "ACTIVE", "attrs": {"type": "CALL_RUNBOOK", "inarg_list": [], "runbook_reference": {"kind": "app_runbook", "name": "3cedb317_runbook_cloned_1", "uuid": "aeedc907-4495-18e4-e7aa-93b781b90737"}}, "timeout_secs": "", "type": "CALL_RUNBOOK", "variable_list": [], "uuid": "4fbf8a01-298d-4363-8968-6f4b034ee440"}], "description": "", "name": "SYS_GEN__Runbook_Package_44bbdab6_b71e_8989_d19b_aeb8c940122c", "state": "ACTIVE", "main_task_local_reference": {"kind": "app_task", "name": "SYS_GEN__Composite_DAG_Package_44bbdab6_b71e_8989_d19b_aeb8c940122c", "uuid": "c340dcbc-2937-40b0-82a9-e82c9ea49f06"}, "message_list": [], "variable_list": [], "uuid": "12813311-3bf3-486f-b59a-ec1087c2f54d"}, "message_list": [], "name": "action_start"}, {"description": "System action for stopping an application", "type": "system", "uuid": "cf8d0527-c638-4d40-8947-a8bf1cb1d051", "state": "ACTIVE", "critical": true, "attrs": {}, "runbook": {"task_definition_list": [{"target_any_local_reference": {"kind": "app_package", "name": "cloudera", "uuid": "44bbdab6-b71e-8989-d19b-aeb8c940122c"}, "description": "", "message_list": [], "child_tasks_local_reference_list": [{"kind": "app_task", "name": "SYS_GEN__stop_CRb_Service_0afc9748_88f7_8e8c_f7fe_b2b2cd6669e1", "uuid": "9626db53-15f7-4f28-83b0-70061870f14f"}], "name": "SYS_GEN__Composite_DAG_Package_44bbdab6_b71e_8989_d19b_aeb8c940122c", "state": "ACTIVE", "attrs": {"edges": [], "type": "DAG"}, "timeout_secs": "", "type": "DAG", "variable_list": [], "uuid": "fd0d81b5-563f-46bc-b001-b3e967bf45a0"}, {"target_any_local_reference": {"kind": "app_service", "name": "code", "uuid": "0afc9748-88f7-8e8c-f7fe-b2b2cd6669e1"}, "description": "", "message_list": [], "child_tasks_local_reference_list": [], "name": "SYS_GEN__stop_CRb_Service_0afc9748_88f7_8e8c_f7fe_b2b2cd6669e1", "state": "ACTIVE", "attrs": {"type": "CALL_RUNBOOK", "inarg_list": [], "runbook_reference": {"kind": "app_runbook", "name": "5be55dea_runbook_cloned_1", "uuid": "aec97efe-ee30-0743-f90f-192698db865c"}}, "timeout_secs": "", "type": "CALL_RUNBOOK", "variable_list": [], "uuid": "9626db53-15f7-4f28-83b0-70061870f14f"}], "description": "", "name": "SYS_GEN__Runbook_Package_44bbdab6_b71e_8989_d19b_aeb8c940122c", "state": "ACTIVE", "main_task_local_reference": {"kind": "app_task", "name": "SYS_GEN__Composite_DAG_Package_44bbdab6_b71e_8989_d19b_aeb8c940122c", "uuid": "fd0d81b5-563f-46bc-b001-b3e967bf45a0"}, "message_list": [], "variable_list": [], "uuid": "6715e54e-21f8-4206-9d27-bb4518c0a012"}, "message_list": [], "name": "action_stop"}], "type": "DEB", "service_local_reference_list": [{"kind": "app_service", "name": "code", "uuid": "0afc9748-88f7-8e8c-f7fe-b2b2cd6669e1"}], "name": "cloudera", "state": "ACTIVE", "version": "", "editables": {}, "message_list": [], "options": {"install_runbook": {"task_definition_list": [{"target_any_local_reference": {"kind": "app_package", "name": "cloudera", "uuid": "44bbdab6-b71e-8989-d19b-aeb8c940122c"}, "description": "", "message_list": [], "child_tasks_local_reference_list": [{"kind": "app_task", "name": "PackageInstallTask", "uuid": "b24dd948-e65d-2c1e-52d8-c4d21d615274"}], "name": "6405be68_dag", "state": "ACTIVE", "attrs": {"edges": [], "type": ""}, "timeout_secs": "", "type": "DAG", "variable_list": [], "uuid": "7ec268af-4e53-3c55-930a-740501d41cfb"}, {"target_any_local_reference": {"kind": "app_package", "name": "cloudera", "uuid": "44bbdab6-b71e-8989-d19b-aeb8c940122c"}, "description": "", "message_list": [], "child_tasks_local_reference_list": [], "name": "PackageInstallTask", "state": "ACTIVE", "attrs": {"exit_status": [], "script": "#!/usr/bin/env python\nfrom cm_api.api_client import ApiResource\nfrom cm_api.endpoints.clusters import ApiCluster\nfrom cm_api.endpoints.clusters import create_cluster\nfrom cm_api.endpoints.parcels import ApiParcel\nfrom cm_api.endpoints.parcels import get_parcel\nfrom cm_api.endpoints.cms import ClouderaManager\nfrom cm_api.endpoints.services import ApiService, ApiServiceSetupInfo\nfrom cm_api.endpoints.services import create_service\nfrom cm_api.endpoints.types import ApiCommand, ApiRoleConfigGroupRef\nfrom cm_api.endpoints.role_config_groups import get_role_config_group\nfrom cm_api.endpoints.role_config_groups import ApiRoleConfigGroup\nfrom cm_api.endpoints.roles import ApiRole\nfrom time import sleep\n\nhost_username = 'root'\ncm_repo_url = None\ncdh_version = \"CDH5\" # also valid: \"CDH4\"\ncdh_version_number = \"5\"\n#240\nsleep(240)\n\nhostsIP = [ ]\nhostNames = [ ]\nhostNamesLong = [ ]\nthostNames = [ ]\nthostNames = \"@@{compute.name}@@\"\n#hostsIP = thostsIP.split(,)\nhostNames = thostNames.split(\",\")\n\n#for name in hostName:\n # hostNamesLong = hostName.append(name + '.rf9.local')\n\nfor name in hostNames:\n hostNamesLong.append(name + '.rf9.local')\n print hostNamesLong\n \nprint \"@@{code.cmip}@@\"\nprint \"@@{code.cmPassword}@@\"\n\napi = ApiResource(\"@@{code.cmip}@@\", '7180', \"@@{code.cmAdmin}@@\", \"@@{code.cmPassword}@@\", version=\"@@{code.APIversion}@@\")\n\n#get the CM instance\ncm = api.get_cluster(\"@@{code.clusterName}@@\")\ncm = ClouderaManager(api)\n\n\ncmd = cm.host_install(\"@@{code.hostAdmin}@@\", hostNamesLong, password=\"@@{code.hostPassword}@@\", cm_repo_url=cm_repo_url)\nprint \"Installing hosts. This might take a while.\"\nwhile cmd.success == None:\n sleep(15)\n cmd = cmd.fetch()\n\nif cmd.success != True:\n print \"cm_host_install failed: \" + cmd.resultMessage\n# exit(0)\nprint \"cm_host_install succeeded\"\n\ncluster = api.get_cluster(\"@@{code.clusterName}@@\")\n\nhostsadded = cluster.add_hosts(hostNamesLong)\n\nparcels_list = []\n# get and list all available parcels\nprint \"Available parcels:\"\nfor p in cluster.get_all_parcels():\n print p.version\n if p.version.startswith(\"@@{code.cmPackage}@@\") and p.product == \"CDH\":\n parcels_list.append(p)\n\nif len(parcels_list) == 0:\n print \"No \"\n print cdh_version\n print \" parcel found!\"\n exit(0)\n\ncdh_parcel = parcels_list[0]\n\nfor p in parcels_list:\n if p.version > cdh_parcel.version:\n cdh_parcel = p\n\nprint cdh_parcel.product + ' ' + cdh_parcel.version + \" downloaded\"\n\n# distribute the parcel\nprint \"Starting parcel distribution. This might take a while.\"\ncmd = cdh_parcel.start_distribution()\nif cmd.success != True:\n print \"Parcel distribution failed!\"\n exit(0)\n\nprint 'wait 4 min'\n# make sure the distribution finishes\n#while cdh_parcel.stage != \"DISTRIBUTED\":\nsleep(200)\n# cdh_parcel = get_parcel(api, cdh_parcel.product, '5.13.3-1.cdh5.13.3.p0.2', cluster_name)\n\nprint cdh_parcel.product + ' ' + cdh_parcel.version + \" distributed\"\n\n# activate the parcel\ncmd = cdh_parcel.activate()\nif cmd.success != True:\n print \"Parcel activation failed!\"\n exit(0)\n\n# make sure the activation finishes\n#while cdh_parcel.stage != \"ACTIVATED\":\n# cdh_parcel = get_parcel(api, cdh_parcel.product, '5.13.3-1.cdh5.13.3.p0.2', cluster_name)\n\nprint cdh_parcel.product + ' ' + cdh_parcel.version + \" activated\"\n\n\nyarn = cluster.get_service('yarn')\nprint yarn.name\nhostsid = api.get_all_hosts()\n#range = len(hostsid)\n#print 'range: ' + range\n\nfor current in hostsid:\n if current.rackId == '/default/default':\n print 'host name to add' + current.hostname\n currentshort1 = current.hostname\n currentshort = currentshort1.split(\".\", 1)\n short = currentshort[0]\n print short\n yarn.create_role(short, 'NODEMANAGER', current.hostId)\n\ncluster.restart(restart_only_stale_services=True, redeploy_client_configuration=True)\n\nexit(0)", "script_type": "sh", "type": "", "command_line_args": "", "login_credential_local_reference": {"kind": "app_credential", "name": "go", "uuid": "f356d2b6-2fac-453c-84ad-d91bc21bb1ba"}}, "timeout_secs": "", "type": "EXEC", "variable_list": [], "uuid": "b24dd948-e65d-2c1e-52d8-c4d21d615274"}], "description": "", "name": "7da2879a_runbook_cloned_1", "state": "ACTIVE", "main_task_local_reference": {"kind": "app_task", "name": "6405be68_dag", "uuid": "7ec268af-4e53-3c55-930a-740501d41cfb"}, "message_list": [], "variable_list": [], "uuid": "2363d280-af23-b97a-557f-1e6a3d156fbb"}, "type": "", "uninstall_runbook": {"task_definition_list": [{"target_any_local_reference": {"kind": "app_package", "name": "cloudera", "uuid": "44bbdab6-b71e-8989-d19b-aeb8c940122c"}, "description": "", "message_list": [], "child_tasks_local_reference_list": [], "name": "de4520ca_dag", "state": "ACTIVE", "attrs": {"edges": [], "type": ""}, "timeout_secs": "", "type": "DAG", "variable_list": [], "uuid": "aabae04b-08bb-466e-76a2-1fcfc3c816cd"}], "description": "", "name": "339b443d_runbook_cloned_1", "state": "ACTIVE", "main_task_local_reference": {"kind": "app_task", "name": "de4520ca_dag", "uuid": "aabae04b-08bb-466e-76a2-1fcfc3c816cd"}, "message_list": [], "variable_list": [], "uuid": "ff185ae5-bc26-095b-6840-9fc957638621"}}, "variable_list": [], "uuid": "44bbdab6-b71e-8989-d19b-aeb8c940122c"}]}, "name": "computeexpand"}, "spec": {"description": "", "resources": {"service_definition_list": [{"singleton": false, "name": "compute", "action_list": [{"critical": false, "type": "system", "description": "System action for creating an application", "name": "action_create", "runbook": {"variable_list": [], "task_definition_list": [{"target_any_local_reference": {"kind": "app_service", "name": "compute"}, "description": "", "child_tasks_local_reference_list": [], "attrs": {"edges": [], "type": ""}, "timeout_secs": "", "type": "DAG", "variable_list": [], "name": "b3c287b5_dag"}], "description": "", "main_task_local_reference": {"kind": "app_task", "name": "b3c287b5_dag"}, "name": "eba78c76_runbook"}}, {"critical": false, "type": "system", "description": "System action for deleting an application. Deletes physical machines as well", "name": "action_delete", "runbook": {"variable_list": [], "task_definition_list": [{"target_any_local_reference": {"kind": "app_service", "name": "compute"}, "description": "", "child_tasks_local_reference_list": [], "attrs": {"edges": [], "type": ""}, "timeout_secs": "", "type": "DAG", "variable_list": [], "name": "1744848a_dag"}], "description": "", "main_task_local_reference": {"kind": "app_task", "name": "1744848a_dag"}, "name": "740b4861_runbook"}}, {"critical": false, "type": "system", "description": "System action for starting an application", "name": "action_start", "runbook": {"variable_list": [], "task_definition_list": [{"target_any_local_reference": {"kind": "app_service", "name": "compute"}, "description": "", "child_tasks_local_reference_list": [], "attrs": {"edges": [], "type": ""}, "timeout_secs": "", "type": "DAG", "variable_list": [], "name": "87c10021_dag"}], "description": "", "main_task_local_reference": {"kind": "app_task", "name": "87c10021_dag"}, "name": "3cedb317_runbook"}}, {"critical": false, "type": "system", "description": "System action for stopping an application", "name": "action_stop", "runbook": {"variable_list": [], "task_definition_list": [{"target_any_local_reference": {"kind": "app_service", "name": "compute"}, "description": "", "child_tasks_local_reference_list": [], "attrs": {"edges": [], "type": ""}, "timeout_secs": "", "type": "DAG", "variable_list": [], "name": "c68f8d5e_dag"}], "description": "", "main_task_local_reference": {"kind": "app_task", "name": "c68f8d5e_dag"}, "name": "5be55dea_runbook"}}, {"critical": false, "type": "system", "description": "System action for restarting an application", "name": "action_restart", "runbook": {"variable_list": [], "task_definition_list": [{"target_any_local_reference": {"kind": "app_service", "name": "compute"}, "description": "", "child_tasks_local_reference_list": [], "attrs": {"edges": [], "type": ""}, "timeout_secs": "", "type": "DAG", "variable_list": [], "name": "5869d3a7_dag"}], "description": "", "main_task_local_reference": {"kind": "app_task", "name": "5869d3a7_dag"}, "name": "4ebd0004_runbook"}}], "description": "", "port_list": [], "tier": "", "variable_list": [], "depends_on_list": []}, {"singleton": false, "name": "code", "action_list": [{"critical": false, "type": "system", "description": "System action for creating an application", "name": "action_create", "runbook": {"variable_list": [], "task_definition_list": [{"target_any_local_reference": {"kind": "app_service", "name": "code"}, "description": "", "child_tasks_local_reference_list": [], "attrs": {"edges": [], "type": ""}, "timeout_secs": "", "type": "DAG", "variable_list": [], "name": "b3c287b5_dag"}], "description": "", "main_task_local_reference": {"kind": "app_task", "name": "b3c287b5_dag"}, "name": "eba78c76_runbook_cloned_1"}}, {"critical": false, "type": "system", "description": "System action for deleting an application. Deletes physical machines as well", "name": "action_delete", "runbook": {"variable_list": [], "task_definition_list": [{"target_any_local_reference": {"kind": "app_service", "name": "code"}, "description": "", "child_tasks_local_reference_list": [], "attrs": {"edges": [], "type": ""}, "timeout_secs": "", "type": "DAG", "variable_list": [], "name": "1744848a_dag"}], "description": "", "main_task_local_reference": {"kind": "app_task", "name": "1744848a_dag"}, "name": "740b4861_runbook_cloned_1"}}, {"critical": false, "type": "system", "description": "System action for starting an application", "name": "action_start", "runbook": {"variable_list": [], "task_definition_list": [{"target_any_local_reference": {"kind": "app_service", "name": "code"}, "description": "", "child_tasks_local_reference_list": [], "attrs": {"edges": [], "type": ""}, "timeout_secs": "", "type": "DAG", "variable_list": [], "name": "87c10021_dag"}], "description": "", "main_task_local_reference": {"kind": "app_task", "name": "87c10021_dag"}, "name": "3cedb317_runbook_cloned_1"}}, {"critical": false, "type": "system", "description": "System action for stopping an application", "name": "action_stop", "runbook": {"variable_list": [], "task_definition_list": [{"target_any_local_reference": {"kind": "app_service", "name": "code"}, "description": "", "child_tasks_local_reference_list": [], "attrs": {"edges": [], "type": ""}, "timeout_secs": "", "type": "DAG", "variable_list": [], "name": "c68f8d5e_dag"}], "description": "", "main_task_local_reference": {"kind": "app_task", "name": "c68f8d5e_dag"}, "name": "5be55dea_runbook_cloned_1"}}, {"critical": false, "type": "system", "description": "System action for restarting an application", "name": "action_restart", "runbook": {"variable_list": [], "task_definition_list": [{"target_any_local_reference": {"kind": "app_service", "name": "code"}, "description": "", "child_tasks_local_reference_list": [], "attrs": {"edges": [], "type": ""}, "timeout_secs": "", "type": "DAG", "variable_list": [], "name": "5869d3a7_dag"}], "description": "", "main_task_local_reference": {"kind": "app_task", "name": "5869d3a7_dag"}, "name": "4ebd0004_runbook_cloned_1"}}], "description": "", "port_list": [], "tier": "", "variable_list": [{"val_type": "STRING", "description": "", "value": "admin", "label": "", "attrs": {"type": ""}, "type": "LOCAL", "name": "cmAdmin"}, {"val_type": "STRING", "description": "", "value": "10.15.80.180", "label": "", "attrs": {"type": ""}, "type": "LOCAL", "name": "cmip"}, {"val_type": "STRING", "description": "", "value": "", "label": "", "attrs": {"is_secret_modified": false, "secret_reference": {}, "type": ""}, "type": "SECRET", "name": "cmPassword"}, {"val_type": "STRING", "description": "", "value": "18", "label": "", "attrs": {"type": ""}, "type": "LOCAL", "name": "APIversion"}, {"val_type": "STRING", "description": "", "value": "Cluster 1", "label": "", "attrs": {"type": ""}, "type": "LOCAL", "name": "clusterName"}, {"val_type": "STRING", "description": "", "value": "5.13.3-1.cdh5.13.3", "label": "", "attrs": {"type": ""}, "type": "LOCAL", "name": "cmPackage"}, {"val_type": "STRING", "description": "", "value": "", "label": "", "attrs": {"is_secret_modified": false, "secret_reference": {}, "type": ""}, "type": "SECRET", "name": "hostPassword"}, {"val_type": "STRING", "description": "", "value": "root", "label": "", "attrs": {"type": ""}, "type": "LOCAL", "name": "hostAdmin"}], "depends_on_list": []}], "substrate_definition_list": [{"description": "", "action_list": [], "readiness_probe": {"connection_type": "SSH", "address": "@@{platform.status.resources.nic_list[0].ip_endpoint_list[0].ip}@@", "disable_readiness_probe": false, "timeout_secs": "60", "connection_port": 22}, "editables": {"create_spec": {"resources": {}}}, "os_type": "Linux", "type": "AHV_VM", "create_spec": {"backup_policy": null, "type": "", "name": "yarn@@{calm_application_name}@@-@@{calm_random}@@", "resources": {"hardware_clock_timezone": "", "type": "", "boot_config": null, "power_state": "ON", "guest_tools": null}, "availability_zone_reference": null}, "variable_list": [], "name": "yarn"}, {"description": "", "action_list": [], "readiness_probe": {"connection_type": "SSH", "address": "@@{platform.status.resources.nic_list[0].ip_endpoint_list[0].ip}@@", "disable_readiness_probe": false, "timeout_secs": "60", "connection_port": 22}, "editables": {"create_spec": {"resources": {}}}, "os_type": "Linux", "type": "AHV_VM", "create_spec": {"backup_policy": null, "type": "", "name": "run@@{calm_application_name}@@-@@{calm_random}@@", "resources": {"hardware_clock_timezone": "", "type": "", "boot_config": {"boot_device": {"type": "", "disk_address": {"adapter_type": "SCSI", "device_index": 0, "type": ""}}, "type": "", "mac_address": ""}, "power_state": "ON", "guest_tools": null}, "availability_zone_reference": null}, "variable_list": [], "name": "runner"}], "credential_definition_list": [{"username": "root", "secret": {"attrs": {"is_secret_modified": false, "secret_reference": {}}}, "type": "PASSWORD", "description": "", "name": "go"}], "package_definition_list": [{"description": "", "action_list": [], "service_local_reference_list": [{"kind": "app_service", "name": "compute"}], "version": "", "type": "DEB", "options": {"install_runbook": {"variable_list": [], "task_definition_list": [{"target_any_local_reference": {"kind": "app_package", "name": "deployCPUa"}, "description": "", "child_tasks_local_reference_list": [{"kind": "app_task", "name": "PackageInstallTask"}], "attrs": {"edges": [], "type": ""}, "timeout_secs": "", "type": "DAG", "variable_list": [], "name": "6405be68_dag"}, {"target_any_local_reference": {"kind": "app_package", "name": "deployCPUa"}, "description": "", "child_tasks_local_reference_list": [], "attrs": {"exit_status": [], "script": "#!/usr/bin/env bash\n\nhostnamectl set-hostname @@{name}@@.rf9.local\n\nsystemctl restart network.service", "script_type": "sh", "type": "", "command_line_args": "", "login_credential_local_reference": {"kind": "app_credential", "name": "go"}}, "timeout_secs": "", "type": "EXEC", "variable_list": [], "name": "PackageInstallTask"}], "description": "", "main_task_local_reference": {"kind": "app_task", "name": "6405be68_dag"}, "name": "7da2879a_runbook"}, "type": "", "uninstall_runbook": {"variable_list": [], "task_definition_list": [{"target_any_local_reference": {"kind": "app_package", "name": "deployCPUa"}, "description": "", "child_tasks_local_reference_list": [], "attrs": {"edges": [], "type": ""}, "timeout_secs": "", "type": "DAG", "variable_list": [], "name": "de4520ca_dag"}], "description": "", "main_task_local_reference": {"kind": "app_task", "name": "de4520ca_dag"}, "name": "339b443d_runbook"}}, "variable_list": [], "name": "deployCPUa"}, {"description": "", "action_list": [], "service_local_reference_list": [{"kind": "app_service", "name": "code"}], "version": "", "type": "DEB", "options": {"install_runbook": {"variable_list": [], "task_definition_list": [{"target_any_local_reference": {"kind": "app_package", "name": "cloudera"}, "description": "", "child_tasks_local_reference_list": [{"kind": "app_task", "name": "PackageInstallTask"}], "attrs": {"edges": [], "type": ""}, "timeout_secs": "", "type": "DAG", "variable_list": [], "name": "6405be68_dag"}, {"target_any_local_reference": {"kind": "app_package", "name": "cloudera"}, "description": "", "child_tasks_local_reference_list": [], "attrs": {"exit_status": [], "script": "#!/usr/bin/env python\nfrom cm_api.api_client import ApiResource\nfrom cm_api.endpoints.clusters import ApiCluster\nfrom cm_api.endpoints.clusters import create_cluster\nfrom cm_api.endpoints.parcels import ApiParcel\nfrom cm_api.endpoints.parcels import get_parcel\nfrom cm_api.endpoints.cms import ClouderaManager\nfrom cm_api.endpoints.services import ApiService, ApiServiceSetupInfo\nfrom cm_api.endpoints.services import create_service\nfrom cm_api.endpoints.types import ApiCommand, ApiRoleConfigGroupRef\nfrom cm_api.endpoints.role_config_groups import get_role_config_group\nfrom cm_api.endpoints.role_config_groups import ApiRoleConfigGroup\nfrom cm_api.endpoints.roles import ApiRole\nfrom time import sleep\n\nhost_username = 'root'\ncm_repo_url = None\ncdh_version = \"CDH5\" # also valid: \"CDH4\"\ncdh_version_number = \"5\"\n#240\nsleep(240)\n\nhostsIP = [ ]\nhostNames = [ ]\nhostNamesLong = [ ]\nthostNames = [ ]\nthostNames = \"@@{compute.name}@@\"\n#hostsIP = thostsIP.split(,)\nhostNames = thostNames.split(\",\")\n\n#for name in hostName:\n # hostNamesLong = hostName.append(name + '.rf9.local')\n\nfor name in hostNames:\n hostNamesLong.append(name + '.rf9.local')\n print hostNamesLong\n \nprint \"@@{code.cmip}@@\"\nprint \"@@{code.cmPassword}@@\"\n\napi = ApiResource(\"@@{code.cmip}@@\", '7180', \"@@{code.cmAdmin}@@\", \"@@{code.cmPassword}@@\", version=\"@@{code.APIversion}@@\")\n\n#get the CM instance\ncm = api.get_cluster(\"@@{code.clusterName}@@\")\ncm = ClouderaManager(api)\n\n\ncmd = cm.host_install(\"@@{code.hostAdmin}@@\", hostNamesLong, password=\"@@{code.hostPassword}@@\", cm_repo_url=cm_repo_url)\nprint \"Installing hosts. This might take a while.\"\nwhile cmd.success == None:\n sleep(15)\n cmd = cmd.fetch()\n\nif cmd.success != True:\n print \"cm_host_install failed: \" + cmd.resultMessage\n# exit(0)\nprint \"cm_host_install succeeded\"\n\ncluster = api.get_cluster(\"@@{code.clusterName}@@\")\n\nhostsadded = cluster.add_hosts(hostNamesLong)\n\nparcels_list = []\n# get and list all available parcels\nprint \"Available parcels:\"\nfor p in cluster.get_all_parcels():\n print p.version\n if p.version.startswith(\"@@{code.cmPackage}@@\") and p.product == \"CDH\":\n parcels_list.append(p)\n\nif len(parcels_list) == 0:\n print \"No \"\n print cdh_version\n print \" parcel found!\"\n exit(0)\n\ncdh_parcel = parcels_list[0]\n\nfor p in parcels_list:\n if p.version > cdh_parcel.version:\n cdh_parcel = p\n\nprint cdh_parcel.product + ' ' + cdh_parcel.version + \" downloaded\"\n\n# distribute the parcel\nprint \"Starting parcel distribution. This might take a while.\"\ncmd = cdh_parcel.start_distribution()\nif cmd.success != True:\n print \"Parcel distribution failed!\"\n exit(0)\n\nprint 'wait 4 min'\n# make sure the distribution finishes\n#while cdh_parcel.stage != \"DISTRIBUTED\":\nsleep(200)\n# cdh_parcel = get_parcel(api, cdh_parcel.product, '5.13.3-1.cdh5.13.3.p0.2', cluster_name)\n\nprint cdh_parcel.product + ' ' + cdh_parcel.version + \" distributed\"\n\n# activate the parcel\ncmd = cdh_parcel.activate()\nif cmd.success != True:\n print \"Parcel activation failed!\"\n exit(0)\n\n# make sure the activation finishes\n#while cdh_parcel.stage != \"ACTIVATED\":\n# cdh_parcel = get_parcel(api, cdh_parcel.product, '5.13.3-1.cdh5.13.3.p0.2', cluster_name)\n\nprint cdh_parcel.product + ' ' + cdh_parcel.version + \" activated\"\n\n\nyarn = cluster.get_service('yarn')\nprint yarn.name\nhostsid = api.get_all_hosts()\n#range = len(hostsid)\n#print 'range: ' + range\n\nfor current in hostsid:\n if current.rackId == '/default/default':\n print 'host name to add' + current.hostname\n currentshort1 = current.hostname\n currentshort = currentshort1.split(\".\", 1)\n short = currentshort[0]\n print short\n yarn.create_role(short, 'NODEMANAGER', current.hostId)\n\ncluster.restart(restart_only_stale_services=True, redeploy_client_configuration=True)\n\nexit(0)", "script_type": "sh", "type": "", "command_line_args": "", "login_credential_local_reference": {"kind": "app_credential", "name": "go"}}, "timeout_secs": "", "type": "EXEC", "variable_list": [], "name": "PackageInstallTask"}], "description": "", "main_task_local_reference": {"kind": "app_task", "name": "6405be68_dag"}, "name": "7da2879a_runbook_cloned_1"}, "type": "", "uninstall_runbook": {"variable_list": [], "task_definition_list": [{"target_any_local_reference": {"kind": "app_package", "name": "cloudera"}, "description": "", "child_tasks_local_reference_list": [], "attrs": {"edges": [], "type": ""}, "timeout_secs": "", "type": "DAG", "variable_list": [], "name": "de4520ca_dag"}], "description": "", "main_task_local_reference": {"kind": "app_task", "name": "de4520ca_dag"}, "name": "339b443d_runbook_cloned_1"}}, "variable_list": [], "name": "cloudera"}], "app_profile_list": [{"deployment_create_list": [{"name": "76df5aad_deployment", "action_list": [], "package_local_reference_list": [{"kind": "app_package", "name": "deployCPUa"}], "editables": {"min_replicas": true}, "max_replicas": "1", "substrate_local_reference": {"kind": "app_substrate", "name": "yarn"}, "min_replicas": "1", "variable_list": [], "description": ""}, {"description": "", "action_list": [], "package_local_reference_list": [{"kind": "app_package", "name": "cloudera"}], "max_replicas": "1", "substrate_local_reference": {"kind": "app_substrate", "name": "runner"}, "min_replicas": "1", "variable_list": [], "name": "76df5aad_deployment_cloned_1"}], "variable_list": [], "description": "", "action_list": [], "name": "Default1"}], "default_credential_local_reference": {"kind": "app_credential", "name": "go"}, "client_attrs": {"None": {"y": 450, "x": 810}, "code": {"y": 380, "x": 760}, "compute": {"y": 380, "x": 580}}}, "name": "computeexpand"}, "api_version": "3.0", "metadata": {"last_update_time": "1526503361119604", "creation_time": "1520027297574039", "kind": "blueprint", "spec_version": 85, "name": "computeexpand"}}

Feb
05

Demo: ESXi Backup with Nutanix Snapshots with HYCU

In addition to the support of Nutanix clusters that use Nutanix native AHV hypervisors, HYCU introduces the support for Nutanix environments that use VMware ESXi hypervisors. By using the native Nutanix storage layer snapshot technology, VMware snapshot stuns are avoided.

Jan
31

HYCU – Backing up virtual machines from Replicated Nutanix Snapshots

In remote office/branch office (ROBO) environments, HYCU allows you to back up virtual machines from their replicas on the central site Nutanix cluster. Backing up data from replicas without having to transfer the virtual machine data twice frees up WAN bandwidth for other business processes.

To be able to back up virtual machines from their replicas, make sure that the replication retention on the Nutanix cluster is adjusted to the backup policy’s RPO. This allows HYCU to use the Changed Region Tracking (CRT) feature to get a list of changed data since the last snapshot and perform an incremental backup. For example, if the Nutanix schedule interval is two hours and the RPO of the HYCU backup policy is eight hours, the retention policy for the remote site must be set to 4 or more snapshots (that is, at least the last four snapshots must be kept).

    Dec
    15

    Nutanix Calm Blueprints Overview

    Nutanix Calm Overview

    A blueprint is the framework for every application that you model by using Nutanix Calm. Blueprints are templates that describe all the steps that are required to provision, configure, and execute tasks on the services and applications that are created. You can create a blueprint to represent the architecture of your application and then run the blueprint repeatedly to create an instance, provision, and launch your applications. A blueprint also defines the lifecycle of an application and its underlying infrastructure starting from the creation of the application to the actions that are carried out on a blueprint until the termination of the application.

    You can use blueprints to model the applications of various complexities; from simply provisioning a single virtual machine to provisioning and managing a multi-node, multi-tier application.

    Blueprint editor provides a graphical representation of various components that enable you to visualize and configure the components and their dependencies in your environment.

    repeatable and auditable automation

    Dec
    12

    Running IT: Docker and Cilium for Enterprise Network Security for Micro-Services

    Well I think 40 min is about as long as I can last watching a IT related video while running after that I need music! This time I watched another video from DockerCon, Cilium – Kernel Native Security & DDOS Mitigation for Microservices with BPF

    Skip to 7:23: The quick overview of the presentation is that managing IP Tables to lock down micro-services isn’t going to scale and will be almost impossible to manage. Cilium is open source software for providing and transparently securing network connectivity and load balancing between application workloads such as application containers or processes. Cilium operates at Layer 3/4 to provide traditional networking and security services as well as Layer 7 to protect and secure use of modern application protocols such as HTTP, gRPC and Kafka. BPF is used a lot of the big web-scale properties like Facebook and Netflix to secure their environment and to provide troubleshooting. Like anything with a lot of options there is a lot of ways to shoot yourself in the foot so Cilium provides the wrapper to get it easily deployed and configured.

    The presentation uses that example of locking down a Kafka cluster via layer 7 instead of having the whole API left wind open which would happen if your were only using IP tables. Kafka is used for building real-time pipelines and streaming apps. Kafka is horizontally scalable and fault-tolerant so it’s a good choice to run it in docker. Kakfa is used by 1/3 of Fortune 500 companies.

    Cilium Architecture

    Cilium Integrates with:

    Docker
    Kubernetes
    Mesos

    Cilium runs as a agent on every host.
    Cilium can provide policy for Host to Docker micro-service and even between two containers on the same host.

    The demo didn’t pan out but the 2nd half of the presentation talks about Cilium using BPF with XDP. XDP is a further step in evolution and enables to run a specific flavor of BPF programs from the network driver with direct access to the packet’s DMA buffer. This is, by definition, the earliest possible point in the software stack, where programs can be attached to in order to allow for a programmable, high performance packet processor in the Linux kernel networking data path.

    Since XDP can happen earlier on at the nic versus iptables with ipset, CPU can be saved, rules load faster and latency under load is a lot better with XDP.

    Dec
    05

    Handling Network Partition with Near-Sync

    Near-Sync is GA!!!

    Part 1: Near-Sync Primer on Nutanix
    Part 2: Recovery Points and Schedules with Near-Sync

    Perform the following procedure, if network partition (network isolation) between the primary and remote site occurs.

    Following scenarios may occur if the network partition occurs.

    1.Network between primary site (site A) and remote site (site B) is restored and both the sites are working.
    Primary site tries to transition into NearSync automatically between site A and site B. No manual intervention is required.

    2.Site B is not working or destroyed (for whatever reason). If you create a new site (site C) and want to establish sub-hourly schedule from A to C.
    Configure sub-hourly schedule from A to C.
    The configuration between A to C should succeed. No other manual intervention is required.

    3.Site A is not working or destroyed (for whatever reason). If you create a new site (site C) and try to configure sub-hourly schedule from B to C.
    Activate the protection domain on site B and set up the schedule between site B and site C.

    Dec
    01

    Supported Anti-Virus Offload for Nutanix Native File Services(AFS)


    As the list grows with releases I will try to keep this updated.

    As of AFS 2.2.1 supported AV ICAP based vendors:

    McAfee Virus Scan Enterprise for Storage 1.2.0

    Symantec Protection Engine 7.9.0

    Kaspersky Security 10

    Sophos Antivirus

    Nutanix recommends the following file extensions for user profiles are added to the exclusion list when using the AFS Antivirus scanning:
    .dat
    .ini
    .pol

    Symantec Pre-Req

    Each Symantec ICAP server needs the hot fix (SPE_7.9.0_HF03.zip) installed from http://www.symantec.com/docs/TECH216348.

    Kaspersky Pre-Req
    When running the Database Update task with the network folder as an update source, you might encounter an error after entering credentials.

    Solution

    To resolve, download and install the critical fix 13017 provided by Kaspersky

    Download Link:

    https://support.kaspersky.com/13017

    Nov
    19

    Nutanix Additional Cluster Health Tooling: Panacea

    There are over 450 health checks in the Cluster Health UI inside of Prism Element. To provide additional help a new script called “panacea” had been added. Panacea is bundled with NCC 3.5 and later to provide a user-friendly interface for very advanced troubleshooting. The Nutanix Support team can take these logs and correlate results so you don’t have to wait for the problem to reoccur again before fixing the issue.

    The ability to quickly track retransmissions with a very low granularity for a distrusted system is very important. I am hoping in the future this new tooling will play into Nutanix’s ability for degraded node detection. Panacea can be ran for a specific time interval during which logs will be analyzed, possible options are:
    –last_no_of_hours
    –last_no_of_days
    –start_time
    –end_time

    Login to any CVM within the cluster and the command can be ran from home/nutanix/ncc/panacea/

    The below output is from using the tool when digging for network information.

    Network outage can cause degraded performance. Cluster network outage
    detection is based on following schemes:
    1) Cassandra Paxos Request Timeout Exceptions/Message Drops
    2) CVM Degraded node scoring
    3) Ping latency

    In some cases, intermittent network issue might NOT be reflected in ping latency, but it does have impact on TCP throughput and packet
    retransmission, leading to more request timeout exceptions.

    TCP Retransmission:
    ——————-
    By default, Panacea tracks the TCP connections(destination port 7000) used by Cassandra between peer CVMs. This table displays stats of
    packet Retransmissions per min in TCP socket. Frequent retransmission could cause delay in application, and may reflect the congestion status on the host or in the network.
    1) Local: Local CVM IP address
    2) Remote: Remote CVM IP address
    3) Max/Mean/Min/STD: number of retransmissions/min, calcuated from
    samples where retransmission happened.
    4) %: Value distribution, % of samples is less than the value
    = 25, 50, and 75
    5) Ratio: N/M, N = number of samples where retransmission happened
    M = total samples in the entire data set

    +————–+————–+——-+——+——+——+——+——+——+———+
    | Local | Remote | Max | Mean | Min | STD | 25% | 50% | 75% | Ratio |
    +————–+————–+——-+——+——+——+——+——+——+———+
    | XX.X.XXX.110 | XX.X.XXX.109 | 19.00 | 1.61 | 1.00 | 1.90 | 1.00 | 1.00 | 2.00 | 133/279 |
    | XX.X.XXX.111 | XX.X.XXX.109 | 11.00 | 2.41 | 1.00 | 1.54 | 1.00 | 2.00 | 3.00 | 236/280 |
    | XX.X.XXX.112 | XX.X.XXX.109 | 12.00 | 2.40 | 1.00 | 1.59 | 1.00 | 2.00 | 3.00 | 235/279 |
    | XX.X.XXX.109 | XX.X.XXX.110 | 32.00 | 3.04 | 1.00 | 2.70 | 1.00 | 2.00 | 4.00 | 252/279 |
    | XX.X.XXX.111 | XX.X.XXX.110 | 9.00 | 1.51 | 1.00 | 1.02 | 1.00 | 1.00 | 2.00 | 152/280 |
    | XX.X.XXX.112 | XX.X.XXX.110 | 11.00 | 2.21 | 1.00 | 1.31 | 1.00 | 2.00 | 3.00 | 231/279 |
    | XX.X.XXX.109 | XX.X.XXX.111 | 9.00 | 2.01 | 1.00 | 1.20 | 1.00 | 2.00 | 2.00 | 202/279 |
    | XX.X.XXX.110 | XX.X.XXX.111 | 10.00 | 2.70 | 1.00 | 1.68 | 1.00 | 2.00 | 3.00 | 244/279 |
    | XX.X.XXX.112 | XX.X.XXX.111 | 4.00 | 1.46 | 1.00 | 0.76 | 1.00 | 1.00 | 2.00 | 135/279 |
    | XX.X.XXX.109 | XX.X.XXX.112 | 5.00 | 1.56 | 1.00 | 0.85 | 1.00 | 1.00 | 2.00 | 150/279 |
    | XX.X.XXX.110 | XX.X.XXX.112 | 6.00 | 2.05 | 1.00 | 1.18 | 1.00 | 2.00 | 3.00 | 234/279 |
    | XX.X.XXX.111 | XX.X.XXX.112 | 16.00 | 3.26 | 1.00 | 2.24 | 2.00 | 3.00 | 4.00 | 261/280 |
    +————–+————–+——-+——+——+——+——+——+——+———+

    Most of the 450 Cluster Health checks inside of Prism with automatic alerting

    CVM | CPU
    CPU Utilization

    Load Level

    Node Avg Load – Critical

    CVM | Disk
    Boot RAID Health

    Disk Configuration

    Disk Diagnostic Status

    Disk Metadata Usage

    Disk Offline Status

    HDD Disk Usage

    HDD I/O Latency

    HDD S.M.A.R.T Health Status

    Metadata Disk Mounted Check

    Metro Vstore Mount Status

    Non SED Disk Inserted Check

    Nutanix System Partitions Usage High

    Password Protected Disk Status

    Physical Disk Remove Check

    Physical Disk Status

    SED Operation Status

    SSD I/O Latency

    CVM | Hardware
    Agent VM Restoration

    FT2 Configuration

    Host Evacuation Status

    Node Status

    VM HA Healing Status

    VM HA Status

    VMs Restart Status

    CVM | Memory
    CVM Memory Pinned Check

    CVM Memory Usage

    Kernel Memory Usage

    CVM | Network
    CVM IP Address Configuration

    CVM NTP Time Synchronization

    Duplicate Remote Cluster ID Check

    Host IP Pingable

    IP Configuration

    SMTP Configuration

    Subnet Configuration

    Virtual IP Configuration

    vCenter Connection Check

    CVM | Protection Domain
    Entities Restored Check

    Restored Entities Protected

    CVM | Services
    Admin User API Authentication Check

    CVM Rebooted Check

    CVM Services Status

    Cassandra Waiting For Disk Replacement

    Certificate Creation Status

    Cluster In Override Mode

    Cluster In Read-Only Mode

    Curator Job Status

    Curator Scan Status

    Kerberos Clock Skew Status

    Metadata Drive AutoAdd Disabled Check

    Metadata Drive Detached Check

    Metadata Drive Failed Check

    Metadata Drive Ring Check

    Metadata DynRingChangeOp Slow Check

    Metadata DynRingChangeOp Status

    Metadata Imbalance Check

    Metadata Size

    Node Degradation Status

    RemoteSiteHighLatency

    Stargate Responsive

    Stargate Status

    Upgrade Bundle Available

    CVM | Storage Capacity
    Compression Status

    Finger Printing Status

    Metadata Usage

    NFS Metadata Size Overshoot

    On-Disk Dedup Status

    Space Reservation Status

    vDisk Block Map Usage

    vDisk Block Map Usage Warning

    Cluster | CPU
    CPU type on chassis check

    Cluster | Disk
    CVM startup dependency check

    Disk online check

    Duplicate disk id check

    Flash Mode Configuration

    Flash Mode Enabled VM Power Status

    Flash Mode Usage

    Incomplete disk removal

    Storage Pool Flash Mode Configuration

    System Defined Flash Mode Usage Limit

    Cluster | Hardware
    Power Supply Status

    Cluster | Network
    CVM Passwordless Connectivity Check

    CVM to CVM Connectivity

    Duplicate CVM IP check

    NIC driver and firmware version check

    Time Drift

    Cluster | Protection Domain
    Duplicate VM names

    Internal Consistency Groups Check

    Linked Clones in high frequency snapshot schedule

    SSD Snapshot reserve space check

    Snapshot file location check

    Cluster | Remote Site
    Cloud Remote Alert

    Remote Site virtual external IP(VIP)

    Cluster | Services
    AWS Instance Check

    AWS Instance Type Check

    Acropolis Dynamic Scheduler Status

    Alert Manager Service Check

    Automatic Dedup disabled check

    Automatic disabling of Deduplication

    Backup snapshots on metro secondary check

    CPS Deployment Evaluation Mode

    CVM same timezone check

    CVM virtual hardware version check

    Cassandra Similar Token check

    Cassandra metadata balanced across CVMs

    Cassandra nodes up

    Cassandra service status check

    Cassandra tokens consistent

    Check that cluster virtual IP address is part of cluster external subnet

    Checkpoint snapshot on Metro configured Protection Domain

    Cloud Gflags Check

    Cloud Remote Version Check

    Cloud remote check

    Cluster NCC version check

    Cluster version check

    Compression disabled check

    Curator scan time elapsed check

    Datastore VM Count Check

    E-mail alerts check

    E-mail alerts contacts configuration

    HTTP proxy check

    Hardware configuration validation

    High disk space usage

    Hypervisor version check

    LDAP configuration

    Linked clones on Dedup check

    Multiple vCenter Servers Discovered

    NGT CA Setup Check

    Oplog episodes check

    Pulse configuration

    RPO script validation on storage heavy cluster

    Remote Support Status

    Report Generation Failure

    Report Quota Scan Failure

    Send Report Through E-mail Failure

    Snapshot chain height check

    Snapshots space utilization status

    Storage Pool SSD tier usage

    Stretch Connectivity Lost

    VM group Snapshot and Current Mismatch

    Zookeeper active on all CVMs

    Zookeeper fault tolerance check

    Zookeeper nodes distributed in multi-block cluster

    vDisk Count Check

    Cluster | Storage Capacity
    Erasure Code Configuration

    Erasure Code Garbage

    Erasure coding pending check

    Erasure-Code-Delay Configuration

    High Space Usage on Storage Container

    Storage Container RF Status

    Storage Container Space Usage

    StoragePool Space Usage

    Volume Group Space Usage

    Data Protection | Protection Domain
    Aged Third-party Backup Snapshot Check

    Check VHDX Disks

    Clone Age Check

    Clone Count Check

    Consistency Group Configuration

    Cross Hypervisor NGT Installation Check

    EntityRestoreAbort

    External iSCSI Attachments Not Snapshotted

    Failed To Mount NGT ISO On Recovery of VM

    Failed To Recover NGT Information

    Failed To Recover NGT Information for VM

    Failed To Snapshot Entities

    Incorrect Cluster Information in Remote Site

    Metadata Volume Snapshot Persistent

    Metadata Volume Snapshot Status

    Metro Availability

    Metro Availability Prechecks Failed

    Metro Availability Secondary PD sync check

    Metro Old Primary Site Hosting VMs

    Metro Protection domain VMs running at Sub-optimal performance

    Metro Vstore Symlinks Check

    Metro/Vstore Consistency Group File Count Check

    Metro/Vstore Protection Domain File Count Check

    NGT Configuration

    PD Active

    PD Change Mode Status

    PD Full Replication Status

    PD Replication Expiry Status

    PD Replication Skipped Status

    PD Snapshot Retrieval

    PD Snapshot Status

    PD VM Action Status

    PD VM Registration Status

    Protected VM CBR Capablity

    Protected VM Not Found

    Protected VMs Not Found

    Protected VMs Storage Configuration

    Protected Volume Group Not Found

    Protected Volume Groups Not Found

    Protection Domain Decoupled Status

    Protection Domain Initial Replication Pending to Remote Site

    Protection Domain Replication Stuck

    Protection Domain Snapshots Delayed

    Protection Domain Snapshots Queued for Replication to Remote Site

    Protection Domain VM Count Check

    Protection Domain fallback to lower frequency replications to remote

    Protection Domain transitioning to higher frequency snapshot schedule

    Protection Domain transitioning to lower frequency snapshot schedule

    Protection Domains sharing VMs

    Related Entity Protection Status

    Remote Site NGT Support

    Remote Site Snapshot Replication Status

    Remote Stargate Version Check

    Replication Of Deduped Entity

    Self service restore operation Failed

    Snapshot Crash Consistent

    Snapshot Symlink Check

    Storage Container Mount

    Updating Metro Failure Handling Failed

    Updating Metro Failure Handling Remote Failed

    VM Registration Failure

    VM Registration Warning

    VSS Scripts Not Installed

    VSS Snapshot Status

    VSS VM Reachable

    VStore Snapshot Status

    Volume Group Action Status

    Volume Group Attachments Not Restored

    Vstore Replication To Backup Only Remote

    Data Protection | Remote Site
    Automatic Promote Metro Availability

    Cloud Remote Operation Failure

    Cloud Remote Site failed to start

    LWS store allocation in remote too long

    Manual Break Metro Availability

    Manual Promote Metro Availability

    Metro Connectivity

    Remote Site Health

    Remote Site Network Configuration

    Remote Site Network Mapping Configuration

    Remote Site Operation Mode ReadOnly

    Remote Site Tunnel Status

    Data Protection | Witness
    Authentication Failed in Witness

    Witness Not Configured

    Witness Not Reachable

    File server | Host
    File Server Upgrade Task Stuck Check

    File Server VM Status

    Multiple File Server Versions Check

    File server | Network
    File Server Entities Not Protected

    File Server Invalid Snapshot Warning

    File Server Network Reachable

    File Server PD Active On Multiple Sites

    File Server Reachable

    File Server Status

    Remote Site Not File Server Capable

    File server | Services
    Failed to add one or more file server admin users or groups

    File Server AntiVirus – All ICAP Servers Down

    File Server AntiVirus – Excessive Quarantined / Unquarantined Files

    File Server AntiVirus – ICAP Server Down

    File Server AntiVirus – Quarantined / Unquarantined Files Limit Reached

    File Server AntiVirus – Scan Queue Full on FSVM

    File Server AntiVirus – Scan Queue Piling Up on FSVM

    File Server Clone – Snapshot invalid

    File Server Clone Failed

    File Server Rename Failed

    Maximum connections limit reached on a file server VM

    Skipped File Server Compatibility Check

    File server | Storage Capacity
    FSVM Time Drift Status

    Failed To Run File Server Metadata Fixer Successfully

    Failed To Set VM-to-VM Anti Affinity Rule

    File Server AD Connectivity Failure

    File Server Activation Failed

    File Server CVM IP update failed

    File Server DNS Updates Pending

    File Server Home Share Creation Failed

    File Server In Heterogeneous State

    File Server Iscsi Discovery Failure

    File Server Join Domain Status

    File Server Network Change Failed

    File Server Node Join Domain Status

    File Server Performance Optimization Recommended

    File Server Quota allocation failed for user

    File Server Scale-out Status

    File Server Share Deletion Failed

    File Server Site Not Found

    File Server Space Usage

    File Server Space Usage Critical

    File Server Storage Cleanup Failure

    File Server Storage Status

    File Server Unavailable Check

    File Server Upgrade Failed

    Incompatible File Server Activation

    Share Utilization Reached Configured Limit

    Host | CPU
    CPU Utilization

    Host | Disk
    All-flash Node Intermixed Check

    Host disk usage high

    NVMe Status Check

    SATA DOM 3ME Date and Firmware Status

    SATA DOM Guest VM Check

    SATADOM Connection Status

    SATADOM Status

    SATADOM Wearout Status

    SATADOM-SL 3IE3 Wearout Status

    Samsung PM1633 FW Version

    Samsung PM1633 Version Compatibility

    Samsung PM1633 Wearout Status

    Samsung PM863a config check

    Toshiba PM3 Status

    Toshiba PM4 Config

    Toshiba PM4 FW Version

    Toshiba PM4 Status

    Toshiba PM4 Version Compatibility

    Host | Hardware
    CPU Temperature Fetch

    CPU Temperature High

    CPU Voltage

    CPU-VRM Temperature

    Correctable ECC Errors 10 Days

    Correctable ECC Errors One Day

    DIMM Voltage

    DIMM temperature high

    DIMM-VRM Temperature

    Fan Speed High

    Fan Speed Low

    GPU Status

    GPU Temperature High

    Hardware Clock Status

    IPMI SDR Status

    SAS Connectivity

    System temperature high

    Host | Memory
    Memory Swap Rate

    Ram Fault Status

    Host | Network
    10 GbE Compliance

    Hypervisor IP Address Configuration

    IPMI IP Address Configuration

    Mellanox NIC Mixed Family check

    Mellanox NIC Status check

    NIC Flapping Check

    NIC Link Down

    Node NIC Error Rate High

    Receive Packet Loss

    Transmit Packet Loss

    Host | Services
    Datastore Remount Status

    Node | Disk
    Boot device connection check

    Boot device status check

    Descriptors to deleted files check

    FusionIO PCIE-SSD: ECC errors check

    Intel Drive: ECC errors

    Intel SSD Configuration

    LSI Disk controller firmware status

    M.2 Boot Disk change check

    M.2 Intel S3520 host boot drive status check

    M.2 Micron5100 host boot drive status check

    SATA controller

    SSD Firmware Check

    Samsung PM863a FW version check

    Samsung PM863a status check

    Samsung PM863a version compatibility check

    Samsung SM863 SSD status check

    Samsung SM863a version compatibility check

    Node | Hardware
    IPMI connectivity check

    IPMI sel assertions check

    IPMI sel log fetch check

    IPMI sel power failure check

    IPMI sensor values check

    M10 GPU check

    M10 and M60 GPU Mixed check

    M60 GPU check

    Node | Network
    CVM 10 GB uplink check

    Inter-CVM connectivity check

    NTP configuration check

    Storage routed to alternate CVM check

    Node | Protection Domain
    ESX VM Virtual Hardware Version Compatible

    Node | Services
    .dvsData directory in local datastore

    Advanced Encryption Standard (AES) enabled

    Autobackup check

    BMC BIOS version check

    CVM memory check

    CVM port group renamed

    Cassandra Keyspace/Column family check

    Cassandra memory usage

    Cassandra service restarts check

    Cluster Services Down Check

    DIMM Config Check

    DIMMs Interoperability Check

    Deduplication efficiency check

    Degraded Node check

    Detected VMs with non local data

    EOF check

    ESXi AHCI Driver version check

    ESXi APD handling check

    ESXi CPU model and UVM EVC mode check

    ESXi Driver compatibility check

    ESXi NFS hearbeat timeout check

    ESXi RAM disk full check

    ESXi RAM disk root usage

    ESXi Scratch Configuration

    ESXi TCP delayed ACK check

    ESXi VAAI plugin enabled

    ESXi VAAI plugin installed

    ESXi configured VMK check

    ESXi services check

    ESXi version compatibility

    File permissions check

    Files in a streched VMs should be in the same Storage Container

    GPU drivers installed

    Garbage egroups check

    Host passwordless SSH

    Ivy Bridge performance check

    Mellanox NIC Driver version check

    NFS file count check

    NSC(Nutanix Service Center) server FQDN resolution

    NTP server FQDN resolution

    Network adapter setting check

    Non default gflags check

    Notifications dropped check

    PYNFS dependency check

    RC local script exit statement present

    Remote syslog server check

    SMTP server FQDN resolution

    Sanity check on local.sh

    VM IDE bus check

    VMKNICs subnets check

    VMware hostd service check

    Virtual IP check

    Zookeeper Alias Check

    localcli check

    vim command check

    Nutanix Guest Tools | VM
    PostThaw Script Execution Failed

    Other Checks
    LWS Store Full

    LWS store allocation too long

    Recovery Point Objective Cannot Be Met

    VM | CPU
    CPU Utilization

    VM | Disk
    I/O Latency

    Orphan VM Snapshot Check

    VM | Memory
    Memory Pressure

    Memory Swap Rate

    VM | Network
    Memory Usage

    Receive Packet Loss

    Transmit Packet Loss

    VM | Nutanix Guest Tools
    Disk Configuration Update Failed

    VM Guest Power Op Failed

    iSCSI Configuration Failed

    VM | Remote Site
    VM Virtual Hardware Version Compatible

    VM | Services
    VM Action Status

    VM | Virtual Machine
    Application Consistent Snapshot Skipped

    NGT Mount Failure

    NGT Version Incompatible

    Temporary Hypervisor Snapshot Cleanup Failed

    VSS Snapshot Aborted

    VSS Snapshot Not Supported

    host | Network
    Hypervisor time synchronized

    Sep
    07

    Windows Get Some Love with #Docker EE 17.06

    With the new release of Docker 17.06 EE Windows containers gets lots of added features. First up is the ability to run Windows and Linux worker nodes in the same same cluster. This is great because you have centralized security and logging across your whole environment. Your .NET and Java teams can live in peace to consolidate your infrastructure instead of spinning of separate environments.

    Continuously scanning for vulnerabilities in Windows images was added if your have Advanced EE license. Not only does it scan images it will also alert when new vulnerabilities are found in existing images.

    Bringing everything together you can use the same overlay networks to connect your application in the case of SQL server and web servers running on Linux. Your developers can create a single compose file covering both SQL and web severs.

    Other New Windows related features in Docker 17.06:

    Windows Server 2016 support
    Windows 10586 is marked as deprecated; it will not be supported going forward in stable releases
    Integration with Docker Cloud, with the ability to control remote Swarms from the local command line interface (CLI) and view your repositories
    Unified login between the Docker CLI and Docker Hub, Docker Cloud.
    Sharing a drive can be done on demand, the first time a mount is requested
    Add an experimental DNS name for the host: docker.for.win.localhost
    Support for client (i.e. “login”) certificates for authenticating registry access (fixes docker/for-win#569)
    New installer experience

    Aug
    29

    VMworld attendees get to the Docker booth to save money & time like Visa.

    The Docker booth is right beside the Nutanix booth at VMworld this year so I have seen lots of people there but not 23,000 but there should be. Docker had been apart of all the announcements if you realized it our not. Lots of talk about Google with Kubernetes. Kubernetes still requires Docker as the container engine so whether it’s Swarm or Kubernetes you’re going to be using Docker. If you want Enterprise support Docker is both you want to be visiting and learning what they can do to develop better end to end software while saving you money.

    With Docker EE has been in production at Visa for over 6 months and is seeing improvements in a number of ways:

    Provisioning time: Visa can now provision in seconds rather than days even while more application teams join the effort. They can also deliver just-in-time infrastructure across multiple datacenters around the world with a standardized format that works across their diverse set of applications.
    Patching & maintenance: With Docker, Visa can simply redeploy an application with a new image. This also allows Visa to respond quickly to new threats as they can deploy patches across their entire environment at one time.
    Tech Refresh: Once applications are containerized with Docker, developers do not have to worry about the underlying infrastructure; the infrastructure is invisible.
    Multi-tenancy: Docker containers provides both space and time division multiplexing by allowing Visa to provision and deprovision microservices quickly as needed. This allows them to strategically place new services into the available infrastructure which has allowed the team to support 10x the scale they could previously.

    Visa moved a VM-based environment to containers running on bare metal and saved the time to provision and decommissioned its first containerized app by 50%.By saving time and money on the existing infrastructure and applications, organizations can reinvest the savings — both the time and money — in transforming the business.

    BTW Nutanix can do bare-metal or run AHV to provide great experience for containers with our own Docker Volume plugin.