From 2cf4061388df1a8ad02264094a1553b4f83ff23a Mon Sep 17 00:00:00 2001 From: Yuri Date: Wed, 28 May 2025 13:28:33 -0700 Subject: [PATCH 01/10] Add worker visibility API - heartbeat and list worker satus --- openapi/openapiv2.json | 319 ++++++++++++++++++ openapi/openapiv3.yaml | 241 +++++++++++++ temporal/api/worker/v1/message.proto | 59 ++++ .../workflowservice/v1/request_response.proto | 28 ++ temporal/api/workflowservice/v1/service.proto | 21 ++ 5 files changed, 668 insertions(+) create mode 100644 temporal/api/worker/v1/message.proto diff --git a/openapi/openapiv2.json b/openapi/openapiv2.json index bd0bc435..9e96b438 100644 --- a/openapi/openapiv2.json +++ b/openapi/openapiv2.json @@ -2115,6 +2115,46 @@ ] } }, + "/api/v1/namespaces/{namespace}/worker-heartbeat": { + "post": { + "summary": "WorkerHeartbeat receive heartbeat request from the worker.", + "operationId": "WorkerHeartbeat2", + "responses": { + "200": { + "description": "A successful response.", + "schema": { + "$ref": "#/definitions/v1WorkerHeartbeatResponse" + } + }, + "default": { + "description": "An unexpected error response.", + "schema": { + "$ref": "#/definitions/rpcStatus" + } + } + }, + "parameters": [ + { + "name": "namespace", + "description": "Namespace of the workflow which scheduled this activity.", + "in": "path", + "required": true, + "type": "string" + }, + { + "name": "body", + "in": "body", + "required": true, + "schema": { + "$ref": "#/definitions/WorkflowServiceWorkerHeartbeatBody" + } + } + ], + "tags": [ + "WorkflowService" + ] + } + }, "/api/v1/namespaces/{namespace}/worker-task-reachability": { "get": { "summary": "Deprecated. Use `DescribeTaskQueue`.", @@ -2184,6 +2224,57 @@ ] } }, + "/api/v1/namespaces/{namespace}/workers": { + "get": { + "summary": "ListWorkerStatus is a visibility API to list worker status information in a specific namespace.", + "operationId": "ListWorkerStatus2", + "responses": { + "200": { + "description": "A successful response.", + "schema": { + "$ref": "#/definitions/v1ListWorkerStatusResponse" + } + }, + "default": { + "description": "An unexpected error response.", + "schema": { + "$ref": "#/definitions/rpcStatus" + } + } + }, + "parameters": [ + { + "name": "namespace", + "in": "path", + "required": true, + "type": "string" + }, + { + "name": "pageSize", + "in": "query", + "required": false, + "type": "integer", + "format": "int32" + }, + { + "name": "nextPageToken", + "in": "query", + "required": false, + "type": "string", + "format": "byte" + }, + { + "name": "query", + "in": "query", + "required": false, + "type": "string" + } + ], + "tags": [ + "WorkflowService" + ] + } + }, "/api/v1/namespaces/{namespace}/workflow-count": { "get": { "summary": "CountWorkflowExecutions is a visibility API to count of workflow executions in a specific namespace.", @@ -5624,6 +5715,46 @@ ] } }, + "/namespaces/{namespace}/worker-heartbeat": { + "post": { + "summary": "WorkerHeartbeat receive heartbeat request from the worker.", + "operationId": "WorkerHeartbeat", + "responses": { + "200": { + "description": "A successful response.", + "schema": { + "$ref": "#/definitions/v1WorkerHeartbeatResponse" + } + }, + "default": { + "description": "An unexpected error response.", + "schema": { + "$ref": "#/definitions/rpcStatus" + } + } + }, + "parameters": [ + { + "name": "namespace", + "description": "Namespace of the workflow which scheduled this activity.", + "in": "path", + "required": true, + "type": "string" + }, + { + "name": "body", + "in": "body", + "required": true, + "schema": { + "$ref": "#/definitions/WorkflowServiceWorkerHeartbeatBody" + } + } + ], + "tags": [ + "WorkflowService" + ] + } + }, "/namespaces/{namespace}/worker-task-reachability": { "get": { "summary": "Deprecated. Use `DescribeTaskQueue`.", @@ -5693,6 +5824,57 @@ ] } }, + "/namespaces/{namespace}/workers": { + "get": { + "summary": "ListWorkerStatus is a visibility API to list worker status information in a specific namespace.", + "operationId": "ListWorkerStatus", + "responses": { + "200": { + "description": "A successful response.", + "schema": { + "$ref": "#/definitions/v1ListWorkerStatusResponse" + } + }, + "default": { + "description": "An unexpected error response.", + "schema": { + "$ref": "#/definitions/rpcStatus" + } + } + }, + "parameters": [ + { + "name": "namespace", + "in": "path", + "required": true, + "type": "string" + }, + { + "name": "pageSize", + "in": "query", + "required": false, + "type": "integer", + "format": "int32" + }, + { + "name": "nextPageToken", + "in": "query", + "required": false, + "type": "string", + "format": "byte" + }, + { + "name": "query", + "in": "query", + "required": false, + "type": "string" + } + ], + "tags": [ + "WorkflowService" + ] + } + }, "/namespaces/{namespace}/workflow-count": { "get": { "summary": "CountWorkflowExecutions is a visibility API to count of workflow executions in a specific namespace.", @@ -8353,6 +8535,18 @@ }, "description": "Keep the parameters in sync with:\n - temporal.api.batch.v1.BatchOperationUpdateWorkflowExecutionOptions.\n - temporal.api.workflow.v1.PostResetOperation.UpdateWorkflowOptions." }, + "WorkflowServiceWorkerHeartbeatBody": { + "type": "object", + "properties": { + "identity": { + "type": "string", + "description": "The identity of the client who initiated this request." + }, + "workerStatus": { + "$ref": "#/definitions/v1WorkerStatus" + } + } + }, "apicommonv1Link": { "type": "object", "properties": { @@ -11174,6 +11368,23 @@ } } }, + "v1ListWorkerStatusResponse": { + "type": "object", + "properties": { + "workerStatus": { + "type": "array", + "items": { + "type": "object", + "$ref": "#/definitions/v1WorkerStatus" + } + }, + "nextPageToken": { + "type": "string", + "format": "byte", + "title": "Next page token" + } + } + }, "v1ListWorkflowExecutionsResponse": { "type": "object", "properties": { @@ -15053,6 +15264,114 @@ "default": "WORKER_DEPLOYMENT_VERSION_STATUS_UNSPECIFIED", "description": "Specify the status of a Worker Deployment Version.\nExperimental. Worker Deployments are experimental and might significantly change in the future.\n\n - WORKER_DEPLOYMENT_VERSION_STATUS_INACTIVE: The Worker Deployment Version has been created inside the Worker Deployment but is not used by any\nworkflow executions. These Versions can still have workflows if they have an explicit Versioning Override targeting\nthis Version. Such Versioning Override could be set at workflow start time, or at a later time via `UpdateWorkflowExecutionOptions`.\n - WORKER_DEPLOYMENT_VERSION_STATUS_CURRENT: The Worker Deployment Version is the current version of the Worker Deployment. All new workflow executions \nand tasks of existing unversioned or AutoUpgrade workflows are routed to this version.\n - WORKER_DEPLOYMENT_VERSION_STATUS_RAMPING: The Worker Deployment Version is the ramping version of the Worker Deployment. A subset of new Pinned workflow executions are \nrouted to this version. Moreover, a portion of existing unversioned or AutoUpgrade workflow executions are also routed to this version.\n - WORKER_DEPLOYMENT_VERSION_STATUS_DRAINING: The Worker Deployment Version is not used by new workflows but is still used by\nopen pinned workflows. The version cannot be decommissioned safely.\n - WORKER_DEPLOYMENT_VERSION_STATUS_DRAINED: The Worker Deployment Version is not used by new or open workflows, but might be still needed by\nQueries sent to closed workflows. The version can be decommissioned safely if user does\nnot query closed workflows. If the user does query closed workflows for some time x after\nworkflows are closed, they should decommission the version after it has been drained for that duration." }, + "v1WorkerHeartbeatResponse": { + "type": "object" + }, + "v1WorkerStatus": { + "type": "object", + "properties": { + "namespaceId": { + "type": "string" + }, + "workerId": { + "type": "string" + }, + "hostId": { + "type": "string" + }, + "taskQueue": { + "type": "string" + }, + "deploymentName": { + "type": "string" + }, + "buildId": { + "type": "string" + }, + "sdkName": { + "type": "string" + }, + "sdkVersion": { + "type": "string" + }, + "workerIdentity": { + "type": "string" + }, + "workerStatus": { + "type": "integer", + "format": "int32" + }, + "uptime": { + "type": "string" + }, + "lastHeartbeatTime": { + "type": "string", + "format": "date-time" + }, + "workflowTaskStatus": { + "$ref": "#/definitions/v1WorkerTaskStatus" + }, + "activityTaskStatus": { + "$ref": "#/definitions/v1WorkerTaskStatus" + }, + "nexusTaskStatus": { + "$ref": "#/definitions/v1WorkerTaskStatus" + }, + "cpuUsagePercent": { + "type": "number", + "format": "float" + }, + "memoryUsageBytes": { + "type": "string", + "format": "int64" + }, + "cacheHitRatio": { + "type": "number", + "format": "float" + }, + "cacheSize": { + "type": "number", + "format": "float" + } + } + }, + "v1WorkerTaskStatus": { + "type": "object", + "properties": { + "lastSuccessfulTaskPollTime": { + "type": "string", + "format": "date-time" + }, + "taskPollers": { + "type": "integer", + "format": "int32" + }, + "slotsAvailable": { + "type": "integer", + "format": "int32" + }, + "slotsUsed": { + "type": "integer", + "format": "int32" + }, + "processedTasks": { + "type": "integer", + "format": "int32" + }, + "failedTasks": { + "type": "integer", + "format": "int32" + }, + "processRateMin": { + "type": "number", + "format": "float" + }, + "failureRateMin": { + "type": "number", + "format": "float" + } + } + }, "v1WorkerVersionCapabilities": { "type": "object", "properties": { diff --git a/openapi/openapiv3.yaml b/openapi/openapiv3.yaml index a0dfa04d..95c12e13 100644 --- a/openapi/openapiv3.yaml +++ b/openapi/openapiv3.yaml @@ -1902,6 +1902,38 @@ paths: application/json: schema: $ref: '#/components/schemas/Status' + /api/v1/namespaces/{namespace}/worker-heartbeat: + post: + tags: + - WorkflowService + description: WorkerHeartbeat receive heartbeat request from the worker. + operationId: WorkerHeartbeat + parameters: + - name: namespace + in: path + description: Namespace of the workflow which scheduled this activity. + required: true + schema: + type: string + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/WorkerHeartbeatRequest' + required: true + responses: + "200": + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/WorkerHeartbeatResponse' + default: + description: Default error response + content: + application/json: + schema: + $ref: '#/components/schemas/Status' /api/v1/namespaces/{namespace}/worker-task-reachability: get: tags: @@ -1982,6 +2014,45 @@ paths: application/json: schema: $ref: '#/components/schemas/Status' + /api/v1/namespaces/{namespace}/workers: + get: + tags: + - WorkflowService + description: ListWorkerStatus is a visibility API to list worker status information in a specific namespace. + operationId: ListWorkerStatus + parameters: + - name: namespace + in: path + required: true + schema: + type: string + - name: pageSize + in: query + schema: + type: integer + format: int32 + - name: nextPageToken + in: query + schema: + type: string + format: bytes + - name: query + in: query + schema: + type: string + responses: + "200": + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/ListWorkerStatusResponse' + default: + description: Default error response + content: + application/json: + schema: + $ref: '#/components/schemas/Status' /api/v1/namespaces/{namespace}/workflow-count: get: tags: @@ -5049,6 +5120,38 @@ paths: application/json: schema: $ref: '#/components/schemas/Status' + /namespaces/{namespace}/worker-heartbeat: + post: + tags: + - WorkflowService + description: WorkerHeartbeat receive heartbeat request from the worker. + operationId: WorkerHeartbeat + parameters: + - name: namespace + in: path + description: Namespace of the workflow which scheduled this activity. + required: true + schema: + type: string + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/WorkerHeartbeatRequest' + required: true + responses: + "200": + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/WorkerHeartbeatResponse' + default: + description: Default error response + content: + application/json: + schema: + $ref: '#/components/schemas/Status' /namespaces/{namespace}/worker-task-reachability: get: tags: @@ -5129,6 +5232,45 @@ paths: application/json: schema: $ref: '#/components/schemas/Status' + /namespaces/{namespace}/workers: + get: + tags: + - WorkflowService + description: ListWorkerStatus is a visibility API to list worker status information in a specific namespace. + operationId: ListWorkerStatus + parameters: + - name: namespace + in: path + required: true + schema: + type: string + - name: pageSize + in: query + schema: + type: integer + format: int32 + - name: nextPageToken + in: query + schema: + type: string + format: bytes + - name: query + in: query + schema: + type: string + responses: + "200": + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/ListWorkerStatusResponse' + default: + description: Default error response + content: + application/json: + schema: + $ref: '#/components/schemas/Status' /namespaces/{namespace}/workflow-count: get: tags: @@ -8288,6 +8430,17 @@ components: - $ref: '#/components/schemas/WorkerDeploymentInfo_WorkerDeploymentVersionSummary' description: Summary of the ramping version of the Worker Deployment. description: A subset of WorkerDeploymentInfo + ListWorkerStatusResponse: + type: object + properties: + workerStatus: + type: array + items: + $ref: '#/components/schemas/WorkerStatus' + nextPageToken: + type: string + description: Next page token + format: bytes ListWorkflowExecutionsResponse: type: object properties: @@ -12467,6 +12620,94 @@ components: - TASK_QUEUE_TYPE_NEXUS type: string format: enum + WorkerHeartbeatRequest: + type: object + properties: + namespace: + type: string + description: Namespace of the workflow which scheduled this activity. + identity: + type: string + description: The identity of the client who initiated this request. + workerStatus: + $ref: '#/components/schemas/WorkerStatus' + WorkerHeartbeatResponse: + type: object + properties: {} + WorkerStatus: + type: object + properties: + namespaceId: + type: string + workerId: + type: string + hostId: + type: string + taskQueue: + type: string + deploymentName: + type: string + buildId: + type: string + sdkName: + type: string + sdkVersion: + type: string + workerIdentity: + type: string + workerStatus: + type: integer + format: int32 + uptime: + pattern: ^-?(?:0|[1-9][0-9]{0,11})(?:\.[0-9]{1,9})?s$ + type: string + lastHeartbeatTime: + type: string + format: date-time + workflowTaskStatus: + $ref: '#/components/schemas/WorkerTaskStatus' + activityTaskStatus: + $ref: '#/components/schemas/WorkerTaskStatus' + nexusTaskStatus: + $ref: '#/components/schemas/WorkerTaskStatus' + cpuUsagePercent: + type: number + format: float + memoryUsageBytes: + type: string + cacheHitRatio: + type: number + format: float + cacheSize: + type: number + format: float + WorkerTaskStatus: + type: object + properties: + lastSuccessfulTaskPollTime: + type: string + format: date-time + taskPollers: + type: integer + format: int32 + slotsAvailable: + type: integer + format: int32 + slotsUsed: + type: integer + format: int32 + processedTasks: + type: integer + format: int32 + failedTasks: + type: integer + format: int32 + processRateMin: + type: number + format: float + failureRateMin: + type: number + format: float WorkerVersionCapabilities: type: object properties: diff --git a/temporal/api/worker/v1/message.proto b/temporal/api/worker/v1/message.proto new file mode 100644 index 00000000..327b87d7 --- /dev/null +++ b/temporal/api/worker/v1/message.proto @@ -0,0 +1,59 @@ +syntax = "proto3"; + +package temporal.api.worker.v1; + +option go_package = "go.temporal.io/api/worker/v1;worker"; +option java_package = "io.temporal.api.worker.v1"; +option java_multiple_files = true; +option java_outer_classname = "MessageProto"; +option ruby_package = "Temporalio::Api::Worker::V1"; +option csharp_namespace = "Temporalio.Api.Worker.V1"; + + +import "google/protobuf/duration.proto"; +import "google/protobuf/timestamp.proto"; + + +message WorkerTaskStatus { + google.protobuf.Timestamp last_successful_task_poll_time = 1; + + int32 task_pollers = 2; + + int32 slots_available = 3; + int32 slots_used = 4; + + int32 processed_tasks = 5; + int32 failed_tasks = 6; + + float process_rate_min = 7; + float failure_rate_min = 8; +} + +message WorkerStatus { + string namespace_id = 1; + // Unique identifier for the namespace. + string worker_id = 2; + + string host_id = 3; + string task_queue = 4; + string deployment_name = 5; + string build_id = 6; + string sdk_name = 7; + string sdk_version = 8; + + string worker_identity = 9; + + int32 worker_status = 10; + google.protobuf.Duration uptime = 11; + + google.protobuf.Timestamp last_heartbeat_time = 12; + + WorkerTaskStatus workflow_task_status = 13; + WorkerTaskStatus activity_task_status = 14; + WorkerTaskStatus nexus_task_status = 15; + + float cpu_usage_percent = 16; + int64 memory_usage_bytes = 17; + float cache_hit_ratio = 18; + float cache_size = 19; +} diff --git a/temporal/api/workflowservice/v1/request_response.proto b/temporal/api/workflowservice/v1/request_response.proto index 544499b7..18f3b3f1 100644 --- a/temporal/api/workflowservice/v1/request_response.proto +++ b/temporal/api/workflowservice/v1/request_response.proto @@ -40,6 +40,7 @@ import "temporal/api/batch/v1/message.proto"; import "temporal/api/sdk/v1/task_complete_metadata.proto"; import "temporal/api/sdk/v1/user_metadata.proto"; import "temporal/api/nexus/v1/message.proto"; +import "temporal/api/worker/v1/message.proto"; import "google/protobuf/duration.proto"; import "google/protobuf/field_mask.proto"; @@ -2346,3 +2347,30 @@ message TriggerWorkflowRuleResponse { // True is the rule was applied, based on the rule conditions (predicate/visibility_query). bool applied = 1; } +message WorkerHeartbeatRequest { + // Namespace of the workflow which scheduled this activity. + string namespace = 1; + + // The identity of the client who initiated this request. + string identity = 2; + + temporal.api.worker.v1.WorkerStatus worker_status = 3; +} + +message WorkerHeartbeatResponse { + +} + +message ListWorkerStatusRequest { + string namespace = 1; + int32 page_size = 2; + bytes next_page_token = 3; + string query = 4; +} + +message ListWorkerStatusResponse { + repeated temporal.api.worker.v1.WorkerStatus worker_status = 1; + + // Next page token + bytes next_page_token = 2; +} diff --git a/temporal/api/workflowservice/v1/service.proto b/temporal/api/workflowservice/v1/service.proto index 86538650..476b8a78 100644 --- a/temporal/api/workflowservice/v1/service.proto +++ b/temporal/api/workflowservice/v1/service.proto @@ -1173,4 +1173,25 @@ service WorkflowService { }; } + // WorkerHeartbeat receive heartbeat request from the worker. + rpc WorkerHeartbeat(WorkerHeartbeatRequest) returns (WorkerHeartbeatResponse) { + option (google.api.http) = { + post: "/namespaces/{namespace}/worker-heartbeat" + body: "*" + additional_bindings { + post: "/api/v1/namespaces/{namespace}/worker-heartbeat" + body: "*" + } + }; + }; + + // ListWorkerStatus is a visibility API to list worker status information in a specific namespace. + rpc ListWorkerStatus (ListWorkerStatusRequest) returns (ListWorkerStatusResponse) { + option (google.api.http) = { + get: "/namespaces/{namespace}/workers" + additional_bindings { + get: "/api/v1/namespaces/{namespace}/workers" + } + }; + } } From 56bc12d9004803652cf40bec9d328d9e9d61ca0d Mon Sep 17 00:00:00 2001 From: Yuri Date: Wed, 28 May 2025 19:15:40 -0700 Subject: [PATCH 02/10] work on comments --- temporal/api/worker/v1/message.proto | 57 ++++++++++++------- .../workflowservice/v1/request_response.proto | 21 +++++-- temporal/api/workflowservice/v1/service.proto | 8 +-- 3 files changed, 58 insertions(+), 28 deletions(-) diff --git a/temporal/api/worker/v1/message.proto b/temporal/api/worker/v1/message.proto index 327b87d7..39e7ee8f 100644 --- a/temporal/api/worker/v1/message.proto +++ b/temporal/api/worker/v1/message.proto @@ -12,6 +12,7 @@ option csharp_namespace = "Temporalio.Api.Worker.V1"; import "google/protobuf/duration.proto"; import "google/protobuf/timestamp.proto"; +import "temporal/api/deployment/v1/message.proto"; message WorkerTaskStatus { @@ -29,31 +30,47 @@ message WorkerTaskStatus { float failure_rate_min = 8; } +// Holds everything needed to identify the host/process context +message WorkerHostInfo { + // worker host identifier, should be unique for the namespace. + string host_id = 1; + + // worker process identifier, should be unique for the host. + string process_id = 2; + + // freeform (e.g. "k8s container abc123", etc.) + string host_context = 3; + + // Worker identity, set by the worker, may not be unique. + string worker_identity = 7; +} + message WorkerStatus { - string namespace_id = 1; - // Unique identifier for the namespace. - string worker_id = 2; + // Worker identifier, should be unique for the namespace. Required. + string worker_id = 1; + + // Worker host information. Required. + WorkerHostInfo host_info = 2; - string host_id = 3; - string task_queue = 4; - string deployment_name = 5; - string build_id = 6; - string sdk_name = 7; - string sdk_version = 8; + // Task queue this worker is polling for tasks. Required. + string task_queue = 3; - string worker_identity = 9; + // Required. + temporal.api.deployment.v1.WorkerDeploymentVersion deployment_version = 4; - int32 worker_status = 10; - google.protobuf.Duration uptime = 11; + string sdk_name = 6; + string sdk_version = 7; - google.protobuf.Timestamp last_heartbeat_time = 12; + google.protobuf.Duration uptime = 9; + google.protobuf.Timestamp last_heartbeat_time = 10; - WorkerTaskStatus workflow_task_status = 13; - WorkerTaskStatus activity_task_status = 14; - WorkerTaskStatus nexus_task_status = 15; + WorkerTaskStatus workflow_task_status = 11; + WorkerTaskStatus activity_task_status = 12; + WorkerTaskStatus nexus_task_status = 13; - float cpu_usage_percent = 16; - int64 memory_usage_bytes = 17; - float cache_hit_ratio = 18; - float cache_size = 19; + float cpu_usage_percent = 14; + int64 memory_usage_bytes = 15; + float cache_hit_ratio = 16; + float max_cache_size = 17; + float available_cache_size = 18; } diff --git a/temporal/api/workflowservice/v1/request_response.proto b/temporal/api/workflowservice/v1/request_response.proto index 18f3b3f1..9920ee74 100644 --- a/temporal/api/workflowservice/v1/request_response.proto +++ b/temporal/api/workflowservice/v1/request_response.proto @@ -2347,7 +2347,7 @@ message TriggerWorkflowRuleResponse { // True is the rule was applied, based on the rule conditions (predicate/visibility_query). bool applied = 1; } -message WorkerHeartbeatRequest { +message RecordWorkerHeartbeatRequest { // Namespace of the workflow which scheduled this activity. string namespace = 1; @@ -2357,18 +2357,31 @@ message WorkerHeartbeatRequest { temporal.api.worker.v1.WorkerStatus worker_status = 3; } -message WorkerHeartbeatResponse { +message RecordWorkerHeartbeatResponse { } -message ListWorkerStatusRequest { +// `query` in ListWorkers is used to filter workers based on worker status info. +// The following worker status attributes are expected are supported as part of the query: +//* WorkerId +//* HostId +//* TaskQueue +//* DeploymentName +//* BuildId +//* SdkName +//* SdkVersion +//* Uptime +//* LastHeartbeatTime +//* Status +// Currently metrics are not supported as a part of ListWorkers query. +message ListWorkersRequest { string namespace = 1; int32 page_size = 2; bytes next_page_token = 3; string query = 4; } -message ListWorkerStatusResponse { +message ListWorkersResponse { repeated temporal.api.worker.v1.WorkerStatus worker_status = 1; // Next page token diff --git a/temporal/api/workflowservice/v1/service.proto b/temporal/api/workflowservice/v1/service.proto index 476b8a78..45bcfa5c 100644 --- a/temporal/api/workflowservice/v1/service.proto +++ b/temporal/api/workflowservice/v1/service.proto @@ -1174,19 +1174,19 @@ service WorkflowService { } // WorkerHeartbeat receive heartbeat request from the worker. - rpc WorkerHeartbeat(WorkerHeartbeatRequest) returns (WorkerHeartbeatResponse) { + rpc RecordWorkerHeartbeat(RecordWorkerHeartbeatRequest) returns (RecordWorkerHeartbeatResponse) { option (google.api.http) = { post: "/namespaces/{namespace}/worker-heartbeat" body: "*" additional_bindings { - post: "/api/v1/namespaces/{namespace}/worker-heartbeat" + post: "/api/v1/namespaces/{namespace}/workers/heartbeat" body: "*" } }; }; - // ListWorkerStatus is a visibility API to list worker status information in a specific namespace. - rpc ListWorkerStatus (ListWorkerStatusRequest) returns (ListWorkerStatusResponse) { + // ListWorkers is a visibility API to list worker status information in a specific namespace. + rpc ListWorkers (ListWorkersRequest) returns (ListWorkersResponse) { option (google.api.http) = { get: "/namespaces/{namespace}/workers" additional_bindings { From f360b84672e544c505ac100a83e721865ae8745e Mon Sep 17 00:00:00 2001 From: Yuri Date: Wed, 28 May 2025 19:32:48 -0700 Subject: [PATCH 03/10] regenerate opeapi after merge --- openapi/openapiv2.json | 185 ++++++++++++++++++++++------------------- openapi/openapiv3.yaml | 148 ++++++++++++++++++--------------- 2 files changed, 183 insertions(+), 150 deletions(-) diff --git a/openapi/openapiv2.json b/openapi/openapiv2.json index 9e96b438..db147ee8 100644 --- a/openapi/openapiv2.json +++ b/openapi/openapiv2.json @@ -2115,46 +2115,6 @@ ] } }, - "/api/v1/namespaces/{namespace}/worker-heartbeat": { - "post": { - "summary": "WorkerHeartbeat receive heartbeat request from the worker.", - "operationId": "WorkerHeartbeat2", - "responses": { - "200": { - "description": "A successful response.", - "schema": { - "$ref": "#/definitions/v1WorkerHeartbeatResponse" - } - }, - "default": { - "description": "An unexpected error response.", - "schema": { - "$ref": "#/definitions/rpcStatus" - } - } - }, - "parameters": [ - { - "name": "namespace", - "description": "Namespace of the workflow which scheduled this activity.", - "in": "path", - "required": true, - "type": "string" - }, - { - "name": "body", - "in": "body", - "required": true, - "schema": { - "$ref": "#/definitions/WorkflowServiceWorkerHeartbeatBody" - } - } - ], - "tags": [ - "WorkflowService" - ] - } - }, "/api/v1/namespaces/{namespace}/worker-task-reachability": { "get": { "summary": "Deprecated. Use `DescribeTaskQueue`.", @@ -2226,13 +2186,13 @@ }, "/api/v1/namespaces/{namespace}/workers": { "get": { - "summary": "ListWorkerStatus is a visibility API to list worker status information in a specific namespace.", - "operationId": "ListWorkerStatus2", + "summary": "ListWorkers is a visibility API to list worker status information in a specific namespace.", + "operationId": "ListWorkers2", "responses": { "200": { "description": "A successful response.", "schema": { - "$ref": "#/definitions/v1ListWorkerStatusResponse" + "$ref": "#/definitions/v1ListWorkersResponse" } }, "default": { @@ -2275,6 +2235,46 @@ ] } }, + "/api/v1/namespaces/{namespace}/workers/heartbeat": { + "post": { + "summary": "WorkerHeartbeat receive heartbeat request from the worker.", + "operationId": "RecordWorkerHeartbeat2", + "responses": { + "200": { + "description": "A successful response.", + "schema": { + "$ref": "#/definitions/v1RecordWorkerHeartbeatResponse" + } + }, + "default": { + "description": "An unexpected error response.", + "schema": { + "$ref": "#/definitions/rpcStatus" + } + } + }, + "parameters": [ + { + "name": "namespace", + "description": "Namespace of the workflow which scheduled this activity.", + "in": "path", + "required": true, + "type": "string" + }, + { + "name": "body", + "in": "body", + "required": true, + "schema": { + "$ref": "#/definitions/WorkflowServiceRecordWorkerHeartbeatBody" + } + } + ], + "tags": [ + "WorkflowService" + ] + } + }, "/api/v1/namespaces/{namespace}/workflow-count": { "get": { "summary": "CountWorkflowExecutions is a visibility API to count of workflow executions in a specific namespace.", @@ -5718,12 +5718,12 @@ "/namespaces/{namespace}/worker-heartbeat": { "post": { "summary": "WorkerHeartbeat receive heartbeat request from the worker.", - "operationId": "WorkerHeartbeat", + "operationId": "RecordWorkerHeartbeat", "responses": { "200": { "description": "A successful response.", "schema": { - "$ref": "#/definitions/v1WorkerHeartbeatResponse" + "$ref": "#/definitions/v1RecordWorkerHeartbeatResponse" } }, "default": { @@ -5746,7 +5746,7 @@ "in": "body", "required": true, "schema": { - "$ref": "#/definitions/WorkflowServiceWorkerHeartbeatBody" + "$ref": "#/definitions/WorkflowServiceRecordWorkerHeartbeatBody" } } ], @@ -5826,13 +5826,13 @@ }, "/namespaces/{namespace}/workers": { "get": { - "summary": "ListWorkerStatus is a visibility API to list worker status information in a specific namespace.", - "operationId": "ListWorkerStatus", + "summary": "ListWorkers is a visibility API to list worker status information in a specific namespace.", + "operationId": "ListWorkers", "responses": { "200": { "description": "A successful response.", "schema": { - "$ref": "#/definitions/v1ListWorkerStatusResponse" + "$ref": "#/definitions/v1ListWorkersResponse" } }, "default": { @@ -7567,6 +7567,18 @@ } } }, + "WorkflowServiceRecordWorkerHeartbeatBody": { + "type": "object", + "properties": { + "identity": { + "type": "string", + "description": "The identity of the client who initiated this request." + }, + "workerStatus": { + "$ref": "#/definitions/v1WorkerStatus" + } + } + }, "WorkflowServiceRequestCancelWorkflowExecutionBody": { "type": "object", "properties": { @@ -8535,18 +8547,6 @@ }, "description": "Keep the parameters in sync with:\n - temporal.api.batch.v1.BatchOperationUpdateWorkflowExecutionOptions.\n - temporal.api.workflow.v1.PostResetOperation.UpdateWorkflowOptions." }, - "WorkflowServiceWorkerHeartbeatBody": { - "type": "object", - "properties": { - "identity": { - "type": "string", - "description": "The identity of the client who initiated this request." - }, - "workerStatus": { - "$ref": "#/definitions/v1WorkerStatus" - } - } - }, "apicommonv1Link": { "type": "object", "properties": { @@ -11368,7 +11368,7 @@ } } }, - "v1ListWorkerStatusResponse": { + "v1ListWorkersResponse": { "type": "object", "properties": { "workerStatus": { @@ -12718,6 +12718,9 @@ } } }, + "v1RecordWorkerHeartbeatResponse": { + "type": "object" + }, "v1RegisterNamespaceRequest": { "type": "object", "properties": { @@ -15264,29 +15267,46 @@ "default": "WORKER_DEPLOYMENT_VERSION_STATUS_UNSPECIFIED", "description": "Specify the status of a Worker Deployment Version.\nExperimental. Worker Deployments are experimental and might significantly change in the future.\n\n - WORKER_DEPLOYMENT_VERSION_STATUS_INACTIVE: The Worker Deployment Version has been created inside the Worker Deployment but is not used by any\nworkflow executions. These Versions can still have workflows if they have an explicit Versioning Override targeting\nthis Version. Such Versioning Override could be set at workflow start time, or at a later time via `UpdateWorkflowExecutionOptions`.\n - WORKER_DEPLOYMENT_VERSION_STATUS_CURRENT: The Worker Deployment Version is the current version of the Worker Deployment. All new workflow executions \nand tasks of existing unversioned or AutoUpgrade workflows are routed to this version.\n - WORKER_DEPLOYMENT_VERSION_STATUS_RAMPING: The Worker Deployment Version is the ramping version of the Worker Deployment. A subset of new Pinned workflow executions are \nrouted to this version. Moreover, a portion of existing unversioned or AutoUpgrade workflow executions are also routed to this version.\n - WORKER_DEPLOYMENT_VERSION_STATUS_DRAINING: The Worker Deployment Version is not used by new workflows but is still used by\nopen pinned workflows. The version cannot be decommissioned safely.\n - WORKER_DEPLOYMENT_VERSION_STATUS_DRAINED: The Worker Deployment Version is not used by new or open workflows, but might be still needed by\nQueries sent to closed workflows. The version can be decommissioned safely if user does\nnot query closed workflows. If the user does query closed workflows for some time x after\nworkflows are closed, they should decommission the version after it has been drained for that duration." }, - "v1WorkerHeartbeatResponse": { - "type": "object" + "v1WorkerHostInfo": { + "type": "object", + "properties": { + "hostId": { + "type": "string", + "description": "worker host identifier, should be unique for the namespace." + }, + "processId": { + "type": "string", + "description": "worker process identifier, should be unique for the host." + }, + "hostContext": { + "type": "string", + "title": "freeform (e.g. \"k8s container abc123\", etc.)" + }, + "workerIdentity": { + "type": "string", + "description": "Worker identity, set by the worker, may not be unique." + } + }, + "title": "Holds everything needed to identify the host/process context" }, "v1WorkerStatus": { "type": "object", "properties": { - "namespaceId": { - "type": "string" - }, "workerId": { - "type": "string" + "type": "string", + "description": "Worker identifier, should be unique for the namespace. Required." }, - "hostId": { - "type": "string" + "hostInfo": { + "$ref": "#/definitions/v1WorkerHostInfo", + "description": "Worker host information. Required." }, "taskQueue": { - "type": "string" - }, - "deploymentName": { - "type": "string" + "type": "string", + "description": "Task queue this worker is polling for tasks. Required." }, - "buildId": { - "type": "string" + "deploymentVersion": { + "$ref": "#/definitions/v1WorkerDeploymentVersion", + "description": "Required." }, "sdkName": { "type": "string" @@ -15294,13 +15314,6 @@ "sdkVersion": { "type": "string" }, - "workerIdentity": { - "type": "string" - }, - "workerStatus": { - "type": "integer", - "format": "int32" - }, "uptime": { "type": "string" }, @@ -15329,7 +15342,11 @@ "type": "number", "format": "float" }, - "cacheSize": { + "maxCacheSize": { + "type": "number", + "format": "float" + }, + "availableCacheSize": { "type": "number", "format": "float" } diff --git a/openapi/openapiv3.yaml b/openapi/openapiv3.yaml index 95c12e13..717df818 100644 --- a/openapi/openapiv3.yaml +++ b/openapi/openapiv3.yaml @@ -1902,38 +1902,6 @@ paths: application/json: schema: $ref: '#/components/schemas/Status' - /api/v1/namespaces/{namespace}/worker-heartbeat: - post: - tags: - - WorkflowService - description: WorkerHeartbeat receive heartbeat request from the worker. - operationId: WorkerHeartbeat - parameters: - - name: namespace - in: path - description: Namespace of the workflow which scheduled this activity. - required: true - schema: - type: string - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/WorkerHeartbeatRequest' - required: true - responses: - "200": - description: OK - content: - application/json: - schema: - $ref: '#/components/schemas/WorkerHeartbeatResponse' - default: - description: Default error response - content: - application/json: - schema: - $ref: '#/components/schemas/Status' /api/v1/namespaces/{namespace}/worker-task-reachability: get: tags: @@ -2018,8 +1986,8 @@ paths: get: tags: - WorkflowService - description: ListWorkerStatus is a visibility API to list worker status information in a specific namespace. - operationId: ListWorkerStatus + description: ListWorkers is a visibility API to list worker status information in a specific namespace. + operationId: ListWorkers parameters: - name: namespace in: path @@ -2046,7 +2014,39 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/ListWorkerStatusResponse' + $ref: '#/components/schemas/ListWorkersResponse' + default: + description: Default error response + content: + application/json: + schema: + $ref: '#/components/schemas/Status' + /api/v1/namespaces/{namespace}/workers/heartbeat: + post: + tags: + - WorkflowService + description: WorkerHeartbeat receive heartbeat request from the worker. + operationId: RecordWorkerHeartbeat + parameters: + - name: namespace + in: path + description: Namespace of the workflow which scheduled this activity. + required: true + schema: + type: string + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/RecordWorkerHeartbeatRequest' + required: true + responses: + "200": + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/RecordWorkerHeartbeatResponse' default: description: Default error response content: @@ -5125,7 +5125,7 @@ paths: tags: - WorkflowService description: WorkerHeartbeat receive heartbeat request from the worker. - operationId: WorkerHeartbeat + operationId: RecordWorkerHeartbeat parameters: - name: namespace in: path @@ -5137,7 +5137,7 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/WorkerHeartbeatRequest' + $ref: '#/components/schemas/RecordWorkerHeartbeatRequest' required: true responses: "200": @@ -5145,7 +5145,7 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/WorkerHeartbeatResponse' + $ref: '#/components/schemas/RecordWorkerHeartbeatResponse' default: description: Default error response content: @@ -5236,8 +5236,8 @@ paths: get: tags: - WorkflowService - description: ListWorkerStatus is a visibility API to list worker status information in a specific namespace. - operationId: ListWorkerStatus + description: ListWorkers is a visibility API to list worker status information in a specific namespace. + operationId: ListWorkers parameters: - name: namespace in: path @@ -5264,7 +5264,7 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/ListWorkerStatusResponse' + $ref: '#/components/schemas/ListWorkersResponse' default: description: Default error response content: @@ -8430,7 +8430,7 @@ components: - $ref: '#/components/schemas/WorkerDeploymentInfo_WorkerDeploymentVersionSummary' description: Summary of the ramping version of the Worker Deployment. description: A subset of WorkerDeploymentInfo - ListWorkerStatusResponse: + ListWorkersResponse: type: object properties: workerStatus: @@ -9651,6 +9651,20 @@ components: description: |- Will be set to true if the activity was reset. Applies only to the current run. + RecordWorkerHeartbeatRequest: + type: object + properties: + namespace: + type: string + description: Namespace of the workflow which scheduled this activity. + identity: + type: string + description: The identity of the client who initiated this request. + workerStatus: + $ref: '#/components/schemas/WorkerStatus' + RecordWorkerHeartbeatResponse: + type: object + properties: {} RegisterNamespaceRequest: type: object properties: @@ -12620,44 +12634,43 @@ components: - TASK_QUEUE_TYPE_NEXUS type: string format: enum - WorkerHeartbeatRequest: + WorkerHostInfo: type: object properties: - namespace: + hostId: type: string - description: Namespace of the workflow which scheduled this activity. - identity: + description: worker host identifier, should be unique for the namespace. + processId: type: string - description: The identity of the client who initiated this request. - workerStatus: - $ref: '#/components/schemas/WorkerStatus' - WorkerHeartbeatResponse: - type: object - properties: {} + description: worker process identifier, should be unique for the host. + hostContext: + type: string + description: freeform (e.g. "k8s container abc123", etc.) + workerIdentity: + type: string + description: Worker identity, set by the worker, may not be unique. + description: Holds everything needed to identify the host/process context WorkerStatus: type: object properties: - namespaceId: - type: string workerId: type: string - hostId: - type: string + description: Worker identifier, should be unique for the namespace. Required. + hostInfo: + allOf: + - $ref: '#/components/schemas/WorkerHostInfo' + description: Worker host information. Required. taskQueue: type: string - deploymentName: - type: string - buildId: - type: string + description: Task queue this worker is polling for tasks. Required. + deploymentVersion: + allOf: + - $ref: '#/components/schemas/WorkerDeploymentVersion' + description: Required. sdkName: type: string sdkVersion: type: string - workerIdentity: - type: string - workerStatus: - type: integer - format: int32 uptime: pattern: ^-?(?:0|[1-9][0-9]{0,11})(?:\.[0-9]{1,9})?s$ type: string @@ -12678,7 +12691,10 @@ components: cacheHitRatio: type: number format: float - cacheSize: + maxCacheSize: + type: number + format: float + availableCacheSize: type: number format: float WorkerTaskStatus: From 71b35c241de16be734683ba03f2146c7e4547ac4 Mon Sep 17 00:00:00 2001 From: Yuri Date: Thu, 29 May 2025 14:41:00 -0700 Subject: [PATCH 04/10] work on comments --- openapi/openapiv2.json | 141 ++++++++++------ openapi/openapiv3.yaml | 155 +++++++++++++----- temporal/api/worker/v1/message.proto | 99 +++++++---- .../workflowservice/v1/request_response.proto | 32 ++-- 4 files changed, 280 insertions(+), 147 deletions(-) diff --git a/openapi/openapiv2.json b/openapi/openapiv2.json index db147ee8..e18cdd94 100644 --- a/openapi/openapiv2.json +++ b/openapi/openapiv2.json @@ -2225,6 +2225,7 @@ }, { "name": "query", + "description": "`query` in ListWorkers is used to filter workers based on worker status info.\nThe following worker status attributes are expected are supported as part of the query:\n* WorkerId\n* WorkerIdentity\n* HostId\n* TaskQueue\n* DeploymentName\n* BuildId\n* SdkName\n* SdkVersion\n* Uptime\n* LastHeartbeatTime\n* Status\nCurrently metrics are not supported as a part of ListWorkers query.", "in": "query", "required": false, "type": "string" @@ -5865,6 +5866,7 @@ }, { "name": "query", + "description": "`query` in ListWorkers is used to filter workers based on worker status info.\nThe following worker status attributes are expected are supported as part of the query:\n* WorkerId\n* WorkerIdentity\n* HostId\n* TaskQueue\n* DeploymentName\n* BuildId\n* SdkName\n* SdkVersion\n* Uptime\n* LastHeartbeatTime\n* Status\nCurrently metrics are not supported as a part of ListWorkers query.", "in": "query", "required": false, "type": "string" @@ -7574,8 +7576,8 @@ "type": "string", "description": "The identity of the client who initiated this request." }, - "workerStatus": { - "$ref": "#/definitions/v1WorkerStatus" + "workerInfo": { + "$ref": "#/definitions/v1WorkerInfo" } } }, @@ -11371,11 +11373,11 @@ "v1ListWorkersResponse": { "type": "object", "properties": { - "workerStatus": { + "workerInfo": { "type": "array", "items": { "type": "object", - "$ref": "#/definitions/v1WorkerStatus" + "$ref": "#/definitions/v1WorkerInfo" } }, "nextPageToken": { @@ -15270,43 +15272,38 @@ "v1WorkerHostInfo": { "type": "object", "properties": { - "hostId": { + "hostName": { "type": "string", - "description": "worker host identifier, should be unique for the namespace." + "description": "Worker host identifier, should be unique for the namespace." }, "processId": { "type": "string", - "description": "worker process identifier, should be unique for the host." - }, - "hostContext": { - "type": "string", - "title": "freeform (e.g. \"k8s container abc123\", etc.)" + "description": "Worker process identifier, should be unique for the host." }, "workerIdentity": { "type": "string", - "description": "Worker identity, set by the worker, may not be unique." + "description": "Worker identity, set by the client, may not be unique.\nUsually host_name+(user group name)+process_id, but can be overwritten by the user." } }, - "title": "Holds everything needed to identify the host/process context" + "title": "Holds everything needed to identify the worker host/process context" }, - "v1WorkerStatus": { + "v1WorkerInfo": { "type": "object", "properties": { "workerId": { "type": "string", - "description": "Worker identifier, should be unique for the namespace. Required." + "description": "Worker identifier, should be unique for the namespace.\nIt is different from worker identity, which is usually a combination of host_name and process_id." }, "hostInfo": { "$ref": "#/definitions/v1WorkerHostInfo", - "description": "Worker host information. Required." + "description": "Worker host information." }, "taskQueue": { "type": "string", - "description": "Task queue this worker is polling for tasks. Required." + "description": "Task queue this worker is polling for tasks." }, "deploymentVersion": { - "$ref": "#/definitions/v1WorkerDeploymentVersion", - "description": "Required." + "$ref": "#/definitions/v1WorkerDeploymentVersion" }, "sdkName": { "type": "string" @@ -15314,21 +15311,42 @@ "sdkVersion": { "type": "string" }, + "status": { + "type": "string", + "description": "Worker status. Possible values - \"running\", \"shutting_down\", \"shutdown\"." + }, "uptime": { - "type": "string" + "type": "string", + "title": "Uptime of the worker, since it started.\nThis is the time since the worker started, not the time since the last heartbeat.\nIt can be used to determine worker start time. (current time - uptime)" }, "lastHeartbeatTime": { "type": "string", - "format": "date-time" + "format": "date-time", + "description": "Last heartbeat time, coming from the worker. Worker should set it to \"now\"." + }, + "workflowTaskSlotsInfo": { + "$ref": "#/definitions/v1WorkerSlotsInfo" + }, + "activityTaskSlotsInfo": { + "$ref": "#/definitions/v1WorkerSlotsInfo" + }, + "nexusTaskSlotsInfo": { + "$ref": "#/definitions/v1WorkerSlotsInfo" + }, + "localActivitySlotsInfo": { + "$ref": "#/definitions/v1WorkerSlotsInfo" }, - "workflowTaskStatus": { - "$ref": "#/definitions/v1WorkerTaskStatus" + "workflowPollerInfo": { + "$ref": "#/definitions/v1WorkerPollerInfo" }, - "activityTaskStatus": { - "$ref": "#/definitions/v1WorkerTaskStatus" + "workflowStickyPollerInfo": { + "$ref": "#/definitions/v1WorkerPollerInfo" }, - "nexusTaskStatus": { - "$ref": "#/definitions/v1WorkerTaskStatus" + "activityPollerInfo": { + "$ref": "#/definitions/v1WorkerPollerInfo" + }, + "nexusPollerInfo": { + "$ref": "#/definitions/v1WorkerPollerInfo" }, "cpuUsagePercent": { "type": "number", @@ -15338,54 +15356,73 @@ "type": "string", "format": "int64" }, - "cacheHitRatio": { - "type": "number", - "format": "float" + "stickyCacheHit": { + "type": "integer", + "format": "int32", + "description": "A Workflow Task found a cached Workflow Execution to run against." }, - "maxCacheSize": { - "type": "number", - "format": "float" + "stickyCacheMiss": { + "type": "integer", + "format": "int32", + "description": "A Workflow Task did not find a cached Workflow execution to run against." }, - "availableCacheSize": { - "type": "number", - "format": "float" + "stickyCacheSize": { + "type": "integer", + "format": "int32", + "description": "Current cache size, expressed in number of Workflow Executions." } - } + }, + "description": "Worker info message, contains information about the worker and its current state.\nAll information is provided by the worker itself." }, - "v1WorkerTaskStatus": { + "v1WorkerPollerInfo": { "type": "object", "properties": { - "lastSuccessfulTaskPollTime": { - "type": "string", - "format": "date-time" + "activePollers": { + "type": "integer", + "format": "int32" }, - "taskPollers": { + "availablePollers": { "type": "integer", "format": "int32" }, + "lastSuccessfulPollTime": { + "type": "string", + "format": "date-time" + } + } + }, + "v1WorkerSlotsInfo": { + "type": "object", + "properties": { "slotsAvailable": { "type": "integer", - "format": "int32" + "format": "int32", + "description": "Number of slots available for the worker to specific tasks." }, "slotsUsed": { "type": "integer", - "format": "int32" + "format": "int32", + "description": "Number of slots used by the worker for specific tasks." }, "processedTasks": { "type": "integer", - "format": "int32" + "format": "int32", + "description": "Total number of tasks processed by the worker so far." }, "failedTasks": { "type": "integer", - "format": "int32" + "format": "int32", + "description": "Total number of failed tasks processed by the worker so far." }, - "processRateMin": { - "type": "number", - "format": "float" + "processedTasksLastMinute": { + "type": "integer", + "format": "int32", + "description": "Number of tasks processed in the last minute." }, - "failureRateMin": { - "type": "number", - "format": "float" + "failureTasksLastMinute": { + "type": "integer", + "format": "int32", + "description": "Number of failed tasks processed in the last minute." } } }, diff --git a/openapi/openapiv3.yaml b/openapi/openapiv3.yaml index 717df818..b83f1696 100644 --- a/openapi/openapiv3.yaml +++ b/openapi/openapiv3.yaml @@ -2006,6 +2006,21 @@ paths: format: bytes - name: query in: query + description: |- + `query` in ListWorkers is used to filter workers based on worker status info. + The following worker status attributes are expected are supported as part of the query: + * WorkerId + * WorkerIdentity + * HostId + * TaskQueue + * DeploymentName + * BuildId + * SdkName + * SdkVersion + * Uptime + * LastHeartbeatTime + * Status + Currently metrics are not supported as a part of ListWorkers query. schema: type: string responses: @@ -5256,6 +5271,21 @@ paths: format: bytes - name: query in: query + description: |- + `query` in ListWorkers is used to filter workers based on worker status info. + The following worker status attributes are expected are supported as part of the query: + * WorkerId + * WorkerIdentity + * HostId + * TaskQueue + * DeploymentName + * BuildId + * SdkName + * SdkVersion + * Uptime + * LastHeartbeatTime + * Status + Currently metrics are not supported as a part of ListWorkers query. schema: type: string responses: @@ -8433,10 +8463,10 @@ components: ListWorkersResponse: type: object properties: - workerStatus: + workerInfo: type: array items: - $ref: '#/components/schemas/WorkerStatus' + $ref: '#/components/schemas/WorkerInfo' nextPageToken: type: string description: Next page token @@ -9660,8 +9690,8 @@ components: identity: type: string description: The identity of the client who initiated this request. - workerStatus: - $ref: '#/components/schemas/WorkerStatus' + workerInfo: + $ref: '#/components/schemas/WorkerInfo' RecordWorkerHeartbeatResponse: type: object properties: {} @@ -12637,93 +12667,128 @@ components: WorkerHostInfo: type: object properties: - hostId: + hostName: type: string - description: worker host identifier, should be unique for the namespace. + description: Worker host identifier, should be unique for the namespace. processId: type: string - description: worker process identifier, should be unique for the host. - hostContext: - type: string - description: freeform (e.g. "k8s container abc123", etc.) + description: Worker process identifier, should be unique for the host. workerIdentity: type: string - description: Worker identity, set by the worker, may not be unique. - description: Holds everything needed to identify the host/process context - WorkerStatus: + description: |- + Worker identity, set by the client, may not be unique. + Usually host_name+(user group name)+process_id, but can be overwritten by the user. + description: Holds everything needed to identify the worker host/process context + WorkerInfo: type: object properties: workerId: type: string - description: Worker identifier, should be unique for the namespace. Required. + description: |- + Worker identifier, should be unique for the namespace. + It is different from worker identity, which is usually a combination of host_name and process_id. hostInfo: allOf: - $ref: '#/components/schemas/WorkerHostInfo' - description: Worker host information. Required. + description: Worker host information. taskQueue: type: string - description: Task queue this worker is polling for tasks. Required. + description: Task queue this worker is polling for tasks. deploymentVersion: - allOf: - - $ref: '#/components/schemas/WorkerDeploymentVersion' - description: Required. + $ref: '#/components/schemas/WorkerDeploymentVersion' sdkName: type: string sdkVersion: type: string + status: + type: string + description: Worker status. Possible values - "running", "shutting_down", "shutdown". uptime: pattern: ^-?(?:0|[1-9][0-9]{0,11})(?:\.[0-9]{1,9})?s$ type: string + description: |- + Uptime of the worker, since it started. + This is the time since the worker started, not the time since the last heartbeat. + It can be used to determine worker start time. (current time - uptime) lastHeartbeatTime: type: string + description: Last heartbeat time, coming from the worker. Worker should set it to "now". format: date-time - workflowTaskStatus: - $ref: '#/components/schemas/WorkerTaskStatus' - activityTaskStatus: - $ref: '#/components/schemas/WorkerTaskStatus' - nexusTaskStatus: - $ref: '#/components/schemas/WorkerTaskStatus' + workflowTaskSlotsInfo: + $ref: '#/components/schemas/WorkerSlotsInfo' + activityTaskSlotsInfo: + $ref: '#/components/schemas/WorkerSlotsInfo' + nexusTaskSlotsInfo: + $ref: '#/components/schemas/WorkerSlotsInfo' + localActivitySlotsInfo: + $ref: '#/components/schemas/WorkerSlotsInfo' + workflowPollerInfo: + $ref: '#/components/schemas/WorkerPollerInfo' + workflowStickyPollerInfo: + $ref: '#/components/schemas/WorkerPollerInfo' + activityPollerInfo: + $ref: '#/components/schemas/WorkerPollerInfo' + nexusPollerInfo: + $ref: '#/components/schemas/WorkerPollerInfo' cpuUsagePercent: type: number format: float memoryUsageBytes: type: string - cacheHitRatio: - type: number - format: float - maxCacheSize: - type: number - format: float - availableCacheSize: - type: number - format: float - WorkerTaskStatus: + stickyCacheHit: + type: integer + description: A Workflow Task found a cached Workflow Execution to run against. + format: int32 + stickyCacheMiss: + type: integer + description: A Workflow Task did not find a cached Workflow execution to run against. + format: int32 + stickyCacheSize: + type: integer + description: Current cache size, expressed in number of Workflow Executions. + format: int32 + description: |- + Worker info message, contains information about the worker and its current state. + All information is provided by the worker itself. + WorkerPollerInfo: type: object properties: - lastSuccessfulTaskPollTime: - type: string - format: date-time - taskPollers: + activePollers: + type: integer + format: int32 + availablePollers: type: integer format: int32 + lastSuccessfulPollTime: + type: string + format: date-time + WorkerSlotsInfo: + type: object + properties: slotsAvailable: type: integer + description: Number of slots available for the worker to specific tasks. format: int32 slotsUsed: type: integer + description: Number of slots used by the worker for specific tasks. format: int32 processedTasks: type: integer + description: Total number of tasks processed by the worker so far. format: int32 failedTasks: type: integer + description: Total number of failed tasks processed by the worker so far. + format: int32 + processedTasksLastMinute: + type: integer + description: Number of tasks processed in the last minute. + format: int32 + failureTasksLastMinute: + type: integer + description: Number of failed tasks processed in the last minute. format: int32 - processRateMin: - type: number - format: float - failureRateMin: - type: number - format: float WorkerVersionCapabilities: type: object properties: diff --git a/temporal/api/worker/v1/message.proto b/temporal/api/worker/v1/message.proto index 39e7ee8f..0b1f3371 100644 --- a/temporal/api/worker/v1/message.proto +++ b/temporal/api/worker/v1/message.proto @@ -14,63 +14,92 @@ import "google/protobuf/duration.proto"; import "google/protobuf/timestamp.proto"; import "temporal/api/deployment/v1/message.proto"; +message WorkerPollerInfo { -message WorkerTaskStatus { - google.protobuf.Timestamp last_successful_task_poll_time = 1; + int32 active_pollers = 1; - int32 task_pollers = 2; + int32 available_pollers = 2; - int32 slots_available = 3; - int32 slots_used = 4; + google.protobuf.Timestamp last_successful_poll_time = 3; +} + +message WorkerSlotsInfo { - int32 processed_tasks = 5; - int32 failed_tasks = 6; + // Number of slots available for the worker to specific tasks. + int32 slots_available = 1; + // Number of slots used by the worker for specific tasks. + int32 slots_used = 2; - float process_rate_min = 7; - float failure_rate_min = 8; + // Total number of tasks processed by the worker so far. + int32 processed_tasks = 3; + // Total number of failed tasks processed by the worker so far. + int32 failed_tasks = 4; + + // Number of tasks processed in the last minute. + int32 processed_tasks_last_minute = 5; + // Number of failed tasks processed in the last minute. + int32 failure_tasks_last_minute = 6; } -// Holds everything needed to identify the host/process context +// Holds everything needed to identify the worker host/process context message WorkerHostInfo { - // worker host identifier, should be unique for the namespace. - string host_id = 1; + // Worker host identifier, should be unique for the namespace. + string host_name = 1; - // worker process identifier, should be unique for the host. + // Worker process identifier, should be unique for the host. string process_id = 2; - // freeform (e.g. "k8s container abc123", etc.) - string host_context = 3; - - // Worker identity, set by the worker, may not be unique. - string worker_identity = 7; + // Worker identity, set by the client, may not be unique. + // Usually host_name+(user group name)+process_id, but can be overwritten by the user. + string worker_identity = 3; } -message WorkerStatus { - // Worker identifier, should be unique for the namespace. Required. +// Worker info message, contains information about the worker and its current state. +// All information is provided by the worker itself. +message WorkerInfo { + // Worker identifier, should be unique for the namespace. + // It is different from worker identity, which is usually a combination of host_name and process_id. string worker_id = 1; - // Worker host information. Required. + // Worker host information. WorkerHostInfo host_info = 2; - // Task queue this worker is polling for tasks. Required. + // Task queue this worker is polling for tasks. string task_queue = 3; - // Required. temporal.api.deployment.v1.WorkerDeploymentVersion deployment_version = 4; - string sdk_name = 6; - string sdk_version = 7; + string sdk_name = 5; + string sdk_version = 6; + + // Worker status. Possible values - "running", "shutting_down", "shutdown". + string status = 7; + + // Uptime of the worker, since it started. + // This is the time since the worker started, not the time since the last heartbeat. + // It can be used to determine worker start time. (current time - uptime) + google.protobuf.Duration uptime = 8; + + // Last heartbeat time, coming from the worker. Worker should set it to "now". + google.protobuf.Timestamp last_heartbeat_time = 9; + + WorkerSlotsInfo workflow_task_slots_info = 10; + WorkerSlotsInfo activity_task_slots_info = 11; + WorkerSlotsInfo nexus_task_slots_info = 12; + WorkerSlotsInfo local_activity_slots_info = 13; - google.protobuf.Duration uptime = 9; - google.protobuf.Timestamp last_heartbeat_time = 10; + WorkerPollerInfo workflow_poller_info = 14; + WorkerPollerInfo workflow_sticky_poller_info = 15; + WorkerPollerInfo activity_poller_info = 16; + WorkerPollerInfo nexus_poller_info = 17; - WorkerTaskStatus workflow_task_status = 11; - WorkerTaskStatus activity_task_status = 12; - WorkerTaskStatus nexus_task_status = 13; + float cpu_usage_percent = 18; + int64 memory_usage_bytes = 19; - float cpu_usage_percent = 14; - int64 memory_usage_bytes = 15; - float cache_hit_ratio = 16; - float max_cache_size = 17; - float available_cache_size = 18; + // A Workflow Task found a cached Workflow Execution to run against. + int32 sticky_cache_hit = 20; + // A Workflow Task did not find a cached Workflow execution to run against. + int32 sticky_cache_miss = 21; + // Current cache size, expressed in number of Workflow Executions. + int32 sticky_cache_size = 22; } diff --git a/temporal/api/workflowservice/v1/request_response.proto b/temporal/api/workflowservice/v1/request_response.proto index 9920ee74..7adb9b60 100644 --- a/temporal/api/workflowservice/v1/request_response.proto +++ b/temporal/api/workflowservice/v1/request_response.proto @@ -2354,35 +2354,37 @@ message RecordWorkerHeartbeatRequest { // The identity of the client who initiated this request. string identity = 2; - temporal.api.worker.v1.WorkerStatus worker_status = 3; + temporal.api.worker.v1.WorkerInfo worker_info = 3; } message RecordWorkerHeartbeatResponse { } -// `query` in ListWorkers is used to filter workers based on worker status info. -// The following worker status attributes are expected are supported as part of the query: -//* WorkerId -//* HostId -//* TaskQueue -//* DeploymentName -//* BuildId -//* SdkName -//* SdkVersion -//* Uptime -//* LastHeartbeatTime -//* Status -// Currently metrics are not supported as a part of ListWorkers query. message ListWorkersRequest { string namespace = 1; int32 page_size = 2; bytes next_page_token = 3; + + // `query` in ListWorkers is used to filter workers based on worker status info. + // The following worker status attributes are expected are supported as part of the query: + //* WorkerId + //* WorkerIdentity + //* HostId + //* TaskQueue + //* DeploymentName + //* BuildId + //* SdkName + //* SdkVersion + //* Uptime + //* LastHeartbeatTime + //* Status + // Currently metrics are not supported as a part of ListWorkers query. string query = 4; } message ListWorkersResponse { - repeated temporal.api.worker.v1.WorkerStatus worker_status = 1; + repeated temporal.api.worker.v1.WorkerInfo worker_info = 1; // Next page token bytes next_page_token = 2; From bc1cbc918572b0aba1c6a236f20a2451afe4d778 Mon Sep 17 00:00:00 2001 From: Yuri Date: Thu, 29 May 2025 15:07:40 -0700 Subject: [PATCH 05/10] work on comments- extract status to enum --- openapi/openapiv2.json | 12 +++++++++++- openapi/openapiv3.yaml | 6 ++++++ temporal/api/enums/v1/common.proto | 11 ++++++++++- temporal/api/worker/v1/message.proto | 3 ++- 4 files changed, 29 insertions(+), 3 deletions(-) diff --git a/openapi/openapiv2.json b/openapi/openapiv2.json index e18cdd94..ad9948d2 100644 --- a/openapi/openapiv2.json +++ b/openapi/openapiv2.json @@ -15312,7 +15312,7 @@ "type": "string" }, "status": { - "type": "string", + "$ref": "#/definitions/v1WorkerStatus", "description": "Worker status. Possible values - \"running\", \"shutting_down\", \"shutdown\"." }, "uptime": { @@ -15426,6 +15426,16 @@ } } }, + "v1WorkerStatus": { + "type": "string", + "enum": [ + "WORKER_STATUS_UNSPECIFIED", + "WORKER_STATUS_STATUS_RUNNING", + "WORKER_STATUS_SHUTTING_DOWN", + "WORKER_STATUS_SHUTDOWN" + ], + "default": "WORKER_STATUS_UNSPECIFIED" + }, "v1WorkerVersionCapabilities": { "type": "object", "properties": { diff --git a/openapi/openapiv3.yaml b/openapi/openapiv3.yaml index b83f1696..fb15c29b 100644 --- a/openapi/openapiv3.yaml +++ b/openapi/openapiv3.yaml @@ -12701,8 +12701,14 @@ components: sdkVersion: type: string status: + enum: + - WORKER_STATUS_UNSPECIFIED + - WORKER_STATUS_STATUS_RUNNING + - WORKER_STATUS_SHUTTING_DOWN + - WORKER_STATUS_SHUTDOWN type: string description: Worker status. Possible values - "running", "shutting_down", "shutdown". + format: enum uptime: pattern: ^-?(?:0|[1-9][0-9]{0,11})(?:\.[0-9]{1,9})?s$ type: string diff --git a/temporal/api/enums/v1/common.proto b/temporal/api/enums/v1/common.proto index c45174b7..06eb53ff 100644 --- a/temporal/api/enums/v1/common.proto +++ b/temporal/api/enums/v1/common.proto @@ -96,4 +96,13 @@ enum ApplicationErrorCategory { APPLICATION_ERROR_CATEGORY_UNSPECIFIED = 0; // Expected application error with little/no severity. APPLICATION_ERROR_CATEGORY_BENIGN = 1; -} \ No newline at end of file +} + +// (-- api-linter: core::0216::synonyms=disabled +// aip.dev/not-precedent: It seems we have both state and status, and status is a better fit for workers. --) +enum WorkerStatus { + WORKER_STATUS_UNSPECIFIED = 0; + WORKER_STATUS_STATUS_RUNNING = 1; + WORKER_STATUS_SHUTTING_DOWN = 2; + WORKER_STATUS_SHUTDOWN = 3; +} diff --git a/temporal/api/worker/v1/message.proto b/temporal/api/worker/v1/message.proto index 0b1f3371..a006c4e1 100644 --- a/temporal/api/worker/v1/message.proto +++ b/temporal/api/worker/v1/message.proto @@ -13,6 +13,7 @@ option csharp_namespace = "Temporalio.Api.Worker.V1"; import "google/protobuf/duration.proto"; import "google/protobuf/timestamp.proto"; import "temporal/api/deployment/v1/message.proto"; +import "temporal/api/enums/v1/common.proto"; message WorkerPollerInfo { @@ -73,7 +74,7 @@ message WorkerInfo { string sdk_version = 6; // Worker status. Possible values - "running", "shutting_down", "shutdown". - string status = 7; + temporal.api.enums.v1.WorkerStatus status = 7; // Uptime of the worker, since it started. // This is the time since the worker started, not the time since the last heartbeat. From 46cd5194676c0358ccfcd73c6f17531cf5853a60 Mon Sep 17 00:00:00 2001 From: Yuri Date: Fri, 30 May 2025 12:15:56 -0700 Subject: [PATCH 06/10] work on comments --- openapi/openapiv2.json | 13 +++++++------ openapi/openapiv3.yaml | 19 ++++++++++--------- temporal/api/worker/v1/message.proto | 14 ++++++-------- .../workflowservice/v1/request_response.proto | 2 +- 4 files changed, 24 insertions(+), 24 deletions(-) diff --git a/openapi/openapiv2.json b/openapi/openapiv2.json index ad9948d2..e894d867 100644 --- a/openapi/openapiv2.json +++ b/openapi/openapiv2.json @@ -2225,7 +2225,7 @@ }, { "name": "query", - "description": "`query` in ListWorkers is used to filter workers based on worker status info.\nThe following worker status attributes are expected are supported as part of the query:\n* WorkerId\n* WorkerIdentity\n* HostId\n* TaskQueue\n* DeploymentName\n* BuildId\n* SdkName\n* SdkVersion\n* Uptime\n* LastHeartbeatTime\n* Status\nCurrently metrics are not supported as a part of ListWorkers query.", + "description": "`query` in ListWorkers is used to filter workers based on worker status info.\nThe following worker status attributes are expected are supported as part of the query:\n* WorkerId\n* WorkerIdentity\n* HostId\n* TaskQueue\n* DeploymentName\n* BuildId\n* SdkName\n* SdkVersion\n* StartTime\n* LastHeartbeatTime\n* Status\nCurrently metrics are not supported as a part of ListWorkers query.", "in": "query", "required": false, "type": "string" @@ -5866,7 +5866,7 @@ }, { "name": "query", - "description": "`query` in ListWorkers is used to filter workers based on worker status info.\nThe following worker status attributes are expected are supported as part of the query:\n* WorkerId\n* WorkerIdentity\n* HostId\n* TaskQueue\n* DeploymentName\n* BuildId\n* SdkName\n* SdkVersion\n* Uptime\n* LastHeartbeatTime\n* Status\nCurrently metrics are not supported as a part of ListWorkers query.", + "description": "`query` in ListWorkers is used to filter workers based on worker status info.\nThe following worker status attributes are expected are supported as part of the query:\n* WorkerId\n* WorkerIdentity\n* HostId\n* TaskQueue\n* DeploymentName\n* BuildId\n* SdkName\n* SdkVersion\n* StartTime\n* LastHeartbeatTime\n* Status\nCurrently metrics are not supported as a part of ListWorkers query.", "in": "query", "required": false, "type": "string" @@ -15290,7 +15290,7 @@ "v1WorkerInfo": { "type": "object", "properties": { - "workerId": { + "workerInstanceKey": { "type": "string", "description": "Worker identifier, should be unique for the namespace.\nIt is different from worker identity, which is usually a combination of host_name and process_id." }, @@ -15315,9 +15315,10 @@ "$ref": "#/definitions/v1WorkerStatus", "description": "Worker status. Possible values - \"running\", \"shutting_down\", \"shutdown\"." }, - "uptime": { + "startTime": { "type": "string", - "title": "Uptime of the worker, since it started.\nThis is the time since the worker started, not the time since the last heartbeat.\nIt can be used to determine worker start time. (current time - uptime)" + "format": "date-time", + "title": "Worker start time.\nIt can be used to determine worker uptime. (current time - start time)" }, "lastHeartbeatTime": { "type": "string", @@ -15407,7 +15408,7 @@ "processedTasks": { "type": "integer", "format": "int32", - "description": "Total number of tasks processed by the worker so far." + "description": "Total number of tasks processed (completed both successfully and unsuccesfully)\nby the worker since the worker started. This is a cumulative counter." }, "failedTasks": { "type": "integer", diff --git a/openapi/openapiv3.yaml b/openapi/openapiv3.yaml index fb15c29b..a0b908a7 100644 --- a/openapi/openapiv3.yaml +++ b/openapi/openapiv3.yaml @@ -2017,7 +2017,7 @@ paths: * BuildId * SdkName * SdkVersion - * Uptime + * StartTime * LastHeartbeatTime * Status Currently metrics are not supported as a part of ListWorkers query. @@ -5282,7 +5282,7 @@ paths: * BuildId * SdkName * SdkVersion - * Uptime + * StartTime * LastHeartbeatTime * Status Currently metrics are not supported as a part of ListWorkers query. @@ -12682,7 +12682,7 @@ components: WorkerInfo: type: object properties: - workerId: + workerInstanceKey: type: string description: |- Worker identifier, should be unique for the namespace. @@ -12709,13 +12709,12 @@ components: type: string description: Worker status. Possible values - "running", "shutting_down", "shutdown". format: enum - uptime: - pattern: ^-?(?:0|[1-9][0-9]{0,11})(?:\.[0-9]{1,9})?s$ + startTime: type: string description: |- - Uptime of the worker, since it started. - This is the time since the worker started, not the time since the last heartbeat. - It can be used to determine worker start time. (current time - uptime) + Worker start time. + It can be used to determine worker uptime. (current time - start time) + format: date-time lastHeartbeatTime: type: string description: Last heartbeat time, coming from the worker. Worker should set it to "now". @@ -12781,7 +12780,9 @@ components: format: int32 processedTasks: type: integer - description: Total number of tasks processed by the worker so far. + description: |- + Total number of tasks processed (completed both successfully and unsuccesfully) + by the worker since the worker started. This is a cumulative counter. format: int32 failedTasks: type: integer diff --git a/temporal/api/worker/v1/message.proto b/temporal/api/worker/v1/message.proto index a006c4e1..ee54f133 100644 --- a/temporal/api/worker/v1/message.proto +++ b/temporal/api/worker/v1/message.proto @@ -9,8 +9,6 @@ option java_outer_classname = "MessageProto"; option ruby_package = "Temporalio::Api::Worker::V1"; option csharp_namespace = "Temporalio.Api.Worker.V1"; - -import "google/protobuf/duration.proto"; import "google/protobuf/timestamp.proto"; import "temporal/api/deployment/v1/message.proto"; import "temporal/api/enums/v1/common.proto"; @@ -31,7 +29,8 @@ message WorkerSlotsInfo { // Number of slots used by the worker for specific tasks. int32 slots_used = 2; - // Total number of tasks processed by the worker so far. + // Total number of tasks processed (completed both successfully and unsuccesfully) + // by the worker since the worker started. This is a cumulative counter. int32 processed_tasks = 3; // Total number of failed tasks processed by the worker so far. int32 failed_tasks = 4; @@ -60,7 +59,7 @@ message WorkerHostInfo { message WorkerInfo { // Worker identifier, should be unique for the namespace. // It is different from worker identity, which is usually a combination of host_name and process_id. - string worker_id = 1; + string worker_instance_key = 1; // Worker host information. WorkerHostInfo host_info = 2; @@ -76,10 +75,9 @@ message WorkerInfo { // Worker status. Possible values - "running", "shutting_down", "shutdown". temporal.api.enums.v1.WorkerStatus status = 7; - // Uptime of the worker, since it started. - // This is the time since the worker started, not the time since the last heartbeat. - // It can be used to determine worker start time. (current time - uptime) - google.protobuf.Duration uptime = 8; + // Worker start time. + // It can be used to determine worker uptime. (current time - start time) + google.protobuf.Timestamp start_time = 8; // Last heartbeat time, coming from the worker. Worker should set it to "now". google.protobuf.Timestamp last_heartbeat_time = 9; diff --git a/temporal/api/workflowservice/v1/request_response.proto b/temporal/api/workflowservice/v1/request_response.proto index 7adb9b60..3c795dc4 100644 --- a/temporal/api/workflowservice/v1/request_response.proto +++ b/temporal/api/workflowservice/v1/request_response.proto @@ -2376,7 +2376,7 @@ message ListWorkersRequest { //* BuildId //* SdkName //* SdkVersion - //* Uptime + //* StartTime //* LastHeartbeatTime //* Status // Currently metrics are not supported as a part of ListWorkers query. From b8fdf1bcdbd8410a37da7d5e6d644a0a43c0a0f0 Mon Sep 17 00:00:00 2001 From: Yuri Date: Mon, 2 Jun 2025 09:59:42 -0700 Subject: [PATCH 07/10] work on comments --- openapi/openapiv2.json | 20 +++++++++---------- openapi/openapiv3.yaml | 20 +++++++++---------- temporal/api/worker/v1/message.proto | 20 +++++++++---------- .../workflowservice/v1/request_response.proto | 2 ++ 4 files changed, 32 insertions(+), 30 deletions(-) diff --git a/openapi/openapiv2.json b/openapi/openapiv2.json index e894d867..c56f2991 100644 --- a/openapi/openapiv2.json +++ b/openapi/openapiv2.json @@ -15349,25 +15349,25 @@ "nexusPollerInfo": { "$ref": "#/definitions/v1WorkerPollerInfo" }, - "cpuUsagePercent": { + "currentHostCpuUsage": { "type": "number", "format": "float" }, - "memoryUsageBytes": { + "currentHostMemUsage": { "type": "string", "format": "int64" }, - "stickyCacheHit": { + "totalStickyCacheHit": { "type": "integer", "format": "int32", "description": "A Workflow Task found a cached Workflow Execution to run against." }, - "stickyCacheMiss": { + "totalStickyCacheMiss": { "type": "integer", "format": "int32", "description": "A Workflow Task did not find a cached Workflow execution to run against." }, - "stickyCacheSize": { + "currentStickyCacheSize": { "type": "integer", "format": "int32", "description": "Current cache size, expressed in number of Workflow Executions." @@ -15395,22 +15395,22 @@ "v1WorkerSlotsInfo": { "type": "object", "properties": { - "slotsAvailable": { + "currentSlotsAvailable": { "type": "integer", "format": "int32", "description": "Number of slots available for the worker to specific tasks." }, - "slotsUsed": { + "currentSlotsUsed": { "type": "integer", "format": "int32", "description": "Number of slots used by the worker for specific tasks." }, - "processedTasks": { + "totalProcessedTasks": { "type": "integer", "format": "int32", - "description": "Total number of tasks processed (completed both successfully and unsuccesfully)\nby the worker since the worker started. This is a cumulative counter." + "description": "Total number of tasks processed (completed both successfully and unsuccesfully, or any other way)\nby the worker since the worker started. This is a cumulative counter." }, - "failedTasks": { + "totalFailedTasks": { "type": "integer", "format": "int32", "description": "Total number of failed tasks processed by the worker so far." diff --git a/openapi/openapiv3.yaml b/openapi/openapiv3.yaml index a0b908a7..0a3f68e5 100644 --- a/openapi/openapiv3.yaml +++ b/openapi/openapiv3.yaml @@ -12735,20 +12735,20 @@ components: $ref: '#/components/schemas/WorkerPollerInfo' nexusPollerInfo: $ref: '#/components/schemas/WorkerPollerInfo' - cpuUsagePercent: + currentHostCpuUsage: type: number format: float - memoryUsageBytes: + currentHostMemUsage: type: string - stickyCacheHit: + totalStickyCacheHit: type: integer description: A Workflow Task found a cached Workflow Execution to run against. format: int32 - stickyCacheMiss: + totalStickyCacheMiss: type: integer description: A Workflow Task did not find a cached Workflow execution to run against. format: int32 - stickyCacheSize: + currentStickyCacheSize: type: integer description: Current cache size, expressed in number of Workflow Executions. format: int32 @@ -12770,21 +12770,21 @@ components: WorkerSlotsInfo: type: object properties: - slotsAvailable: + currentSlotsAvailable: type: integer description: Number of slots available for the worker to specific tasks. format: int32 - slotsUsed: + currentSlotsUsed: type: integer description: Number of slots used by the worker for specific tasks. format: int32 - processedTasks: + totalProcessedTasks: type: integer description: |- - Total number of tasks processed (completed both successfully and unsuccesfully) + Total number of tasks processed (completed both successfully and unsuccesfully, or any other way) by the worker since the worker started. This is a cumulative counter. format: int32 - failedTasks: + totalFailedTasks: type: integer description: Total number of failed tasks processed by the worker so far. format: int32 diff --git a/temporal/api/worker/v1/message.proto b/temporal/api/worker/v1/message.proto index ee54f133..f2d95090 100644 --- a/temporal/api/worker/v1/message.proto +++ b/temporal/api/worker/v1/message.proto @@ -25,15 +25,15 @@ message WorkerPollerInfo { message WorkerSlotsInfo { // Number of slots available for the worker to specific tasks. - int32 slots_available = 1; + int32 current_slots_available = 1; // Number of slots used by the worker for specific tasks. - int32 slots_used = 2; + int32 current_slots_used = 2; - // Total number of tasks processed (completed both successfully and unsuccesfully) + // Total number of tasks processed (completed both successfully and unsuccesfully, or any other way) // by the worker since the worker started. This is a cumulative counter. - int32 processed_tasks = 3; + int32 total_processed_tasks = 3; // Total number of failed tasks processed by the worker so far. - int32 failed_tasks = 4; + int32 total_failed_tasks = 4; // Number of tasks processed in the last minute. int32 processed_tasks_last_minute = 5; @@ -92,13 +92,13 @@ message WorkerInfo { WorkerPollerInfo activity_poller_info = 16; WorkerPollerInfo nexus_poller_info = 17; - float cpu_usage_percent = 18; - int64 memory_usage_bytes = 19; + float current_host_cpu_usage = 18; + int64 current_host_mem_usage = 19; // A Workflow Task found a cached Workflow Execution to run against. - int32 sticky_cache_hit = 20; + int32 total_sticky_cache_hit = 20; // A Workflow Task did not find a cached Workflow execution to run against. - int32 sticky_cache_miss = 21; + int32 total_sticky_cache_miss = 21; // Current cache size, expressed in number of Workflow Executions. - int32 sticky_cache_size = 22; + int32 current_sticky_cache_size = 22; } diff --git a/temporal/api/workflowservice/v1/request_response.proto b/temporal/api/workflowservice/v1/request_response.proto index 3c795dc4..caaf6b1d 100644 --- a/temporal/api/workflowservice/v1/request_response.proto +++ b/temporal/api/workflowservice/v1/request_response.proto @@ -994,6 +994,8 @@ message ShutdownWorkerRequest { string sticky_task_queue = 2; string identity = 3; string reason = 4; + + temporal.api.worker.v1.WorkerInfo worker_info = 5; } message ShutdownWorkerResponse { From 119a6d467e88b3e3622c4bda0d4a83a1abe81c76 Mon Sep 17 00:00:00 2001 From: Yuri Date: Mon, 2 Jun 2025 13:02:12 -0700 Subject: [PATCH 08/10] work on comments --- openapi/openapiv2.json | 12 +++++--- openapi/openapiv3.yaml | 17 +++++++++--- temporal/api/worker/v1/message.proto | 41 ++++++++++++++++------------ 3 files changed, 45 insertions(+), 25 deletions(-) diff --git a/openapi/openapiv2.json b/openapi/openapiv2.json index c56f2991..83cfa1da 100644 --- a/openapi/openapiv2.json +++ b/openapi/openapiv2.json @@ -15325,6 +15325,10 @@ "format": "date-time", "description": "Last heartbeat time, coming from the worker. Worker should set it to \"now\"." }, + "elapsedSinceLastHeartbeat": { + "type": "string", + "description": "Elapsed time since the last heartbeat from the worker." + }, "workflowTaskSlotsInfo": { "$ref": "#/definitions/v1WorkerSlotsInfo" }, @@ -15415,15 +15419,15 @@ "format": "int32", "description": "Total number of failed tasks processed by the worker so far." }, - "processedTasksLastMinute": { + "processedTasksLastInterval": { "type": "integer", "format": "int32", - "description": "Number of tasks processed in the last minute." + "description": "Number of tasks processed in since the last heartbeat from the worker.\nThis is a cumulative counter, and it is reset to 0 each time the worker sends a heartbeat.\nContains both successful and failed tasks." }, - "failureTasksLastMinute": { + "failureTasksLastInterval": { "type": "integer", "format": "int32", - "description": "Number of failed tasks processed in the last minute." + "description": "Number of failed tasks processed since the last heartbeat from the worker." } } }, diff --git a/openapi/openapiv3.yaml b/openapi/openapiv3.yaml index 0a3f68e5..3e84cd97 100644 --- a/openapi/openapiv3.yaml +++ b/openapi/openapiv3.yaml @@ -12719,6 +12719,10 @@ components: type: string description: Last heartbeat time, coming from the worker. Worker should set it to "now". format: date-time + elapsedSinceLastHeartbeat: + pattern: ^-?(?:0|[1-9][0-9]{0,11})(?:\.[0-9]{1,9})?s$ + type: string + description: Elapsed time since the last heartbeat from the worker. workflowTaskSlotsInfo: $ref: '#/components/schemas/WorkerSlotsInfo' activityTaskSlotsInfo: @@ -12755,6 +12759,8 @@ components: description: |- Worker info message, contains information about the worker and its current state. All information is provided by the worker itself. + (-- api-linter: core::0140::prepositions=disabled + aip.dev/not-precedent: Removing those words make names less clear. --) WorkerPollerInfo: type: object properties: @@ -12788,13 +12794,16 @@ components: type: integer description: Total number of failed tasks processed by the worker so far. format: int32 - processedTasksLastMinute: + processedTasksLastInterval: type: integer - description: Number of tasks processed in the last minute. + description: |- + Number of tasks processed in since the last heartbeat from the worker. + This is a cumulative counter, and it is reset to 0 each time the worker sends a heartbeat. + Contains both successful and failed tasks. format: int32 - failureTasksLastMinute: + failureTasksLastInterval: type: integer - description: Number of failed tasks processed in the last minute. + description: Number of failed tasks processed since the last heartbeat from the worker. format: int32 WorkerVersionCapabilities: type: object diff --git a/temporal/api/worker/v1/message.proto b/temporal/api/worker/v1/message.proto index f2d95090..cadba406 100644 --- a/temporal/api/worker/v1/message.proto +++ b/temporal/api/worker/v1/message.proto @@ -9,6 +9,7 @@ option java_outer_classname = "MessageProto"; option ruby_package = "Temporalio::Api::Worker::V1"; option csharp_namespace = "Temporalio.Api.Worker.V1"; +import "google/protobuf/duration.proto"; import "google/protobuf/timestamp.proto"; import "temporal/api/deployment/v1/message.proto"; import "temporal/api/enums/v1/common.proto"; @@ -35,10 +36,12 @@ message WorkerSlotsInfo { // Total number of failed tasks processed by the worker so far. int32 total_failed_tasks = 4; - // Number of tasks processed in the last minute. - int32 processed_tasks_last_minute = 5; - // Number of failed tasks processed in the last minute. - int32 failure_tasks_last_minute = 6; + // Number of tasks processed in since the last heartbeat from the worker. + // This is a cumulative counter, and it is reset to 0 each time the worker sends a heartbeat. + // Contains both successful and failed tasks. + int32 processed_tasks_last_interval = 5; + // Number of failed tasks processed since the last heartbeat from the worker. + int32 failure_tasks_last_interval = 6; } // Holds everything needed to identify the worker host/process context @@ -56,6 +59,8 @@ message WorkerHostInfo { // Worker info message, contains information about the worker and its current state. // All information is provided by the worker itself. +// (-- api-linter: core::0140::prepositions=disabled +// aip.dev/not-precedent: Removing those words make names less clear. --) message WorkerInfo { // Worker identifier, should be unique for the namespace. // It is different from worker identity, which is usually a combination of host_name and process_id. @@ -81,24 +86,26 @@ message WorkerInfo { // Last heartbeat time, coming from the worker. Worker should set it to "now". google.protobuf.Timestamp last_heartbeat_time = 9; + // Elapsed time since the last heartbeat from the worker. + google.protobuf.Duration elapsed_since_last_heartbeat = 10; - WorkerSlotsInfo workflow_task_slots_info = 10; - WorkerSlotsInfo activity_task_slots_info = 11; - WorkerSlotsInfo nexus_task_slots_info = 12; - WorkerSlotsInfo local_activity_slots_info = 13; + WorkerSlotsInfo workflow_task_slots_info = 11; + WorkerSlotsInfo activity_task_slots_info = 12; + WorkerSlotsInfo nexus_task_slots_info = 13; + WorkerSlotsInfo local_activity_slots_info = 14; - WorkerPollerInfo workflow_poller_info = 14; - WorkerPollerInfo workflow_sticky_poller_info = 15; - WorkerPollerInfo activity_poller_info = 16; - WorkerPollerInfo nexus_poller_info = 17; + WorkerPollerInfo workflow_poller_info = 15; + WorkerPollerInfo workflow_sticky_poller_info = 16; + WorkerPollerInfo activity_poller_info = 17; + WorkerPollerInfo nexus_poller_info = 18; - float current_host_cpu_usage = 18; - int64 current_host_mem_usage = 19; + float current_host_cpu_usage = 19; + int64 current_host_mem_usage = 20; // A Workflow Task found a cached Workflow Execution to run against. - int32 total_sticky_cache_hit = 20; + int32 total_sticky_cache_hit = 21; // A Workflow Task did not find a cached Workflow execution to run against. - int32 total_sticky_cache_miss = 21; + int32 total_sticky_cache_miss = 22; // Current cache size, expressed in number of Workflow Executions. - int32 current_sticky_cache_size = 22; + int32 current_sticky_cache_size = 23; } From d377c3c760e882b50f581b6267e14ba8cf8cc57c Mon Sep 17 00:00:00 2001 From: Yuri Date: Mon, 2 Jun 2025 14:58:42 -0700 Subject: [PATCH 09/10] work on comments --- openapi/openapiv2.json | 2 +- openapi/openapiv3.yaml | 2 +- temporal/api/worker/v1/message.proto | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/openapi/openapiv2.json b/openapi/openapiv2.json index 83cfa1da..6472e946 100644 --- a/openapi/openapiv2.json +++ b/openapi/openapiv2.json @@ -15274,7 +15274,7 @@ "properties": { "hostName": { "type": "string", - "description": "Worker host identifier, should be unique for the namespace." + "description": "Worker host identifier." }, "processId": { "type": "string", diff --git a/openapi/openapiv3.yaml b/openapi/openapiv3.yaml index 3e84cd97..173bba28 100644 --- a/openapi/openapiv3.yaml +++ b/openapi/openapiv3.yaml @@ -12669,7 +12669,7 @@ components: properties: hostName: type: string - description: Worker host identifier, should be unique for the namespace. + description: Worker host identifier. processId: type: string description: Worker process identifier, should be unique for the host. diff --git a/temporal/api/worker/v1/message.proto b/temporal/api/worker/v1/message.proto index cadba406..a387190f 100644 --- a/temporal/api/worker/v1/message.proto +++ b/temporal/api/worker/v1/message.proto @@ -46,7 +46,7 @@ message WorkerSlotsInfo { // Holds everything needed to identify the worker host/process context message WorkerHostInfo { - // Worker host identifier, should be unique for the namespace. + // Worker host identifier. string host_name = 1; // Worker process identifier, should be unique for the host. From c90de22cc2a91a87aa30583f08350610f537625a Mon Sep 17 00:00:00 2001 From: Yuri Date: Mon, 2 Jun 2025 18:41:56 -0700 Subject: [PATCH 10/10] work on comments --- openapi/openapiv2.json | 12 ++++-------- openapi/openapiv3.yaml | 13 ++++++------- temporal/api/enums/v1/common.proto | 2 +- temporal/api/worker/v1/message.proto | 9 ++++----- 4 files changed, 15 insertions(+), 21 deletions(-) diff --git a/openapi/openapiv2.json b/openapi/openapiv2.json index 6472e946..13909d08 100644 --- a/openapi/openapiv2.json +++ b/openapi/openapiv2.json @@ -15292,7 +15292,7 @@ "properties": { "workerInstanceKey": { "type": "string", - "description": "Worker identifier, should be unique for the namespace.\nIt is different from worker identity, which is usually a combination of host_name and process_id." + "description": "Worker identifier, should be unique for the namespace.\nIt is distinct from worker identity, which is not necessarily namespace-unique." }, "hostInfo": { "$ref": "#/definitions/v1WorkerHostInfo", @@ -15313,7 +15313,7 @@ }, "status": { "$ref": "#/definitions/v1WorkerStatus", - "description": "Worker status. Possible values - \"running\", \"shutting_down\", \"shutdown\"." + "description": "Worker status. Defined by SDK." }, "startTime": { "type": "string", @@ -15386,10 +15386,6 @@ "type": "integer", "format": "int32" }, - "availablePollers": { - "type": "integer", - "format": "int32" - }, "lastSuccessfulPollTime": { "type": "string", "format": "date-time" @@ -15402,7 +15398,7 @@ "currentSlotsAvailable": { "type": "integer", "format": "int32", - "description": "Number of slots available for the worker to specific tasks." + "description": "Number of slots available for the worker to specific tasks.\nMay be -1 if the upper bound is not known." }, "currentSlotsUsed": { "type": "integer", @@ -15435,7 +15431,7 @@ "type": "string", "enum": [ "WORKER_STATUS_UNSPECIFIED", - "WORKER_STATUS_STATUS_RUNNING", + "WORKER_STATUS_RUNNING", "WORKER_STATUS_SHUTTING_DOWN", "WORKER_STATUS_SHUTDOWN" ], diff --git a/openapi/openapiv3.yaml b/openapi/openapiv3.yaml index 173bba28..e801910c 100644 --- a/openapi/openapiv3.yaml +++ b/openapi/openapiv3.yaml @@ -12686,7 +12686,7 @@ components: type: string description: |- Worker identifier, should be unique for the namespace. - It is different from worker identity, which is usually a combination of host_name and process_id. + It is distinct from worker identity, which is not necessarily namespace-unique. hostInfo: allOf: - $ref: '#/components/schemas/WorkerHostInfo' @@ -12703,11 +12703,11 @@ components: status: enum: - WORKER_STATUS_UNSPECIFIED - - WORKER_STATUS_STATUS_RUNNING + - WORKER_STATUS_RUNNING - WORKER_STATUS_SHUTTING_DOWN - WORKER_STATUS_SHUTDOWN type: string - description: Worker status. Possible values - "running", "shutting_down", "shutdown". + description: Worker status. Defined by SDK. format: enum startTime: type: string @@ -12767,9 +12767,6 @@ components: activePollers: type: integer format: int32 - availablePollers: - type: integer - format: int32 lastSuccessfulPollTime: type: string format: date-time @@ -12778,7 +12775,9 @@ components: properties: currentSlotsAvailable: type: integer - description: Number of slots available for the worker to specific tasks. + description: |- + Number of slots available for the worker to specific tasks. + May be -1 if the upper bound is not known. format: int32 currentSlotsUsed: type: integer diff --git a/temporal/api/enums/v1/common.proto b/temporal/api/enums/v1/common.proto index 06eb53ff..192c1d75 100644 --- a/temporal/api/enums/v1/common.proto +++ b/temporal/api/enums/v1/common.proto @@ -102,7 +102,7 @@ enum ApplicationErrorCategory { // aip.dev/not-precedent: It seems we have both state and status, and status is a better fit for workers. --) enum WorkerStatus { WORKER_STATUS_UNSPECIFIED = 0; - WORKER_STATUS_STATUS_RUNNING = 1; + WORKER_STATUS_RUNNING = 1; WORKER_STATUS_SHUTTING_DOWN = 2; WORKER_STATUS_SHUTDOWN = 3; } diff --git a/temporal/api/worker/v1/message.proto b/temporal/api/worker/v1/message.proto index a387190f..d19a9a30 100644 --- a/temporal/api/worker/v1/message.proto +++ b/temporal/api/worker/v1/message.proto @@ -18,14 +18,13 @@ message WorkerPollerInfo { int32 active_pollers = 1; - int32 available_pollers = 2; - - google.protobuf.Timestamp last_successful_poll_time = 3; + google.protobuf.Timestamp last_successful_poll_time = 2; } message WorkerSlotsInfo { // Number of slots available for the worker to specific tasks. + // May be -1 if the upper bound is not known. int32 current_slots_available = 1; // Number of slots used by the worker for specific tasks. int32 current_slots_used = 2; @@ -63,7 +62,7 @@ message WorkerHostInfo { // aip.dev/not-precedent: Removing those words make names less clear. --) message WorkerInfo { // Worker identifier, should be unique for the namespace. - // It is different from worker identity, which is usually a combination of host_name and process_id. + // It is distinct from worker identity, which is not necessarily namespace-unique. string worker_instance_key = 1; // Worker host information. @@ -77,7 +76,7 @@ message WorkerInfo { string sdk_name = 5; string sdk_version = 6; - // Worker status. Possible values - "running", "shutting_down", "shutdown". + // Worker status. Defined by SDK. temporal.api.enums.v1.WorkerStatus status = 7; // Worker start time.