Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

Redesign the task manager metric page, this would allow users to more clearly understand the relationship between these metrics.

REST API Design

...

Memory Configuration

Jira
serverASF JIRA
serverId5aa69414-a9e9-3523-82ec-879b028fb15b
keyFLINK-14435

The TaskManager's memory configuration will be exposed through {{

...

/taskmanagers/:taskmanagerid}}. A proposed REST respond is shown in the code block below:

Code Block
languagejs
titleJSON Schema of response
collapsetrue
{
  "type" : "object",
  "id" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:taskmanager:TaskManagerDetailsInfo",
  "properties" : {
    "id" : {
      "type" : "any"
    },
    "path" : {
      "type" : "string"
    },
    "dataPort" : {
      "type" : "integer"
    },
    "timeSinceLastHeartbeat" : {
      "type" : "integer"
    },
    "slotsNumber" : {
      "type" : "integer"
    },
    "freeSlots" : {
      "type" : "integer"
    },
    "hardware" : {
      "type" : "object",
      "id" : "urn:jsonschema:org:apache:flink:runtime:instance:HardwareDescription",
      "properties" : {
        "cpuCores" : {
          "type" : "integer"
        },
        "physicalMemory" : {
          "type" : "integer"
        },
        "freeMemory" : {
          "type" : "integer"
        },
        "managedMemory" : {
          "type" : "integer"
        }
      }
    },
    "memoryConfiguration" : {
      "type" : "object",
      "id" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:taskmanager:TaskExecutorMemoryConfiguration",
      "properties" : {
        "frameworkHeap" : {
          "type" : "long"
        },
        "frameworkOffHeap" : {
          "type" : "long"
        },
        "taskHeap" : {
          "type" : "long"
        },
        "taskOffHeap" : {
          "type" : "long"
        },
        "networkMemory" : {
          "type" : "long"
        },
        "managedMemory" : {
          "type" : "long"
        },
        "jvmMetaspace" : {
          "type" : "long"
        },
        "jvmOverhead" : {
          "type" : "long"
        },
        "totalFlinkMemory" : {
          "type" : "long"
        }
        "totalProcessMemory" : {
          "type" : "long"
        }
      }
    },
    "metrics" : {
      "type" : "object",
      "id" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:taskmanager:TaskManagerMetricsInfo",
      "properties" : {
        "heapUsed" : {
          "type" : "integer"
        },
        "heapCommitted" : {
          "type" : "integer"
        },
        "heapMax" : {
          "type" : "integer"
        },
        "metaspaceUsed" : {
          "type" : "integer"
        },
        "metaspaceCommitted" : {
          "type" : "integer"
        },
        "metaspaceMax" : {
          "type" : "integer"
        },      
        "nonHeapUsed" : {
          "type" : "integer"
        },
        "nonHeapCommitted" : {
          "type" : "integer"
        },
        "nonHeapMax" : {
          "type" : "integer"
        },
        "directCount" : {
          "type" : "integer"
        },
        "directUsed" : {
          "type" : "integer"
        },
        "directMax" : {
          "type" : "integer"
        },
        "mappedCount" : {
          "type" : "integer"
        },
        "mappedUsed" : {
          "type" : "integer"
        },
        "mappedMax" : {
          "type" : "integer"
        },
        "memorySegmentsAvailable" : {
          "type" : "integer"
        },
        "memorySegmentsTotal" : {
          "type" : "integer"
        },
        "managedMemoryUsed" : {
          "type" : "long"
        },
        "managedMemoryTotal" : {
          "type" : "long"
        },
        "networkMemoryUsed" : {
          "type" : "long"
        },
        "networkMemoryTotal" : {
          "type" : "long"
        },
        "garbageCollectors" : {
          "type" : "array",
          "items" : {
            "type" : "object",
            "id" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:taskmanager:TaskManagerMetricsInfo:GarbageCollectorInfo",
            "properties" : {
              "name" : {
                "type" : "string"
              },
              "count" : {
                "type" : "integer"
              },
              "time" : {
                "type" : "integer"
              }
            }
          }
        }
      }
    }
  }
}

Metrics exposure

The newly introduced metrics can be accessed through the metrics REST endpoint.

Implementation Proposal

Step 1: Expose effective configuration parameters of TaskExecutorn

...

  • Create a separate independent endpoint for the effective memory configuration.
  • Deprecate the metrics sub-record returned by /taskmanagers/:taskmanagerid . The metrics endpoint can be used instead. This would simplify the TaskManagerDetailsHandler .

Test Plan

Existing tests are updated to verify feature.