Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

Status

Current state[Under Discussion]

...

Page properties


Discussion thread
Vote threadhttps://

...

...

Jira
serverASF JIRA
serverId5aa69414-a9e9-3523-82ec-879b028fb15b
keyFLINK-16050

...

Release


Please keep the discussion on the mailing list rather than commenting on the wiki (wiki discussions get unwieldy fast).

...

When users click the vertex timeline, display all the subtask attempts timeline with subtaskId-host-attemptId.

REST API Design

   

  • ArchivedExecutionVertex adds method to return prior executions.
  • get prior execution attempt

...

AccessExecution execution = executionVertex.getCurrentExecutionAttempt();

int currentAttemptNum = execution.getAttemptNumber();

JobID jobID = request.getPathParameter(JobIDPathParameter.class);

JobVertexID jobVertexID = request.getPathParameter(JobVertexIdPathParameter.class);

List<SubtaskExecutionAttemptDetailsInfo> allAttempts = new ArrayList<>();

allAttempts.add(SubtaskExecutionAttemptDetailsInfo.create(execution, metricFetcher, jobID, jobVertexID));

if (currentAttemptNum > 0) {

  for (int i = currentAttemptNum - 1; i >= 0; i--) {

    AccessExecution currentExecution = executionVertex.getPriorExecutionAttempt(i);

    if (currentExecution != null) {

      allAttempts.add(SubtaskExecutionAttemptDetailsInfo.create(currentExecution, metricFetcher, jobID, jobVertexID));

    }

  }

}

  • by ArchivedExecutionVertex.getPriorExecutionAttempts()
  • add SubtaskAllExecutionAttemptsDetailsHandler for failed attempt
  • url /jobs/:jobid/vertices/:vertexid/subtasks/:subtaskIndex/attempts
  • response:

{
   "type" : "object",

  "id" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:job:SubtaskAllExecutionAttemptsDetailsInfo",

  "properties" : {

      "attempts" : {

         "type" : "array",

        "items" : {

           "type" : "object",

           "id" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:job:SubtaskExecutionAttemptDetailsInfo",

              "properties" : {

                  "subtask" : {

                      "type" : "integer"

                    },

                  "status" : {

                      "type" : "string",

                      "enum" : [ "CREATED", "SCHEDULED", "DEPLOYING", "RUNNING", "FINISHED", "CANCELING", "CANCELED", "FAILED", "RECONCILING" ]

                   },

                "attempt" : {

                    "type" : "integer"

                },

                       "host" : {

                              "type" : "string"

                        },

                        "start-time" : {

                               "type" : "integer"

                        },

                        "end-time" : {

                              "type" : "integer"

                       },

                      "duration" : {

                            "type" : "integer"

                       },

                       "metrics" : {

                              "type" : "object",

                             "id" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:job:metrics:IOMetricsInfo",

                             "properties" : {

                                   "read-bytes" : {

                                          "type" : "integer"

                                   },

                                  "read-bytes-complete" : {

                                        "type" : "boolean"

                                  },

                                  "write-bytes" : {

                                         "type" : "integer"

                                  },

                                  "write-bytes-complete" : {

                                         "type" : "boolean"

                                 },

                                 "read-records" : {

                                        "type" : "integer"

                                  },

                                  "read-records-complete" : {

                                         "type" : "boolean"

                                  },

                                  "write-records" : {

                                        "type" : "integer"

                                  },

                                 "write-records-complete" : {

                                        "type" : "boolean"

            }

          }

        }

      }

    }

                     }

                 }

             },

             "taskmanager-id" : {
                 "type" : "string"
             },
            "start_time" : {
                 "type" : "integer"
              }
           }
        }
     }
     }
}

  • In the 'subtasks' array we have objects of type SubtaskTimeInfo with only one added field 'attempt-num'.
  • add query parameter show-history, default value is false. If show-history is true return the information of attempt., information for all attempts including
    previous ones will be returned
  • url: /jobs/:jobid/vertices/:vertexid/subtasktimes?show-history=true
  • response:

...

{

   "type" : "object",

   "id" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:SubtasksTimesInfo",

   "properties" : {

       "id" : {

       "type" : "string"

     },

     "name" : {

         "type" : "string"

      },

      "now" : {

          "type" : "integer"

      },

      "subtasks" : {

          "type" : "array",

          "items" : {

             "type" : "object",

             "id" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:SubtasksTimesInfo:SubtaskTimeInfo",

             "properties" : {

                "subtask" : {

                    "type" : "integer"

                 },

                 "host" : {

                     "type" : "string"

                  },

                  "duration" : {

                      "type" : "integer"

                    },

                    "timestamps" : {

                       "type" : "object",

                       "additionalProperties" : {

                           "type" : "integer"

                        }

                    },

                    "attempt-num": {

                         "type" : "integer"

                    }

          }

      }

    }

  }

}

...