Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

Code Block
languagejava
/**
 * A Graph acts as an Estimator. It consists of a DAG of stages, each of which is either an
 * Estimator or Transformer.
 */
@PublicEvolving
public final class Graph implements Estimator<Graph, GraphModel> {
    public Graph(...) {...}

    @Override
    public GraphModel fit(Table... inputs) {...}

    @Override
    public TableSchema[] transformSchemas(TableSchema... schemas) {
        return schemas;
    }

    /** Skipped a few methods, including the implementations of some Estimator APIs. */
}

/** A GraphModel acts as a Transformer. It consists of a DAG of Transformers. */
@PublicEvolving
public final class GraphModel implements Transformer<GraphModel> {
    /** Skipped a few methods, including the implementations of the Transformer APIs. */
}

/** A GraphBuilder helps connect Stage instances into a Graph or GraphModel. */
@PublicEvolving
public final class GraphBuilder {
    /**
     * Specifies the upper bound (could be loose) of the number of output tables that can be
     * returned by the Transformer::getStateStreams and Transformer::transform methods, for any
     * stage involved in this Graph.
     *
     * <p>The default upper bound is 20.
     */
    public GraphBuilder setMaxOutputLength(int maxOutputLength) {...}

    /**
     * Creates a TableId associated with this GraphBuilder. It can be used to specify the passing of
     * tables between stages, as well as the input/output tables of the Graph/GraphModel generated
     * by this builder.
     */
    public TableId createTableId() {...}

    /**
     * If the stage is an Estimator, both its fit method and the transform method of its fitted Transformer would be
     * invoked with the given inputs when the graph runs.
     *
     * If this stage is a Transformer, its transform method would be invoked with the given inputs when the graph runs.
     *
     * <p>Returns a list of TableIds, which represents outputs of the Transformer::transform
     * invocation.
     */
    public TableId[] getOutputs(Stage<?> stage, TableId... inputs) {
        return new TableId[maxOutputLength];
    }

    /**
     * If this stage is an Estimator, its fit method would be invoked with estimatorInputs, and the transform method
     * of its fitted Transformer would be invoked with transformerInputs, when the graph runs.
     *
     * This method throws Exception if the stage is a Transformer.
     *
     * This is useful when the state is an Estimator AND the Estimator::fit needs to take a different list of Tables
     * from the Transformer::transform of the fitted Transformer.
     *
     * <p>Returns a list of TableIds, which represents outputs of the Transformer::transform
     * invocation.
     */
    public TableId[] getOutputs(Stage<?> stage, TableId[] estimatorInputs, TableId[] transformerInputs) {
        return new TableId[maxOutputLength];
    }

    /**
     * The Graph::fit and GraphModel::transform should invoke the fit/transform of the corresponding
     * stage with the corresponding inputs.
     *
     * This is useful when this state is an Estimator, and the Estimator::fit must take a different list of Tables
     * from the Transformer::transform of the corresponding Transformer.
     *
     * This method throws Exception if the stage is an Estimator.
     *
     * <p>Returns a list of TableIds, which represents outputs of the Transformer::transform
     * invocation.
     */
    public TableId[] getOutputs(Stage<?> stage, TableId[] estimatorInputs, TableId[] transformerInputs) {
        return new TableId[maxOutputLength];
    }

    /**
     * The GraphModel::setStateStreams should invoke the setStateStreams of the corresponding stage
     * with the corresponding inputs.
     */
    void setStateStreams(Stage<?> stage, TableId... inputs) {...}

    /**
     * The GraphModel::getStateStreams should invoke the getStateStreams of the corresponding stage.
     *
     * <p>Returns a list of TableIds, which represents outputs of the getStateStreams invocation.
     */
    TableId[] getStateStreams(Stage<?> stage) {...}

    /**
     * Returns a Graph instance which the following API specification: -
     * 1) Graph::fit should take
     * inputs and returns a GraphModel with the following specification. - GraphModel::transform
     *
     * 2) GraphModel::transform should take inputs and returns outputs.
 -  GraphModel::setStateStreams should take*
     * 3) GraphModel::setStateStreams should take inputStates. - GraphModel::getStateStreams should return outputStates.
     *
     * <p>TheThe fit/transform/setStateStreams/getStateStreams should invoke the APIs of the internal
     * stages in the order specified by the DAG of stages.
     */
    Graph build(TableId[] inputs, TableId[] outputs, TableId[] inputStates, TableId[] outputStates) {...}

    /**
     * Returns a GraphModel instance which the following API specification:
   - GraphModel::transform  *
     * 1) GraphModel::transform should take inputs and returns outputs.
  - GraphModel::setStateStreams should take*
     * 2) GraphModel::setStateStreams should take inputStates. - GraphModel::getStateStreams should return outputStates.
     *
     * <p>TheThe transform/setStateStreams/getStateStreams should invoke the APIs of the internal
     * stages in the order specified by the DAG of stages.
     *
     * <p>ThisThis method throws exception if any stage of this graph is an Estimator.
     */
    GraphModel buildModel(TableId[] inputs, TableId[] outputs, TableId[] inputStates, TableId[] outputStates) {...}

    // The TableId is necessary to pass the inputs/outputs of various API calls across the
    // Graph/GraphModel stags.
    static class TableId {}

}

...