Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

Current state: Not ready for Under discussion.

Discussion thread: To be added

...

Code Block
languagejava
public class GraphNode {
    public final Stage<?> stage;
    public final TableId[] estimatorInputs;
    public final TableId[] modelInputsalgoInputs;
    public final TableId[] outputs;
}

...

Code Block
languagejava
/**
 * A Graph acts as an Estimator. A Graph consists of a DAG of stages, each of which could be an
 * Estimator, Model, Transformer or AlgoOperator. When `Graph::fit` is called, the stages are
 * executed in a topologically-sorted order. If a stage is an Estimator, its `Estimator::fit` method
 * will be called on the input tables (from the input edges) to fit a Model. Then the Model will be
 * used to transform the input tables and produce output tables to the output edges. If a stage is
 * an AlgoOperator, its `AlgoOperator::transform` method will be called on the input tables and
 * produce output tables to the output edges. The GraphModel fitted from a Graph consists of the
 * fitted Models and AlgoOperators, corresponding to the Graph's stages.
 */
@PublicEvolving
public final class Graph implements Estimator<Graph, GraphModel> {
    public Graph(List<GraphNode> nodes, TableId[] estimatorInputIds, TableId[] modelInputsalgoInputs, TableId[] outputs, TableId[] inputModelData, TableId[] outputModelData) {...}

    @Override
    public GraphModel fit(Table... inputs) {...}

    @Override
    public void save(String path) throws IOException {...}

    @Override
    public static Graph load(String path) throws IOException {...}
}

...

Code Block
languagejava
/**
 * A GraphBuilder provides APIs to build Estimator/Model/AlgoOperator from a DAG of stages, each of
 * which could be an Estimator, Model, Transformer or AlgoOperator.
 */
@PublicEvolving
public final class GraphBuilder {
    private int maxOutputLength = 20;

    public GraphBuilder() {}

    /**
     * Specifies the upper bound (could be loose) of the number of output tables that can be
     * returned by the Transformer::getModelData and AlgoOperator::transform methods, for any stage
     * involved in this Graph.
     *
     * <p>The default upper bound is 20.
     */
    public GraphBuilder setMaxOutputLength(int maxOutputLength) {...}

    /**
     * Creates a TableId associated with this GraphBuilder. It can be used to specify the passing of
     * tables between stages, as well as the input/output tables of the Graph/GraphModel generated
     * by this builder.
     */
    public TableId createTableId() {...}

    /**
     * If the stage is an Estimator, both its fit method and the transform method of its fitted
     * Model would be invoked with the given inputs when the graph runs.
     *
     * <p>If this stage is a Model, Transformer or AlgoOperator, its transform method would be
     * invoked with the given inputs when the graph runs.
     *
     * <p>Returns a list of TableIds, which represents outputs of AlgoOperator::transform of the given stage.
     */
    public TableId[] getOutputs(Stage<?> stage, TableId... inputs) {...}

    /**
     * If this stage is an Estimator, its fit method would be invoked with estimatorInputs, and the
     * transform method of its fitted Model would be invoked with modelInputsalgoInputs.
     *
     * <p>This method throws Exception if the stage is not an Estimator.
     *
     * <p>This method is useful when the state is an Estimator AND the Estimator::fit needs to take
     * a different list of Tables from the Model::transform of the fitted Model.
     *
     * <p>Returns a list of TableIds, which represents outputs of Model::transform of the fitted Model.
     */
    public TableId[] getOutputs(Stage<?> stage, TableId[] estimatorInputs, TableId[] modelInputsalgoInputs) {...}

    /**
     * The setModelData() of the fitted GraphModel should invoke the setModelData() of the given
     * stage with the given inputs.
     */
    public void setModelData(Stage<?> stage, TableId... inputs) {...}

    /**
     * The getModelData() of the fitted GraphModel should invoke the getModelData() of the given
     * stage.
     *
     * <p>Returns a list of TableIds, which represents the outputs of getModelData() of the given
     * stage.
     */
    public TableId[] getModelData(Stage<?> stage) {...}

    /**
     * Returns an Estimator instance with the following behavior:
     *
     * <p>1) Estimator::fit should take the given inputs and return a Model with the following
     * behavior.
     *
     * <p>2) Model::transform should take the given inputs and return the given outputs.
     *
     * <p>The fit method of the returned Estimator and the transform method of the fitted Model
     * should invoke the corresponding methods of the internal stages as specified by the
     * GraphBuilder.
     */
    public Estimator<?, ?> buildEstimator(TableId[] inputs, TableId[] outputs) {...}

    /**
     * Returns an Estimator instance with the following behavior:
     *
     * <p>1) Estimator::fit should take the given inputs and returns a Model with the following
     * behavior.
     *
     * <p>2) Model::transform should take the given inputs and return the given outputs.
     *
     * <p>3) Model::setModelData should take the given inputModelData.
     *
     * <p>4) Model::getModelData should return the given outputModelData.
     *
     * <p>The fit method of the returned Estimator and the transform/setModelData/getModelData
     * methods of the fitted Model should invoke the corresponding methods of the internal stages as
     * specified by the GraphBuilder.
     */
    public Estimator<?, ?> buildEstimator(TableId[] inputs, TableId[] outputs, TableId[] inputModelData, TableId[] outputModelData) {...}

    /**
     * Returns an Estimator instance with the following behavior:
     *
     * <p>1) Estimator::fit should take the given estimatorInputs and returns a Model with the
     * following behavior.
     *
     * <p>2) Model::transform should take the given transformerInputs and return the given outputs.
     *
     * <p>3) Model::setModelData should take the given inputModelData.
     *
     * <p>4) Model::getModelData should return the given outputModelData.
     *
     * <p>The fit method of the returned Estimator and the transform/setModelData/getModelData
     * methods of the fitted Model should invoke the corresponding methods of the internal stages as
     * specified by the GraphBuilder.
     */
    public Estimator<?, ?> buildEstimator(TableId[] estimatorInputs, TableId[] modelInputsalgoInputs, TableId[] outputs, TableId[] inputModelData, TableId[] outputModelData) {...}

    /**
     * Returns an AlgoOperator instance with the following behavior:
     *
     * <p>1) AlgoOperator::transform should take the given inputs and returns the given outputs.
     *
     * <p>The transform method of the returned AlgoOperator should invoke the corresponding methods
     * of the internal stages as specified by the GraphBuilder.
     */
    public AlgoOperator<?> buildAlgoOperator(TableId[] inputs, TableId[] outputs) {...}

    /**
     * Returns a Model instance with the following behavior:
     *
     * <p>1) Model::transform should take the given inputs and returns the given outputs.
     *
     * <p>The transform method of the returned Model should invoke the corresponding methods of the
     * internal stages as specified by the GraphBuilder.
     */
    public Model<?> buildModel(TableId[] inputs, TableId[] outputs) {...}

    /**
     * Returns a Model instance with the following behavior:
     *
     * <p>1) Model::transform should take the given inputs and returns the given outputs.
     *
     * <p>2) Model::setModelData should take the given inputModelData.
     *
     * <p>3) Model::getModelData should return the given outputModelData.
     *
     * <p>The transform/setModelData/getModelData methods of the returned Model should invoke the
     * corresponding methods of the internal stages as specified by the GraphBuilder.
     */
    public Model<?> buildModel(TableId[] inputs, TableId[] outputs, TableId[] inputModelData, TableId[] outputModelData) {...}
}

...