Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

Code Block
/**
 * Job lineage is built according to StreamGraph. Users can get sources, sinks and relationships from lineage.
 */
@PublicEvolvig
public interface LineageGraph {
    /* Source lineage vertex list. */
    List<SourceLineageVertex> sources();

    /* Sink lineage vertex list. */
    List<LineageVertex> sinks();

    /* lineage edges from sources to sinks. */
    List<LineageEdge> relations();
}

/** Lineage vertex represents the connectors in lineage graph, including source and sink. */
@PublicEvolving
public interface LineageVertex {
    /* List of input (for source) or output (for sink) datasets interacted with by the connector */
    List<LineageDataset> datasets;
}

/** Lineage dataset represents the source or sink in the job. */
@PublicEvolving
public interface LineageDataset {
    /* Name for this particular dataset. */
    String name;
    /* Unique name for this dataset's storage, for example, url for jdbc connector and location for lakehouse connector. */
    String namespace;
    /* Facets for the lineage vertex to describe the particular information of dataset, such as schema and config. */ 
    Map<String, Facet> facets;
}

/** Facet interface for dataset. */
@PublicEvolving
public interface LineageDatasetFacet {
    /** Name for the facet which will be used as key in facets of LineageDataset. */
    String name();
}

/** Builtin config facet for dataset. */
@PublicEvolving
public interface DatasetConfigFacet extends LineageDatasetFacet {
    Map<String, String> config();
}

/** Field for schema in dataset. */
public interface DatasetSchemaField<T> {
    /** The name of the field. */
    String name();
    /** The type of the field. */
    T type();
}

/** Builtin schema facet for dataset. */
@PublicEvolving
public interface DatasetSchemaFacet extends LineageDatasetFacet {
    <T> Map<String, String>DatasetSchemaField<T>> fields();
}

/** Lineage vertex for source which has boundedness. */
@PublicEvolving
public interface SourceLineageVertex extends LineageVertex {
    /**
     * The boundedness for the source connector, users can get boundedness for each sources in
     * the lineage and determine the job execution mode with RuntimeExecutionMode.
     */
    Boundedness boundedness();
}

/** Lineage edge from sources to sink. */
@PublicEvolving
public interface LineageEdge {
    LineageVertex source();
    LineageVertex sink();
}

...

Code Block
/** Basic table lineage vertex which has catalog context and table in it. */
public interface TableLineageVertex extends LineageVertex {
    /* The catalog context of the table lineage vertex. */
    public CatalogContext catalogContext();

    /* The table of the lineage vertex. */
    public CatalogBaseTable table();

    /* Database name and table name for the table lineage vertex. */
    public ObjectPath objectPath();
}

/** Schema field for sql/table jobs which has `LogicalType` for field type. */
public interface TableSchemaField extends DatasetSchemaField<LogicalType> {
}

/** Source lineage vertex for table. */
@PublicEvolving
public interface TableSourceLineageVertex extends TableLineageVertex, SourceLineageVertex {
}

/** Sink lineage vertex for table. */
@PublicEvolving
public interface TableSinkLineageVertex extends TableLineageVertex {
    /* Modify type, INSERT/UPDATE/DELETE statement, listener can identify different sink by the type to determine whether the sink should be added to the lineage. */
    ModifyType modifyType();
}

/* Table lineage edges from source table to sink table. */
@PublicEvolving
public interface TableLineageEdge extends LineageEdge {
    /* Table column lineage edges from source columns to sink columns. */
    List<TableColumnLineageEdge> columnEdges();
}

/* Column lineage from source table columns to each sink table column, one sink column may be aggregated by multiple tables and columns. */
@PublicEvolving
public interface TableColumnLineageEdge {
    /**
     * Columns from one source table of {@link LineageEdge} to the sink column. Each sink column may be computed from multiple columns
     * from source, for example, avg operator from two columns in the source.
     */
    List<String> sourceColumns();

    /* Sink table column. */
    String sinkColumn();
}

/** The existing `ModifyType` should be marked as `PublicEvolving` and users can get it from table sink lineage vertex. */
@PublicEvolving
public enum ModifyType {
}

...