Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

Output

Input2

Global

Keyed

NonKeyed

Broadcast

Input1

Global

Global

Keyed

Non-Keyed / Keyed

Non-Keyed / Keyed

NonKeyed

Non-Keyed

Non-Keyed

Broadcast

Non-Keyed / Keyed

Non-Keyed

...

Code Block
languagejava
titleTwoInputStreamProcessFunction.java
/** This class contains all logical related to process records from two input. */
public interface TwoInputStreamProcessFunction<IN1, IN2, OUT> extends ProcessFunction {
    /**
     * Process record from first input and emit data through {@link Collector}.
     *
     * @param record to process.
     * @param output to emit processed records.
     * @param ctx, runtime context in which this function is executed.
     */
    void processRecordFromFirstInput(IN1 record, Collector<OUT> output, RuntimeContext ctx)
            throws Exception;

    /**
     * Process record from second input and emit data through {@link Collector}.
     *
     * @param record to process.
     * @param output to emit processed records.
     * @param ctx, runtime context in which this function is executed.
     */
    void processRecordFromSecondInput(IN2 record, Collector<OUT> output, RuntimeContext ctx)
            throws Exception;

    /**
     * This is a life-cycle method indicates that this function will no longer receive any data from
     * the first input. This allowing the ProcessFunction to emit results at once rather than upon
     * each record.
     *
     * @param output to emit record.
     * @param ctx, runtime context in which this function is executed.
     */
    default void endFirstInput(Collector<OUT> output, RuntimeContext ctx) {}

    /**
     * This is a life-cycle method indicates that this function will no longer receive any data from
     * the second input. This allowing the ProcessFunction to emit results at once rather than upon
     * each record method indicates that this function will no longer receive any data from
     * the second input. This allowing the ProcessFunction to emit results at once rather than upon
     * each record.
     *
     * @param output to emit record.
     * @param ctx, runtime context in which this function is executed.
     */
    default void endSecondInput(Collector<OUT> output, RuntimeContext ctx) {}
}

BroadcastTwoInputStreamProcessFunction


Code Block
languagejava
titleTwoInputStreamProcessFunction.java
/**
 * This class contains all logical related to process records from a broadcast stream and a
 * non-broadcast stream.
 */
public interface BroadcastTwoInputStreamProcessFunction<IN1, IN2, OUT> extends ProcessFunction {
    /**
     * Process record from non-broadcast input and emit data through {@link Collector}.
     *
     * @param record to process.
     * @param output to emit processed records.
     * @param ctx, runtime context in which this function is executed.
     */
    void processRecordFromNonBroadcastInput(IN1 record, Collector<OUT> output, RuntimeContext ctx)
            throws Exception;

    /**
     * Process record from broadcast input.In general, the broadcast side is not allowed to
     * manipulate state and output data because it corresponds to all partitions instead of a single
     * partition. But you could use broadcast context to process all the partitions at once.
     *
     * @param record to process.
     * @param ctx, the context in which this function is executed.
     */
    void processRecordFromBroadcastInput(IN2 record, BroadcastContext<OUT> ctx) throws Exception;

    /**
     * This is a life-cycle method indicates that this function will no longer receive any data from
     * the non-broadcast input. This allowing the ProcessFunction to emit results at once rather
     * than upon each record.
     *
     * @param output to emit record.
     * @param ctx, runtime context in which this function is executed.
     */
    default void endNonBroadcastInput(Collector<OUT> output, RuntimeContext ctx) {}

    /**
     * This is a life-cycle method indicates that this function will no longer receive any data from
     * the broadcast input.
     *
     * @param ctx, the context in which this function is executed.
     */
    default void endBroadcastInput(BroadcastContext<OUT> ctx) {}
}


/** This interface represents the context associated with broadcasting. */
public interface BroadcastContext<OUT> {
    /**
     * Apply a function to all partitions. For keyed stream, it will apply to all keys. For
     * non-keyed stream, it will apply to single partition.
     */
    void applyToAllPartitions(ApplyPartitionFunction<OUT> applyPartitionFunction);
}

/** A function to be applied to all partitions . */
@FunctionalInterface
public interface ApplyPartitionFunction<OUT> {
    /**
     * The actual method to be applied to each partition.
     *
     * @param outputcollector to emitoutput recorddata.
     * @param ctx, runtime context in which this function is executed.
     */
    default void endSecondInputapply(Collector<OUT> outputcollector, RuntimeContext ctx) throws {}Exception;
}

TwoOutputStreamProcessFunction

...

Code Block
languagejava
titleBroadcastStream.java
/** This class represents a stream that each parallel task processes the same data. */
public interface BroadcastStream<T> {
    /**
     * Apply a two input operation to this and other {@link KeyedPartitionStream}.
     *
     * @param other {@link KeyedPartitionStream} to perform operation with two input.
     * @param processFunction to perform operation.
     * @return new stream with this operation.
     */
    <K, T_OTHER, OUT> NonKeyedPartitionStream<OUT> connectAndProcess(
            KeyedPartitionStream<K, T_OTHER> other,
            TwoInputStreamProcessFunction<T, T_OTHER, OUT> processFunction);

    /**
     * Apply a two input operation to this and other {@link NonKeyedPartitionStream}.
     *
     * @param other {@link NonKeyedPartitionStream} to perform operation with two input.
     * @param processFunction to perform operation.
     * @return new stream with this operation.
     */
    <T_OTHER, OUT> NonKeyedPartitionStream<OUT> connectAndProcess(
            NonKeyedPartitionStream<T_OTHER> other,
            TwoInputStreamProcessFunction<TBroadcastTwoInputStreamProcessFunction<T, T_OTHER, OUT> processFunction);
}

...