...
Current state: "Under Discussion"
Discussion thread: TBD here
JIRA: TBD
Please keep the discussion on the mailing list rather than commenting on the wiki (wiki discussions get unwieldy fast).
Motivation
Rich functions are one of the essential parts of stream processing. There are several use-cases where users cannot express their business logic with current un-rich methods especially when init(Some params)
, close()
methods are needed.
Public Interfaces
Proposed Changes
There 2 main issues to consider while introducing rich functions: 1. Limiting the ProcessorContext for init(Some param) method and introducing rich functions in a backwards compatible way.
Jira | ||||||
---|---|---|---|---|---|---|
|
Jira | ||||||||
---|---|---|---|---|---|---|---|---|
|
Jira | ||||||||
---|---|---|---|---|---|---|---|---|
|
Please keep the discussion on the mailing list rather than commenting on the wiki (wiki discussions get unwieldy fast).
Motivation
This KIP combines KIP-149 and provides a hybrid solution to rich functions in Streams and accessing read-only keys within ValueJoiner, ValueTransformer, ValueMapper interfaces.
Rich functions are one of the essential parts of stream processing. There are several use-cases where users cannot express their business logic with current un-rich methods. For example:
- having access to RecordContext within an operator
- having access to a read-only key for ValueJoiner, ValueTransformer, ValueMapper interfaces
Rich Interfaces
Code Block | ||
---|---|---|
| ||
public interface RichInitializer<V, K> {
V apply(K key);
}
public interface RichValueMapper<V, VR, K> {
VR apply(final V value, final K key, final RecordContext recordContext);
}
public interface RichValueJoiner<V1, V2, VR, K> {
VR apply(final V1 value1, final V2 value2, final K key, final RecordContext recordContext);
}
public interface RichKeyValueMapper<K, V, VR> {
VR apply(final K key, final V value, final RecordContext recordContext);
}
public interface RichReducer<V, K> {
V apply(final V value1, final V value2, final K key, final RecordContext recordContext);
}
public interface RichAggregator<K, V, VA> {
VA apply(final K key, final V value, final VA aggregate, final RecordContext recordContext);
}
public interface RichForeachAction<K, V> {
void apply(final K key, final V value, final RecordContext recordContext);
}
public interface RichPredicate<K, V> {
boolean test(final K key, final V value, final RecordContext recordContext);
}
public interface RichMerger<K, V> {
V apply(final K aggKey, final V aggOne, final V aggTwo, final RecordContext recordContext);
}
public interface RichValueTransformer<V, VR, K> {
void init(final ProcessorContext context);
VR transform(final V value, final K key);
void close();
}
public interface RichValueTransformerSupplier<V, VR, K> {
RichValueTransformer<V, VR, K> get();
}
|
Public Interfaces
KStream
Code Block | ||
---|---|---|
| ||
KStream<K, V> filter(RichPredicate<? super K, ? super V> predicate);
KStream<K, V> filterNot(RichPredicate<? super K, ? super V> predicate);
<KR> KStream<KR, V> selectKey(RichKeyValueMapper<? super K, ? super V, ? extends KR> mapper);
<KR, VR> KStream<KR, VR> map(RichKeyValueMapper<? super K, ? super V, ? extends KeyValue<? extends KR, ? extends VR>> mapper);
<VR> KStream<K, VR> mapValues(RichValueMapper<? super V, ? extends VR, ? super K> mapper);
<KR, VR> KStream<KR, VR> flatMap(final RichKeyValueMapper<? super K, ? super V, ? extends Iterable<? extends KeyValue<? extends KR, ? extends VR>>> mapper);
<VR> KStream<K, VR> flatMapValues(final RichValueMapper<? super V, ? extends Iterable<? extends VR>, ? super K> mapper);
void foreach(final RichForeachAction<? super K, ? super V> action);
KStream<K, V> peek(final RichForeachAction<? super K, ? super V> action);
KStream<K, V>[] branch(final RichPredicate<? super K, ? super V>... predicates);
<VR> KStream<K, VR> transformValues(final RichValueTransformerSupplier<? super V, ? extends VR, ? super K> valueTransformerSupplier,
final String... stateStoreNames);
<KR> KGroupedStream<KR, V> groupBy(final RichKeyValueMapper<? super K, ? super V, KR> selector);
<KR> KGroupedStream<KR, V> groupBy(final RichKeyValueMapper<? super K, ? super V, KR> selector,
final Serialized<KR, V> serialized);
<VO, VR> KStream<K, VR> join(final KStream<K, VO> otherStream,
final RichValueJoiner<? super V, ? super VO, ? extends VR, ? super K> joiner,
final JoinWindows windows);
<VO, VR> KStream<K, VR> join(final KStream<K, VO> otherStream,
final RichValueJoiner<? super V, ? super VO, ? extends VR, ? super K> joiner,
final JoinWindows windows,
final Joined<K, V, VO> joined);
<VO, VR> KStream<K, VR> leftJoin(final KStream<K, VO> otherStream,
final RichValueJoiner<? super V, ? super VO, ? extends VR, ? super K> joiner,
final JoinWindows windows);
<VO, VR> KStream<K, VR> leftJoin(final KStream<K, VO> otherStream,
final RichValueJoiner<? super V, ? super VO, ? extends VR, ? super K> joiner,
final JoinWindows windows,
final Joined<K, V, VO> joined);
<VO, VR> KStream<K, VR> outerJoin(final KStream<K, VO> otherStream,
final RichValueJoiner<? super V, ? super VO, ? extends VR, ? super K> joiner,
final JoinWindows windows);
<VO, VR> KStream<K, VR> outerJoin(final KStream<K, VO> otherStream,
final RichValueJoiner<? super V, ? super VO, ? extends VR, ? super K> joiner,
final JoinWindows windows,
final Joined<K, V, VO> joined);
<VT, VR> KStream<K, VR> join(final KTable<K, VT> table,
final RichValueJoiner<? super K, ? super V, ? super VT, ? extends VR> joiner);
<VT, VR> KStream<K, VR> join(final KTable<K, VT> table,
final RichValueJoiner<? super K, ? super V, ? super VT, ? extends VR> joiner,
final Joined<K, V, VT> joined);
<VT, VR> KStream<K, VR> leftJoin(final KTable<K, VT> table,
final RichValueJoiner<? super K, ? super V, ? super VT, ? extends VR> joiner);
<VT | ||
Code Block | ||
| ||
KStream<K, V> filter(Predicate<? super K, ? super V> predicate, final RecordContext recordContext); KStream<K, V> filterNot(Predicate<? super K, ? super V> predicate, final RecordContext recordContext); <KR, VR> KStream<KR, VR> map(KeyValueMapper<? super K, ? super V, ? extends KeyValue<? extends KR, ? extends VR>> mapper, final RecordContext recordContext); <VR> KStream<K, VR> mapValues(ValueMapper<? super V, ? extends VR> mapper, final RecordContext recordContext); <KR, VR> KStream<KR, VR> flatMap(final KeyValueMapper<? super K, ? super V, ? extends Iterable<? extends KeyValue<? extends KR, ? extends VR>>> mapper, final RecordContext recordContext); <VR> KStream<K, VR> flatMapValues(final ValueMapper<? super V, ? extends Iterable<? extends VR>> processor);<VO, VR> KStream<K, VR> joinleftJoin(final KStream<KKTable<K, VO>VT> otherStreamtable, final ValueJoiner< final RichValueJoiner<? super K, ? super V, ? super VOVT, ? extends VR> joiner, final JoinWindowsJoined<K, windowsV, finalVT> RecordContext recordContextjoined); <VO<GK, GV, VR>RV> KStream<K, VR>RV> join(final KStream<KGlobalKTable<GK, VO>GV> otherStreamglobalKTable, final ValueJoiner<? super V, ? super VO, ? extends VR> joiner, final RichKeyValueMapper<? super K, ? super V, ? extends GK> keyValueMapper, final JoinWindows windows, final RichValueJoiner<? super K, final? Serde<K>super keySerdeV, ? super GV, ? extends RV> joiner); <GK, GV, RV> KStream<K, RV> leftJoin(final GlobalKTable<GK, GV> globalKTable, final Serde<V> thisValueSerde, final RichKeyValueMapper<? super K, ? super V, ? finalextends Serde<VO>GK> otherValueSerdekeyValueMapper, final RecordContext recordContext) <VO, VR> KStream<K, VR> leftJoin(final KStream<K,RichValueJoiner<? VO>super otherStreamK, ? super V, ? super GV, ? extends final ValueJoiner<? super VRV> valueJoiner); |
KTable
Code Block | ||
---|---|---|
| ||
KTable<K, V> filter(final RichPredicate<? super K, ? super V> predicate); KTable<K, V> filter(final RichPredicate<? super K, ? super VO, ? extends VR> joinerV> predicate, final Materialized<K, V, KeyValueStore<Bytes, byte[]>> materialized); KTable<K, V> final JoinWindows windows, final RecordContext recordContext); <VO, VR> KStream<K, VR> leftJoin(final KStream<K, VO> otherStreamfilterNot(final RichPredicate<? super K, ? super V> predicate); KTable<K, V> filterNot(final RichPredicate<? super K, ? super V> predicate, final Materialized<K, V, KeyValueStore<Bytes, byte[]>> final ValueJoiner<materialized); <VR> KTable<K, VR> mapValues(final RichValueMapper<? super V, ? superextends VOVR, ? extendssuper VR> joiner, K> mapper); <VR> KTable<K, VR> mapValues(final RichValueMapper<? super V, ? extends VR, ? super K> mapper, final JoinWindows windows, final Materialized<K, VR, KeyValueStore<Bytes, final Serde<K> keySerde, final Serde<V> thisValSerde, byte[]>> materialized); <KR> KStream<KR, V> toStream(final RichKeyValueMapper<? super K, ? super V, ? extends KR> mapper); <KR, VR> KGroupedTable<KR, VR> groupBy(final RichKeyValueMapper<? super K, ? super V, KeyValue<KR, VR>> selector); <KR, VR> KGroupedTable<KR, VR> groupBy(final RichKeyValueMapper<? super K, ? super V, KeyValue<KR, VR>> selector, final Serde<VO> otherValueSerde, final RecordContext recordContext);Serialized<KR, VR> serialized); <VO, VR> KStream<KKTable<K, VR> outerJoinjoin(final KStream<KKTable<K, VO> otherStreamother, final ValueJoiner<RichValueJoiner<? super V, ? super VO, ? extends VR> joinerVR, ? super K> joiner); <VO, VR> KTable<K, VR> join(final KTable<K, VO> other, final JoinWindows windows, final RecordContext recordContext); <VO, VR> KStream<K, VR> outerJoin(final KStream<K,RichValueJoiner<? VO>super otherStreamV, ? super VO, ? extends VR, ? super K> joiner, final ValueJoiner<? super V, ? super VO, ? extends VR>final joinerMaterialized<K, VR, KeyValueStore<Bytes, byte[]>> materialized); <VO, VR> KTable<K, VR> leftJoin(final KTable<K, VO> other, final JoinWindows windows, final RichValueJoiner<? super V, ? super VO, ? extends VR, ? super K> joiner); <VO, VR> KTable<K, VR> leftJoin(final KTable<K, VO> other, final Serde<K> keySerde, final ValueJoiner<? super K, ? super V, ? super VO, ? finalextends Serde<V>VR> thisValueSerdejoiner, final Serde<VO>Materialized<K, otherValueSerde, final RecordContext recordContextVR, KeyValueStore<Bytes, byte[]>> materialized); <VT <VO, VR> KStream<KKTable<K, VR> joinouterJoin(final KTable<K, VT>VO> tableother, final ValueJoiner<RichValueJoiner<? super V, ? super VTVO, ? extends VR>VR, joiner,? finalsuper RecordContextK> recordContextjoiner); <VT<VO, VR> KStream<KKTable<K, VR> joinouterJoin(final KTable<K, VT>VO> tableother, final ValueJoiner<? super V,final RichValueJoiner<? super VT, ? extends VR> joiner, V, ? super VO, ? extends VR, ? super K> joiner, final Serde<K> keySerde, final Materialized<K, VR, final Serde<V> valSerde, final RecordContext recordContextKeyValueStore<Bytes, byte[]>> materialized); |
KGroupedStream
Code Block | ||
---|---|---|
| ||
KTable<K, V> reduce(final RichReducer<V, K> reducer); <VTKTable<K, VR> KStream<K, VR> leftJoinV> reduce(final KTable<KRichReducer<V, VT>K> tablereducer, final ValueJoiner<?Materialized<K, super V, ? super VT, ? extends VR> joiner, final RecordContext recordContextKeyValueStore<Bytes, byte[]>> materialized); <VT, VR> KStream<K<VR> KTable<K, VR> leftJoinaggregate(final KTable<KRichInitializer<VR, VT>K> tableinitializer, final ValueJoiner<RichAggregator<? super VK, ? super VTV, ? extends VR> joineraggregator, final Materialized<K, VR, KeyValueStore<Bytes, byte[]>> materialized); <VR> KTable<K, VR> aggregate(final Serde<K> keySerdeRichInitializer<VR, K> initializer, final RichAggregator<? super K, ? super V, VR> aggregator); |
SessionWindowedKStream
There are 3 rich interfaces in aggregate() methods. So converting all possible combinations to their rich counterparts can cause a lot of overloads. So, I propose to overload one method with all rich interfaces.
Code Block | ||
---|---|---|
| ||
<T> KTable<Windowed<K>, T> aggregate(final RichInitializer<T, Windowed<K>> initializer, final Serde<V> valSerde, final RecordContext recordContext); <GK, GV, RV> KStream<K, RV> join(final GlobalKTable<GK, GV> globalKTable, final KeyValueMapper<RichAggregator<? super K, ? super V, ? extends GK> keyValueMapperT> aggregator, final ValueJoiner<? super V, ? super GV,final RichMerger<? extendssuper RV> joiner, final RecordContext recordContextK, T> sessionMerger); <GK,<VR> GVKTable<Windowed<K>, RV> KStream<K, RV> leftJoinVR> aggregate(final GlobalKTable<GKRichInitializer<VR, GV>Windowed<K>> globalKTableinitializer, final KeyValueMapper<RichAggregator<? super K, ? super V, ? extends GK> keyValueMapper, VR> aggregator, final ValueJoiner<? super V, ? super GV, ? extends RV> valueJoiner, final RecordContext recordContext); |
Limiting the ProcessorContext - RecordContext interface
We create a subset of features from ProcessorContext
and put into RecordContext
interface
Code Block | ||
---|---|---|
| ||
public interface RecordContext {
String applicationId();
TaskId taskId();
StreamsMetrics metrics();
String topic();
int partition();
long offset();
long timestamp();
Map<String, Object> appConfigs();
Map<String, Object> appConfigsWithPrefix(String prefix);
}
public interface ProcessorContext extends RecordContext {
// all methods but the ones in RecordContext
} |
Once we need a conversion from ProcessorContext
and RecordContext, we just cast:
final RichMerger<? super K, VR> sessionMerger,
final Materialized<K, VR, SessionStore<Bytes, byte[]>> materialized);
KTable<Windowed<K>, V> reduce(final RichReducer<V, K> reducer);
KTable<Windowed<K>, V> reduce(final RichReducer<V, K> reducer,
final Materialized<K, V, SessionStore<Bytes, byte[]>> materializedAs);
, |
TimeWindowedKStream
Code Block | ||
---|---|---|
| ||
<VR> KTable<Windowed<K>, VR> aggregate(final RichInitializer<VR, K> initializer, | ||
Code Block | ||
| ||
private class KStreamMapProcessor extends AbstractProcessor<K, V> { @Override public void init(ProcessorContext processorContext) { super.init(processorContext); richMapper.init((RecordContext) processorContext); // here make a cast } @Override public void process(final K key, final V value) { final RichAggregator<? super V1K, newValue? = mapper.apply(keysuper V, valueVR> aggregator); <VR> KTable<Windowed<K>, VR> aggregate(final RichInitializer<VR, K> initializer, context().forward(key, newValue); } @Override public void close() { super.close(); mapper.close(); final RichAggregator<? } } |
Rich Interfaces
If the interface is value-only (like ValueJoiner, ValueMapper
) we extend its rich interface from its withKey'ed counterpart.
If the interface is key-value (KeyValueMapper) we extend its rich interface from itself.
Code Block | ||
---|---|---|
| ||
public interface RichFunction {
void init(RecordContext recordContext);
void close();
}
public interface ValueMapperWithKey<K, V, VR> {
VR apply(final K key, final V value);
}
public interface RichValueMapper<K, V, VR> extends ValueMapperWithKey<K, V, VR>, RichFunction {
}
public interface RichKeyValueMapper<K, V, VR> extends KeyValueMapper<K, V, VR>, RichFunction {
}
|
The same semantics apply to other interfaces as well.
So we don't need to add any overloaded methods for public APIs. Internally we perform 2 changes:
- Change the constructor type of all related Processors to accept rich interfaces
- Create converters from non-rich to rich interfaces
super K, ? super V, VR> aggregator,
final Materialized<K, VR, WindowStore<Bytes, byte[]>> materialized);
KTable<Windowed<K>, V> reduce(final RichReducer<V, K> reducer);
KTable<Windowed<K>, V> reduce(final RichReducer<V, K> reducer,
final Materialized<K, V, WindowStore<Bytes, byte[]>> materialized);
|
KGroupedTable
Code Block | ||
---|---|---|
| ||
KTable<K, V> reduce(final RichReducer<V, K> adder,
final RichReducer<V, K> subtractor,
final Materialized<K, V, KeyValueStore<Bytes, byte[]>> materialized);
KTable<K, V> reduce(final RichReducer<V, K> adder,
| ||
Code Block | ||
| ||
class KStreamMapValues<K, V, V1> implements ProcessorSupplier<K, V> { private final RichValueMapper<K, V, V1> mapper; public KStreamMapValues(RichValueMapper<K, V, V1> mapper) { final this.mapper = mapper; } RichReducer<V, K> subtractor); <VR> KTable<K, VR> aggregate(final RichInitializer<VR> initializer, @Override public Processor<K, V> get() { return new KStreamMapProcessor(); } final RichAggregator<? privatesuper classK, KStreamMapProcessor? extendssuper AbstractProcessor<KV, V>VR> {adder, @Override public void init(ProcessorContext processorContext) { final RichAggregator<? super.init(processorContext); K, ? super V, VR> subtractor, mapper.init((RecordContext) processorContext); } @Override final Materialized<K, publicVR, void process(final K key, final V value) {KeyValueStore<Bytes, byte[]>> materialized); <VR> KTable<K, VR> aggregate(final RichInitializer<VR> initializer, V1 newValue = mapper.apply(key, value); context().forward(key, newValue); } final RichAggregator<? super K, ? super V, VR> adder, @Override public void close() { final RichAggregator<? super.close(); mapper.close(); } } } static <K, T1, T2, R> RichValueJoiner<K, T1, T2, R> convertToRichValueJoiner(final ValueJoinerWithKey<K, T1, T2, R> valueJoinerWithKey K, ? super V, VR> subtractor); |
Proposed changes
Move
RecordContext
from.
processor.internals
to.processor
Make record context open to public
StreamTask.updateProcessorContext()
) :Code Block | ||
---|---|---|
| ||
// the below code snippet already exists, this is just for background. private void updateProcessorContext(final StampedRecord record, final ProcessorNode currNode) { Objects.requireNonNull(valueJoinerWithKey, "valueJoiner can't be null"processorContext.setRecordContext(new ProcessorRecordContext(record.timestamp, record.offset(), record.partition(), record.topic())); processorContext.setCurrentNode(currNode); } |
Sample processor should look like this:
Code Block | ||
---|---|---|
| ||
class KStreamKTableJoinProcessor<K1, K2, V1, V2, R> extends AbstractProcessor<K1, V1> { ...if (valueJoinerWithKey instanceof RichValueJoiner) { return (RichValueJoiner<K, T1, T2, R>) valueJoinerWithKey; }private elseRecordContext { recordContext; // returnthis newline RichValueJoiner<K,is T1,added T2, R>() { in this KIP ... @Override public void process(final K1 key, final V1 value) { recordContext public= voidnew initRecordContext() {} @Override // recordContext initialization is added in public void close() {}this KIP @Override public long offset() @Override{ return public R apply(K key, T1 value1, T2 value2) {context().recordContext().offset(); } @Override public long timestamp() { return valueJoinerWithKey.apply(key, value1, value2);context().recordContext().timestamp(); } @Override public String topic() { } } return context().recordContext().topic(); } } static <K, T1, T2, R> ValueJoinerWithKey<K, T1, T2, R> convertToValueJoinerWithKey(final ValueJoiner<T1, T2, R> valueJoiner) { Objects.requireNonNull(valueJoiner, "valueJoiner can't be null"); return new ValueJoinerWithKey<K, T1, T2, R>( @Override public int partition() { return context().recordContext().partition(); } }; if (key != null && value != null) { @Override final V2 value2 = valueGetter.get(keyMapper.apply(key, value)); public R apply(K key, T1if value1, T2(leftJoin || value2 != null) { return valueJoinercontext().forward(key, joiner.apply(value1value, value2, recordContext)); } }; } } |
Rejected Alternatives
Not yet.