Class PslSparkUtils (0.3.4)

public class PslSparkUtils

Inheritance

java.lang.Object > PslSparkUtils

Constructors

PslSparkUtils()

public PslSparkUtils()

Methods

convertAttributesToSparkMap(ListMultimap<String,ByteString> attributeMap)

public static ArrayBasedMapData convertAttributesToSparkMap(ListMultimap<String,ByteString> attributeMap)
Parameter
NameDescription
attributeMapcom.google.common.collect.ListMultimap<String,ByteString>
Returns
TypeDescription
org.apache.spark.sql.catalyst.util.ArrayBasedMapData

getSparkEndOffset(SparkSourceOffset headOffset, SparkSourceOffset startOffset, long maxMessagesPerBatch, long topicPartitionCount)

public static SparkSourceOffset getSparkEndOffset(SparkSourceOffset headOffset, SparkSourceOffset startOffset, long maxMessagesPerBatch, long topicPartitionCount)
Parameters
NameDescription
headOffsetSparkSourceOffset
startOffsetSparkSourceOffset
maxMessagesPerBatchlong
topicPartitionCountlong
Returns
TypeDescription
SparkSourceOffset

getSparkStartOffset(CursorClient cursorClient, SubscriptionPath subscriptionPath, long topicPartitionCount)

public static SparkSourceOffset getSparkStartOffset(CursorClient cursorClient, SubscriptionPath subscriptionPath, long topicPartitionCount)
Parameters
NameDescription
cursorClientcom.google.cloud.pubsublite.internal.CursorClient
subscriptionPathcom.google.cloud.pubsublite.SubscriptionPath
topicPartitionCountlong
Returns
TypeDescription
SparkSourceOffset

toInternalRow(SequencedMessage msg, SubscriptionPath subscription, Partition partition)

public static InternalRow toInternalRow(SequencedMessage msg, SubscriptionPath subscription, Partition partition)
Parameters
NameDescription
msgcom.google.cloud.pubsublite.SequencedMessage
subscriptioncom.google.cloud.pubsublite.SubscriptionPath
partitioncom.google.cloud.pubsublite.Partition
Returns
TypeDescription
org.apache.spark.sql.catalyst.InternalRow

toPslPartitionOffset(SparkPartitionOffset sparkPartitionOffset)

public static PslPartitionOffset toPslPartitionOffset(SparkPartitionOffset sparkPartitionOffset)
Parameter
NameDescription
sparkPartitionOffsetcom.google.cloud.pubsublite.spark.SparkPartitionOffset
Returns
TypeDescription
PslPartitionOffset

toPslSourceOffset(SparkSourceOffset sparkSourceOffset)

public static PslSourceOffset toPslSourceOffset(SparkSourceOffset sparkSourceOffset)
Parameter
NameDescription
sparkSourceOffsetSparkSourceOffset
Returns
TypeDescription
PslSourceOffset

toPubSubMessage(StructType inputSchema, InternalRow row)

public static Message toPubSubMessage(StructType inputSchema, InternalRow row)
Parameters
NameDescription
inputSchemaorg.apache.spark.sql.types.StructType
roworg.apache.spark.sql.catalyst.InternalRow
Returns
TypeDescription
com.google.cloud.pubsublite.Message

toSparkSourceOffset(PslSourceOffset pslSourceOffset)

public static SparkSourceOffset toSparkSourceOffset(PslSourceOffset pslSourceOffset)
Parameter
NameDescription
pslSourceOffsetPslSourceOffset
Returns
TypeDescription
SparkSourceOffset

verifyWriteInputSchema(StructType inputSchema)

public static void verifyWriteInputSchema(StructType inputSchema)

Make sure data fields for publish have expected Spark DataType if they exist.

Parameter
NameDescription
inputSchemaorg.apache.spark.sql.types.StructType

input table schema to write to Pub/Sub Lite.