Interface ProcessOptions.LayoutConfig.ChunkingConfigOrBuilder (2.47.0)

public static interface ProcessOptions.LayoutConfig.ChunkingConfigOrBuilder extends MessageOrBuilder

Implements

MessageOrBuilder

Methods

getBreakpointPercentileThreshold()

public abstract int getBreakpointPercentileThreshold()

Optional. The percentile of cosine dissimilarity that must be exceeded between a group of tokens and the next. The smaller this number is, the more chunks will be generated.

int32 breakpoint_percentile_threshold = 4 [(.google.api.field_behavior) = OPTIONAL];

Returns
Type Description
int

The breakpointPercentileThreshold.

getChunkSize()

public abstract int getChunkSize()

Optional. The chunk sizes to use when splitting documents, in order of level.

int32 chunk_size = 1 [(.google.api.field_behavior) = OPTIONAL];

Returns
Type Description
int

The chunkSize.

getIncludeAncestorHeadings()

public abstract boolean getIncludeAncestorHeadings()

Optional. Whether or not to include ancestor headings when splitting.

bool include_ancestor_headings = 2 [(.google.api.field_behavior) = OPTIONAL];

Returns
Type Description
boolean

The includeAncestorHeadings.

getSemanticChunkingGroupSize()

public abstract boolean getSemanticChunkingGroupSize()

Optional. The number of tokens to group together when evaluating semantic similarity.

bool semantic_chunking_group_size = 3 [(.google.api.field_behavior) = OPTIONAL];

Returns
Type Description
boolean

The semanticChunkingGroupSize.