Class Document (2.36.0)

public final class Document extends GeneratedMessageV3 implements DocumentOrBuilder

Document represents the canonical document resource in Document AI. It is an interchange format that provides insights into documents and allows for collaboration between users and Document AI to iterate and optimize for quality.

Protobuf type google.cloud.documentai.v1.Document

Implements

DocumentOrBuilder

Static Fields

CONTENT_FIELD_NUMBER

public static final int CONTENT_FIELD_NUMBER
Field Value
TypeDescription
int

ENTITIES_FIELD_NUMBER

public static final int ENTITIES_FIELD_NUMBER
Field Value
TypeDescription
int

ENTITY_RELATIONS_FIELD_NUMBER

public static final int ENTITY_RELATIONS_FIELD_NUMBER
Field Value
TypeDescription
int

ERROR_FIELD_NUMBER

public static final int ERROR_FIELD_NUMBER
Field Value
TypeDescription
int

MIME_TYPE_FIELD_NUMBER

public static final int MIME_TYPE_FIELD_NUMBER
Field Value
TypeDescription
int

PAGES_FIELD_NUMBER

public static final int PAGES_FIELD_NUMBER
Field Value
TypeDescription
int

REVISIONS_FIELD_NUMBER

public static final int REVISIONS_FIELD_NUMBER
Field Value
TypeDescription
int

SHARD_INFO_FIELD_NUMBER

public static final int SHARD_INFO_FIELD_NUMBER
Field Value
TypeDescription
int

TEXT_CHANGES_FIELD_NUMBER

public static final int TEXT_CHANGES_FIELD_NUMBER
Field Value
TypeDescription
int

TEXT_FIELD_NUMBER

public static final int TEXT_FIELD_NUMBER
Field Value
TypeDescription
int

TEXT_STYLES_FIELD_NUMBER

public static final int TEXT_STYLES_FIELD_NUMBER
Field Value
TypeDescription
int

URI_FIELD_NUMBER

public static final int URI_FIELD_NUMBER
Field Value
TypeDescription
int

Static Methods

getDefaultInstance()

public static Document getDefaultInstance()
Returns
TypeDescription
Document

getDescriptor()

public static final Descriptors.Descriptor getDescriptor()
Returns
TypeDescription
Descriptor

newBuilder()

public static Document.Builder newBuilder()
Returns
TypeDescription
Document.Builder

newBuilder(Document prototype)

public static Document.Builder newBuilder(Document prototype)
Parameter
NameDescription
prototypeDocument
Returns
TypeDescription
Document.Builder

parseDelimitedFrom(InputStream input)

public static Document parseDelimitedFrom(InputStream input)
Parameter
NameDescription
inputInputStream
Returns
TypeDescription
Document
Exceptions
TypeDescription
IOException

parseDelimitedFrom(InputStream input, ExtensionRegistryLite extensionRegistry)

public static Document parseDelimitedFrom(InputStream input, ExtensionRegistryLite extensionRegistry)
Parameters
NameDescription
inputInputStream
extensionRegistryExtensionRegistryLite
Returns
TypeDescription
Document
Exceptions
TypeDescription
IOException

parseFrom(byte[] data)

public static Document parseFrom(byte[] data)
Parameter
NameDescription
databyte[]
Returns
TypeDescription
Document
Exceptions
TypeDescription
InvalidProtocolBufferException

parseFrom(byte[] data, ExtensionRegistryLite extensionRegistry)

public static Document parseFrom(byte[] data, ExtensionRegistryLite extensionRegistry)
Parameters
NameDescription
databyte[]
extensionRegistryExtensionRegistryLite
Returns
TypeDescription
Document
Exceptions
TypeDescription
InvalidProtocolBufferException

parseFrom(ByteString data)

public static Document parseFrom(ByteString data)
Parameter
NameDescription
dataByteString
Returns
TypeDescription
Document
Exceptions
TypeDescription
InvalidProtocolBufferException

parseFrom(ByteString data, ExtensionRegistryLite extensionRegistry)

public static Document parseFrom(ByteString data, ExtensionRegistryLite extensionRegistry)
Parameters
NameDescription
dataByteString
extensionRegistryExtensionRegistryLite
Returns
TypeDescription
Document
Exceptions
TypeDescription
InvalidProtocolBufferException

parseFrom(CodedInputStream input)

public static Document parseFrom(CodedInputStream input)
Parameter
NameDescription
inputCodedInputStream
Returns
TypeDescription
Document
Exceptions
TypeDescription
IOException

parseFrom(CodedInputStream input, ExtensionRegistryLite extensionRegistry)

public static Document parseFrom(CodedInputStream input, ExtensionRegistryLite extensionRegistry)
Parameters
NameDescription
inputCodedInputStream
extensionRegistryExtensionRegistryLite
Returns
TypeDescription
Document
Exceptions
TypeDescription
IOException

parseFrom(InputStream input)

public static Document parseFrom(InputStream input)
Parameter
NameDescription
inputInputStream
Returns
TypeDescription
Document
Exceptions
TypeDescription
IOException

parseFrom(InputStream input, ExtensionRegistryLite extensionRegistry)

public static Document parseFrom(InputStream input, ExtensionRegistryLite extensionRegistry)
Parameters
NameDescription
inputInputStream
extensionRegistryExtensionRegistryLite
Returns
TypeDescription
Document
Exceptions
TypeDescription
IOException

parseFrom(ByteBuffer data)

public static Document parseFrom(ByteBuffer data)
Parameter
NameDescription
dataByteBuffer
Returns
TypeDescription
Document
Exceptions
TypeDescription
InvalidProtocolBufferException

parseFrom(ByteBuffer data, ExtensionRegistryLite extensionRegistry)

public static Document parseFrom(ByteBuffer data, ExtensionRegistryLite extensionRegistry)
Parameters
NameDescription
dataByteBuffer
extensionRegistryExtensionRegistryLite
Returns
TypeDescription
Document
Exceptions
TypeDescription
InvalidProtocolBufferException

parser()

public static Parser<Document> parser()
Returns
TypeDescription
Parser<Document>

Methods

equals(Object obj)

public boolean equals(Object obj)
Parameter
NameDescription
objObject
Returns
TypeDescription
boolean
Overrides

getContent()

public ByteString getContent()

Optional. Inline document content, represented as a stream of bytes. Note: As with all bytes fields, protobuffers use a pure binary representation, whereas JSON representations use base64.

bytes content = 2 [(.google.api.field_behavior) = OPTIONAL];

Returns
TypeDescription
ByteString

The content.

getDefaultInstanceForType()

public Document getDefaultInstanceForType()
Returns
TypeDescription
Document

getEntities(int index)

public Document.Entity getEntities(int index)

A list of entities detected on Document.text. For document shards, entities in this list may cross shard boundaries.

repeated .google.cloud.documentai.v1.Document.Entity entities = 7;

Parameter
NameDescription
indexint
Returns
TypeDescription
Document.Entity

getEntitiesCount()

public int getEntitiesCount()

A list of entities detected on Document.text. For document shards, entities in this list may cross shard boundaries.

repeated .google.cloud.documentai.v1.Document.Entity entities = 7;

Returns
TypeDescription
int

getEntitiesList()

public List<Document.Entity> getEntitiesList()

A list of entities detected on Document.text. For document shards, entities in this list may cross shard boundaries.

repeated .google.cloud.documentai.v1.Document.Entity entities = 7;

Returns
TypeDescription
List<Entity>

getEntitiesOrBuilder(int index)

public Document.EntityOrBuilder getEntitiesOrBuilder(int index)

A list of entities detected on Document.text. For document shards, entities in this list may cross shard boundaries.

repeated .google.cloud.documentai.v1.Document.Entity entities = 7;

Parameter
NameDescription
indexint
Returns
TypeDescription
Document.EntityOrBuilder

getEntitiesOrBuilderList()

public List<? extends Document.EntityOrBuilder> getEntitiesOrBuilderList()

A list of entities detected on Document.text. For document shards, entities in this list may cross shard boundaries.

repeated .google.cloud.documentai.v1.Document.Entity entities = 7;

Returns
TypeDescription
List<? extends com.google.cloud.documentai.v1.Document.EntityOrBuilder>

getEntityRelations(int index)

public Document.EntityRelation getEntityRelations(int index)

Placeholder. Relationship among Document.entities.

repeated .google.cloud.documentai.v1.Document.EntityRelation entity_relations = 8;

Parameter
NameDescription
indexint
Returns
TypeDescription
Document.EntityRelation

getEntityRelationsCount()

public int getEntityRelationsCount()

Placeholder. Relationship among Document.entities.

repeated .google.cloud.documentai.v1.Document.EntityRelation entity_relations = 8;

Returns
TypeDescription
int

getEntityRelationsList()

public List<Document.EntityRelation> getEntityRelationsList()

Placeholder. Relationship among Document.entities.

repeated .google.cloud.documentai.v1.Document.EntityRelation entity_relations = 8;

Returns
TypeDescription
List<EntityRelation>

getEntityRelationsOrBuilder(int index)

public Document.EntityRelationOrBuilder getEntityRelationsOrBuilder(int index)

Placeholder. Relationship among Document.entities.

repeated .google.cloud.documentai.v1.Document.EntityRelation entity_relations = 8;

Parameter
NameDescription
indexint
Returns
TypeDescription
Document.EntityRelationOrBuilder

getEntityRelationsOrBuilderList()

public List<? extends Document.EntityRelationOrBuilder> getEntityRelationsOrBuilderList()

Placeholder. Relationship among Document.entities.

repeated .google.cloud.documentai.v1.Document.EntityRelation entity_relations = 8;

Returns
TypeDescription
List<? extends com.google.cloud.documentai.v1.Document.EntityRelationOrBuilder>

getError()

public Status getError()

Any error that occurred while processing this document.

.google.rpc.Status error = 10;

Returns
TypeDescription
com.google.rpc.Status

The error.

getErrorOrBuilder()

public StatusOrBuilder getErrorOrBuilder()

Any error that occurred while processing this document.

.google.rpc.Status error = 10;

Returns
TypeDescription
com.google.rpc.StatusOrBuilder

getMimeType()

public String getMimeType()

An IANA published media type (MIME type).

string mime_type = 3;

Returns
TypeDescription
String

The mimeType.

getMimeTypeBytes()

public ByteString getMimeTypeBytes()

An IANA published media type (MIME type).

string mime_type = 3;

Returns
TypeDescription
ByteString

The bytes for mimeType.

getPages(int index)

public Document.Page getPages(int index)

Visual page layout for the Document.

repeated .google.cloud.documentai.v1.Document.Page pages = 6;

Parameter
NameDescription
indexint
Returns
TypeDescription
Document.Page

getPagesCount()

public int getPagesCount()

Visual page layout for the Document.

repeated .google.cloud.documentai.v1.Document.Page pages = 6;

Returns
TypeDescription
int

getPagesList()

public List<Document.Page> getPagesList()

Visual page layout for the Document.

repeated .google.cloud.documentai.v1.Document.Page pages = 6;

Returns
TypeDescription
List<Page>

getPagesOrBuilder(int index)

public Document.PageOrBuilder getPagesOrBuilder(int index)

Visual page layout for the Document.

repeated .google.cloud.documentai.v1.Document.Page pages = 6;

Parameter
NameDescription
indexint
Returns
TypeDescription
Document.PageOrBuilder

getPagesOrBuilderList()

public List<? extends Document.PageOrBuilder> getPagesOrBuilderList()

Visual page layout for the Document.

repeated .google.cloud.documentai.v1.Document.Page pages = 6;

Returns
TypeDescription
List<? extends com.google.cloud.documentai.v1.Document.PageOrBuilder>

getParserForType()

public Parser<Document> getParserForType()
Returns
TypeDescription
Parser<Document>
Overrides

getRevisions(int index)

public Document.Revision getRevisions(int index)

Placeholder. Revision history of this document.

repeated .google.cloud.documentai.v1.Document.Revision revisions = 13;

Parameter
NameDescription
indexint
Returns
TypeDescription
Document.Revision

getRevisionsCount()

public int getRevisionsCount()

Placeholder. Revision history of this document.

repeated .google.cloud.documentai.v1.Document.Revision revisions = 13;

Returns
TypeDescription
int

getRevisionsList()

public List<Document.Revision> getRevisionsList()

Placeholder. Revision history of this document.

repeated .google.cloud.documentai.v1.Document.Revision revisions = 13;

Returns
TypeDescription
List<Revision>

getRevisionsOrBuilder(int index)

public Document.RevisionOrBuilder getRevisionsOrBuilder(int index)

Placeholder. Revision history of this document.

repeated .google.cloud.documentai.v1.Document.Revision revisions = 13;

Parameter
NameDescription
indexint
Returns
TypeDescription
Document.RevisionOrBuilder

getRevisionsOrBuilderList()

public List<? extends Document.RevisionOrBuilder> getRevisionsOrBuilderList()

Placeholder. Revision history of this document.

repeated .google.cloud.documentai.v1.Document.Revision revisions = 13;

Returns
TypeDescription
List<? extends com.google.cloud.documentai.v1.Document.RevisionOrBuilder>

getSerializedSize()

public int getSerializedSize()
Returns
TypeDescription
int
Overrides

getShardInfo()

public Document.ShardInfo getShardInfo()

Information about the sharding if this document is sharded part of a larger document. If the document is not sharded, this message is not specified.

.google.cloud.documentai.v1.Document.ShardInfo shard_info = 9;

Returns
TypeDescription
Document.ShardInfo

The shardInfo.

getShardInfoOrBuilder()

public Document.ShardInfoOrBuilder getShardInfoOrBuilder()

Information about the sharding if this document is sharded part of a larger document. If the document is not sharded, this message is not specified.

.google.cloud.documentai.v1.Document.ShardInfo shard_info = 9;

Returns
TypeDescription
Document.ShardInfoOrBuilder

getSourceCase()

public Document.SourceCase getSourceCase()
Returns
TypeDescription
Document.SourceCase

getText()

public String getText()

Optional. UTF-8 encoded text in reading order from the document.

string text = 4 [(.google.api.field_behavior) = OPTIONAL];

Returns
TypeDescription
String

The text.

getTextBytes()

public ByteString getTextBytes()

Optional. UTF-8 encoded text in reading order from the document.

string text = 4 [(.google.api.field_behavior) = OPTIONAL];

Returns
TypeDescription
ByteString

The bytes for text.

getTextChanges(int index)

public Document.TextChange getTextChanges(int index)

Placeholder. A list of text corrections made to Document.text. This is usually used for annotating corrections to OCR mistakes. Text changes for a given revision may not overlap with each other.

repeated .google.cloud.documentai.v1.Document.TextChange text_changes = 14;

Parameter
NameDescription
indexint
Returns
TypeDescription
Document.TextChange

getTextChangesCount()

public int getTextChangesCount()

Placeholder. A list of text corrections made to Document.text. This is usually used for annotating corrections to OCR mistakes. Text changes for a given revision may not overlap with each other.

repeated .google.cloud.documentai.v1.Document.TextChange text_changes = 14;

Returns
TypeDescription
int

getTextChangesList()

public List<Document.TextChange> getTextChangesList()

Placeholder. A list of text corrections made to Document.text. This is usually used for annotating corrections to OCR mistakes. Text changes for a given revision may not overlap with each other.

repeated .google.cloud.documentai.v1.Document.TextChange text_changes = 14;

Returns
TypeDescription
List<TextChange>

getTextChangesOrBuilder(int index)

public Document.TextChangeOrBuilder getTextChangesOrBuilder(int index)

Placeholder. A list of text corrections made to Document.text. This is usually used for annotating corrections to OCR mistakes. Text changes for a given revision may not overlap with each other.

repeated .google.cloud.documentai.v1.Document.TextChange text_changes = 14;

Parameter
NameDescription
indexint
Returns
TypeDescription
Document.TextChangeOrBuilder

getTextChangesOrBuilderList()

public List<? extends Document.TextChangeOrBuilder> getTextChangesOrBuilderList()

Placeholder. A list of text corrections made to Document.text. This is usually used for annotating corrections to OCR mistakes. Text changes for a given revision may not overlap with each other.

repeated .google.cloud.documentai.v1.Document.TextChange text_changes = 14;

Returns
TypeDescription
List<? extends com.google.cloud.documentai.v1.Document.TextChangeOrBuilder>

getTextStyles(int index) (deprecated)

public Document.Style getTextStyles(int index)

Styles for the Document.text.

repeated .google.cloud.documentai.v1.Document.Style text_styles = 5 [deprecated = true];

Parameter
NameDescription
indexint
Returns
TypeDescription
Document.Style

getTextStylesCount() (deprecated)

public int getTextStylesCount()

Styles for the Document.text.

repeated .google.cloud.documentai.v1.Document.Style text_styles = 5 [deprecated = true];

Returns
TypeDescription
int

getTextStylesList() (deprecated)

public List<Document.Style> getTextStylesList()

Styles for the Document.text.

repeated .google.cloud.documentai.v1.Document.Style text_styles = 5 [deprecated = true];

Returns
TypeDescription
List<Style>

getTextStylesOrBuilder(int index) (deprecated)

public Document.StyleOrBuilder getTextStylesOrBuilder(int index)

Styles for the Document.text.

repeated .google.cloud.documentai.v1.Document.Style text_styles = 5 [deprecated = true];

Parameter
NameDescription
indexint
Returns
TypeDescription
Document.StyleOrBuilder

getTextStylesOrBuilderList() (deprecated)

public List<? extends Document.StyleOrBuilder> getTextStylesOrBuilderList()

Styles for the Document.text.

repeated .google.cloud.documentai.v1.Document.Style text_styles = 5 [deprecated = true];

Returns
TypeDescription
List<? extends com.google.cloud.documentai.v1.Document.StyleOrBuilder>

getUri()

public String getUri()

Optional. Currently supports Google Cloud Storage URI of the form gs://bucket_name/object_name. Object versioning is not supported. For more information, refer to Google Cloud Storage Request URIs.

string uri = 1 [(.google.api.field_behavior) = OPTIONAL];

Returns
TypeDescription
String

The uri.

getUriBytes()

public ByteString getUriBytes()

Optional. Currently supports Google Cloud Storage URI of the form gs://bucket_name/object_name. Object versioning is not supported. For more information, refer to Google Cloud Storage Request URIs.

string uri = 1 [(.google.api.field_behavior) = OPTIONAL];

Returns
TypeDescription
ByteString

The bytes for uri.

hasContent()

public boolean hasContent()

Optional. Inline document content, represented as a stream of bytes. Note: As with all bytes fields, protobuffers use a pure binary representation, whereas JSON representations use base64.

bytes content = 2 [(.google.api.field_behavior) = OPTIONAL];

Returns
TypeDescription
boolean

Whether the content field is set.

hasError()

public boolean hasError()

Any error that occurred while processing this document.

.google.rpc.Status error = 10;

Returns
TypeDescription
boolean

Whether the error field is set.

hasShardInfo()

public boolean hasShardInfo()

Information about the sharding if this document is sharded part of a larger document. If the document is not sharded, this message is not specified.

.google.cloud.documentai.v1.Document.ShardInfo shard_info = 9;

Returns
TypeDescription
boolean

Whether the shardInfo field is set.

hasUri()

public boolean hasUri()

Optional. Currently supports Google Cloud Storage URI of the form gs://bucket_name/object_name. Object versioning is not supported. For more information, refer to Google Cloud Storage Request URIs.

string uri = 1 [(.google.api.field_behavior) = OPTIONAL];

Returns
TypeDescription
boolean

Whether the uri field is set.

hashCode()

public int hashCode()
Returns
TypeDescription
int
Overrides

internalGetFieldAccessorTable()

protected GeneratedMessageV3.FieldAccessorTable internalGetFieldAccessorTable()
Returns
TypeDescription
FieldAccessorTable
Overrides

isInitialized()

public final boolean isInitialized()
Returns
TypeDescription
boolean
Overrides

newBuilderForType()

public Document.Builder newBuilderForType()
Returns
TypeDescription
Document.Builder

newBuilderForType(GeneratedMessageV3.BuilderParent parent)

protected Document.Builder newBuilderForType(GeneratedMessageV3.BuilderParent parent)
Parameter
NameDescription
parentBuilderParent
Returns
TypeDescription
Document.Builder
Overrides

newInstance(GeneratedMessageV3.UnusedPrivateParameter unused)

protected Object newInstance(GeneratedMessageV3.UnusedPrivateParameter unused)
Parameter
NameDescription
unusedUnusedPrivateParameter
Returns
TypeDescription
Object
Overrides

toBuilder()

public Document.Builder toBuilder()
Returns
TypeDescription
Document.Builder

writeTo(CodedOutputStream output)

public void writeTo(CodedOutputStream output)
Parameter
NameDescription
outputCodedOutputStream
Overrides
Exceptions
TypeDescription
IOException