public final class Document extends GeneratedMessageV3 implements DocumentOrBuilder
Document represents the canonical document resource in Document Understanding
AI.
It is an interchange format that provides insights into documents and allows
for collaboration between users and Document Understanding AI to iterate and
optimize for quality.
Protobuf type google.cloud.documentai.v1.Document
Fields
CONTENT_FIELD_NUMBER
public static final int CONTENT_FIELD_NUMBER
Field Value
ENTITIES_FIELD_NUMBER
public static final int ENTITIES_FIELD_NUMBER
Field Value
ENTITY_RELATIONS_FIELD_NUMBER
public static final int ENTITY_RELATIONS_FIELD_NUMBER
Field Value
ERROR_FIELD_NUMBER
public static final int ERROR_FIELD_NUMBER
Field Value
MIME_TYPE_FIELD_NUMBER
public static final int MIME_TYPE_FIELD_NUMBER
Field Value
PAGES_FIELD_NUMBER
public static final int PAGES_FIELD_NUMBER
Field Value
REVISIONS_FIELD_NUMBER
public static final int REVISIONS_FIELD_NUMBER
Field Value
SHARD_INFO_FIELD_NUMBER
public static final int SHARD_INFO_FIELD_NUMBER
Field Value
TEXT_CHANGES_FIELD_NUMBER
public static final int TEXT_CHANGES_FIELD_NUMBER
Field Value
TEXT_FIELD_NUMBER
public static final int TEXT_FIELD_NUMBER
Field Value
TEXT_STYLES_FIELD_NUMBER
public static final int TEXT_STYLES_FIELD_NUMBER
Field Value
URI_FIELD_NUMBER
public static final int URI_FIELD_NUMBER
Field Value
Methods
equals(Object obj)
public boolean equals(Object obj)
Parameter
Returns
Overrides
getContent()
public ByteString getContent()
Optional. Inline document content, represented as a stream of bytes.
Note: As with all bytes
fields, protobuffers use a pure binary
representation, whereas JSON representations use base64.
bytes content = 2 [(.google.api.field_behavior) = OPTIONAL];
Returns
getDefaultInstance()
public static Document getDefaultInstance()
Returns
getDefaultInstanceForType()
public Document getDefaultInstanceForType()
Returns
getDescriptor()
public static final Descriptors.Descriptor getDescriptor()
Returns
getEntities(int index)
public Document.Entity getEntities(int index)
A list of entities detected on Document.text. For document shards,
entities in this list may cross shard boundaries.
repeated .google.cloud.documentai.v1.Document.Entity entities = 7;
Parameter
Returns
getEntitiesCount()
public int getEntitiesCount()
A list of entities detected on Document.text. For document shards,
entities in this list may cross shard boundaries.
repeated .google.cloud.documentai.v1.Document.Entity entities = 7;
Returns
getEntitiesList()
public List<Document.Entity> getEntitiesList()
A list of entities detected on Document.text. For document shards,
entities in this list may cross shard boundaries.
repeated .google.cloud.documentai.v1.Document.Entity entities = 7;
Returns
getEntitiesOrBuilder(int index)
public Document.EntityOrBuilder getEntitiesOrBuilder(int index)
A list of entities detected on Document.text. For document shards,
entities in this list may cross shard boundaries.
repeated .google.cloud.documentai.v1.Document.Entity entities = 7;
Parameter
Returns
getEntitiesOrBuilderList()
public List<? extends Document.EntityOrBuilder> getEntitiesOrBuilderList()
A list of entities detected on Document.text. For document shards,
entities in this list may cross shard boundaries.
repeated .google.cloud.documentai.v1.Document.Entity entities = 7;
Returns
Type | Description |
List<? extends com.google.cloud.documentai.v1.Document.EntityOrBuilder> | |
getEntityRelations(int index)
public Document.EntityRelation getEntityRelations(int index)
Relationship among Document.entities.
repeated .google.cloud.documentai.v1.Document.EntityRelation entity_relations = 8;
Parameter
Returns
getEntityRelationsCount()
public int getEntityRelationsCount()
Relationship among Document.entities.
repeated .google.cloud.documentai.v1.Document.EntityRelation entity_relations = 8;
Returns
getEntityRelationsList()
public List<Document.EntityRelation> getEntityRelationsList()
Relationship among Document.entities.
repeated .google.cloud.documentai.v1.Document.EntityRelation entity_relations = 8;
Returns
getEntityRelationsOrBuilder(int index)
public Document.EntityRelationOrBuilder getEntityRelationsOrBuilder(int index)
Relationship among Document.entities.
repeated .google.cloud.documentai.v1.Document.EntityRelation entity_relations = 8;
Parameter
Returns
getEntityRelationsOrBuilderList()
public List<? extends Document.EntityRelationOrBuilder> getEntityRelationsOrBuilderList()
Relationship among Document.entities.
repeated .google.cloud.documentai.v1.Document.EntityRelation entity_relations = 8;
Returns
Type | Description |
List<? extends com.google.cloud.documentai.v1.Document.EntityRelationOrBuilder> | |
getError()
Any error that occurred while processing this document.
.google.rpc.Status error = 10;
Returns
Type | Description |
com.google.rpc.Status | The error.
|
getErrorOrBuilder()
public StatusOrBuilder getErrorOrBuilder()
Any error that occurred while processing this document.
.google.rpc.Status error = 10;
Returns
Type | Description |
com.google.rpc.StatusOrBuilder | |
getMimeType()
public String getMimeType()
Returns
Type | Description |
String | The mimeType.
|
getMimeTypeBytes()
public ByteString getMimeTypeBytes()
Returns
getPages(int index)
public Document.Page getPages(int index)
Visual page layout for the Document.
repeated .google.cloud.documentai.v1.Document.Page pages = 6;
Parameter
Returns
getPagesCount()
public int getPagesCount()
Visual page layout for the Document.
repeated .google.cloud.documentai.v1.Document.Page pages = 6;
Returns
getPagesList()
public List<Document.Page> getPagesList()
Visual page layout for the Document.
repeated .google.cloud.documentai.v1.Document.Page pages = 6;
Returns
getPagesOrBuilder(int index)
public Document.PageOrBuilder getPagesOrBuilder(int index)
Visual page layout for the Document.
repeated .google.cloud.documentai.v1.Document.Page pages = 6;
Parameter
Returns
getPagesOrBuilderList()
public List<? extends Document.PageOrBuilder> getPagesOrBuilderList()
Visual page layout for the Document.
repeated .google.cloud.documentai.v1.Document.Page pages = 6;
Returns
Type | Description |
List<? extends com.google.cloud.documentai.v1.Document.PageOrBuilder> | |
getParserForType()
public Parser<Document> getParserForType()
Returns
Overrides
getRevisions(int index)
public Document.Revision getRevisions(int index)
Revision history of this document.
repeated .google.cloud.documentai.v1.Document.Revision revisions = 13;
Parameter
Returns
getRevisionsCount()
public int getRevisionsCount()
Revision history of this document.
repeated .google.cloud.documentai.v1.Document.Revision revisions = 13;
Returns
getRevisionsList()
public List<Document.Revision> getRevisionsList()
Revision history of this document.
repeated .google.cloud.documentai.v1.Document.Revision revisions = 13;
Returns
getRevisionsOrBuilder(int index)
public Document.RevisionOrBuilder getRevisionsOrBuilder(int index)
Revision history of this document.
repeated .google.cloud.documentai.v1.Document.Revision revisions = 13;
Parameter
Returns
getRevisionsOrBuilderList()
public List<? extends Document.RevisionOrBuilder> getRevisionsOrBuilderList()
Revision history of this document.
repeated .google.cloud.documentai.v1.Document.Revision revisions = 13;
Returns
Type | Description |
List<? extends com.google.cloud.documentai.v1.Document.RevisionOrBuilder> | |
getSerializedSize()
public int getSerializedSize()
Returns
Overrides
getShardInfo()
public Document.ShardInfo getShardInfo()
Information about the sharding if this document is sharded part of a larger
document. If the document is not sharded, this message is not specified.
.google.cloud.documentai.v1.Document.ShardInfo shard_info = 9;
Returns
getShardInfoOrBuilder()
public Document.ShardInfoOrBuilder getShardInfoOrBuilder()
Information about the sharding if this document is sharded part of a larger
document. If the document is not sharded, this message is not specified.
.google.cloud.documentai.v1.Document.ShardInfo shard_info = 9;
Returns
getSourceCase()
public Document.SourceCase getSourceCase()
Returns
getText()
Optional. UTF-8 encoded text in reading order from the document.
string text = 4 [(.google.api.field_behavior) = OPTIONAL];
Returns
Type | Description |
String | The text.
|
getTextBytes()
public ByteString getTextBytes()
Optional. UTF-8 encoded text in reading order from the document.
string text = 4 [(.google.api.field_behavior) = OPTIONAL];
Returns
getTextChanges(int index)
public Document.TextChange getTextChanges(int index)
A list of text corrections made to [Document.text]. This is usually
used for annotating corrections to OCR mistakes. Text changes for a given
revision may not overlap with each other.
repeated .google.cloud.documentai.v1.Document.TextChange text_changes = 14;
Parameter
Returns
getTextChangesCount()
public int getTextChangesCount()
A list of text corrections made to [Document.text]. This is usually
used for annotating corrections to OCR mistakes. Text changes for a given
revision may not overlap with each other.
repeated .google.cloud.documentai.v1.Document.TextChange text_changes = 14;
Returns
getTextChangesList()
public List<Document.TextChange> getTextChangesList()
A list of text corrections made to [Document.text]. This is usually
used for annotating corrections to OCR mistakes. Text changes for a given
revision may not overlap with each other.
repeated .google.cloud.documentai.v1.Document.TextChange text_changes = 14;
Returns
getTextChangesOrBuilder(int index)
public Document.TextChangeOrBuilder getTextChangesOrBuilder(int index)
A list of text corrections made to [Document.text]. This is usually
used for annotating corrections to OCR mistakes. Text changes for a given
revision may not overlap with each other.
repeated .google.cloud.documentai.v1.Document.TextChange text_changes = 14;
Parameter
Returns
getTextChangesOrBuilderList()
public List<? extends Document.TextChangeOrBuilder> getTextChangesOrBuilderList()
A list of text corrections made to [Document.text]. This is usually
used for annotating corrections to OCR mistakes. Text changes for a given
revision may not overlap with each other.
repeated .google.cloud.documentai.v1.Document.TextChange text_changes = 14;
Returns
Type | Description |
List<? extends com.google.cloud.documentai.v1.Document.TextChangeOrBuilder> | |
getTextStyles(int index)
public Document.Style getTextStyles(int index)
Styles for the Document.text.
repeated .google.cloud.documentai.v1.Document.Style text_styles = 5;
Parameter
Returns
getTextStylesCount()
public int getTextStylesCount()
Styles for the Document.text.
repeated .google.cloud.documentai.v1.Document.Style text_styles = 5;
Returns
getTextStylesList()
public List<Document.Style> getTextStylesList()
Styles for the Document.text.
repeated .google.cloud.documentai.v1.Document.Style text_styles = 5;
Returns
getTextStylesOrBuilder(int index)
public Document.StyleOrBuilder getTextStylesOrBuilder(int index)
Styles for the Document.text.
repeated .google.cloud.documentai.v1.Document.Style text_styles = 5;
Parameter
Returns
getTextStylesOrBuilderList()
public List<? extends Document.StyleOrBuilder> getTextStylesOrBuilderList()
Styles for the Document.text.
repeated .google.cloud.documentai.v1.Document.Style text_styles = 5;
Returns
Type | Description |
List<? extends com.google.cloud.documentai.v1.Document.StyleOrBuilder> | |
getUnknownFields()
public final UnknownFieldSet getUnknownFields()
Returns
Overrides
getUri()
Optional. Currently supports Google Cloud Storage URI of the form
gs://bucket_name/object_name
. Object versioning is not supported.
See Google Cloud Storage Request
URIs for more
info.
string uri = 1 [(.google.api.field_behavior) = OPTIONAL];
Returns
getUriBytes()
public ByteString getUriBytes()
Optional. Currently supports Google Cloud Storage URI of the form
gs://bucket_name/object_name
. Object versioning is not supported.
See Google Cloud Storage Request
URIs for more
info.
string uri = 1 [(.google.api.field_behavior) = OPTIONAL];
Returns
hasContent()
public boolean hasContent()
Optional. Inline document content, represented as a stream of bytes.
Note: As with all bytes
fields, protobuffers use a pure binary
representation, whereas JSON representations use base64.
bytes content = 2 [(.google.api.field_behavior) = OPTIONAL];
Returns
Type | Description |
boolean | Whether the content field is set.
|
hasError()
public boolean hasError()
Any error that occurred while processing this document.
.google.rpc.Status error = 10;
Returns
Type | Description |
boolean | Whether the error field is set.
|
hasShardInfo()
public boolean hasShardInfo()
Information about the sharding if this document is sharded part of a larger
document. If the document is not sharded, this message is not specified.
.google.cloud.documentai.v1.Document.ShardInfo shard_info = 9;
Returns
Type | Description |
boolean | Whether the shardInfo field is set.
|
hasUri()
Optional. Currently supports Google Cloud Storage URI of the form
gs://bucket_name/object_name
. Object versioning is not supported.
See Google Cloud Storage Request
URIs for more
info.
string uri = 1 [(.google.api.field_behavior) = OPTIONAL];
Returns
Type | Description |
boolean | Whether the uri field is set.
|
hashCode()
Returns
Overrides
internalGetFieldAccessorTable()
protected GeneratedMessageV3.FieldAccessorTable internalGetFieldAccessorTable()
Returns
Overrides
isInitialized()
public final boolean isInitialized()
Returns
Overrides
newBuilder()
public static Document.Builder newBuilder()
Returns
newBuilder(Document prototype)
public static Document.Builder newBuilder(Document prototype)
Parameter
Returns
newBuilderForType()
public Document.Builder newBuilderForType()
Returns
newBuilderForType(GeneratedMessageV3.BuilderParent parent)
protected Document.Builder newBuilderForType(GeneratedMessageV3.BuilderParent parent)
Parameter
Returns
Overrides
newInstance(GeneratedMessageV3.UnusedPrivateParameter unused)
protected Object newInstance(GeneratedMessageV3.UnusedPrivateParameter unused)
Parameter
Returns
Overrides
public static Document parseDelimitedFrom(InputStream input)
Parameter
Returns
Exceptions
public static Document parseDelimitedFrom(InputStream input, ExtensionRegistryLite extensionRegistry)
Parameters
Returns
Exceptions
parseFrom(byte[] data)
public static Document parseFrom(byte[] data)
Parameter
Name | Description |
data | byte[]
|
Returns
Exceptions
parseFrom(byte[] data, ExtensionRegistryLite extensionRegistry)
public static Document parseFrom(byte[] data, ExtensionRegistryLite extensionRegistry)
Parameters
Returns
Exceptions
parseFrom(ByteString data)
public static Document parseFrom(ByteString data)
Parameter
Returns
Exceptions
parseFrom(ByteString data, ExtensionRegistryLite extensionRegistry)
public static Document parseFrom(ByteString data, ExtensionRegistryLite extensionRegistry)
Parameters
Returns
Exceptions
public static Document parseFrom(CodedInputStream input)
Parameter
Returns
Exceptions
public static Document parseFrom(CodedInputStream input, ExtensionRegistryLite extensionRegistry)
Parameters
Returns
Exceptions
public static Document parseFrom(InputStream input)
Parameter
Returns
Exceptions
public static Document parseFrom(InputStream input, ExtensionRegistryLite extensionRegistry)
Parameters
Returns
Exceptions
parseFrom(ByteBuffer data)
public static Document parseFrom(ByteBuffer data)
Parameter
Returns
Exceptions
parseFrom(ByteBuffer data, ExtensionRegistryLite extensionRegistry)
public static Document parseFrom(ByteBuffer data, ExtensionRegistryLite extensionRegistry)
Parameters
Returns
Exceptions
parser()
public static Parser<Document> parser()
Returns
toBuilder()
public Document.Builder toBuilder()
Returns
writeTo(CodedOutputStream output)
public void writeTo(CodedOutputStream output)
Parameter
Overrides
Exceptions