使用过滤条件

Cloud Bigtable 提供以下类型的过滤条件:

本页面详细介绍每种 Bigtable 过滤条件,并展示如何通过 Cloud 客户端库使用每种类型的过滤条件。阅读本页面内容之前,请先阅读 Bigtable 过滤条件概览

有关如何使用过滤条件读取多行数据的其他示例,请参阅读取数据

示例数据

本页面中的示例假定您要存储智能手机和平板电脑的时间序列数据,并且以下数据已写入表格。该表格中有两个列族:stats_summarycell_plan。 每个列族都有三列。

stats_summary cell_plan
行键 connected_cell connected_wifi os_build data_plan_01gb data_plan_05gb data_plan_10gb
phone#4c410523#20190501 1 1 PQ2A.190405.003 true@time minus one hour

False @time
true
phone#4c410523#20190502 1 1 PQ2A.190405.004 true
phone#4c410523#20190505 0 1 PQ2A.190406.000 true
phone#5c10102#20190501 1 1 PQ2A.190401.002 true
phone#5c10102#20190502 1 0 PQ2A.190406.000 true

限制性过滤条件

以下部分介绍每种限制性过滤条件。限制性过滤条件可根据响应是否符合特定条件来控制响应中包含哪些行或单元。

当您使用来组合多个限制性过滤条件时,请注意链中每个过滤条件的输入行都是上一个过滤条件的输出行。例如,如果您链接两个过滤条件,而第一个过滤条件仅输出原始行中的两个单元,则第二个过滤条件仅看到这两个单元。

行选择过滤条件

行样本

此过滤条件允许您检索给定范围内的随机行样本。 该过滤条件会根据您指定的概率,随机选择输出行是应与输入行相同还是应从结果中省略。

Go

func filterLimitRowSample(w io.Writer, projectID, instanceID string, tableName string) error {
	filter := bigtable.RowSampleFilter(.75)
	return readWithFilter(w, projectID, instanceID, tableName, filter)
}

HBase

public static void filterLimitRowSample() {
  // TODO(developer): Replace these variables before running the sample.
  String projectId = "my-project-id";
  String instanceId = "my-instance-id";
  String tableId = "mobile-time-series";
  filterLimitRowSample(projectId, instanceId, tableId);
}

public static void filterLimitRowSample(String projectId, String instanceId, String tableId) {
  // A filter that matches cells from a row with probability .75
  Filter filter = new RandomRowFilter(.75f);
  Scan scan = new Scan().setFilter(filter);
  readWithFilter(projectId, instanceId, tableId, scan);
}

Java

public static void filterLimitRowSample() {
  // TODO(developer): Replace these variables before running the sample.
  String projectId = "my-project-id";
  String instanceId = "my-instance-id";
  String tableId = "mobile-time-series";
  filterLimitRowSample(projectId, instanceId, tableId);
}

public static void filterLimitRowSample(String projectId, String instanceId, String tableId) {
  // A filter that matches cells from a row with probability .75
  Filter filter = FILTERS.key().sample(.75);
  readFilter(projectId, instanceId, tableId, filter);
}

Python

def filter_limit_row_sample(project_id, instance_id, table_id):
    client = bigtable.Client(project=project_id, admin=True)
    instance = client.instance(instance_id)
    table = instance.table(table_id)

    rows = table.read_rows(filter_=row_filters.RowSampleFilter(.75))
    for row in rows:
        print_row(row)

C#

/// <summary>
/// /// Read using a row sample filter from an existing table.
///</summary>
/// <param name="projectId">Your Google Cloud Project ID.</param>
/// <param name="instanceId">Your Google Cloud Bigtable Instance ID.</param>
/// <param name="tableId">Your Google Cloud Bigtable table ID.</param>

public string filterLimitRowSample(string projectId = "YOUR-PROJECT-ID", string instanceId = "YOUR-INSTANCE-ID", string tableId = "YOUR-TABLE-ID")
{
    // A filter that matches cells from a row with probability .75
    RowFilter filter = RowFilters.RowSample(.75);
    return readFilter(projectId, instanceId, tableId, filter);
}

C++

此代码示例即将推出。

Node.js

const filter = {
  row: {
    sample: 0.75,
  },
};
readWithFilter(filter);

PHP

$filter = Filter::key()->sample(.75);
read_filter($table, $filter);

Ruby

# instance_id = "my-instance"
# table_id    = "my-table"
filter = Google::Cloud::Bigtable::RowFilter.sample 0.75
read_with_filter instance_id, table_id, filter

行键正则表达式

此过滤条件会检查输入行的行键是否与正则表达式匹配。 如果行键匹配,则输出行与输入行相同。如果行键不匹配,则输出行为空。

正则表达式必须使用 RE2 语法。由于行键可以包含任意字节(包括换行符),因此在大多数情况下,您应该使用 \C 作为通配符表达式。. 表达式与换行符不匹配。

Go

func filterLimitRowRegex(w io.Writer, projectID, instanceID string, tableName string) error {
	filter := bigtable.RowKeyFilter(".*#20190501$")
	return readWithFilter(w, projectID, instanceID, tableName, filter)
}

HBase

public static void filterLimitRowRegex() {
  // TODO(developer): Replace these variables before running the sample.
  String projectId = "my-project-id";
  String instanceId = "my-instance-id";
  String tableId = "mobile-time-series";
  filterLimitRowRegex(projectId, instanceId, tableId);
}

public static void filterLimitRowRegex(String projectId, String instanceId, String tableId) {
  // A filter that matches cells from rows whose keys satisfy the given regex
  Filter filter = new RowFilter(CompareOp.EQUAL, new RegexStringComparator(".*#20190501$"));
  Scan scan = new Scan().setFilter(filter).setMaxVersions();
  readWithFilter(projectId, instanceId, tableId, scan);
}

Java

public static void filterLimitRowRegex() {
  // TODO(developer): Replace these variables before running the sample.
  String projectId = "my-project-id";
  String instanceId = "my-instance-id";
  String tableId = "mobile-time-series";
  filterLimitRowRegex(projectId, instanceId, tableId);
}

public static void filterLimitRowRegex(String projectId, String instanceId, String tableId) {
  // A filter that matches cells from rows whose keys satisfy the given regex
  Filter filter = FILTERS.key().regex(".*#20190501$");
  readFilter(projectId, instanceId, tableId, filter);
}

Python

def filter_limit_row_regex(project_id, instance_id, table_id):
    client = bigtable.Client(project=project_id, admin=True)
    instance = client.instance(instance_id)
    table = instance.table(table_id)

    rows = table.read_rows(
        filter_=row_filters.RowKeyRegexFilter(".*#20190501$".encode("utf-8")))
    for row in rows:
        print_row(row)

C#

/// <summary>
/// /// Read using a row regex filter from an existing table.
///</summary>
/// <param name="projectId">Your Google Cloud Project ID.</param>
/// <param name="instanceId">Your Google Cloud Bigtable Instance ID.</param>
/// <param name="tableId">Your Google Cloud Bigtable table ID.</param>

public string filterLimitRowRegex(string projectId = "YOUR-PROJECT-ID", string instanceId = "YOUR-INSTANCE-ID", string tableId = "YOUR-TABLE-ID")
{
    // A filter that matches cells from rows whose keys satisfy the given regex
    RowFilter filter = RowFilters.RowKeyRegex(".*#20190501$");
    return readFilter(projectId, instanceId, tableId, filter);
}

C++

此代码示例即将推出。

Node.js

const filter = {
  row: /.*#20190501$/,
};
readWithFilter(filter);

PHP

$filter = Filter::key()->regex(".*#20190501$");
read_filter($table, $filter);

Ruby

# instance_id = "my-instance"
# table_id    = "my-table"
filter = Google::Cloud::Bigtable::RowFilter.key ".*#20190501$"
read_with_filter instance_id, table_id, filter

单元选择过滤条件

每列的单元数上限

此过滤条件会限制输出行中包含的每列单元数量。应用此过滤条件时,每个输出行都包含来自每列中的 N 个最新单元,并省略该列中的其他所有单元。

如果您还使用交错式过滤条件,且交错式过滤条件会生成单元的重复副本,则每个副本都会计入该上限。

Go

func filterLimitCellsPerCol(w io.Writer, projectID, instanceID string, tableName string) error {
	filter := bigtable.LatestNFilter(2)
	return readWithFilter(w, projectID, instanceID, tableName, filter)
}

HBase

public static void filterLimitCellsPerCol() {
  // TODO(developer): Replace these variables before running the sample.
  String projectId = "my-project-id";
  String instanceId = "my-instance-id";
  String tableId = "mobile-time-series";
  filterLimitCellsPerCol(projectId, instanceId, tableId);
}

public static void filterLimitCellsPerCol(String projectId, String instanceId, String tableId) {
  // A filter that matches only the most recent 2 cells within each column
  Scan scan = new Scan().setMaxVersions(2);
  readWithFilter(projectId, instanceId, tableId, scan);
}

Java

public static void filterLimitCellsPerCol() {
  // TODO(developer): Replace these variables before running the sample.
  String projectId = "my-project-id";
  String instanceId = "my-instance-id";
  String tableId = "mobile-time-series";
  filterLimitCellsPerCol(projectId, instanceId, tableId);
}

public static void filterLimitCellsPerCol(String projectId, String instanceId, String tableId) {
  // A filter that matches only the most recent 2 cells within each column
  Filter filter = FILTERS.limit().cellsPerColumn(2);
  readFilter(projectId, instanceId, tableId, filter);
}

Python

def filter_limit_cells_per_col(project_id, instance_id, table_id):
    client = bigtable.Client(project=project_id, admin=True)
    instance = client.instance(instance_id)
    table = instance.table(table_id)

    rows = table.read_rows(filter_=row_filters.CellsColumnLimitFilter(2))
    for row in rows:
        print_row(row)

C#

/// <summary>
/// /// Read using a cells per column filter from an existing table.
///</summary>
/// <param name="projectId">Your Google Cloud Project ID.</param>
/// <param name="instanceId">Your Google Cloud Bigtable Instance ID.</param>
/// <param name="tableId">Your Google Cloud Bigtable table ID.</param>

public string filterLimitCellsPerCol(string projectId = "YOUR-PROJECT-ID", string instanceId = "YOUR-INSTANCE-ID", string tableId = "YOUR-TABLE-ID")
{
    // A filter that matches only the most recent 2 cells within each column
    RowFilter filter = RowFilters.CellsPerColumnLimit(2);
    return readFilter(projectId, instanceId, tableId, filter);
}

C++

此代码示例即将推出。

Node.js

const filter = {
  column: {
    cellLimit: 2,
  },
};
readWithFilter(filter);

PHP

$filter = Filter::limit()->cellsPerColumn(2);
read_filter($table, $filter);

Ruby

# instance_id = "my-instance"
# table_id    = "my-table"
filter = Google::Cloud::Bigtable::RowFilter.cells_per_column 2
read_with_filter instance_id, table_id, filter

每行的单元数上限

此过滤条件会限制各输出行中的单元数量。应用此过滤条件时,每个输出行都包含输入行的前 N 个单元,并省略该行后面的所有单元。系统会按存储顺序读取前 N 个单元,无论它们位于哪一列中。

一行中的一列可以包含多个单元。每个单元都包含列值和唯一时间戳。因此,将一行限制为 N 个单元可能与从该行检索前 N 列不同。举例来说,如果您使用的过滤条件每行的单元数上限为 20,且读取行有 30 列,每列有 10 个带有时间戳的单元,则输出行仅返回该行前两列中的值 (2 * 10 = 20)。

如果您需要读取大型行,请综合使用此过滤条件和偏移量过滤条件来进行分页。

Go

func filterLimitCellsPerRow(w io.Writer, projectID, instanceID string, tableName string) error {
	filter := bigtable.CellsPerRowLimitFilter(2)
	return readWithFilter(w, projectID, instanceID, tableName, filter)
}

HBase

public static void filterLimitCellsPerRow() {
  // TODO(developer): Replace these variables before running the sample.
  String projectId = "my-project-id";
  String instanceId = "my-instance-id";
  String tableId = "mobile-time-series";
  filterLimitCellsPerRow(projectId, instanceId, tableId);
}

public static void filterLimitCellsPerRow(String projectId, String instanceId, String tableId) {
  // A filter that matches the first 2 cells of each row
  //    Filter filter = new ColumnCountGetFilter(2);
  Filter filter = new ColumnPaginationFilter(2, 0);

  Scan scan = new Scan().setFilter(filter);
  readWithFilter(projectId, instanceId, tableId, scan);
}

Java

public static void filterLimitCellsPerRow() {
  // TODO(developer): Replace these variables before running the sample.
  String projectId = "my-project-id";
  String instanceId = "my-instance-id";
  String tableId = "mobile-time-series";
  filterLimitCellsPerRow(projectId, instanceId, tableId);
}

public static void filterLimitCellsPerRow(String projectId, String instanceId, String tableId) {
  // A filter that matches the first 2 cells of each row
  Filter filter = FILTERS.limit().cellsPerRow(2);
  readFilter(projectId, instanceId, tableId, filter);
}

Python

def filter_limit_cells_per_row(project_id, instance_id, table_id):
    client = bigtable.Client(project=project_id, admin=True)
    instance = client.instance(instance_id)
    table = instance.table(table_id)

    rows = table.read_rows(filter_=row_filters.CellsRowLimitFilter(2))
    for row in rows:
        print_row(row)

C#

/// <summary>
/// /// Read using a cells per row filter from an existing table.
///</summary>
/// <param name="projectId">Your Google Cloud Project ID.</param>
/// <param name="instanceId">Your Google Cloud Bigtable Instance ID.</param>
/// <param name="tableId">Your Google Cloud Bigtable table ID.</param>

public string filterLimitCellsPerRow(string projectId = "YOUR-PROJECT-ID", string instanceId = "YOUR-INSTANCE-ID", string tableId = "YOUR-TABLE-ID")
{
    // A filter that matches the first 2 cells of each row
    RowFilter filter = RowFilters.CellsPerRowLimit(2);
    return readFilter(projectId, instanceId, tableId, filter);
}

C++

此代码示例即将推出。

Node.js

const filter = {
  row: {
    cellLimit: 2,
  },
};
readWithFilter(filter);

PHP

$filter = Filter::limit()->cellsPerRow(2);
read_filter($table, $filter);

Ruby

# instance_id = "my-instance"
# table_id    = "my-table"
filter = Google::Cloud::Bigtable::RowFilter.cells_per_row 2
read_with_filter instance_id, table_id, filter

每行的单元偏移量

此过滤条件省略了每个输出行的前 N 个单元。输出行中包含所有剩余单元。系统会跳过前 N 个单元,无论它们位于哪一列中。

一行中的一列可以包含多个单元。每个单元都包含列值和唯一时间戳。因此,跳过某一行的前 N 个单元可能与跳过该行的前 N 列不同。举例来说,如果您使用的过滤条件每行的单元偏移量为 20,且读取行有 30 列,每列有 10 个带有时间戳的单元,则输出行返回该行中所有单元的值(前两列中的值除外)(2 * 10 = 20)。

如果您还使用交错式过滤条件,且交错式过滤条件会生成单元的重复副本,则每个副本都会计入该偏移量。

Go

func filterLimitCellsPerRowOffset(w io.Writer, projectID, instanceID string, tableName string) error {
	filter := bigtable.CellsPerRowOffsetFilter(2)
	return readWithFilter(w, projectID, instanceID, tableName, filter)
}

HBase

public static void filterLimitCellsPerRowOffset() {
  // TODO(developer): Replace these variables before running the sample.
  String projectId = "my-project-id";
  String instanceId = "my-instance-id";
  String tableId = "mobile-time-series";
  filterLimitCellsPerRowOffset(projectId, instanceId, tableId);
}

public static void filterLimitCellsPerRowOffset(
    String projectId, String instanceId, String tableId) {
  // A filter that skips the first 2 cells per row
  Filter filter = new ColumnPaginationFilter(Integer.MAX_VALUE, 2);
  Scan scan = new Scan().setFilter(filter);
  readWithFilter(projectId, instanceId, tableId, scan);
}

Java

public static void filterLimitCellsPerRowOffset() {
  // TODO(developer): Replace these variables before running the sample.
  String projectId = "my-project-id";
  String instanceId = "my-instance-id";
  String tableId = "mobile-time-series";
  filterLimitCellsPerRowOffset(projectId, instanceId, tableId);
}

public static void filterLimitCellsPerRowOffset(
    String projectId, String instanceId, String tableId) {
  // A filter that skips the first 2 cells per row
  Filter filter = FILTERS.offset().cellsPerRow(2);
  readFilter(projectId, instanceId, tableId, filter);
}

Python

def filter_limit_cells_per_row_offset(project_id, instance_id, table_id):
    client = bigtable.Client(project=project_id, admin=True)
    instance = client.instance(instance_id)
    table = instance.table(table_id)

    rows = table.read_rows(filter_=row_filters.CellsRowOffsetFilter(2))
    for row in rows:
        print_row(row)

C#

    /// <summary>
    /// /// Read using a cells per row offset filter from an existing table.
    ///</summary>
    /// <param name="projectId">Your Google Cloud Project ID.</param>
    /// <param name="instanceId">Your Google Cloud Bigtable Instance ID.</param>
    /// <param name="tableId">Your Google Cloud Bigtable table ID.</param>

    public string filterLimitCellsPerRowOffset(
String projectId, String instanceId, String tableId)
    {
        // A filter that skips the first 2 cells per row
        RowFilter filter = RowFilters.CellsPerRowOffset(2);
        return readFilter(projectId, instanceId, tableId, filter);
    }

C++

此代码示例即将推出。

Node.js

const filter = {
  row: {
    cellOffset: 2,
  },
};
readWithFilter(filter);

PHP

$filter = Filter::offset()->cellsPerRow(2);
read_filter($table, $filter);

Ruby

# instance_id = "my-instance"
# table_id    = "my-table"
filter = Google::Cloud::Bigtable::RowFilter.cells_per_row_offset 2
read_with_filter instance_id, table_id, filter

列族正则表达式

仅当单元的列族与正则表达式匹配时,此过滤条件才会在输出行中包含单元。

正则表达式必须使用 RE2 语法。正则表达式不得包含 : 字符,即使该字符并未用作字面量也是如此。由于列族不能包含换行符,所以您可以使用 .\C 作为通配符表达式。

Go

func filterLimitColFamilyRegex(w io.Writer, projectID, instanceID string, tableName string) error {
	filter := bigtable.FamilyFilter("stats_.*$")
	return readWithFilter(w, projectID, instanceID, tableName, filter)
}

HBase

public static void filterLimitColFamilyRegex() {
  // TODO(developer): Replace these variables before running the sample.
  String projectId = "my-project-id";
  String instanceId = "my-instance-id";
  String tableId = "mobile-time-series";
  filterLimitColFamilyRegex(projectId, instanceId, tableId);
}

public static void filterLimitColFamilyRegex(
    String projectId, String instanceId, String tableId) {
  // A filter that matches cells whose column family satisfies the given regex
  Filter filter = new FamilyFilter(CompareOp.EQUAL, new RegexStringComparator("stats_.*$"));
  Scan scan = new Scan().setFilter(filter);
  readWithFilter(projectId, instanceId, tableId, scan);
}

Java

public static void filterLimitColFamilyRegex() {
  // TODO(developer): Replace these variables before running the sample.
  String projectId = "my-project-id";
  String instanceId = "my-instance-id";
  String tableId = "mobile-time-series";
  filterLimitColFamilyRegex(projectId, instanceId, tableId);
}

public static void filterLimitColFamilyRegex(
    String projectId, String instanceId, String tableId) {
  // A filter that matches cells whose column family satisfies the given regex
  Filter filter = FILTERS.family().regex("stats_.*$");
  readFilter(projectId, instanceId, tableId, filter);
}

Python

def filter_limit_col_family_regex(project_id, instance_id, table_id):
    client = bigtable.Client(project=project_id, admin=True)
    instance = client.instance(instance_id)
    table = instance.table(table_id)

    rows = table.read_rows(
        filter_=row_filters.FamilyNameRegexFilter("stats_.*$".encode("utf-8")))
    for row in rows:
        print_row(row)

C#

    /// <summary>
    /// /// Read using a family regex filter from an existing table.
    ///</summary>
    /// <param name="projectId">Your Google Cloud Project ID.</param>
    /// <param name="instanceId">Your Google Cloud Bigtable Instance ID.</param>
    /// <param name="tableId">Your Google Cloud Bigtable table ID.</param>

    public string filterLimitColFamilyRegex(
String projectId, String instanceId, String tableId)
    {
        // A filter that matches cells whose column family satisfies the given regex
        RowFilter filter = RowFilters.FamilyNameRegex("stats_.*$");
        return readFilter(projectId, instanceId, tableId, filter);
    }

C++

此代码示例即将推出。

Node.js

const filter = {
  family: /stats_.*$/,
};
readWithFilter(filter);

PHP

$filter = Filter::family()->regex("stats_.*$");
read_filter($table, $filter);

Ruby

# instance_id = "my-instance"
# table_id    = "my-table"
filter = Google::Cloud::Bigtable::RowFilter.family "stats_.*$"
read_with_filter instance_id, table_id, filter

列限定符正则表达式

仅当单元的列限定符与正则表达式匹配时,此过滤条件才会在输出行中包含单元。

正则表达式必须使用 RE2 语法。由于列限定符可以包含任意字节(包括换行符),所以在大多数情况下,您应该使用 \C 作为通配符表达式。. 表达式与换行符不匹配。

Go

func filterLimitColQualifierRegex(w io.Writer, projectID, instanceID string, tableName string) error {
	filter := bigtable.ColumnFilter("connected_.*$")
	return readWithFilter(w, projectID, instanceID, tableName, filter)
}

HBase

public static void filterLimitColQualifierRegex() {
  // TODO(developer): Replace these variables before running the sample.
  String projectId = "my-project-id";
  String instanceId = "my-instance-id";
  String tableId = "mobile-time-series";
  filterLimitColQualifierRegex(projectId, instanceId, tableId);
}

public static void filterLimitColQualifierRegex(
    String projectId, String instanceId, String tableId) {
  // A filter that matches cells whose column qualifier satisfies the given regex
  Filter filter =
      new QualifierFilter(CompareOp.EQUAL, new RegexStringComparator("connected_.*$"));
  Scan scan = new Scan().setFilter(filter);
  readWithFilter(projectId, instanceId, tableId, scan);
}

Java

public static void filterLimitColQualifierRegex() {
  // TODO(developer): Replace these variables before running the sample.
  String projectId = "my-project-id";
  String instanceId = "my-instance-id";
  String tableId = "mobile-time-series";
  filterLimitColQualifierRegex(projectId, instanceId, tableId);
}

public static void filterLimitColQualifierRegex(
    String projectId, String instanceId, String tableId) {
  // A filter that matches cells whose column qualifier satisfies the given regex
  Filter filter = FILTERS.qualifier().regex("connected_.*$");
  readFilter(projectId, instanceId, tableId, filter);
}

Python

def filter_limit_col_qualifier_regex(project_id, instance_id, table_id):
    client = bigtable.Client(project=project_id, admin=True)
    instance = client.instance(instance_id)
    table = instance.table(table_id)

    rows = table.read_rows(
        filter_=row_filters.ColumnQualifierRegexFilter(
            "connected_.*$".encode("utf-8")))
    for row in rows:
        print_row(row)

C#

    /// <summary>
    /// /// Read using a qualifier regex filter from an existing table.
    ///</summary>
    /// <param name="projectId">Your Google Cloud Project ID.</param>
    /// <param name="instanceId">Your Google Cloud Bigtable Instance ID.</param>
    /// <param name="tableId">Your Google Cloud Bigtable table ID.</param>

    public string filterLimitColQualifierRegex(
String projectId, String instanceId, String tableId)
    {
        // A filter that matches cells whose column qualifier satisfies the given regex
        RowFilter filter = RowFilters.ColumnQualifierRegex("connected_.*$");
        return readFilter(projectId, instanceId, tableId, filter);
    }

C++

此代码示例即将推出。

Node.js

const filter = {
  column: /connected_.*$/,
};
readWithFilter(filter);

PHP

$filter = Filter::qualifier()->regex("connected_.*$");
read_filter($table, $filter);

Ruby

# instance_id = "my-instance"
# table_id    = "my-table"
filter = Google::Cloud::Bigtable::RowFilter.qualifier "connected_.*$"
read_with_filter instance_id, table_id, filter

列范围

仅在单元属于特定列族且其列限定符在特定范围内时,此过滤条件才在输出行中包含单元。您可以通过提供开始限定符和结束限定符来指定范围。

Go

func filterLimitColRange(w io.Writer, projectID, instanceID string, tableName string) error {
	filter := bigtable.ColumnRangeFilter("cell_plan", "data_plan_01gb", "data_plan_10gb")
	return readWithFilter(w, projectID, instanceID, tableName, filter)
}

HBase

public static void filterLimitColRange() {
  // TODO(developer): Replace these variables before running the sample.
  String projectId = "my-project-id";
  String instanceId = "my-instance-id";
  String tableId = "mobile-time-series";
  filterLimitColRange(projectId, instanceId, tableId);
}

public static void filterLimitColRange(String projectId, String instanceId, String tableId) {
  // A filter that matches cells whose column qualifiers are between data_plan_01gb and
  // data_plan_10gb in the column family cell_plan
  Filter filter =
      new ColumnRangeFilter(
          Bytes.toBytes("data_plan_01gb"), true, Bytes.toBytes("data_plan_10gb"), false);
  Scan scan = new Scan().addFamily(Bytes.toBytes("cell_plan")).setFilter(filter).setMaxVersions();
  readWithFilter(projectId, instanceId, tableId, scan);
}

Java

public static void filterLimitColRange() {
  // TODO(developer): Replace these variables before running the sample.
  String projectId = "my-project-id";
  String instanceId = "my-instance-id";
  String tableId = "mobile-time-series";
  filterLimitColRange(projectId, instanceId, tableId);
}

public static void filterLimitColRange(String projectId, String instanceId, String tableId) {
  // A filter that matches cells whose column qualifiers are between data_plan_01gb and
  // data_plan_10gb in the column family cell_plan
  Filter filter =
      FILTERS
          .qualifier()
          .rangeWithinFamily("cell_plan")
          .startClosed("data_plan_01gb")
          .endOpen("data_plan_10gb");
  readFilter(projectId, instanceId, tableId, filter);
}

Python

def filter_limit_col_range(project_id, instance_id, table_id):
    client = bigtable.Client(project=project_id, admin=True)
    instance = client.instance(instance_id)
    table = instance.table(table_id)

    rows = table.read_rows(
        filter_=row_filters.ColumnRangeFilter("cell_plan",
                                              b"data_plan_01gb",
                                              b"data_plan_10gb",
                                              inclusive_end=False))
    for row in rows:
        print_row(row)

C#

/// <summary>
/// /// Read using a qualifer range filter from an existing table.
///</summary>
/// <param name="projectId">Your Google Cloud Project ID.</param>
/// <param name="instanceId">Your Google Cloud Bigtable Instance ID.</param>
/// <param name="tableId">Your Google Cloud Bigtable table ID.</param>

public string filterLimitColRange(string projectId = "YOUR-PROJECT-ID", string instanceId = "YOUR-INSTANCE-ID", string tableId = "YOUR-TABLE-ID")
{
    // A filter that matches cells whose column qualifiers are between data_plan_01gb and
    // data_plan_10gb in the column family cell_plan
    RowFilter filter = RowFilters.ColumnRange(ColumnRange.ClosedOpen("cell_plan", "data_plan_01gb", "data_plan_10gb"));
    return readFilter(projectId, instanceId, tableId, filter);
}

C++

此代码示例即将推出。

Node.js

const filter = {
  column: {
    family: 'cell_plan',
    start: 'data_plan_01gb',
    end: {
      value: 'data_plan_10gb',
      inclusive: false,
    },
  },
};
readWithFilter(filter);

PHP

$filter = Filter::qualifier()
    ->rangeWithinFamily("cell_plan")
    ->startClosed("data_plan_01gb")
    ->endOpen("data_plan_10gb");
read_filter($table, $filter);

Ruby

# instance_id = "my-instance"
# table_id    = "my-table"
range = Google::Cloud::Bigtable::ColumnRange.new("cell_plan").from("data_plan_01gb").to("data_plan_10gb")
filter = Google::Cloud::Bigtable::RowFilter.column_range range
read_with_filter instance_id, table_id, filter

值范围

仅在单元的值在特定范围内时,此过滤条件才在输出行包含单元。您可以通过提供起始值和结束值来指定范围。

  • 如需获取值在特定值之前的单元,请将该值指定为不包含边界值的结束值,并省略起始值。
  • 如需获取值等于特定值或在特定值之后的单元,请将该值指定为包含边界值的起始值,并省略结束值。

Go

func filterLimitValueRange(w io.Writer, projectID, instanceID string, tableName string) error {
	filter := bigtable.ValueRangeFilter([]byte("PQ2A.190405"), []byte("PQ2A.190406"))
	return readWithFilter(w, projectID, instanceID, tableName, filter)
}

HBase

public static void filterLimitValueRange() {
  // TODO(developer): Replace these variables before running the sample.
  String projectId = "my-project-id";
  String instanceId = "my-instance-id";
  String tableId = "mobile-time-series";
  filterLimitValueRange(projectId, instanceId, tableId);
}

public static void filterLimitValueRange(String projectId, String instanceId, String tableId) {
  // A filter that matches cells whose values are between the given values
  ValueFilter valueGreaterFilter =
      new ValueFilter(
          CompareFilter.CompareOp.GREATER_OR_EQUAL,
          new BinaryComparator(Bytes.toBytes("PQ2A.190405")));
  ValueFilter valueLesserFilter =
      new ValueFilter(
          CompareFilter.CompareOp.LESS_OR_EQUAL,
          new BinaryComparator(Bytes.toBytes("PQ2A.190406")));

  FilterList filter = new FilterList(FilterList.Operator.MUST_PASS_ALL);
  filter.addFilter(valueGreaterFilter);
  filter.addFilter(valueLesserFilter);

  Scan scan = new Scan().setFilter(filter);
  readWithFilter(projectId, instanceId, tableId, scan);
}

Java

public static void filterLimitValueRange() {
  // TODO(developer): Replace these variables before running the sample.
  String projectId = "my-project-id";
  String instanceId = "my-instance-id";
  String tableId = "mobile-time-series";
  filterLimitValueRange(projectId, instanceId, tableId);
}

public static void filterLimitValueRange(String projectId, String instanceId, String tableId) {
  // A filter that matches cells whose values are between the given values
  Filter filter = FILTERS.value().range().startClosed("PQ2A.190405").endClosed("PQ2A.190406");
  readFilter(projectId, instanceId, tableId, filter);
}

Python

def filter_limit_value_range(project_id, instance_id, table_id):
    client = bigtable.Client(project=project_id, admin=True)
    instance = client.instance(instance_id)
    table = instance.table(table_id)

    rows = table.read_rows(
        filter_=row_filters.ValueRangeFilter(b"PQ2A.190405", b"PQ2A.190406"))

    for row in rows:
        print_row(row)

C#

/// <summary>
/// /// Read using a value range filter from an existing table.
///</summary>
/// <param name="projectId">Your Google Cloud Project ID.</param>
/// <param name="instanceId">Your Google Cloud Bigtable Instance ID.</param>
/// <param name="tableId">Your Google Cloud Bigtable table ID.</param>

public string filterLimitValueRange(string projectId = "YOUR-PROJECT-ID", string instanceId = "YOUR-INSTANCE-ID", string tableId = "YOUR-TABLE-ID")
{
    // A filter that matches cells whose values are between the given values
    RowFilter filter = RowFilters.ValueRange(ValueRange.Closed("PQ2A.190405", "PQ2A.190406"));
    return readFilter(projectId, instanceId, tableId, filter);
}

C++

此代码示例即将推出。

Node.js

const filter = {
  value: {
    start: 'PQ2A.190405',
    end: 'PQ2A.190406',
  },
};
readWithFilter(filter);

PHP

$filter = Filter::value()
    ->range()
    ->startClosed("PQ2A.190405")
    ->endOpen("PQ2A.190406");
read_filter($table, $filter);

Ruby

# instance_id = "my-instance"
# table_id    = "my-table"
range = Google::Cloud::Bigtable::ValueRange.new.from("PQ2A.190405").to("PQ2A.190406")
filter = Google::Cloud::Bigtable::RowFilter.value_range range
read_with_filter instance_id, table_id, filter

值正则表达式

只有当单元的值与正则表达式匹配时,此过滤条件才会在输出行中包含单元。

正则表达式必须使用 RE2 语法。由于值可以包含任意字节(包括换行符),因此在大多数情况下,您应该使用 \C 作为通配符表达式。. 表达式与换行符不匹配。

Go

func filterLimitValueRegex(w io.Writer, projectID, instanceID string, tableName string) error {
	filter := bigtable.ValueFilter("PQ2A.*$")
	return readWithFilter(w, projectID, instanceID, tableName, filter)
}

HBase

public static void filterLimitValueRegex() {
  // TODO(developer): Replace these variables before running the sample.
  String projectId = "my-project-id";
  String instanceId = "my-instance-id";
  String tableId = "mobile-time-series";
  filterLimitValueRegex(projectId, instanceId, tableId);
}

public static void filterLimitValueRegex(String projectId, String instanceId, String tableId) {
  // A filter that matches cells whose value satisfies the given regex
  Filter filter = new ValueFilter(CompareOp.EQUAL, new RegexStringComparator("PQ2A.*$"));

  Scan scan = new Scan().setFilter(filter);
  readWithFilter(projectId, instanceId, tableId, scan);
}

Java

public static void filterLimitValueRegex() {
  // TODO(developer): Replace these variables before running the sample.
  String projectId = "my-project-id";
  String instanceId = "my-instance-id";
  String tableId = "mobile-time-series";
  filterLimitValueRegex(projectId, instanceId, tableId);
}

public static void filterLimitValueRegex(String projectId, String instanceId, String tableId) {
  // A filter that matches cells whose value satisfies the given regex
  Filter filter = FILTERS.value().regex("PQ2A.*$");
  readFilter(projectId, instanceId, tableId, filter);
}

Python



def filter_limit_value_regex(project_id, instance_id, table_id):
    client = bigtable.Client(project=project_id, admin=True)
    instance = client.instance(instance_id)
    table = instance.table(table_id)

    rows = table.read_rows(
        filter_=row_filters.ValueRegexFilter("PQ2A.*$".encode("utf-8")))
    for row in rows:
        print_row(row)

C#

/// <summary>
/// /// Read using a value regex filter from an existing table.
///</summary>
/// <param name="projectId">Your Google Cloud Project ID.</param>
/// <param name="instanceId">Your Google Cloud Bigtable Instance ID.</param>
/// <param name="tableId">Your Google Cloud Bigtable table ID.</param>

public string filterLimitValueRegex(string projectId = "YOUR-PROJECT-ID", string instanceId = "YOUR-INSTANCE-ID", string tableId = "YOUR-TABLE-ID")
{
    // A filter that matches cells whose value satisfies the given regex
    RowFilter filter = RowFilters.ValueRegex("PQ2A.*$");
    return readFilter(projectId, instanceId, tableId, filter);
}

C++

此代码示例即将推出。

Node.js

const filter = {
  value: /PQ2A.*$/,
};
readWithFilter(filter);

PHP

$filter = Filter::value()->regex("PQ2A.*$");
read_filter($table, $filter);

Ruby

# instance_id = "my-instance"
# table_id    = "my-table"
filter = Google::Cloud::Bigtable::RowFilter.value "PQ2A.*$"
read_with_filter instance_id, table_id, filter

时间戳范围

仅单元的时间戳在特定范围内时,此过滤条件才在输出行中包含单元。您可以通过提供开始时间(包含边界值)和结束时间(不包含边界值)来指定该范围。默认单位为“微秒”,时间戳必须是 1000 的倍数。

  • 如需获取时间戳早于特定时间的单元,请指定该时间作为结束时间,并省略开始时间以使其无边界。
  • 如需获取时间戳等于或晚于特定时间的单元,请指定该时间作为开始时间,并省略结束时间以使其无边界。

Go

func filterLimitTimestampRange(w io.Writer, projectID, instanceID string, tableName string) error {
	startTime := time.Unix(0, 0)
	endTime := time.Now().Add(-1 * time.Hour)
	filter := bigtable.TimestampRangeFilter(startTime, endTime)

	return readWithFilter(w, projectID, instanceID, tableName, filter)
}

HBase

public static void filterLimitTimestampRange() {
  // TODO(developer): Replace these variables before running the sample.
  String projectId = "my-project-id";
  String instanceId = "my-instance-id";
  String tableId = "mobile-time-series";
  filterLimitTimestampRange(projectId, instanceId, tableId);
}

public static void filterLimitTimestampRange(
    String projectId, String instanceId, String tableId) {
  // A filter that matches cells whose timestamp is from an hour ago or earlier
  // Get a time representing one hour ago
  long timestamp = Instant.now().minus(1, ChronoUnit.HOURS).toEpochMilli();
  try {
    Scan scan = new Scan().setTimeRange(0, timestamp).setMaxVersions();
    readWithFilter(projectId, instanceId, tableId, scan);
  } catch (IOException e) {
    System.out.println("There was an issue with your timestamp \n" + e.toString());
  }
}

Java

注意:此客户端库中的时间戳范围过滤条件目前不支持 startOpen()endClosed()方法。

public static void filterLimitTimestampRange() {
  // TODO(developer): Replace these variables before running the sample.
  String projectId = "my-project-id";
  String instanceId = "my-instance-id";
  String tableId = "mobile-time-series";
  filterLimitTimestampRange(projectId, instanceId, tableId);
}

public static void filterLimitTimestampRange(
    String projectId, String instanceId, String tableId) {
  // Get a time representing one hour ago
  long timestamp = Instant.now().minus(1, ChronoUnit.HOURS).toEpochMilli() * 1000;

  // A filter that matches cells whose timestamp is from an hour ago or earlier
  Filter filter = FILTERS.timestamp().range().startClosed(0L).endOpen(timestamp);
  readFilter(projectId, instanceId, tableId, filter);
}

Python

def filter_limit_timestamp_range(project_id, instance_id, table_id):
    client = bigtable.Client(project=project_id, admin=True)
    instance = client.instance(instance_id)
    table = instance.table(table_id)

    end = datetime.datetime(2019, 5, 1)

    rows = table.read_rows(
        filter_=row_filters.TimestampRangeFilter(
            row_filters.TimestampRange(end=end)))
    for row in rows:
        print_row(row)

C#

    /// <summary>
    /// /// Read using a timestamp range filter from an existing table.
    ///</summary>
    /// <param name="projectId">Your Google Cloud Project ID.</param>
    /// <param name="instanceId">Your Google Cloud Bigtable Instance ID.</param>
    /// <param name="tableId">Your Google Cloud Bigtable table ID.</param>

    public string filterLimitTimestampRange(
String projectId, String instanceId, String tableId)
    {
        BigtableVersion timestamp_minus_hr = new BigtableVersion(new DateTime(2020, 1, 10, 13, 0, 0, DateTimeKind.Utc));

        // A filter that matches cells whose timestamp is from an hour ago or earlier
        RowFilter filter = RowFilters.TimestampRange(new DateTime(0), timestamp_minus_hr.ToDateTime());
        return readFilter(projectId, instanceId, tableId, filter);
    }

C++

此代码示例即将推出。

Node.js

const start = 0;
const end = new Date(2019, 5, 1);
end.setUTCHours(0);
const filter = {
  time: {
    start,
    end,
  },
};
readWithFilter(filter);

PHP

$start = 0;
$end = (time() - 60 * 60) * 1000 * 1000;
$filter = Filter::timestamp()
    ->range()
    ->startClosed($start)
    ->endOpen($end);
read_filter($table, $filter);

Ruby

# instance_id = "my-instance"
# table_id    = "my-table"
timestamp_minus_hr = (Time.now.to_f * 1_000_000).round(-3) - 60 * 60 * 1000 * 1000
puts timestamp_minus_hr
filter = Google::Cloud::Bigtable::RowFilter.timestamp_range from: 0, to: timestamp_minus_hr

read_with_filter instance_id, table_id, filter

高级单一过滤条件

以下过滤条件的使用可能有一定难度。

全部屏蔽

此过滤条件会移除输出行中的所有单元。

如果您使用的是交错式过滤条件,则可以将“全部屏蔽”过滤条件与链式过滤条件相结合,以暂时停用部分交错。

Go

func filterLimitBlockAll(w io.Writer, projectID, instanceID string, tableName string) error {
	filter := bigtable.BlockAllFilter()
	return readWithFilter(w, projectID, instanceID, tableName, filter)
}

HBase

public static void filterLimitBlockAll() {
  // TODO(developer): Replace these variables before running the sample.
  String projectId = "my-project-id";
  String instanceId = "my-instance-id";
  String tableId = "mobile-time-series";
  filterLimitBlockAll(projectId, instanceId, tableId);
}

public static void filterLimitBlockAll(String projectId, String instanceId, String tableId) {
  // A filter that does not match any cells
  Filter filter = new SkipFilter(new RandomRowFilter(1));
  Scan scan = new Scan().setFilter(filter);
  readWithFilter(projectId, instanceId, tableId, scan);
}

Java

public static void filterLimitBlockAll() {
  // TODO(developer): Replace these variables before running the sample.
  String projectId = "my-project-id";
  String instanceId = "my-instance-id";
  String tableId = "mobile-time-series";
  filterLimitBlockAll(projectId, instanceId, tableId);
}

public static void filterLimitBlockAll(String projectId, String instanceId, String tableId) {
  // A filter that does not match any cells
  Filter filter = FILTERS.block();
  readFilter(projectId, instanceId, tableId, filter);
}

Python

def filter_limit_block_all(project_id, instance_id, table_id):
    client = bigtable.Client(project=project_id, admin=True)
    instance = client.instance(instance_id)
    table = instance.table(table_id)

    rows = table.read_rows(filter_=row_filters.BlockAllFilter(True))
    for row in rows:
        print_row(row)

C#

/// <summary>
/// /// Read using a block all filter from an existing table.
///</summary>
/// <param name="projectId">Your Google Cloud Project ID.</param>
/// <param name="instanceId">Your Google Cloud Bigtable Instance ID.</param>
/// <param name="tableId">Your Google Cloud Bigtable table ID.</param>

public string filterLimitBlockAll(string projectId = "YOUR-PROJECT-ID", string instanceId = "YOUR-INSTANCE-ID", string tableId = "YOUR-TABLE-ID")
{
    // A filter that does not match any cells
    RowFilter filter = RowFilters.BlockAllFilter();
    return readFilter(projectId, instanceId, tableId, filter);
}

C++

此代码示例即将推出。

Node.js

const filter = {
  all: false,
};
readWithFilter(filter);

PHP

$filter = Filter::block();
read_filter($table, $filter);

Ruby

# instance_id = "my-instance"
# table_id    = "my-table"
filter = Google::Cloud::Bigtable::RowFilter.block
read_with_filter instance_id, table_id, filter

全部通过

此过滤条件使输出行中包含输入行中的所有单元。它相当于没有过滤地进行读取。

如果您要组合多个过滤条件,并且在某些情况下需要原样输出单元,则“全部通过”过滤条件可能很有用。

Go

func filterLimitPassAll(w io.Writer, projectID, instanceID string, tableName string) error {
	filter := bigtable.PassAllFilter()
	return readWithFilter(w, projectID, instanceID, tableName, filter)
}

HBase

此客户端库不支持此过滤条件。

Java

public static void filterLimitPassAll() {
  // TODO(developer): Replace these variables before running the sample.
  String projectId = "my-project-id";
  String instanceId = "my-instance-id";
  String tableId = "mobile-time-series";
  filterLimitPassAll(projectId, instanceId, tableId);
}

public static void filterLimitPassAll(String projectId, String instanceId, String tableId) {
  // A filter that matches all cells
  Filter filter = FILTERS.pass();
  readFilter(projectId, instanceId, tableId, filter);
}

Python

def filter_limit_pass_all(project_id, instance_id, table_id):
    client = bigtable.Client(project=project_id, admin=True)
    instance = client.instance(instance_id)
    table = instance.table(table_id)

    rows = table.read_rows(filter_=row_filters.PassAllFilter(True))
    for row in rows:
        print_row(row)

C#

/// <summary>
/// /// Read using a pass all filter from an existing table.
///</summary>
/// <param name="projectId">Your Google Cloud Project ID.</param>
/// <param name="instanceId">Your Google Cloud Bigtable Instance ID.</param>
/// <param name="tableId">Your Google Cloud Bigtable table ID.</param>

public string filterLimitPassAll(string projectId = "YOUR-PROJECT-ID", string instanceId = "YOUR-INSTANCE-ID", string tableId = "YOUR-TABLE-ID")
{
    // A filter that matches all cells
    RowFilter filter = RowFilters.PassAllFilter();
    return readFilter(projectId, instanceId, tableId, filter);
}

C++

此代码示例即将推出。

Node.js

const filter = {
  all: true,
};
readWithFilter(filter);

PHP

$filter = Filter::pass();
read_filter($table, $filter);

Ruby

# instance_id = "my-instance"
# table_id    = "my-table"
filter = Google::Cloud::Bigtable::RowFilter.pass
read_with_filter instance_id, table_id, filter

接收器

此过滤条件会将输入行的所有单元复制到最终输出行中,即使其他过滤条件通常会移除或更改这些单元也是如此。

使用链式过滤条件时,其他限制性过滤条件只能影响自己的输出行,这些输出行将成为下一个过滤条件的输入行。接收器过滤条件不同,它会将单元直接插入最终输出行中(显示在读取结果中)。

例如,假设您创建了由两个过滤条件组成的链:

  1. 接收器过滤条件
  2. 阻止所有过滤条件,从该行中删除所有单元

“全部阻止”过滤条件会自行生成一个空行,该行不包含在读取结果中。但是,接收器过滤条件会强制将输入行中的所有单元复制到最终输出行,无论链条中的其他过滤条件会执行什么操作都是如此。因此,组合使用接收器过滤条件和“阻止所有”过滤条件的效果与完全不使用过滤条件的效果相同,原始输入行及其所有单元都会显示在读取结果中。

修改性过滤条件

以下部分介绍各种修改性过滤条件。修改性过滤条件会影响单个单元的数据或元数据。

应用标签

此过滤条件会向行中的所有单元添加标签。在交错式中使用此过滤条件,以指示哪个过滤条件导致单元包含在输出行中。您的应用可以使用每个单元的标签来执行其他客户端处理。

每个标签不得超过 15 个字符。此外,每个标签必须与 RE2 正则表达式 [a-z0-9\\-]+ 匹配。

每个单元只能有一个标签。因此,一个过滤条件中只能包含一次“应用标签”过滤条件。

Go

此客户端库不支持此过滤条件。

HBase

此客户端库不支持此过滤条件。

Java

public static void filterModifyApplyLabel() {
  // TODO(developer): Replace these variables before running the sample.
  String projectId = "my-project-id";
  String instanceId = "my-instance-id";
  String tableId = "mobile-time-series";
  filterModifyApplyLabel(projectId, instanceId, tableId);
}

public static void filterModifyApplyLabel(String projectId, String instanceId, String tableId) {
  // A filter that applies the given label to the outputted cell
  Filter filter = FILTERS.label("labelled");
  readFilter(projectId, instanceId, tableId, filter);
}

Python

def filter_modify_apply_label(project_id, instance_id, table_id):
    client = bigtable.Client(project=project_id, admin=True)
    instance = client.instance(instance_id)
    table = instance.table(table_id)

    rows = table.read_rows(
        filter_=row_filters.ApplyLabelFilter(label="labelled"))
    for row in rows:
        print_row(row)

C#

/// <summary>
/// /// Read using a strip value filter from an existing table.
///</summary>
/// <param name="projectId">Your Google Cloud Project ID.</param>
/// <param name="instanceId">Your Google Cloud Bigtable Instance ID.</param>
/// <param name="tableId">Your Google Cloud Bigtable table ID.</param>

public string filterModifyApplyLabel(string projectId = "YOUR-PROJECT-ID", string instanceId = "YOUR-INSTANCE-ID", string tableId = "YOUR-TABLE-ID")
{
    // A filter that applies the given label to the outputted cell
    RowFilter filter = new RowFilter { ApplyLabelTransformer = "labelled" };
    return readFilter(projectId, instanceId, tableId, filter);
}

C++

此代码示例即将推出。

Node.js

const filter = {
  label: 'labelled',
};
readWithFilter(filter);

PHP

$filter = Filter::label("labelled");
read_filter($table, $filter);

Ruby

# instance_id = "my-instance"
# table_id    = "my-table"
filter = Google::Cloud::Bigtable::RowFilter.label "labelled"
read_with_filter instance_id, table_id, filter

删除值

此过滤条件会将每个单元的值替换为空字符串。如果您只需要计算符合条件的行数或单元数,而不是检索这些行或单元中的所有数据,请使用此过滤条件。

Go

func filterModifyStripValue(w io.Writer, projectID, instanceID string, tableName string) error {
	filter := bigtable.StripValueFilter()
	return readWithFilter(w, projectID, instanceID, tableName, filter)
}

HBase

此客户端库不支持此过滤条件。

Java

public static void filterModifyStripValue() {
  // TODO(developer): Replace these variables before running the sample.
  String projectId = "my-project-id";
  String instanceId = "my-instance-id";
  String tableId = "mobile-time-series";
  filterModifyStripValue(projectId, instanceId, tableId);
}

public static void filterModifyStripValue(String projectId, String instanceId, String tableId) {
  // A filter that replaces the outputted cell value with the empty string
  Filter filter = FILTERS.value().strip();
  readFilter(projectId, instanceId, tableId, filter);
}

Python

def filter_modify_strip_value(project_id, instance_id, table_id):
    client = bigtable.Client(project=project_id, admin=True)
    instance = client.instance(instance_id)
    table = instance.table(table_id)

    rows = table.read_rows(
        filter_=row_filters.StripValueTransformerFilter(True))
    for row in rows:
        print_row(row)

C#

/// <summary>
/// /// Read using a strip value filter from an existing table.
///</summary>
/// <param name="projectId">Your Google Cloud Project ID.</param>
/// <param name="instanceId">Your Google Cloud Bigtable Instance ID.</param>
/// <param name="tableId">Your Google Cloud Bigtable table ID.</param>

public string filterModifyStripValue(string projectId = "YOUR-PROJECT-ID", string instanceId = "YOUR-INSTANCE-ID", string tableId = "YOUR-TABLE-ID")
{
    // A filter that replaces the outputted cell value with the empty string
    RowFilter filter = RowFilters.StripValueTransformer();
    return readFilter(projectId, instanceId, tableId, filter);
}

C++

此代码示例即将推出。

Node.js

const filter = {
  value: {
    strip: true,
  },
};
readWithFilter(filter);

PHP

$filter = Filter::value()->strip();
read_filter($table, $filter);

Ruby

# instance_id = "my-instance"
# table_id    = "my-table"
filter = Google::Cloud::Bigtable::RowFilter.strip_value
read_with_filter instance_id, table_id, filter

组合式过滤条件

以下各部分介绍各种组合式过滤条件。组合式过滤条件可让您将多个过滤条件合并为一个,从而可以将多个过滤条件应用于一个读取请求。

链式

此过滤条件将一系列过滤条件按顺序应用于每个输出行。链式过滤条件类似于使用逻辑 AND。

链中的每个过滤条件都只能看到前一个过滤条件的输出。例如,如果您链接两个过滤条件,而第一个过滤条件从输出行中移除了一半的单元,则第二个过滤条件无权访问已移除的单元。

换句话说,该过滤条件的顺序非常重要。如果更改链式过滤条件的顺序,则可能会在输出行中获得不同的数据。

Go

func filterComposingChain(w io.Writer, projectID, instanceID string, tableName string) error {
	filter := bigtable.ChainFilters(bigtable.LatestNFilter(1), bigtable.FamilyFilter("cell_plan"))
	return readWithFilter(w, projectID, instanceID, tableName, filter)
}

HBase

public static void filterComposingChain() {
  // TODO(developer): Replace these variables before running the sample.
  String projectId = "my-project-id";
  String instanceId = "my-instance-id";
  String tableId = "mobile-time-series";
  filterComposingChain(projectId, instanceId, tableId);
}

public static void filterComposingChain(String projectId, String instanceId, String tableId) {
  // A filter that selects one cell per row AND within the column family cell_plan
  Filter familyFilter =
      new FamilyFilter(CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes("cell_plan")));
  Filter columnCountGetFilter = new ColumnCountGetFilter(3);

  FilterList filter = new FilterList(FilterList.Operator.MUST_PASS_ALL);
  filter.addFilter(columnCountGetFilter);
  filter.addFilter(familyFilter);
  Scan scan = new Scan().setFilter(filter);
  readWithFilter(projectId, instanceId, tableId, scan);
}

Java

public static void filterComposingChain() {
  // TODO(developer): Replace these variables before running the sample.
  String projectId = "my-project-id";
  String instanceId = "my-instance-id";
  String tableId = "mobile-time-series";
  filterComposingChain(projectId, instanceId, tableId);
}

public static void filterComposingChain(String projectId, String instanceId, String tableId) {
  // A filter that selects one cell per column AND within the column family cell_plan
  Filter filter =
      FILTERS
          .chain()
          .filter(FILTERS.limit().cellsPerColumn(1))
          .filter(FILTERS.family().exactMatch("cell_plan"));
  readFilter(projectId, instanceId, tableId, filter);
}

Python

def filter_composing_chain(project_id, instance_id, table_id):
    client = bigtable.Client(project=project_id, admin=True)
    instance = client.instance(instance_id)
    table = instance.table(table_id)

    rows = table.read_rows(filter_=row_filters.RowFilterChain(
        filters=[row_filters.CellsColumnLimitFilter(1),
                 row_filters.FamilyNameRegexFilter("cell_plan")]))
    for row in rows:
        print_row(row)

C#

/// <summary>
/// /// Read using a chain filter from an existing table.
///</summary>
/// <param name="projectId">Your Google Cloud Project ID.</param>
/// <param name="instanceId">Your Google Cloud Bigtable Instance ID.</param>
/// <param name="tableId">Your Google Cloud Bigtable table ID.</param>

public string filterComposingChain(string projectId = "YOUR-PROJECT-ID", string instanceId = "YOUR-INSTANCE-ID", string tableId = "YOUR-TABLE-ID")
{
    // A filter that selects one cell per column AND within the column family cell_plan
    RowFilter filter = RowFilters.Chain(RowFilters.CellsPerColumnLimit(1), RowFilters.FamilyNameExact("cell_plan"));
    return readFilter(projectId, instanceId, tableId, filter);
}

C++

此代码示例即将推出。

Node.js

const filter = [
  {
    column: {
      cellLimit: 1,
    },
  },
  {
    family: 'cell_plan',
  },
];
readWithFilter(filter);

PHP

$filter = Filter::chain()
    ->addFilter(Filter::limit()->cellsPerColumn(1))
    ->addFilter(Filter::family()->exactMatch("cell_plan"));
read_filter($table, $filter);

Ruby

# instance_id = "my-instance"
# table_id    = "my-table"
filter = Google::Cloud::Bigtable::RowFilter.chain.cells_per_column(1).family("cell_plan")
read_with_filter instance_id, table_id, filter

交错式

此过滤条件通过多个组件过滤条件发送输入行,根据每个组件过滤条件生成临时输出行。然后,将临时输出行中的所有单元合并到最终输出行中。交错式过滤条件类似于使用逻辑 OR。

交错式会导致单元在输出行中重复。例如,如果您的交错式中包含两个过滤条件,并且两个过滤条件都在其临时输出行中包含特定单元,则最终输出行将包含两份该单元。

如果组件过滤条件输出多个单元,且这些单元的列族、列限定符和时间戳相同,则最终输出行会将这些单元按非指定的顺序组合在一起。

Go

func filterComposingInterleave(w io.Writer, projectID, instanceID string, tableName string) error {
	filter := bigtable.InterleaveFilters(bigtable.ValueFilter("true"), bigtable.ColumnFilter("os_build"))
	return readWithFilter(w, projectID, instanceID, tableName, filter)
}

HBase

public static void filterComposingInterleave() {
  // TODO(developer): Replace these variables before running the sample.
  String projectId = "my-project-id";
  String instanceId = "my-instance-id";
  String tableId = "mobile-time-series";
  filterComposingInterleave(projectId, instanceId, tableId);
}

public static void filterComposingInterleave(
    String projectId, String instanceId, String tableId) {
  // A filter that matches cells with the value true OR with the column qualifier os_build
  Filter qualifierFilter =
      new QualifierFilter(CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes("os_build")));
  Filter valueFilter =
      new ValueFilter(CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes("true")));

  FilterList filter = new FilterList(Operator.MUST_PASS_ONE);
  filter.addFilter(qualifierFilter);
  filter.addFilter(valueFilter);

  Scan scan = new Scan().setFilter(filter).setMaxVersions();
  readWithFilter(projectId, instanceId, tableId, scan);
}

Java

public static void filterComposingInterleave() {
  // TODO(developer): Replace these variables before running the sample.
  String projectId = "my-project-id";
  String instanceId = "my-instance-id";
  String tableId = "mobile-time-series";
  filterComposingInterleave(projectId, instanceId, tableId);
}

public static void filterComposingInterleave(
    String projectId, String instanceId, String tableId) {
  // A filter that matches cells with the value true OR with the column qualifier os_build
  Filter filter =
      FILTERS
          .interleave()
          .filter(FILTERS.value().exactMatch("true"))
          .filter(FILTERS.qualifier().exactMatch("os_build"));
  readFilter(projectId, instanceId, tableId, filter);
}

Python

def filter_composing_interleave(project_id, instance_id, table_id):
    client = bigtable.Client(project=project_id, admin=True)
    instance = client.instance(instance_id)
    table = instance.table(table_id)

    rows = table.read_rows(filter_=row_filters.RowFilterUnion(
        filters=[row_filters.ValueRegexFilter("true"),
                 row_filters.ColumnQualifierRegexFilter("os_build")]))
    for row in rows:
        print_row(row)

C#

    /// <summary>
    /// /// Read using an interleave filter from an existing table.
    ///</summary>
    /// <param name="projectId">Your Google Cloud Project ID.</param>
    /// <param name="instanceId">Your Google Cloud Bigtable Instance ID.</param>
    /// <param name="tableId">Your Google Cloud Bigtable table ID.</param>

    public string filterComposingInterleave(
String projectId, String instanceId, String tableId)
    {
        // A filter that matches cells with the value true OR with the column qualifier os_build
        RowFilter filter = RowFilters.Interleave(RowFilters.ValueExact("true"), RowFilters.ColumnQualifierExact("os_build"));
        return readFilter(projectId, instanceId, tableId, filter);
    }

C++

此代码示例即将推出。

Node.js

const filter = {
  interleave: [
    {
      value: 'true',
    },
    {column: 'os_build'},
  ],
};
readWithFilter(filter);

PHP

$filter = Filter::interleave()
    ->addFilter(Filter::value()->exactMatch(unpack('C*', 1)))
    ->addFilter(Filter::qualifier()->exactMatch("os_build"));
read_filter($table, $filter);

Ruby

# instance_id = "my-instance"
# table_id    = "my-table"
filter = Google::Cloud::Bigtable::RowFilter.interleave.value("true").qualifier("os_build")
read_with_filter instance_id, table_id, filter

条件性

此过滤条件将 true 过滤条件或 false 过滤条件应用于输入行。

如需在 true 过滤条件和 false 过滤条件之间进行选择,请对输入行应用谓词过滤条件。如果谓词过滤条件的输出中包含至少一个单元,则应用 true 过滤条件。如果谓词过滤条件的输出为空,则应用 false 过滤条件。

谓词过滤条件的输出行仅用于在 true 过滤条件和 false 过滤条件之间进行选择。它不会出现在对读取请求的响应中。

使用 true 和 false 过滤条件时,不会以原子方式执行谓词过滤条件。 换句话说,输入行中的数据会在执行谓词过滤条件与执行 true 或 false 过滤条件之间发生变化。此行为会导致结果不一致或发生意外。

使用条件性过滤条件时,您可以省略 true 或 false 过滤条件。 忽略过滤条件与指定全部屏蔽过滤条件相同。 如果谓词过滤条件选择了您省略的条件,则输出行将为空。

Go

func filterComposingCondition(w io.Writer, projectID, instanceID string, tableName string) error {
	filter := bigtable.ConditionFilter(
		bigtable.ChainFilters(bigtable.ColumnFilter("data_plan_10gb"), bigtable.ValueFilter("true")),
		bigtable.StripValueFilter(),
		bigtable.PassAllFilter())
	return readWithFilter(w, projectID, instanceID, tableName, filter)
}

HBase

此客户端库不支持此过滤条件。

Java

public static void filterComposingCondition() {
  // TODO(developer): Replace these variables before running the sample.
  String projectId = "my-project-id";
  String instanceId = "my-instance-id";
  String tableId = "mobile-time-series";
  filterComposingCondition(projectId, instanceId, tableId);
}

public static void filterComposingCondition(String projectId, String instanceId, String tableId) {
  // A filter that applies the label passed-filter IF the cell has the column qualifier
  // data_plan_10gb AND the value true, OTHERWISE applies the label filtered-out
  Filter filter =
      FILTERS
          .condition(
              FILTERS
                  .chain()
                  .filter(FILTERS.qualifier().exactMatch("data_plan_10gb"))
                  .filter(FILTERS.value().exactMatch("true")))
          .then(FILTERS.label("passed-filter"))
          .otherwise(FILTERS.label("filtered-out"));
  readFilter(projectId, instanceId, tableId, filter);
}

Python

def filter_composing_condition(project_id, instance_id, table_id):
    client = bigtable.Client(project=project_id, admin=True)
    instance = client.instance(instance_id)
    table = instance.table(table_id)

    rows = table.read_rows(filter_=row_filters.ConditionalRowFilter(
        base_filter=row_filters.RowFilterChain(filters=[
            row_filters.ColumnQualifierRegexFilter(
                "data_plan_10gb"),
            row_filters.ValueRegexFilter(
                "true")]),
        true_filter=row_filters.ApplyLabelFilter(label="passed-filter"),
        false_filter=row_filters.ApplyLabelFilter(label="filtered-out")

    ))
    for row in rows:
        print_row(row)

C#

/// <summary>
/// /// Read using a conditional filter from an existing table.
///</summary>
/// <param name="projectId">Your Google Cloud Project ID.</param>
/// <param name="instanceId">Your Google Cloud Bigtable Instance ID.</param>
/// <param name="tableId">Your Google Cloud Bigtable table ID.</param>

public string filterComposingCondition(string projectId = "YOUR-PROJECT-ID", string instanceId = "YOUR-INSTANCE-ID", string tableId = "YOUR-TABLE-ID")
{
    // A filter that applies the label passed-filter IF the cell has the column qualifier
    // data_plan_10gb AND the value true, OTHERWISE applies the label filtered-out
    RowFilter filter = RowFilters.Condition(
        RowFilters.Chain(RowFilters.ColumnQualifierExact("data_plan_10gb"), RowFilters.ValueExact("true")),
        new RowFilter { ApplyLabelTransformer = "passed-filter" },
        new RowFilter { ApplyLabelTransformer = "filtered-out" }
        );
    return readFilter(projectId, instanceId, tableId, filter);
}

C++

此代码示例即将推出。

Node.js

const filter = {
  condition: {
    test: [
      {column: 'data_plan_10gb'},
      {
        value: 'true',
      },
    ],
    pass: {
      label: 'passed-filter',
    },
    fail: {
      label: 'filtered-out',
    },
  },
};
readWithFilter(filter);

PHP

$filter = Filter::condition(
    Filter::chain()
        ->addFilter(Filter::value()->exactMatch(unpack('C*', 1)))
        ->addFilter(Filter::qualifier()->exactMatch("data_plan_10gb"))
)
    ->then(Filter::label("passed-filter"))
    ->otherwise(Filter::label("filtered-out"));
read_filter($table, $filter);

Ruby

# instance_id = "my-instance"
# table_id    = "my-table"
filter = Google::Cloud::Bigtable::RowFilter.condition(
  Google::Cloud::Bigtable::RowFilter.chain.qualifier("data_plan_10gb").value("true")
)
                                           .on_match(Google::Cloud::Bigtable::RowFilter.label("passed-filter"))
                                           .otherwise(Google::Cloud::Bigtable::RowFilter.label("filtered-out"))
read_with_filter instance_id, table_id, filter

后续步骤