This is an automated email from the ASF dual-hosted git repository.
lixueclaire pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-graphar.git
The following commit(s) were added to refs/heads/main by this push:
new aca9f5de feat(C++): filter property and return VerticesCollection
(#658)
aca9f5de is described below
commit aca9f5de62b3db99eab1e094ce7797c825a06c08
Author: Elssky <[email protected]>
AuthorDate: Tue Nov 19 10:33:57 2024 +0800
feat(C++): filter property and return VerticesCollection (#658)
---
...l_filtering_example.cc => filtering_example.cc} | 57 ++++++++++-
cpp/src/graphar/high-level/graph_reader.cc | 108 ++++++++++++++++++++-
cpp/src/graphar/high-level/graph_reader.h | 12 +++
3 files changed, 174 insertions(+), 3 deletions(-)
diff --git a/cpp/examples/label_filtering_example.cc
b/cpp/examples/filtering_example.cc
similarity index 62%
rename from cpp/examples/label_filtering_example.cc
rename to cpp/examples/filtering_example.cc
index e519bdda..643028e7 100644
--- a/cpp/examples/label_filtering_example.cc
+++ b/cpp/examples/filtering_example.cc
@@ -80,8 +80,63 @@ void vertices_collection(
std::cout << property << " ";
std::cout << std::endl;
}
-}
+ std::cout << std::endl;
+
+ std::cout << "Test vertices with property in a filtered vertices set"
+ << std::endl;
+ std::cout << "--------------------------------------" << std::endl;
+ auto filter = graphar::_Equal(graphar::_Property("name"),
+ graphar::_Literal("Safi_Airways"));
+ auto maybe_filter_vertices_collection_4 =
+ graphar::VerticesCollection::verticesWithProperty(
+ std::string("name"), filter, graph_info, type);
+ ASSERT(!maybe_filter_vertices_collection_4.has_error());
+ auto filter_vertices_4 = maybe_filter_vertices_collection_4.value();
+ std::cout << "valid vertices num: " << filter_vertices_4->size() <<
std::endl;
+ for (auto it = filter_vertices_4->begin(); it != filter_vertices_4->end();
+ ++it) {
+ // get a node's all labels
+ auto label_result = it.label();
+ std::cout << "id: " << it.id() << " ";
+ if (!label_result.has_error()) {
+ for (auto label : label_result.value()) {
+ std::cout << label << " ";
+ }
+ }
+ std::cout << "name: ";
+ auto property = it.property<std::string>("name").value();
+ std::cout << property << " ";
+ std::cout << std::endl;
+ }
+
+ std::cout << "Test vertices with property" << std::endl;
+ std::cout << "--------------------------------------" << std::endl;
+ auto filter_2 =
+ graphar::_Equal(graphar::_Property("name"),
graphar::_Literal("Kam_Air"));
+ auto maybe_filter_vertices_collection_5 =
+ graphar::VerticesCollection::verticesWithProperty(
+ std::string("name"), filter_2, filter_vertices_3);
+ ASSERT(!maybe_filter_vertices_collection_5.has_error());
+ auto filter_vertices_5 = maybe_filter_vertices_collection_5.value();
+ std::cout << "valid vertices num: " << filter_vertices_5->size() <<
std::endl;
+
+ for (auto it = filter_vertices_5->begin(); it != filter_vertices_5->end();
+ ++it) {
+ // get a node's all labels
+ auto label_result = it.label();
+ std::cout << "id: " << it.id() << " ";
+ if (!label_result.has_error()) {
+ for (auto label : label_result.value()) {
+ std::cout << label << " ";
+ }
+ }
+ std::cout << "name: ";
+ auto property = it.property<std::string>("name").value();
+ std::cout << property << " ";
+ std::cout << std::endl;
+ }
+}
int main(int argc, char* argv[]) {
// read file and construct graph info
std::string path = GetTestingResourceRoot() + "/ldbc/parquet/ldbc.graph.yml";
diff --git a/cpp/src/graphar/high-level/graph_reader.cc
b/cpp/src/graphar/high-level/graph_reader.cc
index 2cfe5b36..66438af2 100644
--- a/cpp/src/graphar/high-level/graph_reader.cc
+++ b/cpp/src/graphar/high-level/graph_reader.cc
@@ -17,13 +17,12 @@
* under the License.
*/
+#include "graphar/high-level/graph_reader.h"
#include <algorithm>
#include <unordered_set>
-
#include "arrow/array.h"
#include "graphar/api/arrow_reader.h"
#include "graphar/convert_to_arrow_type.h"
-#include "graphar/high-level/graph_reader.h"
#include "graphar/label.h"
#include "graphar/types.h"
@@ -264,6 +263,69 @@ Result<std::vector<IdType>>
VerticesCollection::filter_by_acero(
return indices64;
}
+Result<std::vector<IdType>> VerticesCollection::filter(
+ std::string property_name, std::shared_ptr<Expression> filter_expression,
+ std::vector<IdType>* new_valid_chunk) {
+ std::vector<int> indices;
+ const int TOT_ROWS_NUM = vertex_num_;
+ const int CHUNK_SIZE = vertex_info_->GetChunkSize();
+ int total_count = 0;
+ auto property_group = vertex_info_->GetPropertyGroup(property_name);
+ auto maybe_filter_reader = graphar::VertexPropertyArrowChunkReader::Make(
+ vertex_info_, property_group, prefix_, {});
+ auto filter_reader = maybe_filter_reader.value();
+ filter_reader->Filter(filter_expression);
+ std::vector<int64_t> indices64;
+ if (is_filtered_) {
+ for (int chunk_idx : valid_chunk_) {
+ // how to itetate valid_chunk_?
+ filter_reader->seek(chunk_idx * CHUNK_SIZE);
+ auto filter_result = filter_reader->GetChunk();
+ auto filter_table = filter_result.value();
+ int count = filter_table->num_rows();
+ if (count != 0 && new_valid_chunk != nullptr) {
+ new_valid_chunk->emplace_back(static_cast<IdType>(chunk_idx));
+ // TODO(elssky): record indices
+ int kVertexIndexCol = filter_table->schema()->GetFieldIndex(
+ GeneralParams::kVertexIndexCol);
+ auto column_array = filter_table->column(kVertexIndexCol)->chunk(0);
+ auto int64_array =
+ std::static_pointer_cast<arrow::Int64Array>(column_array);
+ for (int64_t i = 0; i < int64_array->length(); ++i) {
+ if (!int64_array->IsNull(i)) {
+ indices64.push_back(int64_array->Value(i));
+ }
+ }
+ }
+ }
+ } else {
+ for (int chunk_idx = 0; chunk_idx * CHUNK_SIZE < TOT_ROWS_NUM;
+ ++chunk_idx) {
+ auto filter_result = filter_reader->GetChunk();
+ auto filter_table = filter_result.value();
+ int count = filter_table->num_rows();
+ filter_reader->next_chunk();
+ total_count += count;
+ if (count != 0) {
+ valid_chunk_.emplace_back(static_cast<IdType>(chunk_idx));
+ // TODO(elssky): record indices
+ int kVertexIndexCol = filter_table->schema()->GetFieldIndex(
+ GeneralParams::kVertexIndexCol);
+ auto column_array = filter_table->column(kVertexIndexCol)->chunk(0);
+ auto int64_array =
+ std::static_pointer_cast<arrow::Int64Array>(column_array);
+ for (int64_t i = 0; i < int64_array->length(); ++i) {
+ if (!int64_array->IsNull(i)) {
+ indices64.push_back(int64_array->Value(i));
+ }
+ }
+ }
+ }
+ }
+ // std::cout << "Total valid count: " << total_count << std::endl;
+ return indices64;
+}
+
Result<std::shared_ptr<VerticesCollection>>
VerticesCollection::verticesWithLabel(
const std::string& filter_label,
@@ -384,6 +446,48 @@ VerticesCollection::verticesWithMultipleLabels(
return new_vertices_collection;
}
+Result<std::shared_ptr<VerticesCollection>>
+VerticesCollection::verticesWithProperty(
+ const std::string property_name, const graphar::util::Filter filter,
+ const std::shared_ptr<GraphInfo>& graph_info, const std::string& type) {
+ auto prefix = graph_info->GetPrefix();
+ auto vertex_info = graph_info->GetVertexInfo(type);
+ auto vertices_collection =
+ std::make_shared<VerticesCollection>(vertex_info, prefix);
+ vertices_collection->filtered_ids_ =
+ vertices_collection->filter(property_name, filter).value();
+ vertices_collection->is_filtered_ = true;
+ return vertices_collection;
+}
+
+Result<std::shared_ptr<VerticesCollection>>
+VerticesCollection::verticesWithProperty(
+ const std::string property_name, const graphar::util::Filter filter,
+ const std::shared_ptr<VerticesCollection>& vertices_collection) {
+ auto new_vertices_collection = std::make_shared<VerticesCollection>(
+ vertices_collection->vertex_info_, vertices_collection->prefix_);
+ auto filtered_ids = vertices_collection
+ ->filter(property_name, filter,
+ &new_vertices_collection->valid_chunk_)
+ .value();
+ if (vertices_collection->is_filtered_) {
+ std::unordered_set<IdType> origin_set(
+ vertices_collection->filtered_ids_.begin(),
+ vertices_collection->filtered_ids_.end());
+ std::unordered_set<int> intersection;
+ for (int num : filtered_ids) {
+ if (origin_set.count(num)) {
+ intersection.insert(num);
+ }
+ }
+ filtered_ids =
+ std::vector<IdType>(intersection.begin(), intersection.end());
+ new_vertices_collection->is_filtered_ = true;
+ }
+ new_vertices_collection->filtered_ids_ = filtered_ids;
+ return new_vertices_collection;
+}
+
template <typename T>
Result<T> Vertex::property(const std::string& property) const {
if constexpr (std::is_final<T>::value) {
diff --git a/cpp/src/graphar/high-level/graph_reader.h
b/cpp/src/graphar/high-level/graph_reader.h
index 19c8f716..31a64ff0 100644
--- a/cpp/src/graphar/high-level/graph_reader.h
+++ b/cpp/src/graphar/high-level/graph_reader.h
@@ -382,6 +382,10 @@ class VerticesCollection {
Result<std::vector<IdType>> filter_by_acero(
std::vector<std::string> filter_labels) const;
+ Result<std::vector<IdType>> filter(
+ std::string property_name, std::shared_ptr<Expression> filter_expression,
+ std::vector<IdType>* new_valid_chunk = nullptr);
+
/**
* @brief Query vertices with a specific label
*
@@ -431,6 +435,14 @@ class VerticesCollection {
const std::vector<std::string>& filter_labels,
const std::shared_ptr<GraphInfo>& graph_info, const std::string& type);
+ static Result<std::shared_ptr<VerticesCollection>> verticesWithProperty(
+ const std::string property_name, const graphar::util::Filter filter,
+ const std::shared_ptr<GraphInfo>& graph_info, const std::string& type);
+
+ static Result<std::shared_ptr<VerticesCollection>> verticesWithProperty(
+ const std::string property_name, const graphar::util::Filter filter,
+ const std::shared_ptr<VerticesCollection>& vertices_collection);
+
/**
* @brief Query vertices with multiple labels within a given collection
*
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]