This is an automated email from the ASF dual-hosted git repository. maedhroz pushed a commit to branch trunk in repository https://gitbox.apache.org/repos/asf/cassandra.git
commit 0bbddf610bb7681c69a0fc778f0cf17d232e627f Merge: 4454ab8699 23ec1c8a3f Author: Caleb Rackliffe <[email protected]> AuthorDate: Tue Mar 17 15:28:56 2026 -0500 Merge branch 'cassandra-5.0' into trunk * cassandra-5.0: Refactor SAI ANN query execution to use score ordered iterators for correctness and speed CHANGES.txt | 1 + .../config/CassandraRelevantProperties.java | 18 +- .../CellSourceIdentifier.java} | 21 +- .../cassandra/db/SinglePartitionReadCommand.java | 54 ++- .../org/apache/cassandra/db/lifecycle/Tracker.java | 6 +- .../org/apache/cassandra/db/memtable/Memtable.java | 3 +- .../apache/cassandra/index/sai/QueryContext.java | 8 +- .../index/sai/StorageAttachedIndexGroup.java | 5 + .../cassandra/index/sai/VectorQueryContext.java | 196 --------- .../cassandra/index/sai/disk/EmptyIndex.java | 121 ++++++ .../cassandra/index/sai/disk/PrimaryKeyMap.java | 7 + .../cassandra/index/sai/disk/SSTableIndex.java | 9 +- .../index/sai/disk/format/IndexDescriptor.java | 5 +- .../index/sai/disk/v1/SkinnyPrimaryKeyMap.java | 17 +- .../index/sai/disk/v1/V1SSTableIndex.java | 24 +- .../index/sai/disk/v1/WidePrimaryKeyMap.java | 9 +- .../sai/disk/v1/postings/VectorPostingList.java | 76 ---- .../sai/disk/v1/segment/IndexSegmentSearcher.java | 21 +- .../index/sai/disk/v1/segment/Segment.java | 26 +- .../index/sai/disk/v1/segment/SegmentBuilder.java | 2 +- .../index/sai/disk/v1/segment/SegmentOrdering.java | 22 +- .../v1/segment/VectorIndexSegmentSearcher.java | 291 +++++++------ .../v1/vector/AutoResumingNodeScoreIterator.java | 162 ++++++++ .../disk/v1/vector/BruteForceRowIdIterator.java | 124 ++++++ .../index/sai/disk/v1/vector/DiskAnn.java | 123 +++--- .../v1/vector/NeighborQueueRowIdIterator.java} | 28 +- .../vector/NodeScoreToRowIdWithScoreIterator.java | 82 ++++ .../index/sai/disk/v1/vector/OnHeapGraph.java | 81 ++-- .../index/sai/disk/v1/vector/OptimizeFor.java | 2 +- .../sai/disk/v1/vector/PrimaryKeyWithScore.java | 81 ++++ .../vector/RowIdToPrimaryKeyWithScoreIterator.java | 69 ++++ .../index/sai/disk/v1/vector/RowIdWithScore.java | 48 +++ .../disk/v1/vector/SegmentRowIdOrdinalPairs.java | 131 ++++++ .../index/sai/iterators/KeyRangeListIterator.java | 67 --- .../sai/iterators/KeyRangeOrderingIterator.java | 94 ----- .../sai/iterators/PriorityQueueIterator.java} | 29 +- .../cassandra/index/sai/memory/MemtableIndex.java | 27 +- .../index/sai/memory/MemtableIndexManager.java | 22 +- .../index/sai/memory/MemtableOrdering.java | 31 +- .../index/sai/memory/TrieMemoryIndex.java | 15 + .../index/sai/memory/VectorMemoryIndex.java | 191 +++++---- .../apache/cassandra/index/sai/plan/Operation.java | 28 +- .../cassandra/index/sai/plan/QueryController.java | 226 +++++----- ...ryMaterializesTooManyPrimaryKeysException.java} | 13 +- .../cassandra/index/sai/plan/QueryViewBuilder.java | 19 +- .../sai/plan/StorageAttachedIndexQueryPlan.java | 2 +- .../sai/plan/StorageAttachedIndexSearcher.java | 437 +++++++++++++++----- .../index/sai/plan/VectorTopKProcessor.java | 49 ++- .../cassandra/index/sai/utils/CellWithSource.java | 273 ++++++++++++ .../utils/MergePrimaryKeyWithScoreIterator.java | 70 ++++ .../cassandra/index/sai/utils/RowWithSource.java | 400 ++++++++++++++++++ .../cassandra/index/sai/view/IndexViewManager.java | 21 +- .../org/apache/cassandra/index/sai/view/View.java | 4 +- .../sai/virtual/SSTableIndexesSystemView.java | 6 + .../org/apache/cassandra/index/sasi/SASIIndex.java | 2 +- .../org/apache/cassandra/io/sstable/SSTable.java | 5 + .../org/apache/cassandra/io/sstable/SSTableId.java | 3 +- .../MemtableSwitchedNotification.java | 8 +- test/unit/org/apache/cassandra/cql3/CQLTester.java | 36 ++ .../apache/cassandra/db/lifecycle/TrackerTest.java | 6 +- .../org/apache/cassandra/index/sai/SAITester.java | 10 +- .../index/sai/cql/StorageAttachedIndexDDLTest.java | 8 +- .../index/sai/cql/VectorSiftSmallTest.java | 143 ++++++- .../cassandra/index/sai/cql/VectorTester.java | 39 +- .../cassandra/index/sai/cql/VectorTypeTest.java | 45 ++ .../index/sai/cql/VectorUpdateDeleteTest.java | 456 +++++++++++++++++++-- .../sai/disk/v1/InvertedIndexSearcherTest.java | 8 + .../v1/bbtree/BlockBalancedTreeIndexBuilder.java | 9 +- .../index/sai/functional/FlushingTest.java | 4 +- .../index/sai/functional/GroupComponentsTest.java | 3 +- .../index/sai/memory/VectorMemoryIndexTest.java | 56 ++- 71 files changed, 3577 insertions(+), 1161 deletions(-) diff --cc CHANGES.txt index f9c6004d36,14d1a075bd..c1c386495f --- a/CHANGES.txt +++ b/CHANGES.txt @@@ -1,316 -1,11 +1,317 @@@ -5.0.7 +5.1 + * Improve performance when calculating settled placements during range movements (CASSANDRA-21144) + * Make shadow gossip round parameters configurable for testing (CASSANDRA-21149) + * Avoid potential gossip thread deadlock during decommission (CASSANDRA-21143) + * Improve construction of consensus groups for range movements (CASSANDRA-21142) + * Support compaction_read_disk_access_mode for cursor-based compaction (CASSANDRA-21147) + * Allow value/element indexing on frozen collections in SAI (CASSANDRA-18492) + * Add tool to offline dump cluster metadata and the log (CASSANDRA-21129) + * Send client warnings when writing to a large partition (CASSANDRA-17258) + * Harden the possible range of values for max dictionary size and max total sample size for dictionary training (CASSANDRA-21194) + * Implement a guardrail ensuring that minimum training frequency parameter is provided in ZstdDictionaryCompressor (CASSANDRA-21192) + * Replace manual referencing with ColumnFamilyStore.selectAndReference when training a dictionary (CASSANDRA-21188) + * Forbid nodes upgrading to a version which cannot read existing log entries (CASSANDRA-21174) + * Introduce a check for minimum time to pass to train or import a compression dictionary from the last one (CASSANDRA-21179) + * Allow overriding compaction strategy parameters during startup (CASSANDRA-21169) + * Introduce created_at column to system_distributed.compression_dictionaries (CASSANDRA-21178) + * Be able to detect and remove orphaned compression dictionaries (CASSANDRA-21157) + * Fix BigTableVerifier to only read a data file during extended verification (CASSANDRA-21150) + * Reduce memory allocation during transformation of BatchStatement to Mutation (CASSANDRA-21141) + * Direct I/O support for compaction reads (CASSANDRA-19987) + * Support custom StartupCheck implementations via SPI (CASSANDRA-21093) + * Make sstableexpiredblockers support human-readable output with SSTable sizes (CASSANDRA-20448) + * Enhance nodetool compactionhistory to report more compaction properities (CASSANDRA-20081) + * Fix initial auto-repairs skipped by too soon check (CASSANDRA-21115) + * Add configuration to disk usage guardrails to stop writes across all replicas of a keyspace when any node replicating that keyspace exceeds the disk usage failure threshold. (CASSANDRA-21024) + * BETWEEN where token(Y) > token(Z) returns wrong answer (CASSANDRA-20154) + * Optimize memtable flush logic (CASSANDRA-21083) + * No need to evict already prepared statements, as it creates a race condition between multiple threads (CASSANDRA-17401) + * Include Level information for UnifiedCompactionStrategy in nodetool tablestats output (CASSANDRA-20820) + * Support low-overhead async profiling (CASSANDRA-20854) + * Minor perf optimizations around memtable put logic (CASSANDRA-21088) + * When level compaction validates its table properties, it used the wrong default value for sstable_size_in_mb which allowed properties that would later be rejected at runtime (CASSANDRA-20570) + * Fix off-by-one bug in exponential backoff for repair retry config (CASSANDRA-21102) + * Move training parameters for Zstd dictionary compression to CQL (CASSANDRA-21078) + * Add configuration for sorted imports in source files (CASSANDRA-17925) + * Change the eager reference counting of compression dictionaries to lazy (CASSANDRA-21074) + * Add cursor based optimized compaction path (CASSANDRA-20918) + * Ensure peers with LEFT status are expired from gossip state (CASSANDRA-21035) + * Optimize UTF8Validator.validate for ASCII prefixed Strings (CASSANDRA-21075) + * Switch LatencyMetrics to use ThreadLocalTimer/ThreadLocalCounter (CASSANDRA-21080) + * Accord: write rejections would be returned to users as server errors rather than INVALID and TxnReferenceOperation didn't handle all collections prperly (CASSANDRA-21061) + * Use byte[] directly in QueryOptions instead of ByteBuffer and convert them to ArrayCell instead of BufferCell to reduce allocations (CASSANDRA-20166) + * Log queries scanning too many SSTables per read (CASSANDRA-21048) + * Extend nodetool verify to (optionally) validate SAI files (CASSANDRA-20949) + * Fix CompressionDictionary being closed while still in use (CASSANDRA-21047) + * When updating a multi cell collection element, if the update is rejected then the shared Row.Builder is not freed causing all future mutations to be rejected (CASSANDRA-21055) + * Schema annotations escape validation on CREATE and ALTER DDL statements (CASSANDRA-21046) + * Calculate once and cache the result of ModificationStatement#requiresRead as a perf optimization (CASSANDRA-21040) + * Update system schema tables with new distributed keyspace on upgrade (CASSANDRA-20872) + * Fix issue when running cms reconfiguration with paxos repair disabled (CASSANDRA-20869) + * Added additional parameter to JVM shutdown to allow for logs to be properly shutdown (CASSANDRA-20978) + * Improve isGossipOnlyMember and location lookup performance (CASSANDRA-21039) + * Refactor the way we check if a transformation is allowed to be committed during upgrades (CASSANDRA-21043) + * Improve debug around paused and disabled compaction (CASSANDRA-20131,CASSANDRA-19728) + * DiskUsageBroadcaster does not update usageInfo on node replacement (CASSANDRA-21033) + * Reject PrepareJoin if tokens are already assigned (CASSANDRA-21006) + * Don't update registration status if node state for decommissioned peer is found with the same address (CASSANDRA-21005) + * Avoid NPE when meta keyspace placements are empty before CMS is initialized (CASSANDRA-21004) + * Gossip entries for hibernating non-members don't block truncate (CASSANDRA-21003) + * Retry without time limit calculates wait time incorrectly (CASSANDRA-21002) + * Don't submit AlterSchemaStatements which produce no effect locally to the CMS (CASSANDRA-21001) + * Avoid iterating all prepared statements when getting PreparedStatementsCacheSize metric (CASSANDRA-21038) + * Reduce performance impact of TableMetadataRef.get and KeyspaceMetadataRef.get (CASSANDRA-20465) + * Improve CMS initialization (CASSANDRA-21036) + * Introducing comments and security labels for schema elements (CASSANDRA-20943) + * Extend nodetool tablestats for dictionary memory usage (CASSANDRA-20940) + * Introduce separate GCInspector thresholds for concurrent GC events (CASSANDRA-20980) + * Reduce contention in MemtableAllocator.allocate (CASSANDRA-20226) + * Add export, list, import sub-commands for nodetool compressiondictionary (CASSANDRA-20941) + * Add support in the binary protocol to allow transactions to have multiple conditions (CASSANDRA-20883) + * Enable CQLSSTableWriter to create SSTables compressed with a dictionary (CASSANDRA-20938) + * Support ZSTD dictionary compression (CASSANDRA-17021) + * Fix ExceptionsTable when stacktrace has zero elements (CASSANDRA-20992) + * Replace blocking wait with non-blocking delay in paxos repair (CASSANDRA-20983) + * Implementation of CEP-55 - Generation of role names (CASSANDRA-20897) + * Add cqlsh autocompletion for the identity mapping feature (CASSANDRA-20021) + * Add DDL Guardrail enabling administrators to disallow creation/modification of keyspaces with durable_writes = false (CASSANDRA-20913) + * Optimize Counter, Meter and Histogram metrics using thread local counters (CASSANDRA-20250) + * Update snakeyaml to 2.4 (CASSANDRA-20928) + * Update Netty to 4.1.125.Final (CASSANDRA-20925) + * Expose uncaught exceptions in system_views.uncaught_exceptions table (CASSANDRA-20858) + * Improved observability in AutoRepair to report both expected vs. actual repair bytes and expected vs. actual keyspaces (CASSANDRA-20581) + * Execution of CreateTriggerStatement should not rely on external state (CASSANDRA-20287) + * Support LIKE expressions in filtering queries (CASSANDRA-17198) + * Make legacy index rebuilds safe on Gossip -> TCM upgrades (CASSANDRA-20887) + * Minor improvements and hardening for IndexHints (CASSANDRA-20888) + * Stop repair scheduler if two major versions are detected (CASSANDRA-20048) + * Optimize audit logic for batch operations especially when audit is not enabled for DML (CASSANDRA-20885) + * Implement nodetool history (CASSANDRA-20851) + * Expose StorageService.dropPreparedStatements via JMX (CASSANDRA-20870) + * Expose Metric for Prepared Statement Cache Size (in bytes) (CASSANDRA-20864) + * Add support for BEGIN TRANSACTION to allow mutations that touch multiple partitions (CASSANDRA-20857) + * AutoRepair: Safeguard Full repair against disk protection(CASSANDRA-20045) + * BEGIN TRANSACTION crashes if a mutation touches multiple rows (CASSANDRA-20844) + * Fix version range check in MessagingService.getVersionOrdinal (CASSANDRA-20842) + * Allow custom constraints to be loaded via SPI (CASSANDRA-20824) + * Optimize DataPlacement lookup by ReplicationParams (CASSANDRA-20804) + * Fix ShortPaxosSimulationTest and AccordSimulationRunner do not execute from the cli (CASSANDRA-20805) + * Allow overriding arbitrary settings via environment variables (CASSANDRA-20749) + * Optimize MessagingService.getVersionOrdinal (CASSANDRA-20816) + * Optimize TrieMemtable#getFlushSet (CASSANDRA-20760) + * Support manual secondary index selection at the CQL level (CASSANDRA-18112) + * When regulars CQL mutations run on Accord use the txn timestamp rather than server timestamp (CASSANDRA-20744) + * When using BEGIN TRANSACTION if a complex mutation exists in the same statement as one that uses a reference, then the complex delete is dropped (CASSANDRA-20788) + * Migrate all nodetool commands from airline to picocli (CASSANDRA-17445) + * Journal.TopologyUpdate should not store the local topology as it can be inferred from the global on (CASSANDRA-20785) + * Accord: Topology serializer has a lot of repeated data, can dedup to shrink the cost (CASSANDRA-20715) + * Stream individual files in their own transactions and hand over ownership to a parent transaction on completion (CASSANDRA-20728) + * Limit the number of held heap dumps to not consume disk space excessively (CASSANDRA-20457) + * Accord: BEGIN TRANSACTIONs IF condition logic does not properly support meaningless emptiness and null values (CASSANDRA-20667) + * Accord: startup race condition where accord journal tries to access the 2i index before its ready (CASSANDRA-20686) + * Adopt Unsafe::invokeCleaner for Direct ByteBuffer cleaning (CASSANDRA-20677) + * Add additional metrics around hints (CASSANDRA-20499) + * Support for add and replace in IntervalTree (CASSANDRA-20513) + * Enable single_sstable_uplevel by default for LCS (CASSANDRA-18509) + * Introduce NativeAccessor to avoid new ByteBuffer allocation on flush for each NativeCell (CASSANDRA-20173) + * Migrate sstableloader code to its own tools directory and artifact (CASSANDRA-20328) + * Stop AutoRepair monitoring thread upon Cassandra shutdown (CASSANDRA-20623) + * Avoid duplicate hardlink error upon forceful taking of ephemeral snapshots during repair (CASSANDRA-20490) + * When a custom disk error handler fails to initiate, fail the startup of a node instead of using the no-op handler (CASSANDRA-20614) + * Rewrite constraint framework to remove column specification from constraint definition, introduce SQL-like NOT NULL (CASSANDRA-20563) + * Fix a bug in AutoRepair duration metric calculation if schedule finishes quickly (CASSANDRA-20622) + * Fix AutoRepair flaky InJvm dtest (CASSANDRA-20620) + * Increasing default for auto_repair.sstable_upper_threshold considering large Cassandra tables & revert three lines removed from CHANGES.txt due to a merge mistake (CASSANDRA-20586) + * Fix token restrictions with MIN_TOKEN (CASSANDRO-20557) + * Upgrade logback version to 1.5.18 and slf4j dependencies to 2.0.17 (CASSANDRA-20429) + * Switch memtable-related off-heap objects to Native Endian and Memory to Little Endian (CASSANDRA-20190) + * Change SSTableSimpleScanner to use SSTableReader#openDataReaderForScan (CASSANDRA-20538) + * Automated Repair Inside Cassandra [CEP-37] (CASSANDRA-19918) + * Implement appender of slow queries to system_views.slow_queries table (CASSANDRA-13001) + * Add autocompletion in CQLSH for built-in functions (CASSANDRA-19631) + * Grant permission on keyspaces system_views and system_virtual_schema not possible (CASSANDRA-20171) + * General Purpose Transactions (Accord) [CEP-15] (CASSANDRA-17092) + * Improve performance when getting writePlacementsAllSettled from ClusterMetadata (CASSANDRA-20526) + * Add nodetool command to dump the contents of the system_views.{cluster_metadata_log, cluster_metadata_directory} tables (CASSANDRA-20525) + * Fix TreeMap race in CollectionVirtualTableAdapter causing us to lose rows in the virtual table (CASSANDRA-20524) + * Improve metadata log catch up with inter-DC mutation forwarding (CASSANDRA-20523) + * Support topology-safe changes to Datacenter & Rack for live nodes (CASSANDRA-20528) + * Add SSTableIntervalTree latency metric (CASSANDRA-20502) + * Ignore repetitions of semicolon in CQLSH (CASSANDRA-19956) + * Avoid NPE during cms initialization abort (CASSANDRA-20527) + * Avoid failing queries when epoch changes and replica goes up/down (CASSANDRA-20489) + * Split out truncation record lock (CASSANDRA-20480) + * Throw new IndexBuildInProgressException when queries fail during index build, instead of IndexNotAvailableException (CASSANDRA-20402) + * Fix Paxos repair interrupts running transactions (CASSANDRA-20469) + * Various fixes in constraint framework (CASSANDRA-20481) + * Add support in CAS for -= on numeric types, and fixed improper handling of empty bytes which lead to NPE (CASSANDRA-20477) + * Do not fail to start a node with materialized views after they are turned off in config (CASSANDRA-20452) + * Fix nodetool gcstats output, support human-readable units and more output formats (CASSANDRA-19022) + * Various gossip to TCM upgrade fixes (CASSANDRA-20483) + * Add nodetool command to abort failed nodetool cms initialize (CASSANDRA-20482) + * Repair Paxos for the distributed metadata log when CMS membership changes (CASSANDRA-20467) + * Reintroduce CASSANDRA-17411 in trunk (CASSANDRA-19346) + * Add min/max/mean/percentiles to timer metrics vtable (CASSANDRA-20466) + * Add support for time, date, timestamp types in scalar constraint (CASSANDRA-20274) + * Add regular expression constraint (CASSANDRA-20275) + * Improve constraints autocompletion (CASSANDRA-20341) + * Add JVM version and Cassandra build date to nodetool version -v (CASSANDRA-19721) + * Move all disk error logic to DiskErrorsHandler to enable pluggability (CASSANDRA-20363) + * Fix marking an SSTable as suspected and BufferPool leakage in case of a corrupted SSTable read during a compaction (CASSANDRA-20396) + * Add missed documentation for CREATE TABLE LIKE (CASSANDRA-20401) + * Add OCTET_LENGTH constraint (CASSANDRA-20340) + * Reduce memory allocations in miscellaneous places along the hot write path (CASSANDRA-20167) + * Provide keystore_password_file and truststore_password_file options to read credentials from a file (CASSANDRA-13428) + * Unregistering a node should also remove it from tokenMap if it is there and recalculate the placements (CASSANDRA-20346) + * Fix PartitionUpdate.isEmpty deserialization issue to avoid potential EOFException (CASSANDRA-20345) + * Avoid adding LEFT nodes to tokenMap on upgrade from gossip (CASSANDRA-20344) + * Allow empty placements when deserializing cluster metadata (CASSANDRA-20343) + * Reduce heap pressure when initializing CMS (CASSANDRA-20267) + * Paxos Repair: NoSuchElementException on DistributedSchema.getKeyspaceMetadata (CASSANDRA-20320) + * Improve performance of DistributedSchema.validate for large schemas (CASSANDRA-20360) + * Add JSON constraint (CASSANDRA-20273) + * Prevent invalid constraint combinations (CASSANDRA-20330) + * Support CREATE TABLE LIKE WITH INDEXES (CASSANDRA-19965) + * Invalidate relevant prepared statements on every change to TableMetadata (CASSANDRA-20318) + * Add per type max size guardrails (CASSANDRA-19677) + * Make it possible to abort all kinds of multi step operations (CASSANDRA-20217) + * Do not leak non-Java exceptions when calling snapshot operations via JMX (CASSANDRA-20335) + * Implement NOT_NULL constraint (CASSANDRA-20276) + * Improve error messages for constraints (CASSANDRA-20266) + * Add system_views.partition_key_statistics for querying SSTable metadata (CASSANDRA-20161) + * CEP-42 - Add Constraints Framework (CASSANDRA-19947) + * Add table metric PurgeableTombstoneScannedHistogram and a tracing event for scanned purgeable tombstones (CASSANDRA-20132) + * Make sure we can parse the expanded CQL before writing it to the log or sending it to replicas (CASSANDRA-20218) + * Add format_bytes and format_time functions (CASSANDRA-19546) + * Fix error when trying to assign a tuple to target type not being a tuple (CASSANDRA-20237) + * Fail CREATE TABLE LIKE statement if UDTs in target keyspace do not exist or they have different structure from ones in source keyspace (CASSANDRA-19966) + * Support octet_length and length functions (CASSANDRA-20102) + * Make JsonUtils serialize Instant always with the same format (CASSANDRA-20209) + * Port Harry v2 to trunk (CASSANDRA-20200) + * Enable filtering of snapshots on keyspace, table and snapshot name in nodetool listsnapshots (CASSANDRA-20151) + * Create manifest upon loading where it does not exist or enrich it (CASSANDRA-20150) + * Propagate true size of snapshot in SnapshotDetailsTabularData to not call JMX twice in nodetool listsnapshots (CASSANDRA-20149) + * Implementation of CEP-43 - copying a table via CQL by CREATE TABLE LIKE (CASSANDRA-19964) + * Periodically disconnect roles that are revoked or have LOGIN=FALSE set (CASSANDRA-19385) + * AST library for CQL-based fuzz tests (CASSANDRA-20198) + * Support audit logging for JMX operations (CASSANDRA-20128) + * Enable sorting of nodetool status output (CASSANDRA-20104) + * Support downgrading after CMS is initialized (CASSANDRA-20145) + * Deprecate IEndpointSnitch (CASSANDRA-19488) + * Check presence of a snapshot in a case-insensitive manner on macOS platform to prevent hardlinking failures (CASSANDRA-20146) + * Enable JMX server configuration to be in cassandra.yaml (CASSANDRA-11695) + * Parallelized UCS compactions (CASSANDRA-18802) + * Avoid prepared statement invalidation race when committing schema changes (CASSANDRA-20116) + * Restore optimization in MultiCBuilder around building one clustering (CASSANDRA-20129) + * Consolidate all snapshot management to SnapshotManager and introduce SnapshotManagerMBean (CASSANDRA-18111) + * Fix RequestFailureReason constants codes (CASSANDRA-20126) + * Introduce SSTableSimpleScanner for compaction (CASSANDRA-20092) + * Include column drop timestamp in alter table transformation (CASSANDRA-18961) + * Make JMX SSL configurable in cassandra.yaml (CASSANDRA-18508) + * Fix cqlsh CAPTURE command to save query results without trace details when TRACING is ON (CASSANDRA-19105) + * Optionally prevent tombstone purging during repair (CASSANDRA-20071) + * Add post-filtering support for the IN operator in SAI queries (CASSANDRA-20025) + * Don’t finish ongoing decommission and move operations during startup (CASSANDRA-20040) + * Nodetool reconfigure cms has correct return code when streaming fails (CASSANDRA-19972) + * Reintroduce RestrictionSet#iterator() optimization around multi-column restrictions (CASSANDRA-20034) + * Explicitly localize strings to Locale.US for internal implementation (CASSANDRA-19953) + * Add -H option for human-friendly output in nodetool compactionhistory (CASSANDRA-20015) + * Fix type check for referenced duration type for nested types (CASSANDRA-19890) + * In simulation tests, correctly set the tokens of replacement nodes (CASSANDRA-19997) + * During TCM upgrade, retain all properties of existing system tables (CASSANDRA-19992) + * Properly cancel in-flight futures and reject requests in EpochAwareDebounce during shutdown (CASSANDRA-19848) + * Provide clearer exception message on failing commitlog_disk_access_mode combinations (CASSANDRA-19812) + * Add total space used for a keyspace to nodetool tablestats (CASSANDRA-19671) + * Ensure Relation#toRestriction() handles ReversedType properly (CASSANDRA-19950) + * Add JSON and YAML output option to nodetool gcstats (CASSANDRA-19771) + * Introduce metadata serialization version V4 (CASSANDRA-19970) + * Allow CMS reconfiguration to work around DOWN nodes (CASSANDRA-19943) + * Make TableParams.Serializer set allowAutoSnapshots and incrementalBackups (CASSANDRA-19954) + * Make sstabledump possible to show tombstones only (CASSANDRA-19939) + * Ensure that RFP queries potentially stale replicas even with only key columns in the row filter (CASSANDRA-19938) + * Allow nodes to change IP address while upgrading to TCM (CASSANDRA-19921) + * Retain existing keyspace params on system tables after upgrade (CASSANDRA-19916) + * Deprecate use of gossip state for paxos electorate verification (CASSANDRA-19904) + * Update dtest-api to 0.0.17 to fix jvm17 crash in jvm-dtests (CASSANDRA-19239) + * Add resource leak test and Update Netty to 4.1.113.Final to fix leak (CASSANDRA-19783) + * Fix incorrect nodetool suggestion when gossip mode is running (CASSANDRA-19905) + * SAI support for BETWEEN operator (CASSANDRA-19688) + * Fix BETWEEN filtering for reversed clustering columns (CASSANDRA-19878) + * Retry if node leaves CMS while committing a transformation (CASSANDRA-19872) + * Add support for NOT operators in WHERE clauses. Fixed Three Valued Logic (CASSANDRA-18584) + * Allow getendpoints for system tables and make sure getNaturalReplicas work for MetaStrategy (CASSANDRA-19846) + * On upgrade, handle pre-existing tables with unexpected table ids (CASSANDRA-19845) + * Reconfigure CMS before assassinate (CASSANDRA-19768) + * Warn about unqualified prepared statement only if it is select or modification statement (CASSANDRA-18322) + * Update legacy peers tables during node replacement (CASSANDRA-19782) + * Refactor ColumnCondition (CASSANDRA-19620) + * Allow configuring log format for Audit Logs (CASSANDRA-19792) + * Support for noboolean rpm (centos7 compatible) packages removed (CASSANDRA-19787) + * Allow threads waiting for the metadata log follower to be interrupted (CASSANDRA-19761) + * Support dictionary lookup for CassandraPasswordValidator (CASSANDRA-19762) + * Disallow denylisting keys in system_cluster_metadata (CASSANDRA-19713) + * Fix gossip status after replacement (CASSANDRA-19712) + * Ignore repair requests for system_cluster_metadata (CASSANDRA-19711) + * Avoid ClassCastException when verifying tables with reversed partitioner (CASSANDRA-19710) + * Always repair the full range when repairing system_cluster_metadata (CASSANDRA-19709) + * Use table-specific partitioners during Paxos repair (CASSANDRA-19714) + * Expose current compaction throughput in nodetool (CASSANDRA-13890) + * CEP-24 Password validation / generation (CASSANDRA-17457) + * Reconfigure CMS after replacement, bootstrap and move operations (CASSANDRA-19705) + * Support querying LocalStrategy tables with any partitioner (CASSANDRA-19692) + * Relax slow_query_log_timeout for MultiNodeSAITest (CASSANDRA-19693) + * Audit Log entries are missing identity for mTLS connections (CASSANDRA-19669) + * Add support for the BETWEEN operator in WHERE clauses (CASSANDRA-19604) + * Replace Stream iteration with for-loop for SimpleRestriction::bindAndGetClusteringElements (CASSANDRA-19679) + * Consolidate logging on trace level (CASSANDRA-19632) + * Expand DDL statements on coordinator before submission to the CMS (CASSANDRA-19592) + * Fix number of arguments of String.format() in various classes (CASSANDRA-19645) + * Remove unused fields from config (CASSANDRA-19599) + * Refactor Relation and Restriction hierarchies (CASSANDRA-19341) + * Raise priority of TCM internode messages during critical operations (CASSANDRA-19517) + * Add nodetool command to unregister LEFT nodes (CASSANDRA-19581) + * Add cluster metadata id to gossip syn messages (CASSANDRA-19613) + * Reduce heap usage occupied by the metrics (CASSANDRA-19567) + * Improve handling of transient replicas during range movements (CASSANDRA-19344) + * Enable debounced internode log requests to be cancelled at shutdown (CASSANDRA-19514) + * Correctly set last modified epoch when combining multistep operations into a single step (CASSANDRA-19538) + * Add new TriggersPolicy configuration to allow operators to disable triggers (CASSANDRA-19532) + * Use Transformation.Kind.id in local and distributed log tables (CASSANDRA-19516) + * Remove period field from ClusterMetadata and metadata log tables (CASSANDRA-19482) + * Enrich system_views.pending_hints vtable with hints sizes (CASSANDRA-19486) + * Expose all dropwizard metrics in virtual tables (CASSANDRA-14572) + * Ensured that PropertyFileSnitchTest do not overwrite cassandra-toploogy.properties (CASSANDRA-19502) + * Add option for MutualTlsAuthenticator to restrict the certificate validity period (CASSANDRA-18951) + * Fix StorageService::constructRangeToEndpointMap for non-distributed keyspaces (CASSANDRA-19255) + * Group nodetool cms commands into single command group (CASSANDRA-19393) + * Register the measurements of the bootstrap process as Dropwizard metrics (CASSANDRA-19447) + * Add LIST SUPERUSERS CQL statement (CASSANDRA-19417) + * Modernize CQLSH datetime conversions (CASSANDRA-18879) + * Harry model and in-JVM tests for partition-restricted 2i queries (CASSANDRA-18275) + * Refactor cqlshmain global constants (CASSANDRA-19201) + * Remove native_transport_port_ssl (CASSANDRA-19397) + * Make nodetool reconfigurecms sync by default and add --cancel to be able to cancel ongoing reconfigurations (CASSANDRA-19216) + * Expose auth mode in system_views.clients, nodetool clientstats, metrics (CASSANDRA-19366) + * Remove sealed_periods and last_sealed_period tables (CASSANDRA-19189) + * Improve setup and initialisation of LocalLog/LogSpec (CASSANDRA-19271) + * Refactor structure of caching metrics and expose auth cache metrics via JMX (CASSANDRA-17062) + * Allow CQL client certificate authentication to work without sending an AUTHENTICATE request (CASSANDRA-18857) + * Extend nodetool tpstats and system_views.thread_pools with detailed pool parameters (CASSANDRA-19289) + * Remove dependency on Sigar in favor of OSHI (CASSANDRA-16565) + * Simplify the bind marker and Term logic (CASSANDRA-18813) + * Limit cassandra startup to supported JDKs, allow higher JDKs by setting CASSANDRA_JDK_UNSUPPORTED (CASSANDRA-18688) + * Standardize nodetool tablestats formatting of data units (CASSANDRA-19104) + * Make nodetool tablestats use number of significant digits for time and average values consistently (CASSANDRA-19015) + * Upgrade jackson to 2.15.3 and snakeyaml to 2.1 (CASSANDRA-18875) + * Transactional Cluster Metadata [CEP-21] (CASSANDRA-18330) + * Add ELAPSED command to cqlsh (CASSANDRA-18861) + * Add the ability to disable bulk loading of SSTables (CASSANDRA-18781) + * Clean up obsolete functions and simplify cql_version handling in cqlsh (CASSANDRA-18787) +Merged from 5.0: + * Refactor SAI ANN query execution to use score ordered iterators for correctness and speed (CASSANDRA-20086) - * Disallow binding an identity to a superuser when the user is a regular user (CASSANDRA-21219) * Fix ConcurrentModificationException in compaction garbagecollect (CASSANDRA-21065) * Dynamically skip sharding L0 when SAI Vector index present (CASSANDRA-19661) - * Optionally force IndexStatusManager to use the optimized index status format (CASSANDRA-21132) - * No need to evict already prepared statements, as it creates a race condition between multiple threads (CASSANDRA-17401) - * Upgrade logback version to 1.5.18 and slf4j dependencies to 2.0.17 (CASSANDRA-21137) + * Optionally force IndexStatusManager to use the optimized index status format (CASSANDRA-21132) * Automatically disable zero-copy streaming for legacy sstables with old bloom filter format (CASSANDRA-21092) * Fix CQLSSTableWriter serialization of vector of date and time (CASSANDRA-20979) * Correctly calculate default for FailureDetector max interval (CASSANDRA-21025) diff --cc src/java/org/apache/cassandra/index/sai/disk/format/IndexDescriptor.java index 4d94084cc3,52725ea843..1d9ee774de --- a/src/java/org/apache/cassandra/index/sai/disk/format/IndexDescriptor.java +++ b/src/java/org/apache/cassandra/index/sai/disk/format/IndexDescriptor.java @@@ -33,11 -30,12 +33,12 @@@ import org.slf4j.Logger import org.slf4j.LoggerFactory; import org.apache.cassandra.db.ClusteringComparator; -import org.apache.cassandra.db.lifecycle.LifecycleNewTracker; +import org.apache.cassandra.db.lifecycle.ILifecycleTransaction; import org.apache.cassandra.dht.IPartitioner; -import org.apache.cassandra.index.sai.StorageAttachedIndex; import org.apache.cassandra.index.sai.IndexValidation; import org.apache.cassandra.index.sai.SSTableContext; +import org.apache.cassandra.index.sai.StorageAttachedIndex; + import org.apache.cassandra.index.sai.disk.EmptyIndex; import org.apache.cassandra.index.sai.disk.PerColumnIndexWriter; import org.apache.cassandra.index.sai.disk.PerSSTableIndexWriter; import org.apache.cassandra.index.sai.disk.PrimaryKeyMap; diff --cc src/java/org/apache/cassandra/index/sai/disk/v1/SkinnyPrimaryKeyMap.java index ab02c1c3cd,ffba96d259..844bfe0c73 --- a/src/java/org/apache/cassandra/index/sai/disk/v1/SkinnyPrimaryKeyMap.java +++ b/src/java/org/apache/cassandra/index/sai/disk/v1/SkinnyPrimaryKeyMap.java @@@ -32,9 -31,10 +32,10 @@@ import org.apache.cassandra.index.sai.d import org.apache.cassandra.index.sai.disk.v1.bitpack.BlockPackedReader; import org.apache.cassandra.index.sai.disk.v1.bitpack.MonotonicBlockPackedReader; import org.apache.cassandra.index.sai.disk.v1.bitpack.NumericValuesMeta; -import org.apache.cassandra.index.sai.disk.v1.keystore.KeyLookupMeta; import org.apache.cassandra.index.sai.disk.v1.keystore.KeyLookup; +import org.apache.cassandra.index.sai.disk.v1.keystore.KeyLookupMeta; import org.apache.cassandra.index.sai.utils.PrimaryKey; + import org.apache.cassandra.io.sstable.SSTableId; import org.apache.cassandra.io.util.FileHandle; import org.apache.cassandra.io.util.FileUtils; import org.apache.cassandra.utils.Throwables; diff --cc src/java/org/apache/cassandra/index/sai/disk/v1/WidePrimaryKeyMap.java index c6e7737cf0,28033e1547..8f4a3097dd --- a/src/java/org/apache/cassandra/index/sai/disk/v1/WidePrimaryKeyMap.java +++ b/src/java/org/apache/cassandra/index/sai/disk/v1/WidePrimaryKeyMap.java @@@ -32,9 -31,10 +32,10 @@@ import org.apache.cassandra.index.sai.d import org.apache.cassandra.index.sai.disk.format.IndexDescriptor; import org.apache.cassandra.index.sai.disk.v1.bitpack.BlockPackedReader; import org.apache.cassandra.index.sai.disk.v1.bitpack.NumericValuesMeta; -import org.apache.cassandra.index.sai.disk.v1.keystore.KeyLookupMeta; import org.apache.cassandra.index.sai.disk.v1.keystore.KeyLookup; +import org.apache.cassandra.index.sai.disk.v1.keystore.KeyLookupMeta; import org.apache.cassandra.index.sai.utils.PrimaryKey; + import org.apache.cassandra.io.sstable.SSTableId; import org.apache.cassandra.io.sstable.format.SSTableReader; import org.apache.cassandra.io.util.FileHandle; import org.apache.cassandra.io.util.FileUtils; diff --cc src/java/org/apache/cassandra/index/sai/disk/v1/segment/VectorIndexSegmentSearcher.java index 1875ec7a8b,82e76de7ae..32f95a1cce --- a/src/java/org/apache/cassandra/index/sai/disk/v1/segment/VectorIndexSegmentSearcher.java +++ b/src/java/org/apache/cassandra/index/sai/disk/v1/segment/VectorIndexSegmentSearcher.java @@@ -20,14 -20,11 +20,12 @@@ package org.apache.cassandra.index.sai. import java.io.IOException; import java.lang.invoke.MethodHandles; import java.util.List; + import java.util.function.IntConsumer; import java.util.stream.Collectors; - import javax.annotation.Nullable; - + import com.google.common.annotations.VisibleForTesting; import com.google.common.base.MoreObjects; - import com.google.common.base.Preconditions; + - import org.agrona.collections.IntArrayList; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@@ -140,25 -151,28 +152,29 @@@ public class VectorIndexSegmentSearche minSSTableRowId = Math.max(minSSTableRowId, metadata.minSSTableRowId); maxSSTableRowId = min(maxSSTableRowId, metadata.maxSSTableRowId); - // If num of matches are not bigger than limit, skip ANN. - // (nRows should not include shadowed rows, but context doesn't break those out by segment, - // so we will live with the inaccuracy.) + // If num of matches are not bigger than limit, skip graph search and lazily sort by brute force. int nRows = Math.toIntExact(maxSSTableRowId - minSSTableRowId + 1); - int maxBruteForceRows = min(globalBruteForceRows, maxBruteForceRows(limit, nRows, graph.size())); + int maxBruteForceRows = maxBruteForceRows(limit, nRows, graph.size()); - logger.trace("Search range covers {} rows; max brute force rows is {} for sstable index with {} nodes, LIMIT {}", - nRows, maxBruteForceRows, graph.size(), limit); + if (logger.isTraceEnabled()) + logger.trace("Search range covers {} rows; max brute force rows is {} for sstable index with {} nodes, LIMIT {}", + nRows, maxBruteForceRows, graph.size(), limit); Tracing.trace("Search range covers {} rows; max brute force rows is {} for sstable index with {} nodes, LIMIT {}", nRows, maxBruteForceRows, graph.size(), limit); - if (nRows <= maxBruteForceRows) + boolean shouldBruteForce = FORCE_BRUTE_FORCE_ANN == null ? nRows <= maxBruteForceRows : FORCE_BRUTE_FORCE_ANN; + if (shouldBruteForce) { - IntArrayList postings = new IntArrayList(Math.toIntExact(nRows), -1); - for (long sstableRowId = minSSTableRowId; sstableRowId <= maxSSTableRowId; sstableRowId++) + SegmentRowIdOrdinalPairs segmentOrdinalPairs = new SegmentRowIdOrdinalPairs(Math.toIntExact(nRows)); + try (OnDiskOrdinalsMap.OrdinalsView ordinalsView = graph.getOrdinalsView()) { - if (context.shouldInclude(sstableRowId, primaryKeyMap)) - postings.addInt(metadata.toSegmentRowId(sstableRowId)); + for (long sstableRowId = minSSTableRowId; sstableRowId <= maxSSTableRowId; sstableRowId++) + { + int segmentRowId = metadata.toSegmentRowId(sstableRowId); + int ordinal = ordinalsView.getOrdinalForRowId(segmentRowId); + if (ordinal >= 0) + segmentOrdinalPairs.add(segmentRowId, ordinal); + } } - return new BitsOrPostingList(new IntArrayPostingList(postings.toIntArray())); + return orderByBruteForce(queryVector, segmentOrdinalPairs, limit, topK); } // create a bitset of ordinals corresponding to the rows in the given key range @@@ -272,13 -341,13 +343,14 @@@ private boolean shouldUseBruteForce(int topK, int limit, int numRows) { // if we have a small number of results then let TopK processor do exact NN computation - int maxBruteForceRows = min(globalBruteForceRows, maxBruteForceRows(topK, numRows, graph.size())); + int maxBruteForceRows = maxBruteForceRows(topK, numRows, graph.size()); - logger.trace("SAI materialized {} rows; max brute force rows is {} for sstable index with {} nodes, LIMIT {}", - numRows, maxBruteForceRows, graph.size(), limit); + if (logger.isTraceEnabled()) + logger.trace("SAI materialized {} rows; max brute force rows is {} for sstable index with {} nodes, LIMIT {}", + numRows, maxBruteForceRows, graph.size(), limit); Tracing.trace("SAI materialized {} rows; max brute force rows is {} for sstable index with {} nodes, LIMIT {}", numRows, maxBruteForceRows, graph.size(), limit); - return numRows <= maxBruteForceRows; + return FORCE_BRUTE_FORCE_ANN == null ? numRows <= maxBruteForceRows + : FORCE_BRUTE_FORCE_ANN; } private int maxBruteForceRows(int limit, int nPermittedOrdinals, int graphSize) diff --cc src/java/org/apache/cassandra/index/sai/disk/v1/vector/DiskAnn.java index 196802df43,4170371553..a0f3c9c6ab --- a/src/java/org/apache/cassandra/index/sai/disk/v1/vector/DiskAnn.java +++ b/src/java/org/apache/cassandra/index/sai/disk/v1/vector/DiskAnn.java @@@ -19,31 -19,26 +19,27 @@@ package org.apache.cassandra.index.sai.disk.v1.vector; import java.io.IOException; - import java.util.Arrays; - import java.util.Iterator; - import java.util.NoSuchElementException; - import java.util.PrimitiveIterator; - import java.util.stream.IntStream; + import java.util.function.IntConsumer; -import io.github.jbellis.jvector.disk.CachingGraphIndex; -import io.github.jbellis.jvector.disk.OnDiskGraphIndex; -import io.github.jbellis.jvector.graph.GraphIndex; -import io.github.jbellis.jvector.graph.GraphSearcher; -import io.github.jbellis.jvector.graph.NeighborSimilarity; -import io.github.jbellis.jvector.pq.CompressedVectors; -import io.github.jbellis.jvector.util.Bits; -import io.github.jbellis.jvector.vector.VectorSimilarityFunction; import org.apache.cassandra.index.sai.disk.format.IndexComponent; import org.apache.cassandra.index.sai.disk.v1.IndexWriterConfig; import org.apache.cassandra.index.sai.disk.v1.PerColumnIndexFiles; - import org.apache.cassandra.index.sai.disk.v1.postings.VectorPostingList; import org.apache.cassandra.index.sai.disk.v1.segment.SegmentMetadata; + import org.apache.cassandra.io.sstable.SSTableId; import org.apache.cassandra.io.util.FileHandle; - import org.apache.cassandra.tracing.Tracing; + import org.apache.cassandra.io.util.FileUtils; + import org.apache.cassandra.utils.CloseableIterator; + import org.apache.cassandra.utils.Throwables; +import io.github.jbellis.jvector.disk.CachingGraphIndex; +import io.github.jbellis.jvector.disk.OnDiskGraphIndex; +import io.github.jbellis.jvector.graph.GraphIndex; +import io.github.jbellis.jvector.graph.GraphSearcher; +import io.github.jbellis.jvector.graph.NeighborSimilarity; - import io.github.jbellis.jvector.graph.SearchResult; - import io.github.jbellis.jvector.graph.SearchResult.NodeScore; +import io.github.jbellis.jvector.pq.CompressedVectors; +import io.github.jbellis.jvector.util.Bits; +import io.github.jbellis.jvector.vector.VectorSimilarityFunction; + public class DiskAnn implements AutoCloseable { private final FileHandle graphHandle; diff --cc src/java/org/apache/cassandra/index/sai/disk/v1/vector/OnHeapGraph.java index 369aac2fde,a823943a4e..108752bb44 --- a/src/java/org/apache/cassandra/index/sai/disk/v1/vector/OnHeapGraph.java +++ b/src/java/org/apache/cassandra/index/sai/disk/v1/vector/OnHeapGraph.java @@@ -32,13 -31,27 +31,15 @@@ import java.util.concurrent.atomic.Atom import java.util.function.Function; import java.util.stream.IntStream; +import org.apache.lucene.util.StringHelper; + import org.cliffc.high_scale_lib.NonBlockingHashMap; import org.cliffc.high_scale_lib.NonBlockingHashMapLong; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import io.github.jbellis.jvector.disk.OnDiskGraphIndex; -import io.github.jbellis.jvector.graph.GraphIndex; -import io.github.jbellis.jvector.graph.GraphIndexBuilder; -import io.github.jbellis.jvector.graph.GraphSearcher; -import io.github.jbellis.jvector.graph.NeighborSimilarity; -import io.github.jbellis.jvector.graph.RandomAccessVectorValues; -import io.github.jbellis.jvector.graph.SearchResult; -import io.github.jbellis.jvector.pq.CompressedVectors; -import io.github.jbellis.jvector.pq.ProductQuantization; -import io.github.jbellis.jvector.util.Bits; -import io.github.jbellis.jvector.util.RamUsageEstimator; -import io.github.jbellis.jvector.vector.VectorEncoding; -import io.github.jbellis.jvector.vector.VectorSimilarityFunction; import org.apache.cassandra.db.marshal.AbstractType; import org.apache.cassandra.db.marshal.VectorType; + import org.apache.cassandra.db.memtable.Memtable; import org.apache.cassandra.exceptions.InvalidRequestException; import org.apache.cassandra.index.sai.disk.format.IndexComponent; import org.apache.cassandra.index.sai.disk.format.IndexDescriptor; @@@ -46,23 -60,10 +47,24 @@@ import org.apache.cassandra.index.sai.d import org.apache.cassandra.index.sai.disk.v1.IndexWriterConfig; import org.apache.cassandra.index.sai.disk.v1.SAICodecUtils; import org.apache.cassandra.index.sai.disk.v1.segment.SegmentMetadata; +import org.apache.cassandra.index.sai.utils.IndexIdentifier; import org.apache.cassandra.io.util.SequentialWriter; - import org.apache.cassandra.tracing.Tracing; import org.apache.cassandra.utils.ByteBufferUtil; + import org.apache.cassandra.utils.CloseableIterator; -import org.apache.lucene.util.StringHelper; + +import io.github.jbellis.jvector.disk.OnDiskGraphIndex; +import io.github.jbellis.jvector.graph.GraphIndex; +import io.github.jbellis.jvector.graph.GraphIndexBuilder; +import io.github.jbellis.jvector.graph.GraphSearcher; +import io.github.jbellis.jvector.graph.NeighborSimilarity; +import io.github.jbellis.jvector.graph.RandomAccessVectorValues; +import io.github.jbellis.jvector.graph.SearchResult; +import io.github.jbellis.jvector.pq.CompressedVectors; +import io.github.jbellis.jvector.pq.ProductQuantization; +import io.github.jbellis.jvector.util.Bits; ++import io.github.jbellis.jvector.util.RamUsageEstimator; +import io.github.jbellis.jvector.vector.VectorEncoding; +import io.github.jbellis.jvector.vector.VectorSimilarityFunction; public class OnHeapGraph<T> { diff --cc src/java/org/apache/cassandra/index/sai/memory/MemtableIndex.java index d592c94209,4ebee9318b..ab24fff087 --- a/src/java/org/apache/cassandra/index/sai/memory/MemtableIndex.java +++ b/src/java/org/apache/cassandra/index/sai/memory/MemtableIndex.java @@@ -33,12 -33,14 +33,14 @@@ import org.apache.cassandra.dht.Abstrac import org.apache.cassandra.index.sai.QueryContext; import org.apache.cassandra.index.sai.StorageAttachedIndex; import org.apache.cassandra.index.sai.disk.format.IndexDescriptor; -import org.apache.cassandra.index.sai.utils.IndexIdentifier; import org.apache.cassandra.index.sai.disk.v1.segment.SegmentMetadata; + import org.apache.cassandra.index.sai.disk.v1.vector.PrimaryKeyWithScore; import org.apache.cassandra.index.sai.iterators.KeyRangeIterator; import org.apache.cassandra.index.sai.plan.Expression; +import org.apache.cassandra.index.sai.utils.IndexIdentifier; import org.apache.cassandra.index.sai.utils.PrimaryKey; import org.apache.cassandra.index.sai.utils.PrimaryKeys; + import org.apache.cassandra.utils.CloseableIterator; import org.apache.cassandra.utils.Pair; import org.apache.cassandra.utils.bytecomparable.ByteComparable; diff --cc src/java/org/apache/cassandra/index/sai/memory/VectorMemoryIndex.java index 976c81d205,c9d5aa2889..b976b2038a --- a/src/java/org/apache/cassandra/index/sai/memory/VectorMemoryIndex.java +++ b/src/java/org/apache/cassandra/index/sai/memory/VectorMemoryIndex.java @@@ -39,14 -44,14 +42,14 @@@ import org.apache.cassandra.db.memtable import org.apache.cassandra.dht.AbstractBounds; import org.apache.cassandra.index.sai.QueryContext; import org.apache.cassandra.index.sai.StorageAttachedIndex; - import org.apache.cassandra.index.sai.VectorQueryContext; import org.apache.cassandra.index.sai.disk.format.IndexDescriptor; -import org.apache.cassandra.index.sai.utils.IndexIdentifier; import org.apache.cassandra.index.sai.disk.v1.segment.SegmentMetadata; import org.apache.cassandra.index.sai.disk.v1.vector.OnHeapGraph; + import org.apache.cassandra.index.sai.disk.v1.vector.PrimaryKeyWithScore; import org.apache.cassandra.index.sai.iterators.KeyRangeIterator; - import org.apache.cassandra.index.sai.iterators.KeyRangeListIterator; + import org.apache.cassandra.index.sai.iterators.PriorityQueueIterator; import org.apache.cassandra.index.sai.plan.Expression; +import org.apache.cassandra.index.sai.utils.IndexIdentifier; import org.apache.cassandra.index.sai.utils.PrimaryKey; import org.apache.cassandra.index.sai.utils.PrimaryKeys; import org.apache.cassandra.index.sai.utils.RangeUtil; @@@ -54,8 -62,6 +60,10 @@@ import org.apache.cassandra.utils.Close import org.apache.cassandra.utils.Pair; import org.apache.cassandra.utils.bytecomparable.ByteComparable; ++import io.github.jbellis.jvector.graph.SearchResult; +import io.github.jbellis.jvector.util.Bits; ++import io.github.jbellis.jvector.vector.VectorSimilarityFunction; + import static java.lang.Math.log; import static java.lang.Math.max; import static java.lang.Math.min; diff --cc src/java/org/apache/cassandra/index/sai/plan/QueryController.java index 49e2aeab6d,89682713ad..00147a54bc --- a/src/java/org/apache/cassandra/index/sai/plan/QueryController.java +++ b/src/java/org/apache/cassandra/index/sai/plan/QueryController.java @@@ -25,7 -23,7 +23,8 @@@ import java.util.Collection import java.util.Collections; import java.util.List; import java.util.NavigableSet; +import java.util.Set; + import java.util.function.Function; import java.util.stream.Collectors; import javax.annotation.Nullable; @@@ -172,22 -198,31 +199,47 @@@ public class QueryControlle return partition.queryMemtableAndDisk(cfs, executionController); } + private static Runnable getIndexReleaser(Set<SSTableIndex> referencedIndexes) + { + return new Runnable() + { + boolean closed; + @Override + public void run() + { + if (closed) + return; + closed = true; + referencedIndexes.forEach(SSTableIndex::releaseQuietly); + } + }; + } + + /** + * Get an iterator over the row(s) for this primary key. Restrict the search to the specified view. Apply the + * {@link #SOURCE_TABLE_ROW_TRANSFORMER} so that resulting cells have the source memtable/sstable. Expect one row + * for a fully qualified primary key or all rows within a partition for a static primary key. + * + * @param key primary key to fetch from storage. + * @param executionController the executionController to use when querying storage + * @return an iterator of rows matching the query + */ + public UnfilteredRowIterator queryStorage(PrimaryKey key, ColumnFamilyStore.ViewFragment view, ReadExecutionController executionController) + { + if (key == null) + throw new IllegalArgumentException("non-null key required"); + + SinglePartitionReadCommand partition = SinglePartitionReadCommand.create(cfs.metadata(), + command.nowInSec(), + command.columnFilter(), + RowFilter.none(), + DataLimits.NONE, + key.partitionKey(), + makeFilter(List.of(key))); + + return partition.queryMemtableAndDisk(cfs, view, SOURCE_TABLE_ROW_TRANSFORMER, executionController); + } + /** * Build a {@link KeyRangeIterator.Builder} from the given list of {@link Expression}s. * <p> diff --cc src/java/org/apache/cassandra/index/sai/plan/StorageAttachedIndexSearcher.java index cf5beb129b,7d16f33990..2e8b7d8f71 --- a/src/java/org/apache/cassandra/index/sai/plan/StorageAttachedIndexSearcher.java +++ b/src/java/org/apache/cassandra/index/sai/plan/StorageAttachedIndexSearcher.java @@@ -32,6 -37,8 +37,7 @@@ import java.util.stream.Collectors import javax.annotation.Nonnull; import javax.annotation.Nullable; -import io.netty.util.concurrent.FastThreadLocal; + import org.apache.cassandra.cql3.Operator; import org.apache.cassandra.db.Clustering; import org.apache.cassandra.db.ClusteringBound; import org.apache.cassandra.db.ClusteringComparator; @@@ -59,16 -66,19 +65,21 @@@ import org.apache.cassandra.dht.Token import org.apache.cassandra.exceptions.RequestTimeoutException; import org.apache.cassandra.index.Index; import org.apache.cassandra.index.sai.QueryContext; + import org.apache.cassandra.index.sai.StorageAttachedIndex; + import org.apache.cassandra.index.sai.disk.v1.vector.PrimaryKeyWithScore; -import org.apache.cassandra.index.sai.metrics.TableQueryMetrics; import org.apache.cassandra.index.sai.iterators.KeyRangeIterator; +import org.apache.cassandra.index.sai.metrics.TableQueryMetrics; import org.apache.cassandra.index.sai.utils.PrimaryKey; + import org.apache.cassandra.index.sai.utils.RangeUtil; import org.apache.cassandra.io.util.FileUtils; import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.utils.AbstractIterator; import org.apache.cassandra.utils.Clock; + import org.apache.cassandra.utils.CloseableIterator; + import org.apache.cassandra.utils.FBUtilities; +import io.netty.util.concurrent.FastThreadLocal; + public class StorageAttachedIndexSearcher implements Index.Searcher { private static final int PARTITION_ROW_BATCH_SIZE = 100; diff --cc src/java/org/apache/cassandra/index/sai/utils/CellWithSource.java index 0000000000,858e541e22..ce697f361a mode 000000,100644..100644 --- a/src/java/org/apache/cassandra/index/sai/utils/CellWithSource.java +++ b/src/java/org/apache/cassandra/index/sai/utils/CellWithSource.java @@@ -1,0 -1,246 +1,273 @@@ + /* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + package org.apache.cassandra.index.sai.utils; + + import java.nio.ByteBuffer; + ++import javax.annotation.Nonnull; ++ ++import com.google.common.base.Function; ++ + import org.apache.cassandra.db.CellSourceIdentifier; + import org.apache.cassandra.db.DeletionPurger; + import org.apache.cassandra.db.Digest; + import org.apache.cassandra.db.marshal.ValueAccessor; + import org.apache.cassandra.db.memtable.Memtable; + import org.apache.cassandra.db.rows.Cell; + import org.apache.cassandra.db.rows.CellPath; + import org.apache.cassandra.db.rows.ColumnData; + import org.apache.cassandra.db.rows.ComplexColumnData; + import org.apache.cassandra.io.sstable.SSTableId; + import org.apache.cassandra.schema.ColumnMetadata; + import org.apache.cassandra.utils.ObjectSizes; + import org.apache.cassandra.utils.memory.ByteBufferCloner; + + /** + * A wrapped {@link Cell} that includes a reference to the cell's source table via {@link CellSourceIdentifier} + * @param <T> the type of the cell's value + */ + public class CellWithSource<T> extends Cell<T> + { + private static final long EMPTY_SIZE = ObjectSizes.measure(new CellWithSource<>(null, null, null)); + + private final Cell<T> cell; + private final CellSourceIdentifier source; + + public CellWithSource(Cell<T> cell, CellSourceIdentifier source) + { + this(cell.column(), cell, source); + assert source instanceof Memtable || source instanceof SSTableId : "Source has unexpected type: " + (source == null ? "null" : source.getClass()); + } + + private CellWithSource(ColumnMetadata column, Cell<T> cell, CellSourceIdentifier source) + { + super(column); + this.cell = cell; + this.source = source; + } + + public CellSourceIdentifier sourceTable() + { + return source; + } + + @Override + public boolean isCounterCell() + { + return cell.isCounterCell(); + } + + @Override + public T value() + { + return cell.value(); + } + + @Override + public ValueAccessor<T> accessor() + { + return cell.accessor(); + } + + @Override + public long timestamp() + { + return cell.timestamp(); + } + + @Override + public int ttl() + { + return cell.ttl(); + } + + @Override + public long localDeletionTime() + { + return cell.localDeletionTime(); + } + + @Override + public boolean isTombstone() + { + return cell.isTombstone(); + } + + @Override + public boolean isExpiring() + { + return cell.isExpiring(); + } + + @Override + public boolean isLive(long nowInSec) + { + return cell.isLive(nowInSec); + } + + @Override + public CellPath path() + { + return cell.path(); + } + + @Override + public Cell<?> withUpdatedColumn(ColumnMetadata newColumn) + { + return wrapIfNew(cell.withUpdatedColumn(newColumn)); + } + + @Override + public Cell<?> withUpdatedValue(ByteBuffer newValue) + { + return wrapIfNew(cell.withUpdatedValue(newValue)); + } + ++ @Override ++ public Cell<?> withUpdatedTimestamp(long newTimestamp) ++ { ++ return wrapIfNew(cell.withUpdatedTimestamp(newTimestamp)); ++ } ++ + @Override + public Cell<?> withUpdatedTimestampAndLocalDeletionTime(long newTimestamp, long newLocalDeletionTime) + { + return wrapIfNew(cell.withUpdatedTimestampAndLocalDeletionTime(newTimestamp, newLocalDeletionTime)); + } + + @Override + public Cell<?> withSkippedValue() + { + return wrapIfNew(cell.withSkippedValue()); + } + + @Override + public Cell<?> clone(ByteBufferCloner cloner) + { + return wrapIfNew(cell.clone(cloner)); + } + + @Override + public int dataSize() + { + return cell.dataSize(); + } + + @Override + public long unsharedHeapSizeExcludingData() + { + return cell.unsharedHeapSizeExcludingData(); + } + + @Override + public long unsharedHeapSize() + { + return cell.unsharedHeapSize() + EMPTY_SIZE; + } + + @Override + public void validate() + { + cell.validate(); + } + + @Override + public boolean hasInvalidDeletions() + { + return cell.hasInvalidDeletions(); + } + + @Override + public void digest(Digest digest) + { + cell.digest(digest); + } + + @Override + public ColumnData updateAllTimestamp(long newTimestamp) + { - ColumnData maybeNewCell = cell.updateAllTimestamp(newTimestamp); - if (maybeNewCell instanceof Cell) - return wrapIfNew((Cell<?>) maybeNewCell); - if (maybeNewCell instanceof ComplexColumnData) - return ((ComplexColumnData) maybeNewCell).transform(this::wrapIfNew); - // It's not clear when we would hit this code path, but it seems we should not - // hit this from SAI. - throw new IllegalStateException("Expected a Cell instance, but got " + maybeNewCell); ++ return wrapIfNew(cell.updateAllTimestamp(newTimestamp)); ++ } ++ ++ @Override ++ public ColumnData updateTimesAndPathsForAccord(@Nonnull Function<Cell, CellPath> cellToMaybeNewListPath, long newTimestamp, long newLocalDeletionTime) ++ { ++ return wrapIfNew(cell.updateTimesAndPathsForAccord(cellToMaybeNewListPath, newTimestamp, newLocalDeletionTime)); ++ } ++ ++ @Override ++ public ColumnData updateAllTimesWithNewCellPathForComplexColumnData(@Nonnull CellPath maybeNewPath, long newTimestamp, long newLocalDeletionTime) ++ { ++ return wrapIfNew(cell.updateAllTimesWithNewCellPathForComplexColumnData(maybeNewPath, newTimestamp, newLocalDeletionTime)); + } + + @Override + public Cell<?> markCounterLocalToBeCleared() + { + return wrapIfNew(cell.markCounterLocalToBeCleared()); + } + + @Override + public Cell<?> purge(DeletionPurger purger, long nowInSec) + { + return wrapIfNew(cell.purge(purger, nowInSec)); + } + + @Override + public Cell<?> purgeDataOlderThan(long timestamp) + { + return wrapIfNew(cell.purgeDataOlderThan(timestamp)); + } + + @Override + protected int localDeletionTimeAsUnsignedInt() + { + // Cannot call cell's localDeletionTimeAsUnsignedInt() because it's protected. + throw new UnsupportedOperationException(); + } + + @Override + public long maxTimestamp() + { + return cell.maxTimestamp(); + } + ++ private ColumnData wrapIfNew(ColumnData maybeNewColumnData) ++ { ++ if (maybeNewColumnData instanceof Cell) ++ return wrapIfNew((Cell<?>) maybeNewColumnData); ++ if (maybeNewColumnData instanceof ComplexColumnData) ++ return ((ComplexColumnData) maybeNewColumnData).transform(this::wrapIfNew); ++ ++ // It's not clear when we would hit this code path, but it seems we should not ++ // hit this from SAI. ++ throw new IllegalStateException("Expected a Cell or ComplexColumnData instance, but got " + maybeNewColumnData); ++ } ++ + private Cell<?> wrapIfNew(Cell<?> maybeNewCell) + { + if (maybeNewCell == null) + return null; + // If the cell's method returned a reference to the same cell, then + // we can skip creating a new wrapper. + if (maybeNewCell == this.cell) + return this; + return new CellWithSource<>(maybeNewCell, source); + } + } diff --cc src/java/org/apache/cassandra/index/sai/utils/RowWithSource.java index 0000000000,a883abb99b..33f5f07deb mode 000000,100644..100644 --- a/src/java/org/apache/cassandra/index/sai/utils/RowWithSource.java +++ b/src/java/org/apache/cassandra/index/sai/utils/RowWithSource.java @@@ -1,0 -1,392 +1,400 @@@ + /* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + package org.apache.cassandra.index.sai.utils; + + import java.util.Collection; + import java.util.Comparator; + import java.util.Iterator; + import java.util.function.BiConsumer; + import java.util.function.Consumer; -import java.util.function.Function; + ++import javax.annotation.Nonnull; ++ ++import com.google.common.base.Function; + import com.google.common.collect.Collections2; + import com.google.common.collect.Iterables; + import com.google.common.collect.Iterators; + + import org.apache.cassandra.db.CellSourceIdentifier; + import org.apache.cassandra.db.Clustering; + import org.apache.cassandra.db.DeletionPurger; + import org.apache.cassandra.db.DeletionTime; + import org.apache.cassandra.db.Digest; + import org.apache.cassandra.db.LivenessInfo; + import org.apache.cassandra.db.filter.ColumnFilter; + import org.apache.cassandra.db.memtable.Memtable; + import org.apache.cassandra.db.rows.Cell; + import org.apache.cassandra.db.rows.CellPath; + import org.apache.cassandra.db.rows.ColumnData; + import org.apache.cassandra.db.rows.ComplexColumnData; + import org.apache.cassandra.db.rows.Row; + import org.apache.cassandra.io.sstable.SSTableId; + import org.apache.cassandra.schema.ColumnMetadata; + import org.apache.cassandra.schema.TableMetadata; + import org.apache.cassandra.utils.BiLongAccumulator; + import org.apache.cassandra.utils.LongAccumulator; + import org.apache.cassandra.utils.ObjectSizes; + import org.apache.cassandra.utils.SearchIterator; + import org.apache.cassandra.utils.memory.Cloner; + + /** + * A Row wrapper that has a {@link CellSourceIdentifier} that gets added to cell as part of the + * {@link #getCell(ColumnMetadata)} and {@link #getCell(ColumnMetadata, CellPath)} calls. This class + * can only be initiallized validly when all the cells share a common {@link CellSourceIdentifier}. + */ + public class RowWithSource implements Row + { + private static final long EMPTY_SIZE = ObjectSizes.measure(new RowWithSource(null, null)); + + private final Row row; + private final CellSourceIdentifier source; + + public RowWithSource(Row row, CellSourceIdentifier source) + { + assert source instanceof Memtable || source instanceof SSTableId || (source == null && row == null) : "Expected Memtable or SSTableId, got " + source; + this.row = row; + this.source = source; + } + + @Override + public Kind kind() + { + return row.kind(); + } + + @Override + public Clustering<?> clustering() + { + return row.clustering(); + } + + @Override + public void digest(Digest digest) + { + row.digest(digest); + } + + @Override + public void validateData(TableMetadata metadata) + { + row.validateData(metadata); + } + + @Override + public boolean hasInvalidDeletions() + { + return row.hasInvalidDeletions(); + } + + @Override + public Collection<ColumnMetadata> columns() + { + return row.columns(); + } + + @Override + public int columnCount() + { + return row.columnCount(); + } + + @Override + public Deletion deletion() + { + return row.deletion(); + } + + @Override + public LivenessInfo primaryKeyLivenessInfo() + { + return row.primaryKeyLivenessInfo(); + } + + @Override + public boolean isStatic() + { + return row.isStatic(); + } + + @Override + public boolean isEmpty() + { + return row.isEmpty(); + } + + @Override + public String toString(TableMetadata metadata) + { + return row.toString(metadata); + } + + @Override + public boolean hasLiveData(long nowInSec, boolean enforceStrictLiveness) + { + return row.hasLiveData(nowInSec, enforceStrictLiveness); + } + + @Override + public Cell<?> getCell(ColumnMetadata c) + { + Cell<?> cell = row.getCell(c); + if (cell == null) + return null; + return new CellWithSource<>(cell, source); + } + + @Override + public Cell<?> getCell(ColumnMetadata c, CellPath path) + { + return wrapCell(row.getCell(c, path)); + } + + @Override + public ComplexColumnData getComplexColumnData(ColumnMetadata c) + { + return (ComplexColumnData) wrapColumnData(row.getComplexColumnData(c)); + } + + @Override + public ColumnData getColumnData(ColumnMetadata c) + { + return wrapColumnData(row.getColumnData(c)); + } + + @Override + public Iterable<Cell<?>> cells() + { + return Iterables.transform(row.cells(), this::wrapCell); + } + + @Override + public Collection<ColumnData> columnData() + { + return Collections2.transform(row.columnData(), this::wrapColumnData); + } + + @Override + public Iterable<Cell<?>> cellsInLegacyOrder(TableMetadata metadata, boolean reversed) + { + return Iterables.transform(row.cellsInLegacyOrder(metadata, reversed), this::wrapCell); + } + + @Override + public boolean hasComplexDeletion() + { + return row.hasComplexDeletion(); + } + + @Override + public boolean hasComplex() + { + return row.hasComplex(); + } + + @Override + public boolean hasDeletion(long nowInSec) + { + return row.hasDeletion(nowInSec); + } + + @Override + public SearchIterator<ColumnMetadata, ColumnData> searchIterator() + { + SearchIterator<ColumnMetadata, ColumnData> iterator = row.searchIterator(); + return key -> wrapColumnData(iterator.next(key)); + } + + @Override + public Row filter(ColumnFilter filter, TableMetadata metadata) + { + return maybeWrapRow(row.filter(filter, metadata)); + } + + @Override + public Row filter(ColumnFilter filter, DeletionTime activeDeletion, boolean setActiveDeletionToRow, TableMetadata metadata) + { + return maybeWrapRow(row.filter(filter, activeDeletion, setActiveDeletionToRow, metadata)); + } + + @Override + public Row transformAndFilter(LivenessInfo info, Deletion deletion, Function<ColumnData, ColumnData> function) + { + return maybeWrapRow(row.transformAndFilter(info, deletion, function)); + } + + @Override + public Row transformAndFilter(Function<ColumnData, ColumnData> function) + { + return maybeWrapRow(row.transformAndFilter(function)); + } + + @Override + public Row clone(Cloner cloner) + { + return maybeWrapRow(row.clone(cloner)); + } + + @Override + public Row purgeDataOlderThan(long timestamp, boolean enforceStrictLiveness) + { + return maybeWrapRow(row.purgeDataOlderThan(timestamp, enforceStrictLiveness)); + } + + @Override + public Row purge(DeletionPurger purger, long nowInSec, boolean enforceStrictLiveness) + { + return maybeWrapRow(row.purge(purger, nowInSec, enforceStrictLiveness)); + } + + @Override + public Row withOnlyQueriedData(ColumnFilter filter) + { + return maybeWrapRow(row.withOnlyQueriedData(filter)); + } + + @Override + public Row markCounterLocalToBeCleared() + { + return maybeWrapRow(row.markCounterLocalToBeCleared()); + } + + @Override + public Row updateAllTimestamp(long newTimestamp) + { + return maybeWrapRow(row.updateAllTimestamp(newTimestamp)); + } + ++ @Override ++ public Row updateTimesAndPathsForAccord(@Nonnull Function<Cell, CellPath> cellToMaybeNewListPath, long newTimestamp, long newLocalDeletionTime) ++ { ++ return maybeWrapRow(row.updateTimesAndPathsForAccord(cellToMaybeNewListPath, newTimestamp, newLocalDeletionTime)); ++ } ++ + @Override + public Row withRowDeletion(DeletionTime deletion) + { + return maybeWrapRow(row.withRowDeletion(deletion)); + } + + @Override + public int dataSize() + { + return row.dataSize(); + } + + @Override + public long unsharedHeapSize() + { + return row.unsharedHeapSize() + EMPTY_SIZE; + } + + @Override + public long unsharedHeapSizeExcludingData() + { + return row.unsharedHeapSizeExcludingData() + EMPTY_SIZE; + } + + @Override + public String toString(TableMetadata metadata, boolean fullDetails) + { + return row.toString(metadata, fullDetails); + } + + @Override + public String toString(TableMetadata metadata, boolean includeClusterKeys, boolean fullDetails) + { + return row.toString(metadata, includeClusterKeys, fullDetails); + } + + @Override + public void apply(Consumer<ColumnData> function) + { + row.apply(function); + } + + @Override + public <A> void apply(BiConsumer<A, ColumnData> function, A arg) + { + row.apply(function, arg); + } + + @Override + public long accumulate(LongAccumulator<ColumnData> accumulator, long initialValue) + { + return row.accumulate(accumulator, initialValue); + } + + @Override + public long accumulate(LongAccumulator<ColumnData> accumulator, Comparator<ColumnData> comparator, ColumnData from, long initialValue) + { + return row.accumulate(accumulator, comparator, from, initialValue); + } + + @Override + public <A> long accumulate(BiLongAccumulator<A, ColumnData> accumulator, A arg, long initialValue) + { + return row.accumulate(accumulator, arg, initialValue); + } + + @Override + public <A> long accumulate(BiLongAccumulator<A, ColumnData> accumulator, A arg, Comparator<ColumnData> comparator, ColumnData from, long initialValue) + { + return row.accumulate(accumulator, arg, comparator, from, initialValue); + } + + @Override + public Iterator<ColumnData> iterator() + { + return Iterators.transform(row.iterator(), this::wrapColumnData); + } + + private ColumnData wrapColumnData(ColumnData c) + { + if (c == null) + return null; + if (c instanceof Cell<?>) + return new CellWithSource<>((Cell<?>) c, source); + if (c instanceof ComplexColumnData) + return ((ComplexColumnData) c).transform(c1 -> new CellWithSource<>(c1, source)); + throw new IllegalStateException("Unexpected ColumnData type: " + c.getClass().getName()); + } + + private Cell<?> wrapCell(Cell<?> c) + { + return c != null ? new CellWithSource<>(c, source) : null; + } + + private Row maybeWrapRow(Row r) + { + if (r == null) + return null; + if (r == this.row) + return this; + return new RowWithSource(r, source); + } + + @Override + public String toString() + { + return "RowWithSourceTable{" + + row + + ", source=" + source + + '}'; + } + } diff --cc test/unit/org/apache/cassandra/cql3/CQLTester.java index 1dad1a5ae7,352f70f110..e4eaec08ea --- a/test/unit/org/apache/cassandra/cql3/CQLTester.java +++ b/test/unit/org/apache/cassandra/cql3/CQLTester.java @@@ -43,6 -44,8 +43,7 @@@ import java.util.Map import java.util.Optional; import java.util.Set; import java.util.UUID; -import java.util.concurrent.CountDownLatch; + import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import java.util.function.Consumer; diff --cc test/unit/org/apache/cassandra/index/sai/cql/VectorTester.java index ab72aabccf,478727dc00..511831fd96 --- a/test/unit/org/apache/cassandra/index/sai/cql/VectorTester.java +++ b/test/unit/org/apache/cassandra/index/sai/cql/VectorTester.java @@@ -24,21 -24,22 +24,24 @@@ import java.util.Arrays import java.util.Collection; import java.util.List; -import io.github.jbellis.jvector.graph.GraphIndexBuilder; -import io.github.jbellis.jvector.graph.GraphSearcher; -import io.github.jbellis.jvector.vector.VectorEncoding; -import io.github.jbellis.jvector.vector.VectorSimilarityFunction; -import org.apache.cassandra.index.sai.SAITester; -import org.apache.cassandra.index.sai.disk.v1.segment.VectorIndexSegmentSearcher; -import org.apache.cassandra.index.sai.disk.v1.vector.ConcurrentVectorValues; -import org.apache.cassandra.index.sai.utils.Glove; import org.junit.Before; import org.junit.BeforeClass; + import org.junit.Ignore; + import org.junit.runner.RunWith; + import org.junit.runners.Parameterized; +import org.apache.cassandra.index.sai.SAITester; ++import org.apache.cassandra.index.sai.disk.v1.segment.VectorIndexSegmentSearcher; +import org.apache.cassandra.index.sai.disk.v1.vector.ConcurrentVectorValues; +import org.apache.cassandra.index.sai.utils.Glove; - import org.apache.cassandra.inject.ActionBuilder; - import org.apache.cassandra.inject.Injections; - import org.apache.cassandra.inject.InvokePointBuilder; + +import io.github.jbellis.jvector.graph.GraphIndexBuilder; +import io.github.jbellis.jvector.graph.GraphSearcher; +import io.github.jbellis.jvector.vector.VectorEncoding; +import io.github.jbellis.jvector.vector.VectorSimilarityFunction; + + @Ignore + @RunWith(Parameterized.class) public class VectorTester extends SAITester { protected static Glove.WordVector word2vec; diff --cc test/unit/org/apache/cassandra/index/sai/cql/VectorUpdateDeleteTest.java index 31bf3dfb7c,7fe71ec09f..66de0c39a5 --- a/test/unit/org/apache/cassandra/index/sai/cql/VectorUpdateDeleteTest.java +++ b/test/unit/org/apache/cassandra/index/sai/cql/VectorUpdateDeleteTest.java @@@ -18,12 -18,13 +18,13 @@@ package org.apache.cassandra.index.sai.cql; +import org.junit.Test; + import org.apache.cassandra.cql3.UntypedResultSet; + import org.apache.cassandra.index.sai.utils.IndexIdentifier; - import static org.apache.cassandra.config.CassandraRelevantProperties.SAI_VECTOR_SEARCH_ORDER_CHUNK_SIZE; -import org.junit.Test; - import static org.apache.cassandra.index.sai.cql.VectorTypeTest.assertContainsInt; + import static org.apache.cassandra.index.sai.disk.v1.vector.OnHeapGraph.MIN_PQ_ROWS; import static org.assertj.core.api.Assertions.assertThat; public class VectorUpdateDeleteTest extends VectorTester diff --cc test/unit/org/apache/cassandra/index/sai/disk/v1/InvertedIndexSearcherTest.java index 06309e42a5,01b6f4d963..d7928aaeb5 --- a/test/unit/org/apache/cassandra/index/sai/disk/v1/InvertedIndexSearcherTest.java +++ b/test/unit/org/apache/cassandra/index/sai/disk/v1/InvertedIndexSearcherTest.java @@@ -45,6 -45,8 +45,7 @@@ import org.apache.cassandra.index.sai.m import org.apache.cassandra.index.sai.plan.Expression; import org.apache.cassandra.index.sai.utils.PrimaryKey; import org.apache.cassandra.index.sai.utils.SAIRandomizedTester; + import org.apache.cassandra.io.sstable.SSTableId; -import org.apache.cassandra.service.StorageService; import org.apache.cassandra.utils.Pair; import org.apache.cassandra.utils.bytecomparable.ByteComparable; import org.apache.cassandra.utils.bytecomparable.ByteSource; diff --cc test/unit/org/apache/cassandra/index/sai/memory/VectorMemoryIndexTest.java index e1be7b276e,a1ef40cc95..1c87978a63 --- a/test/unit/org/apache/cassandra/index/sai/memory/VectorMemoryIndexTest.java +++ b/test/unit/org/apache/cassandra/index/sai/memory/VectorMemoryIndexTest.java @@@ -32,10 -32,8 +32,9 @@@ import java.util.stream.Collectors import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; + import org.mockito.Mockito; +import org.apache.cassandra.ServerTestUtils; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.cql3.Operator; import org.apache.cassandra.db.Clustering; @@@ -61,20 -60,17 +61,18 @@@ import org.apache.cassandra.dht.Range import org.apache.cassandra.index.sai.QueryContext; import org.apache.cassandra.index.sai.SAITester; import org.apache.cassandra.index.sai.StorageAttachedIndex; +import org.apache.cassandra.index.sai.disk.format.Version; - import org.apache.cassandra.index.sai.iterators.KeyRangeIterator; + import org.apache.cassandra.index.sai.disk.v1.vector.PrimaryKeyWithScore; import org.apache.cassandra.index.sai.plan.Expression; - import org.apache.cassandra.index.sai.utils.PrimaryKey; - import org.apache.cassandra.index.sai.utils.RangeUtil; import org.apache.cassandra.inject.Injections; import org.apache.cassandra.inject.InvokePointBuilder; -import org.apache.cassandra.locator.TokenMetadata; import org.apache.cassandra.schema.TableMetadata; -import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.tcm.ClusterMetadata; + import org.apache.cassandra.utils.CloseableIterator; import org.apache.cassandra.utils.FBUtilities; import static org.apache.cassandra.config.CassandraRelevantProperties.MEMTABLE_SHARD_COUNT; +import static org.apache.cassandra.config.CassandraRelevantProperties.ORG_APACHE_CASSANDRA_DISABLE_MBEAN_REGISTRATION; - import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
