Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,8 @@ Optimizations

* GITHUB#15597, GITHUB#15777: Reduce memory usage of NeighborArray (Viliam Durina)

* GITHUB#15606: Utilize bulk scoring for NeighborArray#isWorstNonDiverse (Luis Negrin)

Bug Fixes
---------------------
* GITHUB#14049: Randomize KNN codec params in RandomCodec. Fixes scalar quantization div-by-zero
Expand Down
40 changes: 19 additions & 21 deletions lucene/core/src/java/org/apache/lucene/util/hnsw/NeighborArray.java
Original file line number Diff line number Diff line change
Expand Up @@ -292,13 +292,16 @@ private int findWorstNonDiverse(UpdateableRandomVectorScorer scorer) throws IOEx
int[] uncheckedIndexes = sort(scorer);
assert uncheckedIndexes != null : "We will always have something unchecked";
int uncheckedCursor = uncheckedIndexes.length - 1;
int[] bulkScoreNodes = new int[size];
float[] bulkScores = new float[size];
for (int i = size - 1; i > 0; i--) {
if (uncheckedCursor < 0) {
// no unchecked node left
break;
}
scorer.setScoringOrdinal(nodes[i]);
if (isWorstNonDiverse(i, uncheckedIndexes, uncheckedCursor, scorer)) {
if (isWorstNonDiverse(
i, uncheckedIndexes, uncheckedCursor, scorer, bulkScoreNodes, bulkScores)) {
return i;
}
if (i == uncheckedIndexes[uncheckedCursor]) {
Expand All @@ -309,31 +312,26 @@ private int findWorstNonDiverse(UpdateableRandomVectorScorer scorer) throws IOEx
}

private boolean isWorstNonDiverse(
int candidateIndex, int[] uncheckedIndexes, int uncheckedCursor, RandomVectorScorer scorer)
int candidateIndex,
int[] uncheckedIndexes,
int uncheckedCursor,
RandomVectorScorer scorer,
int[] bulkScoreNodes,
float[] bulkScores)
throws IOException {
float minAcceptedSimilarity = scores[candidateIndex];
if (candidateIndex == uncheckedIndexes[uncheckedCursor]) {
// the candidate itself is unchecked
for (int i = candidateIndex - 1; i >= 0; i--) {
float neighborSimilarity = scorer.score(nodes[i]);
// candidate node is too similar to node i given its score relative to the base node
if (neighborSimilarity >= minAcceptedSimilarity) {
return true;
}
}
} else {
// else we just need to make sure candidate does not violate diversity with the (newly
// inserted) unchecked nodes
assert candidateIndex > uncheckedIndexes[uncheckedCursor];
for (int i = uncheckedCursor; i >= 0; i--) {
float neighborSimilarity = scorer.score(nodes[uncheckedIndexes[i]]);
// candidate node is too similar to node i given its score relative to the base node
if (neighborSimilarity >= minAcceptedSimilarity) {
return true;
}
}
return scorer.bulkScore(nodes, bulkScores, candidateIndex) >= minAcceptedSimilarity;
}
// else we just need to make sure candidate does not violate diversity with the (newly
// inserted) unchecked nodes
assert candidateIndex > uncheckedIndexes[uncheckedCursor];
for (int i = uncheckedCursor; i >= 0; i--) {
bulkScoreNodes[i] = nodes[uncheckedIndexes[i]];
}
return false;
return scorer.bulkScore(bulkScoreNodes, bulkScores, uncheckedCursor + 1)
>= minAcceptedSimilarity;
}

public int maxSize() {
Expand Down
Loading