mirror of
https://review.haiku-os.org/haiku
synced 2025-01-18 12:38:51 +01:00
file_systems: Invert query equation scoring values.
Previously, lower was better, and higher was worse. But really we want the scores to be based primarily around the index sizes, which can grow to be very large, so a maximum score is hard to determine. Instead, start with the index size, and then divide to make it smaller based on how "useful" the equation terms will be in searching it. Improves the performance of queries like those in #19080; according to humdinger's testing, the query with the most expensive term first went from ~2.0s execution time down to ~0.7s, same as the query with the least expensive term first. Change-Id: Id71fa21c95cfe3d8d0019ff356bdf4935446411f Reviewed-on: https://review.haiku-os.org/c/haiku/+/8593 Reviewed-by: waddlesplash <waddlesplash@gmail.com>
This commit is contained in:
parent
8fbddf13a1
commit
bb1f240594
@ -368,7 +368,8 @@ Equation<QueryPolicy>::Equation(const char** expr)
|
||||
fAttribute(NULL),
|
||||
fString(NULL),
|
||||
fType(0),
|
||||
fIsPattern(false)
|
||||
fIsPattern(false),
|
||||
fScore(INT32_MAX)
|
||||
{
|
||||
const char* string = *expr;
|
||||
const char* start = string;
|
||||
@ -760,34 +761,35 @@ Equation<QueryPolicy>::CalculateScore(Index &index)
|
||||
// And the code could also need some real world testing :-)
|
||||
|
||||
// do we have to operate on a "foreign" index?
|
||||
if (Term<QueryPolicy>::fOp == OP_UNEQUAL
|
||||
|| QueryPolicy::IndexSetTo(index, fAttribute) < B_OK) {
|
||||
fScore = 0;
|
||||
if (QueryPolicy::IndexSetTo(index, fAttribute) < B_OK) {
|
||||
fScore = INT32_MAX;
|
||||
return;
|
||||
}
|
||||
|
||||
fScore = QueryPolicy::IndexGetSize(index);
|
||||
|
||||
if (Term<QueryPolicy>::fOp == OP_UNEQUAL) {
|
||||
// we'll need to scan the whole index
|
||||
return;
|
||||
}
|
||||
|
||||
// if we have a pattern, how much does it help our search?
|
||||
if (fIsPattern) {
|
||||
fScore = getFirstPatternSymbol(fString) << 3;
|
||||
const int32 firstSymbolIndex = getFirstPatternSymbol(fString);
|
||||
|
||||
// Even if the first pattern symbol is at position 0,
|
||||
// there's still an index, so don't let our score revert to zero.
|
||||
if (fScore == 0)
|
||||
fScore = 1;
|
||||
// Guess how much of the index we will be able to skip.
|
||||
const int32 divisor = (firstSymbolIndex > 3) ? 4 : (firstSymbolIndex + 1);
|
||||
fScore /= divisor;
|
||||
} else {
|
||||
// Score by operator
|
||||
if (Term<QueryPolicy>::fOp == OP_EQUAL) {
|
||||
// higher than pattern="255 chars+*"
|
||||
fScore = 2048;
|
||||
// higher than most patterns
|
||||
fScore /= (fSize > 8) ? 8 : fSize;
|
||||
} else {
|
||||
// the pattern search is regarded cheaper when you have at
|
||||
// least one character to set your index to
|
||||
fScore = 5;
|
||||
// better than nothing, anyway
|
||||
fScore /= 2;
|
||||
}
|
||||
}
|
||||
|
||||
// take index size into account
|
||||
fScore = QueryPolicy::IndexGetWeightedScore(index, fScore);
|
||||
}
|
||||
|
||||
|
||||
@ -1035,7 +1037,7 @@ Operator<QueryPolicy>::Match(Entry* entry, Node* node, const char* attribute,
|
||||
// choose the term with the better score for OP_OR
|
||||
Term<QueryPolicy>* first;
|
||||
Term<QueryPolicy>* second;
|
||||
if (fRight->Score() > fLeft->Score()) {
|
||||
if (fRight->Score() < fLeft->Score()) {
|
||||
first = fLeft;
|
||||
second = fRight;
|
||||
} else {
|
||||
@ -1082,16 +1084,14 @@ Operator<QueryPolicy>::Score() const
|
||||
{
|
||||
if (Term<QueryPolicy>::fOp == OP_AND) {
|
||||
// return the one with the better score
|
||||
if (fRight->Score() > fLeft->Score())
|
||||
if (fRight->Score() < fLeft->Score())
|
||||
return fRight->Score();
|
||||
|
||||
return fLeft->Score();
|
||||
}
|
||||
|
||||
// for OP_OR, be honest, and return the one with the worse score
|
||||
if (fRight->Score() < fLeft->Score())
|
||||
if (fRight->Score() > fLeft->Score())
|
||||
return fRight->Score();
|
||||
|
||||
return fLeft->Score();
|
||||
}
|
||||
|
||||
@ -1496,13 +1496,13 @@ Query<QueryPolicy>::Rewind()
|
||||
} else {
|
||||
// For OP_AND, we can use the scoring system to decide which
|
||||
// path to add
|
||||
if (op->Right()->Score() > op->Left()->Score())
|
||||
if (op->Right()->Score() < op->Left()->Score())
|
||||
stack.Push(op->Right());
|
||||
else
|
||||
stack.Push(op->Left());
|
||||
}
|
||||
} else if (term->Op() == OP_EQUATION
|
||||
|| fStack.Push((Equation<QueryPolicy>*)term) != B_OK)
|
||||
|| fStack.Push((Equation<QueryPolicy>*)term) != B_OK)
|
||||
QUERY_FATAL("Unknown term on stack or stack error\n");
|
||||
}
|
||||
|
||||
|
@ -122,13 +122,12 @@ struct Query::QueryPolicy {
|
||||
index.Unset();
|
||||
}
|
||||
|
||||
static int32 IndexGetWeightedScore(Index& index, int32 score)
|
||||
static int32 IndexGetSize(Index& index)
|
||||
{
|
||||
// take index size into account (1024 is the current node size
|
||||
// in our B+trees)
|
||||
// 2048 * 2048 == 4194304 is the maximum score (for an empty
|
||||
// tree, since the header + 1 node are already 2048 bytes)
|
||||
return score * ((2048 * 1024LL) / index.Node()->Size());
|
||||
off_t size = index.Node()->Size() / index.Node()->GetVolume()->BlockSize();
|
||||
if (size > INT32_MAX)
|
||||
return INT32_MAX;
|
||||
return size;
|
||||
}
|
||||
|
||||
static type_code IndexGetType(Index& index)
|
||||
|
@ -94,13 +94,9 @@ struct Query::QueryPolicy {
|
||||
index.index = NULL;
|
||||
}
|
||||
|
||||
static int32 IndexGetWeightedScore(Index& index, int32 score)
|
||||
static int32 IndexGetSize(Index& index)
|
||||
{
|
||||
// should be inversely proportional to the index size; max input score
|
||||
// is 2048
|
||||
static const int32 maxFactor = (1024 * 1024) - 1;
|
||||
return score * (maxFactor /
|
||||
std::min(maxFactor, std::max((int32)1, index.index->CountEntries())));
|
||||
return index.index->CountEntries();
|
||||
}
|
||||
|
||||
static type_code IndexGetType(Index& index)
|
||||
|
@ -175,13 +175,9 @@ struct Query::QueryPolicy {
|
||||
index.index = NULL;
|
||||
}
|
||||
|
||||
static int32 IndexGetWeightedScore(Index& index, int32 score)
|
||||
static int32 IndexGetSize(Index& index)
|
||||
{
|
||||
// should be inversely proportional to the index size; max input score
|
||||
// is 2048
|
||||
static const int32 maxFactor = (1024 * 1024) - 1;
|
||||
return score * (maxFactor /
|
||||
std::min(maxFactor, std::max((int32)1, index.index->CountEntries())));
|
||||
return index.index->CountEntries();
|
||||
}
|
||||
|
||||
static type_code IndexGetType(Index& index)
|
||||
|
@ -99,7 +99,7 @@ struct Query::QueryPolicy {
|
||||
{
|
||||
}
|
||||
|
||||
static int32 IndexGetWeightedScore(Index& index, int32 score)
|
||||
static int32 IndexGetSize(Index& index)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user