Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: skip bytecode evaluation for some rules without string matches #1927

Merged
merged 3 commits into from
Jun 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions libyara/exec.c
Original file line number Diff line number Diff line change
Expand Up @@ -1167,11 +1167,12 @@ int yr_execute_code(YR_SCAN_CONTEXT* context)
current_rule = &context->rules->rules_table[current_rule_idx];

// If the rule is disabled let's skip its code.
ip = jmp_if(RULE_IS_DISABLED(current_rule), ip);
bool disabled = RULE_IS_DISABLED(current_rule) || yr_bitmask_is_not_set(context->rule_evaluate_condition_flags, current_rule_idx);
ip = jmp_if(disabled, ip);

// Skip the bytes corresponding to the rule's index, but only if not
// taking the jump.
if (!RULE_IS_DISABLED(current_rule))
if (!disabled)
ip += sizeof(uint32_t);

break;
Expand Down
91 changes: 91 additions & 0 deletions libyara/grammar.y
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
%type <expression> regexp
%type <expression> for_expression
%type <expression> for_quantifier
%type <expression> condition


%type <c_string> arguments
Expand Down Expand Up @@ -405,6 +406,10 @@ rule
}
condition '}'
{
YR_RULE* rule = (YR_RULE*) yr_arena_ref_to_ptr(
compiler->arena, &$<rule>4);
rule->required_strings = $10.required_strings.count;

int result = yr_parser_reduce_rule_declaration_phase_2(
yyscanner, &$<rule>4); // rule created in phase 1

Expand Down Expand Up @@ -455,6 +460,9 @@ strings

condition
: _CONDITION_ ':' boolean_expression
{
$$ = $3;
}
;


Expand Down Expand Up @@ -992,6 +1000,7 @@ identifier
$$.type = EXPRESSION_TYPE_BOOLEAN;
$$.value.integer = YR_UNDEFINED;
$$.identifier.ptr = NULL;
$$.required_strings.count = 0;
}
else
{
Expand Down Expand Up @@ -1311,6 +1320,14 @@ boolean_expression
fail_if_error(yr_parser_emit(
yyscanner, OP_STR_TO_BOOL, NULL));
}
if ($1.type != EXPRESSION_TYPE_BOOLEAN)
{
$$.required_strings.count = 0;
}
else
{
$$.required_strings.count = $1.required_strings.count;
}

$$.type = EXPRESSION_TYPE_BOOLEAN;
}
Expand All @@ -1322,12 +1339,14 @@ expression
fail_if_error(yr_parser_emit_push_const(yyscanner, 1));

$$.type = EXPRESSION_TYPE_BOOLEAN;
$$.required_strings.count = 0;
}
| _FALSE_
{
fail_if_error(yr_parser_emit_push_const(yyscanner, 0));

$$.type = EXPRESSION_TYPE_BOOLEAN;
$$.required_strings.count = 0;
}
| primary_expression _MATCHES_ regexp
{
Expand All @@ -1340,6 +1359,7 @@ expression
NULL));

$$.type = EXPRESSION_TYPE_BOOLEAN;
$$.required_strings.count = 0;
}
| primary_expression _CONTAINS_ primary_expression
{
Expand All @@ -1350,6 +1370,7 @@ expression
yyscanner, OP_CONTAINS, NULL));

$$.type = EXPRESSION_TYPE_BOOLEAN;
$$.required_strings.count = 0;
}
| primary_expression _ICONTAINS_ primary_expression
{
Expand All @@ -1360,6 +1381,7 @@ expression
yyscanner, OP_ICONTAINS, NULL));

$$.type = EXPRESSION_TYPE_BOOLEAN;
$$.required_strings.count = 0;
}
| primary_expression _STARTSWITH_ primary_expression
{
Expand All @@ -1370,6 +1392,7 @@ expression
yyscanner, OP_STARTSWITH, NULL));

$$.type = EXPRESSION_TYPE_BOOLEAN;
$$.required_strings.count = 0;
}
| primary_expression _ISTARTSWITH_ primary_expression
{
Expand All @@ -1380,6 +1403,7 @@ expression
yyscanner, OP_ISTARTSWITH, NULL));

$$.type = EXPRESSION_TYPE_BOOLEAN;
$$.required_strings.count = 0;
}
| primary_expression _ENDSWITH_ primary_expression
{
Expand All @@ -1390,6 +1414,7 @@ expression
yyscanner, OP_ENDSWITH, NULL));

$$.type = EXPRESSION_TYPE_BOOLEAN;
$$.required_strings.count = 0;
}
| primary_expression _IENDSWITH_ primary_expression
{
Expand All @@ -1400,6 +1425,7 @@ expression
yyscanner, OP_IENDSWITH, NULL));

$$.type = EXPRESSION_TYPE_BOOLEAN;
$$.required_strings.count = 0;
}
| primary_expression _IEQUALS_ primary_expression
{
Expand All @@ -1410,6 +1436,7 @@ expression
yyscanner, OP_IEQUALS, NULL));

$$.type = EXPRESSION_TYPE_BOOLEAN;
$$.required_strings.count = 0;
}
| _STRING_IDENTIFIER_
{
Expand All @@ -1424,6 +1451,7 @@ expression
fail_if_error(result);

$$.type = EXPRESSION_TYPE_BOOLEAN;
$$.required_strings.count = 1;
}
| _STRING_IDENTIFIER_ _AT_ primary_expression
{
Expand All @@ -1438,6 +1466,7 @@ expression

fail_if_error(result);

$$.required_strings.count = 1;
$$.type = EXPRESSION_TYPE_BOOLEAN;
}
| _STRING_IDENTIFIER_ _IN_ range
Expand All @@ -1449,6 +1478,7 @@ expression

fail_if_error(result);

$$.required_strings.count = 1;
$$.type = EXPRESSION_TYPE_BOOLEAN;
}
| _FOR_ for_expression error
Expand Down Expand Up @@ -1691,6 +1721,7 @@ expression
compiler->loop_index--;

$$.type = EXPRESSION_TYPE_BOOLEAN;
$$.required_strings.count = 0;
}
| for_expression _OF_ string_set
{
Expand All @@ -1699,6 +1730,18 @@ expression
yywarning(yyscanner,
"expression always false - requesting %" PRId64 " of %" PRId64 ".", $1.value.integer, $3);
}

if (($1.type == EXPRESSION_TYPE_INTEGER && $1.value.integer > 0) ||
($1.type == EXPRESSION_TYPE_QUANTIFIER &&
($1.value.integer == FOR_EXPRESSION_ALL || $1.value.integer == FOR_EXPRESSION_ANY)))
{
$$.required_strings.count = 1;
}
else
{
$$.required_strings.count = 0;
}

yr_parser_emit_with_arg(yyscanner, OP_OF, OF_STRING_SET, NULL, NULL);

$$.type = EXPRESSION_TYPE_BOOLEAN;
Expand All @@ -1713,6 +1756,7 @@ expression
yr_parser_emit_with_arg(yyscanner, OP_OF, OF_RULE_SET, NULL, NULL);

$$.type = EXPRESSION_TYPE_BOOLEAN;
$$.required_strings.count = 0;
}
| primary_expression '%' _OF_ string_set
{
Expand All @@ -1731,6 +1775,15 @@ expression
fail_with_error(ERROR_INVALID_PERCENTAGE);
}

if (!IS_UNDEFINED($1.value.integer))
{
$$.required_strings.count = 1;
}
else
{
$$.required_strings.count = 0;
}

yr_parser_emit_with_arg(yyscanner, OP_OF_PERCENT, OF_STRING_SET, NULL, NULL);
}
| primary_expression '%' _OF_ rule_set
Expand Down Expand Up @@ -1760,6 +1813,17 @@ expression
"expression always false - requesting %" PRId64 " of %" PRId64 ".", $1.value.integer, $3);
}

if (($1.type == EXPRESSION_TYPE_INTEGER && $1.value.integer > 0) ||
($1.type == EXPRESSION_TYPE_QUANTIFIER &&
($1.value.integer == FOR_EXPRESSION_ALL || $1.value.integer == FOR_EXPRESSION_ANY)))
{
$$.required_strings.count = 1;
}
else
{
$$.required_strings.count = 0;
}

yr_parser_emit(yyscanner, OP_OF_FOUND_IN, NULL);

$$.type = EXPRESSION_TYPE_BOOLEAN;
Expand Down Expand Up @@ -1797,6 +1861,17 @@ expression
"multiple strings at an offset is usually false.");
}

if (($1.type == EXPRESSION_TYPE_INTEGER && $1.value.integer > 0) ||
($1.type == EXPRESSION_TYPE_QUANTIFIER &&
($1.value.integer == FOR_EXPRESSION_ALL || $1.value.integer == FOR_EXPRESSION_ANY)))
{
$$.required_strings.count = 1;
}
else
{
$$.required_strings.count = 0;
}

yr_parser_emit(yyscanner, OP_OF_FOUND_AT, NULL);

$$.type = EXPRESSION_TYPE_BOOLEAN;
Expand All @@ -1806,11 +1881,13 @@ expression
yr_parser_emit(yyscanner, OP_NOT, NULL);

$$.type = EXPRESSION_TYPE_BOOLEAN;
$$.required_strings.count = 0;
}
| _DEFINED_ boolean_expression
{
yr_parser_emit(yyscanner, OP_DEFINED, NULL);
$$.type = EXPRESSION_TYPE_BOOLEAN;
$$.required_strings.count = 0;
}
| boolean_expression _AND_
{
Expand Down Expand Up @@ -1856,6 +1933,7 @@ expression
yr_free(fixup);

$$.type = EXPRESSION_TYPE_BOOLEAN;
$$.required_strings.count = $4.required_strings.count + $1.required_strings.count;
}
| boolean_expression _OR_
{
Expand Down Expand Up @@ -1900,48 +1978,61 @@ expression
yr_free(fixup);

$$.type = EXPRESSION_TYPE_BOOLEAN;

// Set required string count to minimum from both parts
if ($1.required_strings.count > $4.required_strings.count) {
$$.required_strings.count = $4.required_strings.count;
} else {
$$.required_strings.count = $1.required_strings.count;
}
}
| primary_expression _LT_ primary_expression
{
fail_if_error(yr_parser_reduce_operation(
yyscanner, "<", $1, $3));

$$.type = EXPRESSION_TYPE_BOOLEAN;
$$.required_strings.count = 0;
}
| primary_expression _GT_ primary_expression
{
fail_if_error(yr_parser_reduce_operation(
yyscanner, ">", $1, $3));

$$.type = EXPRESSION_TYPE_BOOLEAN;
$$.required_strings.count = 0;
}
| primary_expression _LE_ primary_expression
{
fail_if_error(yr_parser_reduce_operation(
yyscanner, "<=", $1, $3));

$$.type = EXPRESSION_TYPE_BOOLEAN;
$$.required_strings.count = 0;
}
| primary_expression _GE_ primary_expression
{
fail_if_error(yr_parser_reduce_operation(
yyscanner, ">=", $1, $3));

$$.type = EXPRESSION_TYPE_BOOLEAN;
$$.required_strings.count = 0;
}
| primary_expression _EQ_ primary_expression
{
fail_if_error(yr_parser_reduce_operation(
yyscanner, "==", $1, $3));

$$.type = EXPRESSION_TYPE_BOOLEAN;
$$.required_strings.count = 0;
}
| primary_expression _NEQ_ primary_expression
{
fail_if_error(yr_parser_reduce_operation(
yyscanner, "!=", $1, $3));

$$.type = EXPRESSION_TYPE_BOOLEAN;
$$.required_strings.count = 0;
}
| primary_expression
{
Expand Down
6 changes: 6 additions & 0 deletions libyara/include/yara/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,12 @@ typedef struct _YR_EXPRESSION
YR_ARENA_REF sized_string_ref;
} value;

// Boolean expressions can hold a string count. If not empty, this indicates that the condition
// can only be fulfilled if at least so many strings match.
struct {
int count;
} required_strings;

// An expression can have an associated identifier, if "ptr" is not NULL it
// points to the identifier name, if it is NULL, then "ref" holds a reference
// to the identifier within YR_SZ_POOL. When the identifier is in YR_SZ_POOL
Expand Down
10 changes: 10 additions & 0 deletions libyara/include/yara/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,8 @@ struct YR_RULE
// Number of atoms generated for this rule.
int32_t num_atoms;

uint32_t required_strings;

DECLARE_REFERENCE(const char*, identifier);
DECLARE_REFERENCE(const char*, tags);
DECLARE_REFERENCE(YR_META*, metas);
Expand Down Expand Up @@ -611,6 +613,10 @@ struct YR_RULES
// the instructions are defined by the OP_X macros in exec.h.
const uint8_t* code_start;

// A bitmap with one bit per rule, bit N is set when the condition for rule
// might evaluate to true even without any string matches.
YR_BITMASK* rule_evaluate_condition_flags;

// Total number of rules.
uint32_t num_rules;

Expand Down Expand Up @@ -815,6 +821,10 @@ struct YR_SCAN_CONTEXT
// until they can be confirmed or discarded.
YR_MATCHES* unconfirmed_matches;

// A bitmap with one bit per rule, bit N is unset when the condition for rule
// with index N is guaranteed to evaluate to false.
YR_BITMASK* rule_evaluate_condition_flags;

// profiling_info is a pointer to an array of YR_PROFILING_INFO structures,
// one per rule. Entry N has the profiling information for rule with index N.
YR_PROFILING_INFO* profiling_info;
Expand Down
Loading