Files
scummvm-cursorfix/engines/glk/adrift/sxglob.cpp
2026-02-02 04:50:13 +01:00

293 lines
9.1 KiB
C++

/* ScummVM - Graphic Adventure Engine
*
* ScummVM is the legal property of its developers, whose names
* are too numerous to list here. Please refer to the COPYRIGHT
* file distributed with this source distribution.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/
#include "glk/adrift/scare.h"
#include "glk/adrift/sxprotos.h"
namespace Glk {
namespace Adrift {
/*
* Module notes:
*
* The glob matching functions in this module are derived from an original
* (and somewhat hairy) glob.c posted by Arjan Kenter from the University
* of Twente, NL, in an assortment of minor variations between 1993 and 1997.
* The major modifications are:
*
* o Added checks to ensure that invalid range patterns such as "[a-" or
* "[-" don't cause the loops to walk off the end of the pattern string
* and (usually) result in SIGSEGV.
* o Moved from plain char to unsigned char to avoid signedness problems
* with range comparisons.
* o Skipped the leading '[' in the range checker; the original was treating
* it as a possible first value of 'r'.
* o Moved the range checker while() from the bottom of the loop to the top,
* to avoid problems with invalid ranges.
* o Gave 'l' in the range checker an initial value that ensures that it
* can never match until it's been re-assigned to 'r'.
* o Used a return value rather than multiple returns in the matcher, for
* better debugability.
* o Applied some const-correctness, and replaced some pointers by indexing.
* o Added scanf-like special cases, making ']' a valid part of a range if
* first, and '-' if last.
*
* This glob accepts * and ? wild cards, and [] ranges. It does not check
* whether the range string is valid (for example, terminates with ']'), but
* simply returns the best it can under those circumstances.
*
* Example call:
* glob_match ("a*b?c[A-Za-z_0-9]d*", some_string)
*/
/*
* glob_inrange_unsigned()
* glob_match_unsigned()
*
* Match a "[...]" character range, and match general glob wildcards. See
* above for notes on where these functions came from originally.
*/
static int glob_inrange_unsigned(const unsigned char **const pattern, unsigned char ch) {
const unsigned char *const pattern_ = *pattern;
int in_range = FALSE;
unsigned int l = 256, r = 0, index_;
/* Skip the leading '[' on entry to a range check. */
index_ = 1;
/* Special-case a range that has ']' as its first character. */
if (pattern_[index_] == ']') {
r = pattern_[index_++];
if (ch == r)
in_range = TRUE;
}
/*
* Check at the loop top, rather than the bottom, to avoid problems with
* invalid or uncompleted ranges.
*/
while (pattern_[index_] && pattern_[index_] != ']') {
r = pattern_[index_++];
if (r == '-') {
/* Special-case a range that has '-' as its last character. */
if (pattern_[index_] == ']' || !pattern_[index_]) {
if (ch == r)
in_range = TRUE;
break;
}
/* Break the loop on unterminated range ending with '-'. */
if (!pattern_[index_])
break;
r = pattern_[index_++];
if (l <= ch && ch <= r)
in_range = TRUE;
} else {
l = r;
if (ch == r)
in_range = TRUE;
}
}
/* Update pattern with characters consumed, return result. */
*pattern += index_;
return in_range;
}
static int glob_match_unsigned(const unsigned char *pattern, const unsigned char *string) {
int is_match = FALSE;
if (!*string) {
if (*pattern == '*')
is_match = glob_match_unsigned(pattern + 1, string);
else
is_match = !*pattern;
} else {
switch (*pattern) {
case '\0':
is_match = !*string;
break;
case '*':
if (glob_match_unsigned(pattern + 1, string))
is_match = TRUE;
else
is_match = glob_match_unsigned(pattern, string + 1);
break;
case '?':
is_match = glob_match_unsigned(pattern + 1, string + 1);
break;
case '[':
/*
* After a range check, we need to see if we hit the end of the
* pattern before recursively matching pattern + 1.
*/
is_match = glob_inrange_unsigned(&pattern, *string)
&& (!*pattern
|| glob_match_unsigned(pattern + 1, string + 1));
break;
default:
is_match = *pattern == *string
&& glob_match_unsigned(pattern + 1, string + 1);
break;
}
}
return is_match;
}
/* Structures and data for the self test function. */
struct sx_test_data_t {
const sc_char *const pattern;
const sc_char *const string;
};
static const sx_test_data_t SHOULD_MATCH[] = {
{"a", "a"}, {"abc", "abc"}, {"", ""},
{"*", ""}, {"*", "abc"}, {"*", "cba"},
{"*c", "c"}, {"*c", "abc"}, {"*c", "cbac"},
{"a*", "a"}, {"a*", "abc"}, {"a*", "abca"},
{"a*c", "ac"}, {"a*c", "abc"}, {"a*c", "abcbcbc"},
{"a**c", "ac"}, {"a**c", "abc"}, {"a**c", "abcbcbc"},
{"*b*", "b"}, {"*b*", "abc"}, {"*b*", "ab"}, {"*b*", "bc"},
{"?", "a"}, {"?", "z"}, {"?", "?"}, {"[?]", "?"},
{"a?", "aa"}, {"a?", "az"}, {"a?", "a?"},
{"?c", "ac"}, {"?c", "zc"}, {"?c", "?c"},
{"[abz]", "a"}, {"[abz]", "b"}, {"[abz]", "z"},
{"[a-c]", "a"}, {"[a-c]", "b"}, {"[a-c]", "c"},
{"[ac]b[ac]", "abc"}, {"[ac]b[ac]", "cba"},
{"[]]", "]"}, {"[]a-c]", "a"}, {"[]a-c]", "b"}, {"[]a-c]", "c"},
{"[?]", "?" }, {"[-]", "-"}, {"[z-]", "z"}, {"[z-]", "-"},
{"[][-]", "]"}, {"[][-]", "["}, {"[][-]", "-"},
{"[a-c-]", "a"}, {"[a-c-]", "b"}, {"[a-c-]", "c"}, {"[a-c-]", "-"},
{"*[a-z]*abc?xyz", "a<star>abcQxyz"}, {"*[a-z]*abc?xyz", "<star>aabcQxyz"},
{"*[a-z]*abc?xyz", "aabcQxyz"}, {"*[a-z]*abc?xyz", "<star>a<star>abcQxyz"},
{"???]", "abc]"}, {"[z-a]", "z"},
{"[a-z", "a"}, {"[a-", "a"}, {"[a", "a"}, {"[[", "["},
{nullptr, nullptr}
};
static const sx_test_data_t SHOULD_NOT_MATCH[] = {
{"a", "b"}, {"abc", "abd"}, {"a", ""}, {"", "a"},
{"*c", "a"}, {"*c", "ab"}, {"*c", "abca"},
{"a*", "c"}, {"a*", "cba"}, {"a*", "cbac"},
{"a*c", "ca"}, {"a*c", "cba"}, {"a*c", "cbababa"},
{"a**c", "ca"}, {"a**c", "cba"}, {"a**c", "cbababa"},
{"*b*", ""}, {"*b*", "z"}, {"*b*", "ac"}, {"*b*", "azc"},
{"?", ""}, {"?", "ab"}, {"?", "abc"}, {"[?]", "a"},
{"a?", "ca"}, {"a?", "cz"}, {"a?", "??"},
{"?c", "ab"}, {"?c", "zb"}, {"?c", "??"},
{"[bcy]", "a"}, {"[bcy]", "d"}, {"[bcy]", "z"},
{"[b-d]", "a"}, {"[b-d]", "e"}, {"[b-d]", ""}, {"[b-d]", "bc"},
{"[ac]b[ac]", "aaa"}, {"[ac]b[ac]", "bbb"}, {"[ac]b[ac]", "ccc"},
{"[]]", "["}, {"[]]", "a"}, {"[]a-c]", "z"},
{"[?]", "a" }, {"[-]", "a"}, {"[z-]", "a"},
{"[][-]", "a"}, {"[][-]", "z"},
{"[a-c-]", "z"},
{"*[a-z]*abc?xyz", "A<STAR>abcQxyz"}, {"*[a-z]*abc?xyz", "<STAR>AabcQxyz"},
{"*[a-z]*abc?xyz", "AabcQxyz"}, {"*[a-z]*abc?xyz", "aabcxyz"},
{"[z-a]", "a"}, {"[z-a]", "b"}, {"[", "a"}, {"[[", "a"},
{nullptr, nullptr}
};
/*
* glob_self_test()
*
* Sed quis custodiet ipsos custodes?
*/
static void glob_self_test(void) {
const sx_test_data_t *test;
sc_int errors;
/*
* Run each test case and compare against expected result. To avoid a lot
* of ugly casting, we use the main public glob_match() function.
*/
errors = 0;
for (test = SHOULD_MATCH; test->pattern; test++) {
if (!glob_match(test->pattern, test->string)) {
sx_error("glob_self_test: \"%s\", \"%s\""
" did not match, and should have matched\n",
test->pattern, test->string);
errors++;
}
}
for (test = SHOULD_NOT_MATCH; test->pattern; test++) {
if (glob_match(test->pattern, test->string)) {
sx_error("glob_self_test: \"%s\", \"%s\""
" matched, and should not have matched\n",
test->pattern, test->string);
errors++;
}
}
/*
* Abort if any error. As befits our distrustful nature, we won't even
* trust that sx_fatal() calls abort() (though it should).
*/
if (errors > 0) {
sx_fatal("glob_self_test: %ld self-test error%s found, aborting\n",
errors, (errors == 1) ? "" : "s");
}
}
/*
* glob_match()
*
* Adapter for the above globbing functions, presenting a more standard char-
* based interface. Here is where all the evil casting lives.
*/
sc_bool glob_match(const sc_char *pattern, const sc_char *string) {
static sc_bool initialized = FALSE;
const unsigned char *pattern_ = (const unsigned char *) pattern;
const unsigned char *string_ = (const unsigned char *) string;
sc_bool retval;
assert(pattern && string);
/* On the first call, run a self-test to verify basic glob matching. */
if (!initialized) {
/*
* To avoid lots of icky casting, the self-test uses the core public
* glob_match() that we're in right here to run its tests. So set
* initialized _before_ the test, to avoid infinite recursion.
*/
initialized = TRUE;
glob_self_test();
}
retval = glob_match_unsigned(pattern_, string_) != 0;
return retval;
}
} // End of namespace Adrift
} // End of namespace Glk