Skip to content

String matching module

Wang Renxin edited this page May 31, 2022 · 11 revisions

This is an implementation of a string matching module.

Add implementation functions. A strict matching algorithm as follow:

static int _find(struct mb_interpreter_t* s, void** l) {
	int result = MB_FUNC_OK;
	char* haystack = 0;
	char* needle = 0;
	int hlen = 0, nlen = 0, found = 0;
	int i = 0, j = 0;
	mb_value_t arg;

	mb_assert(s && l);

	mb_check(mb_attempt_open_bracket(s, l));

	mb_check(mb_pop_string(s, l, &haystack));
	mb_check(mb_pop_string(s, l, &needle));
	if(mb_has_arg(s, l)) {
		mb_check(mb_pop_int(s, l, &i));
	}

	mb_check(mb_attempt_close_bracket(s, l));

	found = -1;
	hlen = (int)strlen(haystack);
	nlen = (int)strlen(needle);
	for( ; i < hlen; i++) {
		for(j = 0; j < nlen; j++) {
			if(haystack[i + j] != needle[j])
				break;

			if(j == nlen - 1) {
				found = i;

				goto _exit;
			}
		}
	}
 
_exit:
	arg.type = MB_DT_INT;
	arg.value.integer = found;

	mb_check(mb_push_int(s, l, arg.value.integer));

	return result;
}

And a wildcard matching algorithm:

static int _wildcard_find(struct mb_interpreter_t* s, void** l) {
	int result = MB_FUNC_OK;
	char* tame_text = 0;
	char* wild_text = 0;
	char* tame_bookmark = 0;
	char* wild_bookmark = 0;
	mb_value_t arg;

	mb_assert(s && l);

	mb_check(mb_attempt_open_bracket(s, l));

	mb_check(mb_pop_string(s, l, &tame_text));
	mb_check(mb_pop_string(s, l, &wild_text));

	mb_check(mb_attempt_close_bracket(s, l));
 
	arg.type = MB_DT_INT;
	do {
		if(*wild_text == '*') {
			while(*(++wild_text) == '*') {
			}
 
			if(!*wild_text) {
				arg.value.integer = true;

				goto _exit;
			}
 
			if(*wild_text != '?') {
				while(*tame_text != *wild_text) {
					if(!(*(++tame_text))) {
						arg.value.integer = false;

						goto _exit;
					}
				}
			}
 
			wild_bookmark = wild_text;
			tame_bookmark = tame_text;
		} else if(*tame_text != *wild_text && *wild_text != '?') {
			if(wild_bookmark) {
				if(wild_text != wild_bookmark) {
					wild_text = wild_bookmark;
 
					if(*tame_text != *wild_text) {
						tame_text = ++tame_bookmark;

						continue;
					} else {
						wild_text++;
					}
				}
 
				if(*tame_text) {
					tame_text++;

					continue;
				}
			}
 
			arg.value.integer = false;

			goto _exit;
		}
 
		tame_text++;
		wild_text++;
 
		if(!*tame_text) {
			while(*wild_text == '*')
				wild_text++;
 
			if(!*wild_text) {
				arg.value.integer = true;

				goto _exit;
			}
 
			arg.value.integer = false;

			goto _exit;
		}
	} while(1);

_exit:
	mb_check(mb_push_int(s, l, arg.value.integer));

	return result;
}

Register them:

mb_register_func(bas, "FIND", _find);
mb_register_func(bas, "WILDCARD_FIND", _wildcard_find);

The FIND function accepts arguments as "(text, pattern, [begin index])". The WILDCARD_FIND function accepts "(text, pattern)", and the pattern allows both ? for single character placeholder and * for none or multiple character placeholder. See the BASIC usage as follow:

print find("aabbccddaabb", "aabb");             ' Search from the beginning, result 0
print find("aabbccddaabb", "aabb", 1);          ' Search from index 1, result 8
print find("aabbccddaabb", "bbaa");             ' Result -1
print wildcard_find("aabbccddaabb", "?a?b?c*"); ' Result 1
print wildcard_find("aabbccddaabb", "a*b*a*b"); ' Result 1
print wildcard_find("aabbccddaabb", "abcd");    ' Result 0

References

Clone this wiki locally