Friday, July 15, 2011

Binary strstr

strstr is a Standard C function but it cannot handle '\0'. Here is the modified version to handle binary data. The bstrstr confronts to strstr and returns char*

bstrstr Select all

char *(bstrstr)(const char *s1, size_t l1, const char *s2, size_t l2) {
/* find first occurrence of s2[] in s1[] for length l1*/
const char *ss1 = s1;
const char *ss2 = s2;
/* handle special case */
if (l1 == 0)
return (NULL);
if (l2 == 0)
return ((char *)s1);

/* match prefix */
for (; (s1 = bstrchr(s1, *s2, ss1-s1+l1)) != NULL && ss1-s1+l1!=0; ++s1) {

/* match rest of prefix */
const char *sc1, *sc2;
for (sc1 = s1, sc2 = s2; ;)
if (++sc2 >= ss2+l2)
return ((char *)s1);
else if (*++sc1 != *sc2)
break;
}
return (NULL);
}


It needs a bstrchr function as well.
bstrchr Select all

char *(bstrchr) (const char *s, int c, size_t l) {
/* find first occurrence of c in char s[] for length l*/
const char ch = c;
/* handle special case */
if (l == 0)
return (NULL);

for (; *s != ch; ++s, --l)
if (l == 0)
return (NULL);
return ((char*)s);
}


here is the testing program
bstsrstr_test.c Select all

/* bstsrstr test */
#include <stdio.h>
#include <string.h>
#include <ctype.h>

char *(bstrchr) (const char *s, int c, size_t l) {
/* find first occurrence of c in char s[] for length l*/
const char ch = c;
/* handle special case */
if (l == 0)
return (NULL);

for (; *s != ch; ++s, --l)
if (l == 0)
return (NULL);
return ((char*)s);
}

char *(bstrstr)(const char *s1, size_t l1, const char *s2, size_t l2) {
/* find first occurrence of s2[] in s1[] for length l1*/
const char *ss1 = s1;
const char *ss2 = s2;
/* handle special case */
if (l1 == 0)
return (NULL);
if (l2 == 0)
return ((char *)s1);

/* match prefix */
for (; (s1 = bstrchr(s1, *s2, ss1-s1+l1)) != NULL && ss1-s1+l1!=0; ++s1) {

/* match rest of prefix */
const char *sc1, *sc2;
for (sc1 = s1, sc2 = s2; ;)
if (++sc2 >= ss2+l2)
return ((char *)s1);
else if (*++sc1 != *sc2)
break;
}
return (NULL);
}

void printbstr(const char *s, size_t l) {
for (; l!=0 ; --l) {
printf("%c", isprint(*(s))?*(s):'.');
s++;
}
printf("\n");
}

void test(const char *s1, size_t l1, const char *s2, size_t l2) {
printf("\n");
printbstr(s1, l1);
printf("locate ");
printbstr(s2, l2);
const char *r = bstrstr(s1,l1,s2,l2);
if (!r)
printf("not found\n");
else {
printf("result ");
printbstr(r, l1-(size_t)(r-s1));
}
}

int main () {
char *s1 = "I\0am a\0manz";
size_t l1 = 11;

printf("bstrstr test\n------\n");
printbstr(s1, l1);
printf("locate \\%c:%s\n", '0', bstrchr(s1,'\0',l1));
printf("locate %c:%s\n", 'I', bstrchr(s1,'I',l1));
printf("locate %c:%s\n", 'a', bstrchr(s1,'a',l1));
printf("locate %c:%s\n", 'n', bstrchr(s1,'n',l1));
printf("locate %c:%s\n", 'i', bstrchr(s1,'i',l1));
printf("locate %c:%s\n", 'z', bstrchr(s1,'z',l1));

test(s1, l1, "a\0m", 3);
test(s1, l1, "z", 1);
test(s1, l1, "I", 1);
test(s1, l1, "am a", 4);
test(s1, l1, "anz", 3);
test(s1, l1, "ax", 2);
test(s1, l1, "\0x", 2);
test(s1, l1, " x", 2);
test(s1, l1, "\0a", 2);
test(s1, l1, "a\0m", 3);
test(s1, l1, "x", 1);
test(s1, l1, "z", 1);

return 0;
}

3 comments:

Anonymous said...

Thanks for the bstrstr snippet and for testing it as well. Hard to believe it's not part of libc.

Anonymous said...

Thanks for the bstrstr snippet and for testing it as well. Hard to believe it's not part of libc.

Anonymous said...

bstrchr is part of libc, under the name memchr. I'd recommend using that instead.

bstrstr is also present in GNU libc (though not standardized - I agree it should be), under the name memmem. I'd recommend using that name.

Finally, I'd expect a zero-length needle to be present in a zero-length haystack.