NO

Author Topic: Regex to DFA  (Read 6500 times)

Offline cosh

  • Member
  • *
  • Posts: 33
Regex to DFA
« on: October 16, 2023, 05:46:49 PM »
Hi, there
Recently I'm working on compilers' theory and automatons. I am reading the famous book Compilers Principles, Techniques and Tools (Second Edition) written by Alfred V. Aho, Monica S. Lam, Ravi Sethi and Jeffrey D Ullman.

My goal is to write my own regular expression library. Through this library, we could convert regular expressions to DFAs and use DFA to recognize wide character strings. And I have achieved some algorithms about turning a regular expression into syntax tree and compute nullable, firstpos, lastpos and followpos table.

Here is my code bellow:

Code: [Select]
#define _CRT_SECURE_NO_WARNINGS 1
#include <stdio.h>
#include <wchar.h>
#include <limits.h>
#include "StoneValley/src/svstring.h"
#include "StoneValley/src/svstack.h"
#include "StoneValley/src/svqueue.h"
#include "StoneValley/src/svtree.h"
#include "StoneValley/src/svset.h"

#define DEBUG 1

#define TREE_NODE_SPACE_COUNT 10

typedef enum en_Terminator
{
T_LeftBracket = -1,
T_Jumpover,
T_Character,
T_Selection,
T_Concatenate,
T_Closure,
T_RightBracket,
T_Error
} TERMINATOR;

typedef struct st_Lexicon
{
TERMINATOR type;
wchar_t    ch;
BOOL       nullable;
P_SET_T    firstpos;
P_SET_T    lastpos;
} LEXICON, * P_LEXICON;

typedef struct st_DStates
{
P_SET_T pset;
size_t  mark;
size_t  label;
} DSTATES, * P_DSTATES;

typedef struct st_LVFNDTBL
{
wchar_t ch;
size_t  i;
} LVFNDTBL, * P_LVFNDTBL;

STACK_L stkOperand;
STACK_L stkOperator;

LEXICON Splitter(FILE * fp, BOOL * pbt)
{
LEXICON lex = { 0 };

lex.ch = fgetwc(fp);

if (L'\\' == lex.ch)
{
*pbt = !*pbt;
if (FALSE == *pbt)
lex.type = T_Character;
else
lex.type = T_Jumpover;
}
else
{
if (*pbt)
{
switch (lex.ch)
{
case L'e':
lex.ch = L'\0';
lex.type = T_Character;
*pbt = FALSE;
break;
case L'n':
lex.ch = L'\n';
lex.type = T_Character;
*pbt = FALSE;
break;
case L't':
lex.ch = L'\t';
lex.type = T_Character;
*pbt = FALSE;
break;
case L'a':
lex.ch = L'\a';
lex.type = T_Character;
*pbt = FALSE;
break;
case L'r':
lex.ch = L'\r';
lex.type = T_Character;
*pbt = FALSE;
break;
case L'v':
lex.ch = L'\v';
lex.type = T_Character;
*pbt = FALSE;
break;
case L'f':
lex.ch = L'\f';
lex.type = T_Character;
*pbt = FALSE;
break;
case L'b':
lex.ch = L'\b';
lex.type = T_Character;
*pbt = FALSE;
break;
case L'.':
case L'|':
case L'*':
case L'(':
case L')':
lex.type = T_Character;
*pbt = FALSE;
break;
default:
lex.type = T_Character;
*pbt = FALSE;
}
}
else /* if FALSE == bt. */
{
switch (lex.ch)
{
case L'.':
lex.type = T_Concatenate;
break;
case L'|':
lex.type = T_Selection;
break;
case L'*':
lex.type = T_Closure;
break;
case L'(':
lex.type = T_LeftBracket;
break;
case L')':
lex.type = T_RightBracket;
break;
default:
lex.type = T_Character;
}
}
}
return lex;
}

int cbftvsPrintSet(void * pitem, size_t param)
{
wprintf(L"%ld, ", *(size_t *)(P2P_TNODE_BY(pitem)->pdata));
return CBF_CONTINUE;
}

void PrintLexicon(LEXICON lex)
{
P_SET_T p, q;
p = lex.firstpos;
q = lex.lastpos;
switch (lex.type)
{
case T_Character:
if (WEOF == lex.ch)
wprintf(L"CHAR: \'(#)\'  ");
else if (L'\0' == lex.ch)
wprintf(L"CHAR: \'\\e\' ");
else
wprintf(L"CHAR: \'%c\' ", lex.ch);
break;
case T_Selection:
wprintf(L"| ");
break;
case T_Closure:
wprintf(L"* ");
break;
case T_LeftBracket:
wprintf(L"( ");
break;
case T_RightBracket:
wprintf(L") ");
break;
case T_Concatenate:
wprintf(L". ");
break;
case T_Jumpover:
wprintf(L"Jump ");
break;
}
wprintf(L"nulabl:%s {", lex.nullable ? L"TRUE" : L"FALSE");
setTraverseT(p, cbftvsPrintSet, 0, ETM_INORDER);
wprintf(L"} {");
setTraverseT(q, cbftvsPrintSet, 0, ETM_INORDER);
wprintf(L"}\n");
}

void PrintSyntaxTree(P_TNODE_BY pnode, size_t space)
{
size_t i;
if (NULL == pnode)
return;

space += TREE_NODE_SPACE_COUNT;

// Process right child first
PrintSyntaxTree(pnode->ppnode[RIGHT], space);

// Print current node after space
// count
printf("\n");
for (i = TREE_NODE_SPACE_COUNT; i < space; ++i)
printf(" ");

PrintLexicon(*(P_LEXICON)((P_TNODE_BY)pnode)->pdata);
PrintSyntaxTree(pnode->ppnode[LEFT], space);
}

extern int _grpCBFCompareInteger(const void * px, const void * py);

int cbftvsComputeNullableAndPos(void * pitem, size_t param)
{
P_TNODE_BY pnode = P2P_TNODE_BY(pitem);

if (NULL != pnode->ppnode[LEFT]) /* pnode is not a leaf node. */
{
switch (((P_LEXICON)pnode->pdata)->type)
{
case T_Selection:
/* Nullable. */
((P_LEXICON)pnode->pdata)->nullable =
((P_LEXICON)pnode->ppnode[LEFT]->pdata)->nullable ||
((P_LEXICON)pnode->ppnode[RIGHT]->pdata)->nullable;

/* Firstpos. */
((P_LEXICON)pnode->pdata)->firstpos =
setCreateUnionT
(
((P_LEXICON)pnode->ppnode[LEFT]->pdata)->firstpos,
((P_LEXICON)pnode->ppnode[RIGHT]->pdata)->firstpos,
sizeof(size_t), _grpCBFCompareInteger
);

/* Lastpos. */
((P_LEXICON)pnode->pdata)->lastpos =
setCreateUnionT
(
((P_LEXICON)pnode->ppnode[LEFT]->pdata)->lastpos,
((P_LEXICON)pnode->ppnode[RIGHT]->pdata)->lastpos,
sizeof(size_t), _grpCBFCompareInteger
);
break;
case T_Concatenate:
/* Nullable. */
((P_LEXICON)pnode->pdata)->nullable =
((P_LEXICON)pnode->ppnode[LEFT]->pdata)->nullable &&
((P_LEXICON)pnode->ppnode[RIGHT]->pdata)->nullable;

/* Firstpos. */
if (((P_LEXICON)pnode->ppnode[LEFT]->pdata)->nullable)
((P_LEXICON)pnode->pdata)->firstpos =
setCreateUnionT
(
((P_LEXICON)pnode->ppnode[LEFT]->pdata)->firstpos,
((P_LEXICON)pnode->ppnode[RIGHT]->pdata)->firstpos,
sizeof(size_t), _grpCBFCompareInteger
);
else
((P_LEXICON)pnode->pdata)->firstpos =
NULL != ((P_LEXICON)pnode->ppnode[LEFT]->pdata)->firstpos ?
setCopyT(((P_LEXICON)pnode->ppnode[LEFT]->pdata)->firstpos, sizeof(size_t)) :
NULL;

/* Lastpos. */
if (((P_LEXICON)pnode->ppnode[RIGHT]->pdata)->nullable)
((P_LEXICON)pnode->pdata)->lastpos =
setCreateUnionT
(
((P_LEXICON)pnode->ppnode[LEFT]->pdata)->lastpos,
((P_LEXICON)pnode->ppnode[RIGHT]->pdata)->lastpos,
sizeof(size_t), _grpCBFCompareInteger
);
else
((P_LEXICON)pnode->pdata)->lastpos =
NULL != ((P_LEXICON)pnode->ppnode[RIGHT]->pdata)->lastpos ?
setCopyT(((P_LEXICON)pnode->ppnode[RIGHT]->pdata)->lastpos, sizeof(size_t)) :
NULL;
break;
case T_Closure:
/* Nullable. */
((P_LEXICON)pnode->pdata)->nullable = TRUE;

/* Firstpos. */
((P_LEXICON)pnode->pdata)->firstpos =
NULL != ((P_LEXICON)pnode->ppnode[LEFT]->pdata)->firstpos ?
setCopyT(((P_LEXICON)pnode->ppnode[LEFT]->pdata)->firstpos, sizeof(size_t)) :
NULL;

/* Lastpos. */
((P_LEXICON)pnode->pdata)->lastpos =
NULL != ((P_LEXICON)pnode->ppnode[LEFT]->pdata)->lastpos ?
setCopyT(((P_LEXICON)pnode->ppnode[LEFT]->pdata)->lastpos, sizeof(size_t)) :
NULL;
break;
}
}
return CBF_CONTINUE;
}

int cbftvsCleanStruct(void * pitem, size_t param)
{
P_LEXICON plex = (P_LEXICON)(P2P_TNODE_BY(pitem)->pdata);

if (NULL != plex->firstpos)
setDeleteT(plex->firstpos);
if (NULL != plex->lastpos)
setDeleteT(plex->lastpos);

return CBF_CONTINUE;
}

void DestroySyntaxTree(P_TNODE_BY pnode)
{
treTraverseBYPost(pnode, cbftvsCleanStruct, 0);
treFreeBY(&pnode);
}

P_TNODE_BY Parse(FILE * fp, size_t * pleaves)
{
BOOL bt = FALSE;
size_t posCtr = 1;
LEXICON lex = { 0 }, tl = { 0 }, ttl = { 0 };
P_TNODE_BY pnode, pnode1, pnode2, pnode3;
size_t i = 1, j;

stkInitL(&stkOperator);
stkInitL(&stkOperand);

do
{
i = 1;
tl = lex;
lex = Splitter(fp, &bt);

if (T_Jumpover == lex.type)
lex = tl;
else
{
/* We need to insert concatenation in the following circumstances:
* a & b
* a & (
* ) & a
* ) & (
* * & a
* * & (
*/
switch (tl.type)
{
case T_Character:
if (T_Character == lex.type || T_LeftBracket == lex.type)
i = 2;
break;
case T_RightBracket:
if (T_Character == lex.type || T_LeftBracket == lex.type)
i = 2;
break;
case T_Closure:
if (T_Character == lex.type || T_LeftBracket == lex.type)
i = 2;
break;
}

for (j = 0; j < i; ++j)
{
if (2 == i && 0 == j)
{
ttl = lex;
lex.type = T_Concatenate;
}
else if (2 == i && 1 == j)
lex = ttl;
#if DEBUG
PrintLexicon(lex);
#endif
switch (lex.type)
{
case T_Character:
if (L'\0' != lex.ch)
{
lex.firstpos = setCreateT();
setInsertT(lex.firstpos, &posCtr, sizeof(size_t), _grpCBFCompareInteger);
lex.lastpos = setCreateT();
setInsertT(lex.lastpos, &posCtr, sizeof(size_t), _grpCBFCompareInteger);
++posCtr;
}
else
lex.firstpos = NULL;

lex.nullable = L'\0' == lex.ch ? TRUE : FALSE;

pnode = strCreateNodeD(&lex, sizeof(LEXICON));
stkPushL(&stkOperand, &pnode, sizeof(P_TNODE_BY));
break;
case T_Selection:
case T_Concatenate:
case T_Closure:
if (!stkIsEmptyL(&stkOperator))
{
stkPeepL(&pnode1, sizeof(P_TNODE_BY), &stkOperator);
if (((P_LEXICON)pnode1->pdata)->type >= lex.type)
{
stkPopL(&pnode1, sizeof(P_TNODE_BY), &stkOperator);
switch (((P_LEXICON)pnode1->pdata)->type)
{
case T_Selection:
case T_Concatenate:
stkPopL(&pnode2, sizeof(P_TNODE_BY), &stkOperand);
stkPopL(&pnode3, sizeof(P_TNODE_BY), &stkOperand);

pnode1->ppnode[RIGHT] = pnode2;
pnode1->ppnode[LEFT] = pnode3;
break;
case T_Closure:
stkPopL(&pnode2, sizeof(P_TNODE_BY), &stkOperand);
pnode1->ppnode[LEFT] = pnode2;

break;
}
stkPushL(&stkOperand, &pnode1, sizeof(P_TNODE_BY));
}
}
lex.lastpos = lex.firstpos = NULL;
pnode1 = strCreateNodeD(&lex, sizeof(LEXICON));
stkPushL(&stkOperator, &pnode1, sizeof(P_TNODE_BY));
break;
case T_LeftBracket:
pnode = strCreateNodeD(&lex, sizeof(LEXICON));
stkPushL(&stkOperator, &pnode, sizeof(P_TNODE_BY));
break;
case T_RightBracket:
for (;;)
{
if (!stkIsEmptyL(&stkOperator))
{
stkPeepL(&pnode1, sizeof(P_TNODE_BY), &stkOperator);
if (T_LeftBracket != ((P_LEXICON)pnode1->pdata)->type)
{
stkPopL(&pnode1, sizeof(P_TNODE_BY), &stkOperator);
switch (((P_LEXICON)pnode1->pdata)->type)
{
case T_Selection:
case T_Concatenate:
stkPopL(&pnode2, sizeof(P_TNODE_BY), &stkOperand);
stkPopL(&pnode3, sizeof(P_TNODE_BY), &stkOperand);

pnode1->ppnode[RIGHT] = pnode2;
pnode1->ppnode[LEFT] = pnode3;
break;
case T_Closure:
stkPopL(&pnode2, sizeof(P_TNODE_BY), &stkOperand);
pnode1->ppnode[LEFT] = pnode2;
break;
}
stkPushL(&stkOperand, &pnode1, sizeof(P_TNODE_BY));
}
else
break;
}
else
break;
}
stkPopL(&pnode1, sizeof(P_TNODE_BY), &stkOperator);
strDeleteNodeD(pnode1);
break;
}
}
}
}
while (WEOF != lex.ch);

for (;;)
{
if (!stkIsEmptyL(&stkOperator))
{
stkPopL(&pnode1, sizeof(P_TNODE_BY), &stkOperator);
switch (((P_LEXICON)pnode1->pdata)->type)
{
case T_Selection:
case T_Concatenate:
stkPopL(&pnode2, sizeof(P_TNODE_BY), &stkOperand);
stkPopL(&pnode3, sizeof(P_TNODE_BY), &stkOperand);

pnode1->ppnode[RIGHT] = pnode2;
pnode1->ppnode[LEFT] = pnode3;
break;
case T_Closure:
stkPopL(&pnode2, sizeof(P_TNODE_BY), &stkOperand);
pnode1->ppnode[LEFT] = pnode2;
break;
}
stkPushL(&stkOperand, &pnode1, sizeof(P_TNODE_BY));
}
else
break;
}

/* Return a syntax tree. */
if (1 == strLevelLinkedListSC(stkOperand))
stkPeepL(&pnode, sizeof(P_TNODE_BY), &stkOperand);
else
{
#if DEBUG
printf("Error! Invalid regular expression.\n");
#endif
while (!stkIsEmptyL(&stkOperand))
{
stkPopL(&pnode, sizeof(P_TNODE_BY), &stkOperand);
DestroySyntaxTree(pnode);
}
*pleaves = posCtr - 1;
pnode = NULL;
}

stkFreeL(&stkOperand);
stkFreeL(&stkOperator);

*pleaves = posCtr - 1;
return pnode;
}

int cbftvsStarTravFirstpos(void * pitem, size_t param)
{
P_ARRAY_Z parr = (P_ARRAY_Z)0[(size_t *)param];

P_SET_T * ppset = (P_SET_T *)strLocateItemArrayZ(parr, sizeof(P_SET_T), 1[(size_t *)param] - 1);

if (NULL == *ppset)
*ppset = setCreateT();

setInsertT(*ppset, (size_t *)P2P_TNODE_BY(pitem)->pdata, sizeof(size_t), _grpCBFCompareInteger);

return CBF_CONTINUE;
}

int cbftvsStarTravLastpos(void * pitem, size_t param)
{
size_t a[2];

P_SET_T firstpos = (P_SET_T)1[(size_t *)param];

a[0] = 0[(size_t *)param];
a[1] = *(size_t *)P2P_TNODE_BY(pitem)->pdata;

setTraverseT(firstpos, cbftvsStarTravFirstpos, (size_t)a, ETM_INORDER);
return CBF_CONTINUE;
}

int cbftvsComputeFollowpos(void * pitem, size_t param)
{
size_t a[2];
P_TNODE_BY pnode = P2P_TNODE_BY(pitem);

a[0] = param;

if (T_Closure == ((P_LEXICON)pnode->pdata)->type)
{
a[1] = (size_t)((P_LEXICON)pnode->pdata)->firstpos;
setTraverseT(((P_LEXICON)pnode->pdata)->lastpos, cbftvsStarTravLastpos, (size_t)a, ETM_INORDER);
}
else if (T_Concatenate == ((P_LEXICON)pnode->pdata)->type)
{
a[1] = (size_t)((P_LEXICON)pnode->ppnode[RIGHT]->pdata)->firstpos;
setTraverseT(((P_LEXICON)pnode->ppnode[LEFT]->pdata)->lastpos, cbftvsStarTravLastpos, (size_t)a, ETM_INORDER);
}

return CBF_CONTINUE;
}

P_ARRAY_Z CreateFollowPosArray(P_TNODE_BY pnode, size_t inodes)
{
P_SET_T nil = NULL;

P_ARRAY_Z parr = strCreateArrayZ(inodes, sizeof(P_SET_T));

strSetArrayZ(parr, &nil, sizeof(P_SET_T));

treTraverseBYPost(pnode, cbftvsComputeFollowpos, (size_t)parr);

return parr;
}

int cbftvsPrintFollowposArray(void * pitem, size_t param)
{
P_SET_T pset = *(P_SET_T *)pitem;
wprintf(L"%ld\t{", ++0[(size_t *)param]);
setTraverseT(pset, cbftvsPrintSet, 0, ETM_INORDER);
wprintf(L"}\n");
return CBF_CONTINUE;
}

int cbftvsClearSetT(void * pitem, size_t param)
{
P_SET_T pset = *(P_SET_T *)pitem;
if (NULL != pset)
setDeleteT(pset);
return CBF_CONTINUE;
}

void DestroyFollowposArray(P_ARRAY_Z parr)
{
strTraverseArrayZ(parr, sizeof(P_SET_T), cbftvsClearSetT, 0, FALSE);
strDeleteArrayZ(parr);
}

int cbftvsConstructLeafNodeTable(void * pitem, size_t param)
{
P_TNODE_BY pnode = P2P_TNODE_BY(pitem);

if
(
NULL == pnode->ppnode[LEFT] &&
NULL == pnode->ppnode[RIGHT] &&
'\0' != ((P_LEXICON)pnode->pdata)->ch
&& WEOF != ((P_LEXICON)pnode->pdata)->ch
)
{
(*(P_LVFNDTBL *)param)->ch = ((P_LEXICON)pnode->pdata)->ch;
(*(P_LVFNDTBL *)param)->i = *(size_t *)(*((P_SET_T)((P_LEXICON)pnode->pdata)->firstpos))->knot.pdata;
++*(P_LVFNDTBL *)param;
}

return CBF_CONTINUE;
}

P_ARRAY_Z ConstructLeafNodeTable(P_TNODE_BY pnode, size_t inodes)
{
P_ARRAY_Z parr = strCreateArrayZ(inodes - 1, sizeof(LVFNDTBL));
P_LVFNDTBL pl = (P_LVFNDTBL)strLocateItemArrayZ(parr, sizeof(LVFNDTBL), 0);

treTraverseBYPost(pnode, cbftvsConstructLeafNodeTable, (size_t)&pl);

return parr;
}

int cbftvsPrintLeafNodeTable(void * pitem, size_t param)
{
wprintf(L"%c\t%ld\n", ((P_LVFNDTBL)pitem)->ch, ((P_LVFNDTBL)pitem)->i);

return CBF_CONTINUE;
}

int main(int argc, char ** argv)
{
size_t i, j = 0;
P_ARRAY_Z parrfollowpos, parrlvfndtbl;
P_TNODE_BY pnode;
FILE * fp = fopen("C:\\test.txt", "r");
//FILE * fp = stdin;
pnode = Parse(fp, &i);

treTraverseBYPost(pnode, cbftvsComputeNullableAndPos, 0);
PrintSyntaxTree(pnode, 0);

parrfollowpos = CreateFollowPosArray(pnode, i);
strTraverseArrayZ(parrfollowpos, sizeof(P_SET_T), cbftvsPrintFollowposArray, (size_t)&j, FALSE);

printf("\n");

parrlvfndtbl = ConstructLeafNodeTable(pnode, i);
strTraverseArrayZ(parrlvfndtbl, sizeof(LVFNDTBL), cbftvsPrintLeafNodeTable, 0, FALSE);

DestroySyntaxTree(pnode);
DestroyFollowposArray(parrfollowpos);
strDeleteArrayZ(parrlvfndtbl);

fclose(fp);

return 0;
}

Does anyone have the interest to join me and developing this small project?

If you do, please leave the comments bellow or send me an Email.

Regards.
« Last Edit: October 16, 2023, 05:51:08 PM by cosh »

Offline frankie

  • Global Moderator
  • Member
  • *****
  • Posts: 2113
Re: Regex to DFA
« Reply #1 on: October 16, 2023, 09:11:05 PM »
Dear cosh this isn't in my direct interests, and in the last years I don't have much time (I am supposed to update my fSDK but don't have time to do it), but I would strongly encourage new projects and collaboration.
I couldn't get directly involved, but available to help in case you think I could...  ;)
"It is better to be hated for what you are than to be loved for what you are not." - Andre Gide

Offline Pelle

  • Administrator
  • Member
  • *****
  • Posts: 2266
    • http://www.smorgasbordet.com
Re: Regex to DFA
« Reply #2 on: October 16, 2023, 09:56:10 PM »
I can only offer this link, but I have found the info to be very useful:
https://swtch.com/~rsc/regexp/regexp1.html
/Pelle

Offline cosh

  • Member
  • *
  • Posts: 33
Re: Regex to DFA
« Reply #3 on: October 17, 2023, 11:41:23 AM »
Dear cosh this isn't in my direct interests, and in the last years I don't have much time (I am supposed to update my fSDK but don't have time to do it), but I would strongly encourage new projects and collaboration.
I couldn't get directly involved, but available to help in case you think I could...  ;)
Hi Frankie,
Thank you for your encouragement.

And you encouragement for me is precious.

If you need to convert regular expression to DFA one day, please check the code in this post for calculating nullable, firstpos, lastpos and followpos. I hope this code would give you some ideas.

Offline cosh

  • Member
  • *
  • Posts: 33
Re: Regex to DFA
« Reply #4 on: October 17, 2023, 11:50:24 AM »
I can only offer this link, but I have found the info to be very useful:
https://swtch.com/~rsc/regexp/regexp1.html

Thank you Pelle. I think you are an expert to building compilers. And your tip is very important to me.

I checked the link you've mentioned above and I really learned some thing about building automatons.

I've also read a book named The Art of Compiler Design Theory And Practices by Tomas Pittman and James Peters.

I reviewed the method for building NFAs and the method in your link.

They both told me to convert regular expression into NFAs, and then convert NFAs to DFAs by subset constructing algorithm.

However I found a way to convert regular expressions directly into DFAs. You may review the code I've mentioned above in this post.

In a word, Thank you for your link and kind guide. :)

Offline MrBcx

  • Global Moderator
  • Member
  • *****
  • Posts: 189
    • Bcx Basic to C/C++ Translator
Re: Regex to DFA
« Reply #5 on: October 17, 2023, 06:36:21 PM »

For those of us who are not / were not Computer Science majors and are scratching our heads
wondering what a DFA is, start here:  https://cs.uwaterloo.ca/~eblais/cs365/docs/automata/dfa/
Bcx Basic to C/C++ Translator
https://www.BcxBasicCoders.com

Offline cosh

  • Member
  • *
  • Posts: 33
Re: Regex to DFA
« Reply #6 on: October 18, 2023, 05:50:35 AM »

For those of us who are not / were not Computer Science majors and are scratching our heads
wondering what a DFA is, start here:  https://cs.uwaterloo.ca/~eblais/cs365/docs/automata/dfa/

Nice references, thanks!

Offline cosh

  • Member
  • *
  • Posts: 33
Re: Regex to DFA
« Reply #7 on: October 21, 2023, 02:08:35 PM »
I finished this small tool that is used to convert regular expressions to DFAs.

https://github.com/coshcage/test/blob/master/regex2dfa.c
Currently, I used MSVC to compile and test. But I haven't successfully test it by Pelles C.
I will continue to work to let it fit Pelles C.


In this program you may input a regular expression such as "(a|b)abb" into a text file.
This program uses fopen to open regexp file and match strings that you inputted.

« Last Edit: October 22, 2023, 10:33:06 AM by cosh »

Offline cosh

  • Member
  • *
  • Posts: 33
Re: Regex to DFA
« Reply #8 on: October 22, 2023, 11:18:30 AM »
Good news.
I tested regex2dfa through Pelles C, MSVC, gcc and clang thoroughly.
This program now can be used.

Next I will try to combine it to a simple regex library.
 :)

Offline WiiLF23

  • Member
  • *
  • Posts: 89
Re: Regex to DFA
« Reply #9 on: January 06, 2024, 05:30:27 AM »
Do take care on filtering input!

Microsoft Excel has been roasted with nasty vulnerabilities as of recent due to their use of the Perl Regex library. I am unsure of the scale of use in terms of your efforts toward a custom library, but it is worth taking note.

https://www.bleepingcomputer.com/news/security/cisa-warns-of-actively-exploited-bugs-in-chrome-and-excel-parsing-library/

Offline cosh

  • Member
  • *
  • Posts: 33
Re: Regex to DFA
« Reply #10 on: January 14, 2024, 05:35:45 AM »
Do take care on filtering input!

Microsoft Excel has been roasted with nasty vulnerabilities as of recent due to their use of the Perl Regex library. I am unsure of the scale of use in terms of your efforts toward a custom library, but it is worth taking note.

https://www.bleepingcomputer.com/news/security/cisa-warns-of-actively-exploited-bugs-in-chrome-and-excel-parsing-library/

Thanks for sharing this note. Here's my regex to DFA library:https://github.com/coshcage/svregex please check it if you want to have a look at this library.
I also combined this library to my dictionary here: https://forum.pellesc.de/index.php?topic=10705.0
« Last Edit: January 14, 2024, 05:37:18 AM by cosh »