NO

Author Topic: Offline English to English Dictionary  (Read 223 times)

Offline cosh

  • Member
  • *
  • Posts: 11
Offline English to English Dictionary
« on: January 31, 2023, 04:46:19 PM »
Hello,
Please enjoy this offline English to English dictionary that I wrote recently.
I just grabbed a raw text format dictionary file from Github and I altered format of this text file as dictionary input.
And I handled the raw file and used a Trie and an AA binary search tree to index all worlds.
The result was now we got a console-based dictionary with maximized searching speed and low memory usage.

$ ./dict
Dict file: ./dict.txt
36447 words loaded.
? Apple
Searching: "Apple"...
        1316 Apple      n. 1 roundish firm fruit with crisp flesh. 2 tree bearing this. apple of one's eye cherished person or thing. [old english]
? .?
Searching: ".?"...
Type [WORD] or [NUMBER] to search.
        For example ? Apple ? 10536
Type .h to show history.
Type .l[A] to show alphabet.
        For example ? .l Z.
Type .? to show this notice.
? 1
Searching: "1"...
        1 A-    prefix (also an- before a vowel sound) not, without (amoral). [greek]
?

The above texts are the console output when you use this dictionary.
I'll print the mainly text file that this dictionary concludes.

Code: [Select]
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "StoneValley/src/svset.h"
#include "StoneValley/src/svtree.h"

typedef struct st_WORD
{
ptrdiff_t id;
char name[64];
size_t times;
long ltip;
} WORD, * P_WORD;

int cbfcmpid(const void * px, const void * py)
{
return (*(P_WORD)px).id - (*(P_WORD)py).id;
}

int cbfcmpchar(const void * px, const void * py)
{
return *(char *)px - *(char *)py;
}

int cbftvs_history(void * pitem, size_t param)
{
if (((P_WORD)P2P_TNODE_B(pitem)->pdata)->times)
printf("%s\t%lld\n", ((P_WORD)P2P_TNODE_B(pitem)->pdata)->name, ((P_WORD)P2P_TNODE_B(pitem)->pdata)->times);
return CBF_CONTINUE;
}

int cbftvs_alphabet(void * pitem, size_t param)
{
if (toupper(((P_WORD)P2P_TNODE_B(pitem)->pdata)->name[0]) == param)
printf("%s\n", ((P_WORD)P2P_TNODE_B(pitem)->pdata)->name);
return CBF_CONTINUE;
}

static char sWord[BUFSIZ * 2] = { 0 };
static char * p = sWord;

static char sFileName[BUFSIZ] = { 0 };
static char sPattern[BUFSIZ] = { 0 };

int main(int argc, char ** argv)
{
size_t i = 1, j;
WORD w = { 0 };
FILE * fp;
P_SET_T set = setCreateT();
P_TRIE_A trie = treCreateTrieA();
size_t * result = NULL;

strcat(sFileName, ".\\dict.txt");
fp = fopen(sFileName, "rb");

printf("Dict file: %s\n", sFileName);
if (NULL != fp)
{
while (!feof(fp))
{
*p = fgetc(fp);
++p;
if ('#' == *(p - 1))
{
P_BSTNODE pnode = NULL;
*(p - 1) = '\0';
p = sWord;

strcpy(w.name, p);
w.id = i;
w.times = 0;
w.ltip = ftell(fp) + 1;

setInsertT(set, &w, sizeof w, cbfcmpid);
pnode = treBSTFindData_A(*set, &i, cbfcmpid);
treInsertTrieA(trie, p, strlen(p), sizeof(char), (size_t)(pnode->knot.pdata), cbfcmpchar);

++i;
}
if ('\n' == *(p - 1))
{
p = sWord;
}
}
printf("%lld words loaded.\n", i);
do
{
printf("? ");
fgets(sPattern, 100, stdin);
sPattern[strlen(sPattern) - 1] = '\0';
if ('\0' == *sPattern)
break;
printf("Searching: \"%s\"...\n", sPattern);
result = treSearchTrieA(trie, sPattern, strlen(sPattern), sizeof(char), cbfcmpchar);
if (result)
{
printf("\t%lld %s  ", ((P_WORD)*result)->id, ((P_WORD)*result)->name);
++((P_WORD)*result)->times;

/* Redirect to the word on the disk. */
fseek(fp, ((P_WORD)*result)->ltip, SEEK_SET);
/* Read explanations. */
p = sWord;
while ('\n' != (*p = fgetc(fp)))
{
++p;
}
*p = '\0';
p = sWord;
printf("\t%s\n", p);
}
else if ('.' == sPattern[0] && '?' == sPattern[1])
{
printf("Type [WORD] or [NUMBER] to search.\n");
printf("\tFor example ? Apple ? 10536\n");
printf("Type .h to show history.\n");
printf("Type .l[A] to show alphabet.\n");
printf("\tFor example ? .l Z.\n");
printf("Type .? to show this notice.\n");
}
else if ('.' == sPattern[0] && 'h' == sPattern[1])
{
printf("History:\n");
treTraverseBIn(*set, cbftvs_history, 0);
}
else if ('.' == sPattern[0] && 'l' == sPattern[1] && ' ' == sPattern[2])
{
sPattern[3] = toupper(sPattern[3]);
printf("Alphabet:\n");
treTraverseBIn(*set, cbftvs_alphabet, toupper(sPattern[3]));
}
else if (0 != (j = atoi(sPattern)))
{
P_BSTNODE pnode = treBSTFindData_A(*set, &j, cbfcmpid);
if (pnode)
{
printf("\t%lld %s  ", ((P_WORD)(pnode->knot.pdata))->id, ((P_WORD)(pnode->knot.pdata))->name);
++((P_WORD)(pnode->knot.pdata))->times;

/* Redirect to the word on the disk. */
fseek(fp, ((P_WORD)(pnode->knot.pdata))->ltip, SEEK_SET);
/* Read explanations. */
p = sWord;
while ('\n' != (*p = fgetc(fp)))
{
++p;
}
*p = '\0';
p = sWord;
printf("\t%s\n", p);
}
}
else
{
printf("Can not find \"%s\".\n", sPattern);
}
} while ('\0' != sPattern[0]);
fclose(fp);
}
else
printf("Can not open file.\n");

printf("History:\n");
treTraverseBIn(*set, cbftvs_history, 0);
setDeleteT(set);
treDeleteTrieA(trie, sizeof(char));
return 0;
}


And finally hope there are no license and privilege concerns of using the raw dictionary textual file. :o
You guys may download dictionary from here: https://github.com/coshcage/dict
Please notice that this project needs StoneValley library.
 :)

Offline HellOfMice

  • Member
  • *
  • Posts: 46
  • Never be pleased, always improve
Re: Offline English to English Dictionary
« Reply #1 on: February 17, 2023, 08:32:33 AM »
A very clean code.
I downloaded all the files and will try them.
No Pelle's project file.
Why have you made it in a console?
Adding clipboard support would be a good idea.


Thank You to share it.
Kenavo