Relatate con illo e illo. Vide anque sci.lang.
/* Le 26 e . Converter un texto in interlingua del scriptura latin al hebraic. */ #include <stdlib.h> #include <stdio.h> #include <string.h> #include <ctype.h> int conv_table1[][2] = { /* Littera basic primo, possibile marca diacritic secundo. Vide: https://rudhar.com/lingtics/uniclnkl.htm, https://unicode.org/charts/PDF/U0590.pdf https://en.wikipedia.org/wiki/Yiddish_orthography */ /* Base value: 0500 */ /* a */ { 0xd0, -1 }, /* alef */ /* b */ { 0xd1, 0xbc}, /* bet con dagesh */ /* c */ { 0xe6, -1 }, /* tsadi (mesmo si [k]) */ /* d */ { 0xd3, -1 }, /* dalet */ /* e */ { 0xe2, -1 }, /* ayin */ /* f */ { 0xe4, -1 }, /* pe sin dagesh */ /* g */ { 0xd2, -1 }, /* gimel */ /* h */ { 0xd4, -1 }, /* he */ /* i */ { 0xd9, -1 }, /* yod */ /* j */ { 0xe9, -1 }, /* shin (benque [Z] */ /* k */ { 0xdb, -1 }, /* kaf */ /* l */ { 0xdc, -1 }, /* lamed */ /* m */ { 0xde, -1 }, /* mem */ /* n */ { 0xe0, -1 }, /* nun */ /* o */ { 0xd5, 0xb9}, /* vav con holam, d5-b9; o: qamats alef, d0-b8 */ /* p */ { 0xe4, 0xbc}, /* pe con dagesh */ /* q */ { 0xe7, -1 }, /* qof */ /* r */ { 0xe8, -1 }, /* resh */ /* s */ { 0xe1, -1 }, /* samekh */ /* t */ { 0xd8, -1 }, /* tav ea, o tet d8? */ /* u */ { 0xd5, -1 }, /* vav */ /* v */ { 0xd1, -1 }, /* bet sin dagesh */ /* w */ { 0xf0, -1 }, /* tsvey vovn */ /* x */ { 0xd7, -1 }, /* het */ /* y */ { 0xf2, -1 }, /* tsvey yudn */ /* z */ { 0xd6, -1 }, /* zayin */ /* safety stop */ {-1, -1}, }; /* Un variante que remane plus presso al yiddish/ */ int conv_table2[][2] = { /* Littera basic primo, possibile marca diacritic secundo. Vide: https://rudhar.com/lingtics/uniclnkl.htm, https://unicode.org/charts/PDF/U0590.pdf https://en.wikipedia.org/wiki/Yiddish_orthography https://de.wikipedia.org/wiki/Jiddisch#Lesebeispiel */ /* Base value: 0500 */ /* a */ { 0xd0, 0xb7}, /* pasekh (pataH) alef */ /* b */ { 0xd1, -1 }, /* bet sin dagesh, because v = now tsvey vovn */ /* c */ { 0xe6, -1 }, /* tsadi (ma q si [k]) */ /* d */ { 0xd3, -1 }, /* dalet */ /* e */ { 0xe2, -1 }, /* ayin */ /* f */ { 0xe4, 0xbf}, /* pe sin dagesh, con rafe */ /* g */ { 0xd2, -1 }, /* gimel */ /* h */ { 0xd4, -1 }, /* he */ /* i */ { 0xd9, -1 }, /* yod */ /* j */ { 0xe9, -1 }, /* shin (benque [Z] */ /* k */ { 0xdb, 0xbc}, /* kaf con dagesh */ /* l */ { 0xdc, -1 }, /* lamed */ /* m */ { 0xde, -1 }, /* mem */ /* n */ { 0xe0, -1 }, /* nun */ /* o */ { 0xd0, 0xb8}, /* qamats alef, d0-b8 */ /* p */ { 0xe4, 0xbc}, /* pe con dagesh */ /* q */ { 0xe7, -1 }, /* qof */ /* r */ { 0xe8, -1 }, /* resh */ /* s */ { 0xe1, -1 }, /* samekh */ /* t */ { 0xd8, -1 }, /* tav ea, o tet d8? */ /* u */ { 0xd5, -1 }, /* vav */ /* v */ { 0xf0, -1 }, /* tsvey vovn */ /* w */ { 0xd5, 0xbc}, /* vav con dagesh = shuruk */ /* x */ { 0xd7, -1 }, /* het */ /* y */ { 0xf2, -1 }, /* tsvey yudn */ /* z */ { 0xd6, -1 }, /* zayin */ /* safety stop */ {-1, -1}, }; static int baseval = 0x0500; static int variant2 = 0; static int convert (int c, FILE *fpi, FILE *fpo); int main (int argc, char **argv) { FILE *fpi = stdin, *fpo = stdout; int c; int intag = 0, inentity = 0; if ((argc > 1 && strcmp(argv[1], "-2") == 0)) { variant2 = 1; } while ((c = getc(fpi)) != EOF) { if (!intag && c == '<') intag = 1; else if (intag && c == '>') intag = 0; else if (!inentity && c == '&') inentity = 1; else if (inentity && c == ';') inentity = 0; if (intag || inentity) putc(c, fpo); else convert(c, fpi, fpo); } return 0; } static int convert (int c, FILE *fpi, FILE *fpo) { if (!isascii(c) || !isalpha(c)) { putc(c, fpo); } else { int index, tabval, diacrt; c = tolower(c); /* Hebrew has no uppercase/lowercase distinction */ index = c - 'a'; /* Safety first */ if (index >= 26) index = 26; tabval = variant2 ? conv_table2[index][0] : conv_table1[index][0]; diacrt = variant2 ? conv_table2[index][1] : conv_table1[index][1]; if (tabval < 0) { putc(c, fpo); } else { int n; if (variant2 && c == 'c') { n = getc(fpi); if (strchr("eiyEIY", n) == NULL) { /* A letter c which sounds as [k] in interlingua, for variant 2 is encoded as qof, as is the letter q of interlingua. This is unambiguous because in interlingua, q is always followed by 'u' and a vowel, while k is rare and hardly ever followed by 'u' and if so, there is no further vowel. (kuo- is an exception). */ tabval = 0xe7; c = 'q'; } ungetc(n, fpi); } /* no final form for p in variant 2 */ if (strchr(variant2 ? "cnmfk" : "cnmpfk", c) != NULL) { n = getc(fpi); if (!isalpha(n)) { /* Use a special final letterform. These are one position higher in Hebrew Unicode. */ tabval--; } ungetc(n, fpi); } } fprintf(fpo, "%02x;", tabval); if (diacrt >= 0) fprintf(fpo, "%02x;", diacrt); } return 0; }