Relatate con illo e illo. Vide anque sci.lang.
/* Le 26 e .
Converter un texto in interlingua
del scriptura latin al hebraic.
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
int conv_table1[][2] =
{
/* Littera basic primo, possibile marca diacritic secundo.
Vide:
https://rudhar.com/lingtics/uniclnkl.htm,
https://unicode.org/charts/PDF/U0590.pdf
https://en.wikipedia.org/wiki/Yiddish_orthography
*/
/* Base value: 0500 */
/* a */ { 0xd0, -1 }, /* alef */
/* b */ { 0xd1, 0xbc}, /* bet con dagesh */
/* c */ { 0xe6, -1 }, /* tsadi (mesmo si [k]) */
/* d */ { 0xd3, -1 }, /* dalet */
/* e */ { 0xe2, -1 }, /* ayin */
/* f */ { 0xe4, -1 }, /* pe sin dagesh */
/* g */ { 0xd2, -1 }, /* gimel */
/* h */ { 0xd4, -1 }, /* he */
/* i */ { 0xd9, -1 }, /* yod */
/* j */ { 0xe9, -1 }, /* shin (benque [Z] */
/* k */ { 0xdb, -1 }, /* kaf */
/* l */ { 0xdc, -1 }, /* lamed */
/* m */ { 0xde, -1 }, /* mem */
/* n */ { 0xe0, -1 }, /* nun */
/* o */ { 0xd5, 0xb9}, /* vav con holam, d5-b9;
o:
qamats alef, d0-b8 */
/* p */ { 0xe4, 0xbc}, /* pe con dagesh */
/* q */ { 0xe7, -1 }, /* qof */
/* r */ { 0xe8, -1 }, /* resh */
/* s */ { 0xe1, -1 }, /* samekh */
/* t */ { 0xd8, -1 }, /* tav ea, o tet d8? */
/* u */ { 0xd5, -1 }, /* vav */
/* v */ { 0xd1, -1 }, /* bet sin dagesh */
/* w */ { 0xf0, -1 }, /* tsvey vovn */
/* x */ { 0xd7, -1 }, /* het */
/* y */ { 0xf2, -1 }, /* tsvey yudn */
/* z */ { 0xd6, -1 }, /* zayin */
/* safety stop */ {-1, -1},
};
/* Un variante que remane plus presso al yiddish/ */
int conv_table2[][2] =
{
/* Littera basic primo, possibile marca diacritic secundo.
Vide:
https://rudhar.com/lingtics/uniclnkl.htm,
https://unicode.org/charts/PDF/U0590.pdf
https://en.wikipedia.org/wiki/Yiddish_orthography
https://de.wikipedia.org/wiki/Jiddisch#Lesebeispiel
*/
/* Base value: 0500 */
/* a */ { 0xd0, 0xb7}, /* pasekh (pataH) alef */
/* b */ { 0xd1, -1 }, /* bet sin dagesh, because v = now tsvey vovn */
/* c */ { 0xe6, -1 }, /* tsadi (ma q si [k]) */
/* d */ { 0xd3, -1 }, /* dalet */
/* e */ { 0xe2, -1 }, /* ayin */
/* f */ { 0xe4, 0xbf}, /* pe sin dagesh, con rafe */
/* g */ { 0xd2, -1 }, /* gimel */
/* h */ { 0xd4, -1 }, /* he */
/* i */ { 0xd9, -1 }, /* yod */
/* j */ { 0xe9, -1 }, /* shin (benque [Z] */
/* k */ { 0xdb, 0xbc}, /* kaf con dagesh */
/* l */ { 0xdc, -1 }, /* lamed */
/* m */ { 0xde, -1 }, /* mem */
/* n */ { 0xe0, -1 }, /* nun */
/* o */ { 0xd0, 0xb8}, /* qamats alef, d0-b8 */
/* p */ { 0xe4, 0xbc}, /* pe con dagesh */
/* q */ { 0xe7, -1 }, /* qof */
/* r */ { 0xe8, -1 }, /* resh */
/* s */ { 0xe1, -1 }, /* samekh */
/* t */ { 0xd8, -1 }, /* tav ea, o tet d8? */
/* u */ { 0xd5, -1 }, /* vav */
/* v */ { 0xf0, -1 }, /* tsvey vovn */
/* w */ { 0xd5, 0xbc}, /* vav con dagesh = shuruk */
/* x */ { 0xd7, -1 }, /* het */
/* y */ { 0xf2, -1 }, /* tsvey yudn */
/* z */ { 0xd6, -1 }, /* zayin */
/* safety stop */ {-1, -1},
};
static int baseval = 0x0500;
static int variant2 = 0;
static int convert (int c, FILE *fpi, FILE *fpo);
int main (int argc, char **argv)
{
FILE *fpi = stdin, *fpo = stdout;
int c;
int intag = 0, inentity = 0;
if ((argc > 1 && strcmp(argv[1], "-2") == 0))
{
variant2 = 1;
}
while ((c = getc(fpi)) != EOF)
{
if (!intag && c == '<')
intag = 1;
else if (intag && c == '>')
intag = 0;
else if (!inentity && c == '&')
inentity = 1;
else if (inentity && c == ';')
inentity = 0;
if (intag || inentity)
putc(c, fpo);
else
convert(c, fpi, fpo);
}
return 0;
}
static int convert (int c, FILE *fpi, FILE *fpo)
{
if (!isascii(c) || !isalpha(c))
{
putc(c, fpo);
}
else
{
int index, tabval, diacrt;
c = tolower(c); /* Hebrew has no uppercase/lowercase distinction */
index = c - 'a';
/* Safety first */
if (index >= 26)
index = 26;
tabval = variant2 ? conv_table2[index][0] : conv_table1[index][0];
diacrt = variant2 ? conv_table2[index][1] : conv_table1[index][1];
if (tabval < 0)
{
putc(c, fpo);
}
else
{
int n;
if (variant2 && c == 'c')
{
n = getc(fpi);
if (strchr("eiyEIY", n) == NULL)
{
/* A letter c which sounds as [k] in interlingua, for variant 2
is encoded as qof, as is the letter q of interlingua. This
is unambiguous because in interlingua, q is always followed
by 'u' and a vowel, while k is rare and hardly ever followed
by 'u' and if so, there is no further vowel. (kuo- is an
exception). */
tabval = 0xe7;
c = 'q';
}
ungetc(n, fpi);
}
/* no final form for p in variant 2 */
if (strchr(variant2 ? "cnmfk" : "cnmpfk", c) != NULL)
{
n = getc(fpi);
if (!isalpha(n))
{
/* Use a special final letterform. These are one position
higher in Hebrew Unicode. */
tabval--;
}
ungetc(n, fpi);
}
}
fprintf(fpo, "%02x;", tabval);
if (diacrt >= 0)
fprintf(fpo, "%02x;", diacrt);
}
return 0;
}