Pro un explication, vide illac.
/* Le .
Converter un texto in elefen
(lingua franca nova) del scriptura latin al cyrillic.
Vide etiam https://rudhar.com/lingtics/intrlnga/scrptura/ia.htm
Le 13 de martio 2018: secunde schema pro converter interlingua
al scriptura grec.
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
int conv_table[][3] =
{
/* Cyrillico pro elefen primo, greco pro interlingua secundo.
In le tertie columna un altere variante de greco.
Vide https://rudhar.com/lingtics/uniclnkl.htm,
https://unicode.org/charts/PDF/U0400.pdf e
https://unicode.org/charts/PDF/U0370.pdf
*/
/* Base value 1st column: 0410, 2nd & 3rd: 0390 */
/* a */ { 0x00, 0x01, 0x01},
/* b */ { 0x01, 0x02, 0x02},
/* c */ { 0x0a, 0x0a, /* kappa, ma vide in basso pro theta */
0x69}, /* 3rd column: lunate sigma */
/* d */ { 0x04, 0x04, 0x04},
/* e */ { 0x05, 0x05, 0x05},
/* f */ { 0x14, 0x16, 0x16},
/* g */ { 0x03, 0x03, 0x03},
/* h */ { 0x15, 0x07, 0x07},
/* i */ { 0x08, 0x09, 0x09},
/* j */ { 0x06, 0x17, 0x17},
/* k */ { -1, 0x0a, 0x0a},
/* l */ { 0x0b, 0x0b, 0x0b},
/* m */ { 0x0c, 0x0c, 0x0c},
/* n */ { 0x0d, 0x0d, 0x0d},
/* o */ { 0x0e, 0x0f, 0x0f},
/* p */ { 0x0f, 0x10, 0x10},
/* q */ { -1, 0x18, 0x48}, /* 3rd column: archaic qoppa */
/* r */ { 0x10, 0x11, 0x11},
/* s */ { 0x11, 0x13, 0x13},
/* t */ { 0x12, 0x14, 0x14},
/* u */ { 0x13, 0x15, 0x15},
/* v */ { 0x02, 0x15, 0x4c}, /* 3rd column: digamma */
/* w */ { -1, 0x19, 0x19},
/* x */ { 0x18, 0x0e, 0x0e},
/* y */ { -1, 0x12, 0x18},
/* z */ { 0x07, 0x06, 0x06},
/* safety stop */ {-1, -1, -1},
};
static int convert (int c, FILE *fpi, FILE *fpo);
static enum {CYRILL = 0, GRECO = 1, GRECO2 = 2} conv_alphabet = CYRILL;
static int baseval = 0x0410;
int main (int argc, char **argv)
{
FILE *fpi = stdin, *fpo = stdout;
int c;
int intag = 0, inentity = 0;
if ((argc > 2 && argv[1][0] == '-' && strcmp(argv[2], "el") == 0)
||
(argc > 1 && strcmp(argv[1], "-lel") == 0))
{
conv_alphabet = GRECO;
baseval = 0x0390;
}
else if ((argc > 2 && argv[1][0] == '-' && strcmp(argv[2], "el2") == 0)
||
(argc > 1 && strcmp(argv[1], "-lel2") == 0))
{
conv_alphabet = GRECO2;
baseval = 0x0390;
}
while ((c = getc(fpi)) != EOF)
{
if (!intag && c == '<')
intag = 1;
else if (intag && c == '>')
intag = 0;
else if (!inentity && c == '&')
inentity = 1;
else if (inentity && c == ';')
inentity = 0;
if (intag || inentity)
putc(c, fpo);
else
convert(c, fpi, fpo);
}
return 0;
}
static int convert (int c, FILE *fpi, FILE *fpo)
{
if (!isascii(c) || !isalpha(c))
{
putc(c, fpo);
}
else
{
int tabval, index, extra_for_lowercase = 1;
static int prev_was_c = 0;
if (isupper(c))
{
index = c - 'A';
extra_for_lowercase = 0;
}
else
{
index = c - 'a';
extra_for_lowercase = 0x20;
if (conv_alphabet == GRECO2)
{
switch (tolower(c))
{
case 'c':
extra_for_lowercase = -7;
break;
case 'q':
case 'v':
extra_for_lowercase = 1;
break;
}
}
}
/* Safety first */
if (index >= 26)
index = 26;
tabval = conv_table[index][conv_alphabet];
if (tabval < 0)
{
putc(c, fpo);
}
else
{
int n;
tabval += baseval + extra_for_lowercase;
if (conv_alphabet == GRECO)
{
if (isascii(c) && tolower(c) == 'c')
{
n = getc(fpi);
if (strchr("eiyEIY", n) != NULL ||
(strchr("hH" , n) != NULL && !prev_was_c))
{
tabval = islower(c) ? 0x03b8 : 0x0398; /* theta */
}
ungetc(n, fpi);
}
else if (tabval == 0x03a2)
/* For Interlingua:
Final sigma (used for y) has no uppercase form. Use "upsilon with a hook"
instead. See https://unicode.org/charts/PDF/U0370.pdf */
{
tabval = 0x03d2;
}
}
else if (conv_alphabet == GRECO2)
{
if (c == 's')
{
n = getc(fpi);
if (!isalpha(n))
{
/* Final lowercase sigma as in real Greek */
tabval = 0x03c2;
}
ungetc(n, fpi);
}
}
fprintf(fpo, "&#x%04x;", tabval);
}
prev_was_c = (isascii(c) && tolower(c) == 'c');
}
return 0;
}