Pro un explication, vide illac.
/* Le . Converter un texto in elefen (lingua franca nova), interlingua etc. del scriptura latin al devanagari, e altere scripturas de India. */ #include <stdlib.h> #include <stdio.h> #include <string.h> #include <ctype.h> int conv_table[][2] = { /* Vide https://rudhar.com/lingtics/uniclnkl.htm, https://unicode.org/charts/PDF/U0900.pdf Devanagari https://unicode.org/charts/PDF/U0980.pdf Bengali https://unicode.org/charts/PDF/U0A00.pdf Gurmukhi https://unicode.org/charts/PDF/U0A80.pdf Gujarati */ /* For vowels: dependent first, independent next */ /* a */ { 0x3e, 0x06}, /* Long aa = vowel a, no vowel = absence (not short a), to avoid excessive use of virama *. /* b */ { 0x2c, -1 }, /* c */ { 0x1a, -1 }, /* Misused palatal consonant for this */ /* d */ { 0x26, -1 }, /* e */ { 0x47, 0x0f}, /* f */ { 0x2b, -1 }, /* Misused ph for this */ /* g */ { 0x17, -1 }, /* h */ { 0x39, -1 }, /* i */ { 0x3f, 0x07}, /* j */ { 0x1c, -1 }, /* Misused palatal consonant for this */ /* k */ { 0x15, -1 }, /* l */ { 0x32, -1 }, /* m */ { 0x2e, -1 }, /* n */ { 0x28, -1 }, /* o */ { 0x4b, 0x13}, /* p */ { 0x2a, -1 }, /* q */ { 0x18, -1 }, /* Misused gha for this */ /* r */ { 0x30, -1 }, /* s */ { 0x38, -1 }, /* t */ { 0x24, -1 }, /* u */ { 0x41, 0x09}, /* v */ { 0x2d, -1 }, /* Misused bh for this */ /* w */ { 0x35, -1 }, /* x */ { 0x16, -1 }, /* Misused kh for this */ /* y */ { 0x40, 0x08}, /* Misused long ii for this */ /* z */ { 0x5b, -1 }, /* safety stop */ {-1, -1}, }; static int convert (int c, FILE *fpi, FILE *fpo); static int baseval = 0x0900; static int prev_was_cons = 0; int main (int argc, char **argv) { FILE *fpi = stdin, *fpo = stdout; int c; int intag = 0, inentity = 0; if ((argc > 2 && argv[1][0] == '-' && strcmp(argv[2], "Deva") == 0) || (argc > 1 && strcmp(argv[1], "-sDeva") == 0)) { baseval = 0x0900; } else if ((argc > 2 && argv[1][0] == '-' && strcmp(argv[2], "Beng") == 0) || (argc > 1 && strcmp(argv[1], "-sBeng") == 0)) { baseval = 0x0980; } else if ((argc > 2 && argv[1][0] == '-' && strcmp(argv[2], "Guru") == 0) || (argc > 1 && strcmp(argv[1], "-sGuru") == 0)) { baseval = 0x0a00; } else if ((argc > 2 && argv[1][0] == '-' && strcmp(argv[2], "Gujr") == 0) || (argc > 1 && strcmp(argv[1], "-sGujr") == 0)) { baseval = 0x0a80; } else if ((argc > 2 && argv[1][0] == '-' && strcmp(argv[2], "Tibt") == 0) || (argc > 1 && strcmp(argv[1], "-sTibt") == 0)) { baseval = 0x0a80; } while ((c = getc(fpi)) != EOF) { if (!intag && c == '<') intag = 1; else if (intag && c == '>') intag = 0; else if (!inentity && c == '&') inentity = 1; else if (inentity && c == ';') inentity = 0; if (intag || inentity) { putc(c, fpo); prev_was_cons = 0; } else { convert(c, fpi, fpo); } } return 0; } static int convert (int c, FILE *fpi, FILE *fpo) { if (!isascii(c) || !isalpha(c)) { putc(c, fpo); prev_was_cons = 0; } else { int tabval, index; index = (c = tolower(c)) - 'a'; /* Safety first */ if (index >= 26) index = 26; tabval = conv_table[index][0]; if (!prev_was_cons && strchr("aeiouy", c) != NULL) tabval = conv_table[index][1]; tabval += baseval; fprintf(fpo, "&#x%03x;", tabval); prev_was_cons = (strchr("aeiouy", c) == NULL); } return 0; }