Relatate con illo e illo. Vide anque sci.lang.

/* Le 26 e 28 de junio 2018. Converter un texto in interlingua
   del scriptura latin al hebraic.
 */

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>

int conv_table1[][2] =
{
   /* Littera basic primo, possibile marca diacritic secundo.
      Vide: 
      http://rudhar.com/lingtics/uniclnkl.htm,
      http://unicode.org/charts/PDF/U0590.pdf
      https://en.wikipedia.org/wiki/Yiddish_orthography
    */
   /* Base value: 0500 */
   /* a */ { 0xd0, -1  }, /* alef */
   /* b */ { 0xd1, 0xbc}, /* bet con dagesh */
   /* c */ { 0xe6, -1  }, /* tsadi (mesmo si [k]) */
   /* d */ { 0xd3, -1  }, /* dalet          */
   /* e */ { 0xe2, -1  }, /* ayin           */
   /* f */ { 0xe4, -1  }, /* pe sin dagesh  */
   /* g */ { 0xd2, -1  }, /* gimel          */
   /* h */ { 0xd4, -1  }, /* he             */
   /* i */ { 0xd9, -1  }, /* yod            */
   /* j */ { 0xe9, -1  }, /* shin (benque [Z] */
   /* k */ { 0xdb, -1  }, /* kaf            */
   /* l */ { 0xdc, -1  }, /* lamed          */
   /* m */ { 0xde, -1  }, /* mem            */
   /* n */ { 0xe0, -1  }, /* nun            */
   /* o */ { 0xd5, 0xb9}, /* vav con holam, d5-b9;
                             o:
                             qamats alef, d0-b8 */
   /* p */ { 0xe4, 0xbc}, /* pe con dagesh  */
   /* q */ { 0xe7, -1  }, /* qof            */
   /* r */ { 0xe8, -1  }, /* resh           */
   /* s */ { 0xe1, -1  }, /* samekh         */
   /* t */ { 0xd8, -1  }, /* tav ea, o tet d8? */
   /* u */ { 0xd5, -1  }, /* vav            */
   /* v */ { 0xd1, -1  }, /* bet sin dagesh */
   /* w */ { 0xf0, -1  }, /* tsvey vovn     */
   /* x */ { 0xd7, -1  }, /* het            */
   /* y */ { 0xf2, -1  }, /* tsvey yudn     */
   /* z */ { 0xd6, -1  }, /* zayin          */
   /* safety stop */ {-1, -1},
};

/* Un variante que remane plus presso al yiddish/ */
int conv_table2[][2] =
{
   /* Littera basic primo, possibile marca diacritic secundo.
      Vide: 
      http://rudhar.com/lingtics/uniclnkl.htm,
      http://unicode.org/charts/PDF/U0590.pdf
      https://en.wikipedia.org/wiki/Yiddish_orthography
      https://de.wikipedia.org/wiki/Jiddisch#Lesebeispiel
    */
   /* Base value: 0500 */
   /* a */ { 0xd0, 0xb7}, /* pasekh (pataH) alef */
   /* b */ { 0xd1, -1  }, /* bet sin dagesh, because v = now tsvey vovn */
   /* c */ { 0xe6, -1  }, /* tsadi (ma q si [k]) */
   /* d */ { 0xd3, -1  }, /* dalet          */
   /* e */ { 0xe2, -1  }, /* ayin           */
   /* f */ { 0xe4, 0xbf}, /* pe sin dagesh, con rafe  */
   /* g */ { 0xd2, -1  }, /* gimel          */
   /* h */ { 0xd4, -1  }, /* he             */
   /* i */ { 0xd9, -1  }, /* yod            */
   /* j */ { 0xe9, -1  }, /* shin (benque [Z] */
   /* k */ { 0xdb, 0xbc}, /* kaf con dagesh */
   /* l */ { 0xdc, -1  }, /* lamed          */
   /* m */ { 0xde, -1  }, /* mem            */
   /* n */ { 0xe0, -1  }, /* nun            */
   /* o */ { 0xd0, 0xb8}, /* qamats alef, d0-b8 */
   /* p */ { 0xe4, 0xbc}, /* pe con dagesh  */
   /* q */ { 0xe7, -1  }, /* qof            */
   /* r */ { 0xe8, -1  }, /* resh           */
   /* s */ { 0xe1, -1  }, /* samekh         */
   /* t */ { 0xd8, -1  }, /* tav ea, o tet d8? */
   /* u */ { 0xd5, -1  }, /* vav            */
   /* v */ { 0xf0, -1  }, /* tsvey vovn     */
   /* w */ { 0xd5, 0xbc}, /* vav con dagesh = shuruk */
   /* x */ { 0xd7, -1  }, /* het            */
   /* y */ { 0xf2, -1  }, /* tsvey yudn     */
   /* z */ { 0xd6, -1  }, /* zayin          */
   /* safety stop */ {-1, -1},
};

static int baseval = 0x0500;
static int variant2 = 0;

static int convert (int c, FILE *fpi, FILE *fpo);

int  main (int argc, char **argv)
{
   FILE *fpi = stdin, *fpo = stdout;
   int c;
   int intag = 0, inentity = 0;

   if ((argc > 1 && strcmp(argv[1], "-2") == 0))
   {
      variant2 = 1;
   }

   while ((c = getc(fpi)) != EOF)
   {
      if (!intag && c == '<')
         intag = 1;
      else if (intag && c == '>')
         intag = 0;
      else if (!inentity && c == '&')
         inentity = 1;
      else if (inentity && c == ';')
         inentity = 0;

      if (intag || inentity)
         putc(c, fpo);
      else
         convert(c, fpi, fpo);
   }

   return 0;
}

static int convert (int c, FILE *fpi, FILE *fpo)
{
   
   if (!isascii(c) || !isalpha(c))
   {
      putc(c, fpo);
   }
   else
   {
      int index, tabval, diacrt;

      c = tolower(c); /* Hebrew has no uppercase/lowercase distinction */

      index = c - 'a';
      /* Safety first */
      if (index >= 26)
          index = 26;

      tabval = variant2 ? conv_table2[index][0] : conv_table1[index][0];
      diacrt = variant2 ? conv_table2[index][1] : conv_table1[index][1];
      if (tabval < 0)
      {
         putc(c, fpo);
      }
      else
      {
         int n;

         if (variant2 && c == 'c')
         {
            n = getc(fpi);

            if (strchr("eiyEIY", n) == NULL)
            {
               /* A letter c which sounds as [k] in interlingua, for variant 2
                  is encoded as qof, as is the letter q of interlingua. This
                  is unambiguous because in interlingua, q is always followed
                  by 'u' and a vowel, while k is rare and hardly ever followed
                  by 'u' and if so, there is no further vowel. (kuo- is an 
                  exception). */
               tabval = 0xe7;
               c = 'q';
            }
            ungetc(n, fpi);
         }

         /* no final form for p in variant 2 */
         if (strchr(variant2 ? "cnmfk" : "cnmpfk", c) != NULL)
         {
            n = getc(fpi);

            if (!isalpha(n))
            {
               /* Use a special final letterform. These are one position
                  higher in Hebrew Unicode. */
               tabval--;
            }
            ungetc(n, fpi);
         }
      }
      fprintf(fpo, "&#x5%02x;", tabval);
      if (diacrt >= 0)
         fprintf(fpo, "&#x5%02x;", diacrt);
   }

   return 0;
}