Pro un explication, vide illac.
/* Le . Converter un texto in elefen
(lingua franca nova), interlingua etc. del scriptura
latin al devanagari, e altere scripturas de India.
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
int conv_table[][2] =
{
/* Vide https://rudhar.com/lingtics/uniclnkl.htm,
https://unicode.org/charts/PDF/U0900.pdf Devanagari
https://unicode.org/charts/PDF/U0980.pdf Bengali
https://unicode.org/charts/PDF/U0A00.pdf Gurmukhi
https://unicode.org/charts/PDF/U0A80.pdf Gujarati
*/
/* For vowels: dependent first, independent next */
/* a */ { 0x3e, 0x06}, /* Long aa = vowel a, no vowel = absence (not short a),
to avoid excessive use of virama *.
/* b */ { 0x2c, -1 },
/* c */ { 0x1a, -1 }, /* Misused palatal consonant for this */
/* d */ { 0x26, -1 },
/* e */ { 0x47, 0x0f},
/* f */ { 0x2b, -1 }, /* Misused ph for this */
/* g */ { 0x17, -1 },
/* h */ { 0x39, -1 },
/* i */ { 0x3f, 0x07},
/* j */ { 0x1c, -1 }, /* Misused palatal consonant for this */
/* k */ { 0x15, -1 },
/* l */ { 0x32, -1 },
/* m */ { 0x2e, -1 },
/* n */ { 0x28, -1 },
/* o */ { 0x4b, 0x13},
/* p */ { 0x2a, -1 },
/* q */ { 0x18, -1 }, /* Misused gha for this */
/* r */ { 0x30, -1 },
/* s */ { 0x38, -1 },
/* t */ { 0x24, -1 },
/* u */ { 0x41, 0x09},
/* v */ { 0x2d, -1 }, /* Misused bh for this */
/* w */ { 0x35, -1 },
/* x */ { 0x16, -1 }, /* Misused kh for this */
/* y */ { 0x40, 0x08}, /* Misused long ii for this */
/* z */ { 0x5b, -1 },
/* safety stop */ {-1, -1},
};
static int convert (int c, FILE *fpi, FILE *fpo);
static int baseval = 0x0900;
static int prev_was_cons = 0;
int main (int argc, char **argv)
{
FILE *fpi = stdin, *fpo = stdout;
int c;
int intag = 0, inentity = 0;
if ((argc > 2 && argv[1][0] == '-' && strcmp(argv[2], "Deva") == 0)
||
(argc > 1 && strcmp(argv[1], "-sDeva") == 0))
{
baseval = 0x0900;
}
else if ((argc > 2 && argv[1][0] == '-' && strcmp(argv[2], "Beng") == 0)
||
(argc > 1 && strcmp(argv[1], "-sBeng") == 0))
{
baseval = 0x0980;
}
else if ((argc > 2 && argv[1][0] == '-' && strcmp(argv[2], "Guru") == 0)
||
(argc > 1 && strcmp(argv[1], "-sGuru") == 0))
{
baseval = 0x0a00;
}
else if ((argc > 2 && argv[1][0] == '-' && strcmp(argv[2], "Gujr") == 0)
||
(argc > 1 && strcmp(argv[1], "-sGujr") == 0))
{
baseval = 0x0a80;
}
else if ((argc > 2 && argv[1][0] == '-' && strcmp(argv[2], "Tibt") == 0)
||
(argc > 1 && strcmp(argv[1], "-sTibt") == 0))
{
baseval = 0x0a80;
}
while ((c = getc(fpi)) != EOF)
{
if (!intag && c == '<')
intag = 1;
else if (intag && c == '>')
intag = 0;
else if (!inentity && c == '&')
inentity = 1;
else if (inentity && c == ';')
inentity = 0;
if (intag || inentity)
{
putc(c, fpo);
prev_was_cons = 0;
}
else
{
convert(c, fpi, fpo);
}
}
return 0;
}
static int convert (int c, FILE *fpi, FILE *fpo)
{
if (!isascii(c) || !isalpha(c))
{
putc(c, fpo);
prev_was_cons = 0;
}
else
{
int tabval, index;
index = (c = tolower(c)) - 'a';
/* Safety first */
if (index >= 26)
index = 26;
tabval = conv_table[index][0];
if (!prev_was_cons && strchr("aeiouy", c) != NULL)
tabval = conv_table[index][1];
tabval += baseval;
fprintf(fpo, "&#x%03x;", tabval);
prev_was_cons = (strchr("aeiouy", c) == NULL);
}
return 0;
}