utf8mixd.c

Publication 6 May 2016, written 15 December 2008 or before

Sources belong to this explanation.

Source code


/* Copyright 2008 by R. Harmsen. But I won't sue anyone who uses or adapts the code. */ #include <stdio.h> #include <stdlib.h> #include <string.h> #include "utftools.h" static void Usage (void) { char text[] = "This is a simple, specialised UTF8 to Latin1 converter.\n" "Unlike general purpose converters like uniconv, it can\n" "reliably convert textfiles in languages like German, French\n" "Spanish etc. in which part of the text is in ISO-8859-1\n" "(Latin1) and part in UTF8. It does this by detecting what\n" "is probably UTF8 and what is certainly not." "\n" "Optionally (-w), it also support Windows code page\n" "1252, instead of plain ISO8859-1.\n" "\n" "Usage: utf8mixd [-w] infile outfile\n\n"; fprintf(stderr, text); } enum dowhats { wantISO8859_1 = 1, wantCP1252 = 2, }; static int Convert (int c, FILE *fi, FILE *fo, enum dowhats dowhat); int main (int argc, char **argv) { FILE *fi, *fo; int c; enum dowhats dowhat; dowhat = wantISO8859_1; if (argc > 3) { if (argv[1][0] == '-' && argv[1][1] == 'w') { dowhat = wantCP1252; argc--, argv++; } } if (argc != 3) { Usage(); exit(1); } fi = fopen(argv[1], "rb"); if (!fi) { fprintf(stderr, "Cannot open input file %s\n\n", argv[1] ? argv[1] : "null"); Usage(); exit(2); } fo = fopen(argv[2], "wb"); if (!fo) { fprintf(stderr, "Cannot open output file %s\n\n", argv[2] ? argv[2] : "null"); Usage(); exit(3); } while ((c = getc(fi)) != EOF) { if (!Convert(c, fi, fo, dowhat)) putc(c, fo); } fclose(fi); fclose(fo); return 0; } static int Convert (int c, FILE *fi, FILE *fo, enum dowhats dowhat) { int j, cc, len; int converted = 0; unsigned char buf[10]; long pos; pos = ftell(fi); memset(buf, '\0', sizeof buf); buf[0] = c; len = utf8len(c); for (j = 1; j < len && j < (sizeof buf - 1); j++) { cc = getc(fi); if (cc == EOF) break; buf[j] = cc; } if (len > 1 && utf8valid(buf)) { long scalar; int toput; scalar = utf2scalar(buf); if (dowhat == wantISO8859_1 && isconvertibleISO8859_1(scalar)) { putc(scalar, fo); converted = 1; } else if (dowhat == wantCP1252 && isconvertibleCP1252(scalar, &toput)) { if (isconvertibleISO8859_1(scalar)) putc(scalar, fo); else putc(toput, fo); converted = 1; } } if (!converted) { fseek(fi, pos, SEEK_SET); } return converted; }

Colours: Neutral Weird No preference Reload screen