per page, with , order by , clip by
Results of 1 - 1 of about 6 for ]); (0.000 sec.)
utfcntxt.c
#score: 13088
@digest: ac6e95b6d0e9a8360d7e29021d5b8539
@id: 1537
@lang: ia
@size: 3912
@type: text/html
content-type: text/html; charset=UTF-8
viewport: width=device-width, initial-scale=1
#keywords: lenhandled (96700), maxutflen (63966), posinfile (57707), bytesbuf (56859), contextbuf (52072), fprintf (46891), display (23088), contextpos (22676), getmore (21804), fpi (21192), bytesinbuf (21167), fseek (20030), utf8valid (18278), contextlen (15368), buffering (15368), inputfile (15368), static (14851), fpo (13785), include (12710), utf8len (12185), seek (11604), unsigned (11091), isascii (11037), utftools (10539), void (10181), stdout (9974), file (9176), argv (8401), red (7971), smallish (7815), repositioning (7815), iscntrl (7498)
Source code belongs to this explanation . /* 19/20 April 2020, by Ruud Harmsen. Improved implementation of my 2008 utf8cntx.c, producing the exact same output, minus the bug. Idea: do only minimal buffering yourself, instead rely on the stdio library's built-in buffering, repositioning as needed using functions ftell() and fseek(). As a consequence, the input can no longer come from stdin or a pipe, it has to be a disk file. Also, missing bytes at the end (if the number of bytes in the file is not a multiple of four) are now displayed as "..", not "00", because they are not. The new version is somewhat slower than the old, but as the whole idea only makes sense for smallish files (because the output is intended for human examination), that is no issue. */ #include <stdio.h> #include <stdlib.h> #include <string.h> #include <ctype.h> #include "../cgi-src/utftools/utftools.h" /* MAXUTFLEN can only be 4. Or UTF16 would be broken. And no more will ever be needed. */ #define MAXUTFLEN 4 unsigned char bytesbuf[MAXUTFLEN]; size_t bytesinbuf = 0; #define CONTEXTLEN 40 long posinfile = 0; static size_t getmore (FILE *fpi); static void display (FILE *fpi, FILE *fpo); static void display_context (FILE *fpi, FILE *fpo); int main (int argc, char **argv) { FILE *fpi = NULL; FILE *fpo = stdout; if (argc != 2) { fprintf(stderr, "Usage: %s inputfile\n", argv[0]); exit(1); } else { fpi = fopen(argv[1], "rb"); if (!fpi) { fprintf(stderr, "%s: Cannot open inputfile %s\n", argv[0], argv[1]); exit(2); } } while (getmore(fpi)) { display(fpi, fpo); } fclose(fpi); return 0; } static size_t getmore (FILE *fpi) { size_t red; posinfile = ftell(fpi); red = fread(bytesbuf, 1, MAXUTFLEN, fpi); bytesinbuf = red; return red; } static void display (FILE *fpi, FILE *fpo) { int lenhandled; int i; fprintf(fpo, "%06ld 0x", posinfile); for (i = 0; i < MAXUTFLEN; i++) if (bytesinbuf > i) fprintf(fpo, i < MAXUTFLEN -1 ? "%02x-" : "%02x ", bytesbuf[i]); else fprintf(fpo, i < MAXUTFLEN -1 ? "..-" : ".. "); if (isascii(bytesbuf[0]) || utf8valid(bytesbuf)) fprintf(fpo, "0x%06lx: ", utf2scalar(bytesbuf)); else /* Invalid UTF8 input */ fprintf(fpo, "--------: "); lenhandled = utf8valid(bytesbuf) ? utf8len(bytesbuf[0]) : 1; if (lenhandled == 0 || lenhandled > 4) lenhandled = 1; fseek(fpi, posinfile, SEEK_SET); display_context(fpi, fpo); posinfile += lenhandled; fseek(fpi, posinfile, SEEK_SET); } static void display_context (FILE *fpi, FILE *fpo) { int i; unsigned char contextbuf[MAXUTFLEN]; long contextpos = posinfile; size_t red; int lenhandled; for (i = 0; i < CONTEXTLEN && (red = fread(contextbuf, 1, MAXUTFLEN, fpi)) > 0; i++) { if (isascii(contextbuf[0]) && !iscntrl(contextbuf[0])) fprintf(stdout, "%c", contextbuf[0]); else fprintf(stdout, "%c", '.'); lenhandled = utf8valid(contextbuf) ? utf8len(contextbuf[0]) : 1; if (lenhandled == 0 || lenhandled > 4) lenhandled = 1; if (lenhandled > red) lenhandled = red; contextpos += lenhandled; fseek(fpi, contextpos, SEEK_SET); } fprintf(fpo, "\n"); fseek(fpi, posinfile, SEEK_SET); } ...
https://rudhar.com/sfreview/utf8cntx/utfcntxt.c.htm - [detail] - [similar]
PREV NEXT
Powered by Hyper Estraier 1.4.13, with 1747 documents and 81086 words.