/*********************************************************************
This file is part of Siworin. Copyright © 2021 by Ruud Harmsen.
Siworin is a simple word indexer and local search engine. See
https://rudhar.com/sfreview/siworin/#siw04 and
https://rudhar.com/sfreview/siworin/toolsrc/siworin .
Siworin is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published
by the Free Software Foundation, either version 3 of the License,
or (at your option) any later version.
Siworin is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Siworin. If not, see .
********************************************************************/
#ifndef _SIWORIN_H
#define _SIWORIN_H
#include
#include
#define SIWORIN_FAIL(OPERATION) { fprintf(stderr, \
"%s failed, %s: line %d, error %d\n", (OPERATION), __FILE__, __LINE__, errno); \
perror(NULL); exit(1);}
/* First I used the pipe symbols '|' as the separator between
the word, and the filenummer and offset. But that caused
problems, because the sorting between siworin-wordsep and
siworin-makelst is done on the whole line, not just the word
until the separator. For example:
aanbiedingen
appeared before
aanbieding
because the pipe symbol has a higher value than alphabetic
ASCII characters. (I sort in the LC_ALL=C locale.) However
in siworin-srchind, I do compare only the word. So this make
the binary search fail, because the list was sorted by a
different criterion that the comparison function assumed.
So now I use '!' as the separator, which has a lower value
than any ASCII digits, letters, and also other Unicode UTF-8
encodings.
See also
https://rudhar.com/sfreview/siworin/siworin10.htm#ChoosingFieldSeparator
*/
#define SIWORIN_SEP '!'
#define SIWORIN_MAXWORDLEN 100
#define SIWORIN_MAXPATHLEN 256
/* See siworin-wordsep.c
Rather long pathindex and offset, but the advantage is
that sorting the whole line as butes (LC_ALL=C) sorts
also the occurrence reference. Might come in handy to
implement a proximity match. Although I won't.
This makes the file to sorted bigger. But siworin-makelst.c
will reduce it again, after sorting has already been done.
See also
https://rudhar.com/sfreview/siworin/siworin10.htm#AscendingLocations
Note that the ! in the below formats should correspond to what
is defined for SIWORIN_SEP
*/
#define SIWORIN_FORMAT1 "!%06lx!%08lx"
#define SIWORIN_FORMAT2 "!%lx!%lx"
/* Next two patterns used in function modify_wrd in siworin-srchind.c */
#define SIWORIN_FORMAT3 "^[^!]*"
#define SIWORIN_FORMAT4 "!"
#define SIWORIN_FORMAT5 "[^!]*!"
#define SIWORIN_OFFSET_HEXDIGITS 8
#endif