/* Pro un explication del scopo de iste programma, vide https://rudhar.com/lingtics/intrlnga/cgi-grep/fria/intro-ia.htm e specificamente https://rudhar.com/lingtics/intrlnga/cgi-grep/fria/intro-ia.htm#frhtmtxt */ #include #include #include #include #include "repundrl.h" static char line1[4096]; static char lemma[512]; char *DT = "
"; char *SDTDD = "
"; char *SDT = ""; char *DD = "
"; char *DL = "
"; char *SDL = "
"; int main (void) { FILE *fpi = stdin, *fpo = stdout, *flog = stderr; int inprepost = 1; long linecnt = 0, blanks = 0, dts = 0, dtss = 0, dds = 0; long prepost = 0, unrecog = 0, invalid = 0; char *s, *s1, *s2; int lemmalen; while (fgets(line1, sizeof line1, fpi)) { linecnt++; if (s = strstr(line1, DL), s && s == line1) { inprepost = 0; prepost++; continue; } else if (s = strstr(line1, SDL), s && s == line1) { inprepost = 1; } if (inprepost) { prepost++; } else { if (line1[0] == '\n' && line1[1] == '\0') { /* Skip empty lines */ blanks++; } else if (s1 = strstr(line1, DT), s1 && s1 == line1) { if (s2 = strstr(line1, SDTDD), s2) { char *b = s2 + strlen(SDTDD); dts++; lemmalen = s2 - line1 - strlen(DT); if (lemmalen >= (sizeof lemma) - 1) lemmalen = (sizeof lemma) - 1; /* Save the lemma (between
and
) in a buffer for later use. */ sprintf(lemma, "%.*s", lemmalen, s1 + strlen(DT)); RepeatWordsWithUnderlining(b, (sizeof line1) - (b - line1)); /* 2., 3. etc, will be preceded by comma-space, so do that for a numbered sense 1. as well. */ if (isdigit(b[0])) fprintf(fpo, "%s, %s", lemma, b); else fprintf(fpo, "%s %s", lemma, b); } else if (s2 = strstr(line1, SDT), s2) { dts++; /* Sometimes all the beef is in following
lines. No nee to mention those. */ lemmalen = s2 - line1 - strlen(DT); if (lemmalen >= (sizeof lemma) - 1) lemmalen = (sizeof lemma) - 1; sprintf(lemma, "%.*s", lemmalen, s1 + strlen(DT)); /* fprintf(fpo, "%s\n", lemma); */ } else { invalid++; fprintf(flog, "Invalid line\n%s", line1); } } else if (strncmp(line1, DD, strlen(DD)) == 0) { dds++; RepeatWordsWithUnderlining(line1, (sizeof line1)); fprintf(fpo, "%s, %s", lemma, line1 + strlen(DD)); } else { unrecog++; fprintf(flog, "Unrecognised line\n%s", line1); } } } fprintf(flog, "\n"); fprintf(flog, "Lines read : %6ld\n", (long)linecnt); fprintf(flog, "\n"); fprintf(flog, "Pre-post HTML : %6ld\n", (long)prepost); fprintf(flog, "Blanks skipped: %6ld\n", (long)blanks); fprintf(flog, "Dt's handled : %6ld\n", (long)dts); fprintf(flog, "Dt shorts : %6ld\n", (long)dtss); fprintf(flog, "Dd's handled : %6ld\n", (long)dds); fprintf(flog, "Unrecognised : %6ld\n", (long)unrecog); fprintf(flog, "Invalid lines : %6ld\n", (long)invalid); return 0; }