23 #define NKF_VERSION "2.1.4" 24 #define NKF_RELEASE_DATE "2015-12-12" 26 "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa).\n" \ 27 "Copyright (C) 1996-2015, The nkf Project." 38 # define INCL_DOSERRORS 168 {
ASCII,
"US-ASCII", &NkfEncodingASCII},
169 {
ISO_8859_1,
"ISO-8859-1", &NkfEncodingASCII},
170 {
ISO_2022_JP,
"ISO-2022-JP", &NkfEncodingISO_2022_JP},
171 {
CP50220,
"CP50220", &NkfEncodingISO_2022_JP},
172 {
CP50221,
"CP50221", &NkfEncodingISO_2022_JP},
173 {
CP50222,
"CP50222", &NkfEncodingISO_2022_JP},
177 {
SHIFT_JIS,
"Shift_JIS", &NkfEncodingShift_JIS},
178 {
WINDOWS_31J,
"Windows-31J", &NkfEncodingShift_JIS},
179 {
CP10001,
"CP10001", &NkfEncodingShift_JIS},
180 {
EUC_JP,
"EUC-JP", &NkfEncodingEUC_JP},
181 {
EUCJP_NKF,
"eucJP-nkf", &NkfEncodingEUC_JP},
182 {
CP51932,
"CP51932", &NkfEncodingEUC_JP},
183 {
EUCJP_MS,
"eucJP-MS", &NkfEncodingEUC_JP},
189 {
UTF_8,
"UTF-8", &NkfEncodingUTF_8},
190 {
UTF_8N,
"UTF-8N", &NkfEncodingUTF_8},
191 {
UTF_8_BOM,
"UTF-8-BOM", &NkfEncodingUTF_8},
192 {
UTF8_MAC,
"UTF8-MAC", &NkfEncodingUTF_8},
193 {
UTF_16,
"UTF-16", &NkfEncodingUTF_16},
194 {
UTF_16BE,
"UTF-16BE", &NkfEncodingUTF_16},
196 {
UTF_16LE,
"UTF-16LE", &NkfEncodingUTF_16},
198 {
UTF_32,
"UTF-32", &NkfEncodingUTF_32},
199 {
UTF_32BE,
"UTF-32BE", &NkfEncodingUTF_32},
201 {
UTF_32LE,
"UTF-32LE", &NkfEncodingUTF_32},
203 {
BINARY,
"BINARY", &NkfEncodingASCII},
265 #if defined(DEFAULT_CODE_JIS) 266 #define DEFAULT_ENCIDX ISO_2022_JP 267 #elif defined(DEFAULT_CODE_SJIS) 268 #define DEFAULT_ENCIDX SHIFT_JIS 269 #elif defined(DEFAULT_CODE_WINDOWS_31J) 270 #define DEFAULT_ENCIDX WINDOWS_31J 271 #elif defined(DEFAULT_CODE_EUC) 272 #define DEFAULT_ENCIDX EUC_JP 273 #elif defined(DEFAULT_CODE_UTF8) 274 #define DEFAULT_ENCIDX UTF_8 278 #define is_alnum(c) \ 279 (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9')) 282 #define nkf_toupper(c) (('a'<=c && c<='z')?(c-('a'-'A')):c) 283 #define nkf_isoctal(c) ('0'<=c && c<='7') 284 #define nkf_isdigit(c) ('0'<=c && c<='9') 285 #define nkf_isxdigit(c) (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F')) 286 #define nkf_isblank(c) (c == SP || c == TAB) 287 #define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == LF) 288 #define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z')) 289 #define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c)) 290 #define nkf_isprint(c) (SP<=c && c<='~') 291 #define nkf_isgraph(c) ('!'<=c && c<='~') 292 #define hex2bin(c) (('0'<=c&&c<='9') ? (c-'0') : \ 293 ('A'<=c&&c<='F') ? (c-'A'+10) : \ 294 ('a'<=c&&c<='f') ? (c-'a'+10) : 0) 295 #define bin2hex(c) ("0123456789ABCDEF"[c&15]) 296 #define is_eucg3(c2) (((unsigned short)c2 >> 8) == SS3) 297 #define nkf_noescape_mime(c) ((c == CR) || (c == LF) || \ 298 ((c > SP) && (c < DEL) && (c != '?') && (c != '=') && (c != '_') \ 299 && (c != '(') && (c != ')') && (c != '.') && (c != 0x22))) 301 #define is_ibmext_in_sjis(c2) (CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END) 302 #define nkf_byte_jisx0201_katakana_p(c) (SP <= c && c <= 0x5F) 304 #define HOLD_SIZE 1024 305 #if defined(INT_IS_SHORT) 306 #define IOBUF_SIZE 2048 308 #define IOBUF_SIZE 16384 311 #define DEFAULT_J 'B' 312 #define DEFAULT_R 'B' 322 extern POINT _BufferSize;
340 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE) 347 #define UCS_MAP_ASCII 0 349 #define UCS_MAP_CP932 2 350 #define UCS_MAP_CP10001 3 353 #ifdef UTF8_INPUT_ENABLE 364 #ifdef UTF8_OUTPUT_ENABLE 382 #if !defined(PERL_XS) && !defined(WIN32DLL) 387 #define NKF_UNSPECIFIED (-TRUE) 406 #ifdef UNICODE_NORMALIZATION 422 #define PREFIX_EUCG3 NKF_INT32_C(0x8F00) 423 #define CLASS_MASK NKF_INT32_C(0xFF000000) 424 #define CLASS_UNICODE NKF_INT32_C(0x01000000) 425 #define VALUE_MASK NKF_INT32_C(0x00FFFFFF) 426 #define UNICODE_BMP_MAX NKF_INT32_C(0x0000FFFF) 427 #define UNICODE_MAX NKF_INT32_C(0x0010FFFF) 428 #define nkf_char_euc3_new(c) ((c) | PREFIX_EUCG3) 429 #define nkf_char_unicode_new(c) ((c) | CLASS_UNICODE) 430 #define nkf_char_unicode_p(c) ((c & CLASS_MASK) == CLASS_UNICODE) 431 #define nkf_char_unicode_bmp_p(c) ((c & VALUE_MASK) <= UNICODE_BMP_MAX) 432 #define nkf_char_unicode_value_p(c) ((c & VALUE_MASK) <= UNICODE_MAX) 434 #define UTF16_TO_UTF32(lead, trail) (((lead) << 10) + (trail) - NKF_INT32_C(0x35FDC00)) 436 #ifdef NUMCHAR_OPTION 446 static void debug(
const char *str);
454 static int exec_f = 0;
457 #ifdef SHIFTJIS_CP932 476 {
"EUC-JP", 0, 0, 0, {0, 0, 0},
e_status,
e_iconv, 0},
477 {
"Shift_JIS", 0, 0, 0, {0, 0, 0},
s_status,
s_iconv, 0},
478 #ifdef UTF8_INPUT_ENABLE 479 {
"UTF-8", 0, 0, 0, {0, 0, 0},
w_status,
w_iconv, 0},
480 {
"UTF-16", 0, 0, 0, {0, 0, 0},
NULL,
w_iconv16, 0},
481 {
"UTF-32", 0, 0, 0, {0, 0, 0},
NULL,
w_iconv32, 0},
504 #define FOLD_MARGIN 10 505 #define DEFAULT_FOLD 60 514 fprintf(stderr,
"nkf internal module connection failure.\n");
564 static const unsigned char cv[]= {
565 0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
566 0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
567 0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
568 0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
569 0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
570 0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
571 0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
572 0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
573 0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
574 0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
575 0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
576 0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
577 0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
578 0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
579 0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
580 0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
586 static const unsigned char dv[]= {
587 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
588 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
589 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
590 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
591 0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
592 0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
593 0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
594 0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
595 0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
596 0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
597 0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
598 0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
599 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
600 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
601 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
602 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
607 static const unsigned char ev[]= {
608 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
609 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
610 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
611 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
612 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
613 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
614 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
615 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
616 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
617 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
618 0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
619 0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
620 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
621 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
622 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
623 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
629 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
630 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
631 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
632 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
633 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
634 0x00,0x00,0x00,0x00,0x25,0x77,0x25,0x78,
635 0x25,0x79,0x25,0x7a,0x25,0x7b,0x00,0x00,
636 0x00,0x00,0x00,0x00,0x25,0x7c,0x00,0x00,
637 0x00,0x00,0x00,0x00,0x25,0x7d,0x00,0x00,
638 0x25,0x7e,0x00,0x00,0x00,0x00,0x00,0x00,
639 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
640 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
641 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
642 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
643 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
644 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
650 static const unsigned char fv[] = {
652 0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
653 0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
654 0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
655 0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
656 0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
657 0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
658 0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
659 0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
660 0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
661 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
662 0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
663 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
681 static int end_check;
689 if (size == 0) size = 1;
693 perror(
"can't malloc");
703 if (size == 0) size = 1;
707 perror(
"can't realloc");
714 #define nkf_xfree(ptr) free(ptr) 720 for (i = 0; src[i] && target[i]; i++) {
723 if (src[i] || target[i])
return FALSE;
733 return &nkf_encoding_table[idx];
740 if (name[0] ==
'X' && *(name+1) ==
'-') name += 2;
754 if (idx < 0)
return 0;
758 #define nkf_enc_name(enc) (enc)->name 759 #define nkf_enc_to_index(enc) (enc)->id 760 #define nkf_enc_to_base_encoding(enc) (enc)->base_encoding 761 #define nkf_enc_to_iconv(enc) nkf_enc_to_base_encoding(enc)->iconv 762 #define nkf_enc_to_oconv(enc) nkf_enc_to_base_encoding(enc)->oconv 763 #define nkf_enc_asciicompat(enc) (\ 764 nkf_enc_to_base_encoding(enc) == &NkfEncodingASCII ||\ 765 nkf_enc_to_base_encoding(enc) == &NkfEncodingISO_2022_JP) 766 #define nkf_enc_unicode_p(enc) (\ 767 nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_8 ||\ 768 nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_16 ||\ 769 nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_32) 770 #define nkf_enc_cp5022x_p(enc) (\ 771 nkf_enc_to_index(enc) == CP50220 ||\ 772 nkf_enc_to_index(enc) == CP50221 ||\ 773 nkf_enc_to_index(enc) == CP50222) 775 #ifdef DEFAULT_CODE_LOCALE 779 #ifdef HAVE_LANGINFO_H 780 return nl_langinfo(CODESET);
781 #elif defined(__WIN32__) 783 sprintf(buf,
"CP%d", GetACP());
785 #elif defined(__OS2__) 786 # if defined(INT_IS_SHORT) 792 ULONG ulCP[1], ulncp;
793 DosQueryCp(
sizeof(ulCP), ulCP, &ulncp);
794 if (ulCP[0] == 932 || ulCP[0] == 943)
795 strcpy(buf,
"Shift_JIS");
797 sprintf(buf,
"CP%lu", ulCP[0]);
818 return &nkf_encoding_table[
UTF_8];
825 #ifdef DEFAULT_CODE_LOCALE 827 #elif defined(DEFAULT_ENCIDX) 859 #define nkf_buf_length(buf) ((buf)->len) 860 #define nkf_buf_empty_p(buf) ((buf)->len == 0) 888 return buf->
ptr[--buf->
len];
894 #define fprintf dllprintf 907 "Usage: nkf -[flags] [--] [in file] .. [out file for -O flag]\n" 909 " j/s/e/w Specify output encoding ISO-2022-JP, Shift_JIS, EUC-JP\n" 910 " UTF options is -w[8[0],{16,32}[{B,L}[0]]]\n" 914 " J/S/E/W Specify input encoding ISO-2022-JP, Shift_JIS, EUC-JP\n" 915 " UTF option is -W[8,[16,32][B,L]]\n" 917 " J/S/E Specify output encoding ISO-2022-JP, Shift_JIS, EUC-JP\n" 921 " m[BQSN0] MIME decode [B:base64,Q:quoted,S:strict,N:nonstrict,0:no decode]\n" 922 " M[BQ] MIME encode [B:base64 Q:quoted]\n" 923 " f/F Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n" 926 " Z[0-4] Default/0: Convert JISX0208 Alphabet to ASCII\n" 927 " 1: Kankaku to one space 2: to two spaces 3: HTML Entity\n" 928 " 4: JISX0208 Katakana to JISX0201 Katakana\n" 929 " X,x Convert Halfwidth Katakana to Fullwidth or preserve it\n" 932 " O Output to File (DEFAULT 'nkf.out')\n" 933 " L[uwm] Line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n" 936 " --ic=<encoding> Specify the input encoding\n" 937 " --oc=<encoding> Specify the output encoding\n" 938 " --hiragana --katakana Hiragana/Katakana Conversion\n" 939 " --katakana-hiragana Converts each other\n" 943 " --{cap, url}-input Convert hex after ':' or '%%'\n" 946 " --numchar-input Convert Unicode Character Reference\n" 949 " --fb-{skip, html, xml, perl, java, subchar}\n" 950 " Specify unassigned character's replacement\n" 955 " --in-place[=SUF] Overwrite original files\n" 956 " --overwrite[=SUF] Preserve timestamp of original files\n" 958 " -g --guess Guess the input code\n" 959 " -v --version Print the version\n" 960 " --help/-V Print this help / configuration\n" 970 " Compile-time options:\n" 971 " Compiled at: " __DATE__
" " __TIME__
"\n" 974 " Default output encoding: " 977 #elif defined(DEFAULT_ENCIDX)
984 " Default output end of line: " 993 " Decode MIME encoded string: " 1000 " Convert JIS X 0201 Katakana: " 1007 " --help, --version output: " 1008 #
if HELP_OUTPUT_HELP_OUTPUT
1021 char *backup_filename;
1022 int asterisk_count = 0;
1024 int filename_length =
strlen(filename);
1026 for(i = 0; suffix[i]; i++){
1027 if(suffix[i] ==
'*') asterisk_count++;
1031 backup_filename =
nkf_xmalloc(
strlen(suffix) + (asterisk_count * (filename_length - 1)) + 1);
1032 for(i = 0, j = 0; suffix[i];){
1033 if(suffix[i] ==
'*'){
1034 backup_filename[j] =
'\0';
1035 strncat(backup_filename, filename, filename_length);
1037 j += filename_length;
1039 backup_filename[j++] = suffix[i++];
1042 backup_filename[j] =
'\0';
1044 j = filename_length +
strlen(suffix);
1046 strcpy(backup_filename, filename);
1047 strcat(backup_filename, suffix);
1048 backup_filename[j] =
'\0';
1050 return backup_filename;
1054 #ifdef UTF8_INPUT_ENABLE 1084 (*oconv)(0, 0x30+(c/10000 )%10);
1086 (*oconv)(0, 0x30+(c/1000 )%10);
1088 (*oconv)(0, 0x30+(c/100 )%10);
1090 (*oconv)(0, 0x30+(c/10 )%10);
1092 (*oconv)(0, 0x30+ c %10);
1144 (*oconv)((c>>8)&0xFF, c&0xFF);
1149 static const struct {
1173 {
"katakana-hiragana",
"h3"},
1181 #ifdef UTF8_OUTPUT_ENABLE 1191 {
"fb-subchar=",
""},
1193 #ifdef UTF8_INPUT_ENABLE 1194 {
"utf8-input",
"W"},
1195 {
"utf16-input",
"W16"},
1196 {
"no-cp932ext",
""},
1197 {
"no-best-fit-chars",
""},
1199 #ifdef UNICODE_NORMALIZATION 1200 {
"utf8mac-input",
""},
1212 #ifdef NUMCHAR_OPTION 1213 {
"numchar-input",
""},
1219 #ifdef SHIFTJIS_CP932 1240 #ifdef SHIFTJIS_CP932 1243 #ifdef UTF8_OUTPUT_ENABLE 1262 #ifdef SHIFTJIS_CP932 1265 #ifdef UTF8_OUTPUT_ENABLE 1271 #ifdef SHIFTJIS_CP932 1274 #ifdef UTF8_OUTPUT_ENABLE 1284 #ifdef SHIFTJIS_CP932 1287 #ifdef UTF8_OUTPUT_ENABLE 1293 #ifdef SHIFTJIS_CP932 1296 #ifdef UTF8_OUTPUT_ENABLE 1302 #ifdef SHIFTJIS_CP932 1305 #ifdef UTF8_OUTPUT_ENABLE 1312 #ifdef SHIFTJIS_CP932 1320 #ifdef SHIFTJIS_CP932 1324 #ifdef UTF8_INPUT_ENABLE 1325 #ifdef UNICODE_NORMALIZATION 1357 #ifdef SHIFTJIS_CP932 1360 #ifdef UTF8_OUTPUT_ENABLE 1366 #ifdef SHIFTJIS_CP932 1369 #ifdef UTF8_OUTPUT_ENABLE 1374 #ifdef SHIFTJIS_CP932 1380 #ifdef SHIFTJIS_CP932 1388 #ifdef SHIFTJIS_CP932 1396 #ifdef UTF8_OUTPUT_ENABLE 1401 #ifdef UTF8_OUTPUT_ENABLE 1407 #ifdef SHIFTJIS_CP932 1410 #ifdef UTF8_OUTPUT_ENABLE 1416 #ifdef SHIFTJIS_CP932 1419 #ifdef UTF8_OUTPUT_ENABLE 1425 #ifdef SHIFTJIS_CP932 1428 #ifdef UTF8_OUTPUT_ENABLE 1435 #ifdef UTF8_OUTPUT_ENABLE 1442 #ifdef UTF8_OUTPUT_ENABLE 1449 #ifdef SHIFTJIS_CP932 1457 #ifdef SHIFTJIS_CP932 1461 #ifdef UTF8_OUTPUT_ENABLE 1511 #ifdef INPUT_CODE_FIX 1512 if (f || !input_encoding)
1520 && (f == -
TRUE || !input_encoding)
1544 if (0x75 <= c && c <= 0x7f){
1545 ret = c + (0x109 - 0x75);
1548 if (0x75 <= c && c <= 0x7f){
1549 ret = c + (0x113 - 0x75);
1560 if (0x7f <= c && c <= 0x88){
1561 ret = c + (0x75 - 0x7f);
1562 }
else if (0x89 <= c && c <= 0x92){
1572 static const char x0213_2_table[] =
1573 {0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1};
1576 return x0213_2_table[ku];
1577 if (78 <= ku && ku <= 94)
1589 if((0x21 <= ndx && ndx <= 0x2F)){
1590 if (p2) *p2 = ((ndx - 1) >> 1) + 0xec - ndx / 8 * 3;
1591 if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
1593 }
else if(0x6E <= ndx && ndx <= 0x7E){
1594 if (p2) *p2 = ((ndx - 1) >> 1) + 0xbe;
1595 if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
1603 const unsigned short *ptr;
1606 val = ptr[(c1 & 0x7f) - 0x21];
1619 if(0x7F < c2)
return 1;
1620 if (p2) *p2 = ((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1);
1621 if (p1) *p1 = c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
1628 #if defined(SHIFTJIS_CP932) || defined(X0212_ENABLE) 1631 static const char shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} };
1632 if (0xFC < c1)
return 1;
1633 #ifdef SHIFTJIS_CP932 1642 && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
1643 val =
cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
1669 if(c2 <= 0xF3 || (c2 == 0xF4 && c1 < 0x9F)){
1670 c2 =
PREFIX_EUCG3 | 0x20 | shift_jisx0213_s1a3_table[c2 - 0xF0][0x9E < c1];
1673 if (0x9E < c1) c2++;
1676 #define SJ0162 0x00e1 1677 #define SJ6394 0x0161 1679 if (0x9E < c1) c2++;
1682 c1 = c1 - ((c1 >
DEL) ?
SP : 0x1F);
1696 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE) 1706 }
else if (val < 0x800){
1707 *p1 = 0xc0 | (val >> 6);
1708 *p2 = 0x80 | (val & 0x3f);
1712 *p1 = 0xe0 | (val >> 12);
1713 *p2 = 0x80 | ((val >> 6) & 0x3f);
1714 *p3 = 0x80 | ( val & 0x3f);
1717 *p1 = 0xf0 | (val >> 18);
1718 *p2 = 0x80 | ((val >> 12) & 0x3f);
1719 *p3 = 0x80 | ((val >> 6) & 0x3f);
1720 *p4 = 0x80 | ( val & 0x3f);
1737 else if (c1 <= 0xC1) {
1741 else if (c1 <= 0xDF) {
1743 wc = (c1 & 0x1F) << 6;
1746 else if (c1 <= 0xEF) {
1748 wc = (c1 & 0x0F) << 12;
1749 wc |= (c2 & 0x3F) << 6;
1752 else if (c2 <= 0xF4) {
1754 wc = (c1 & 0x0F) << 18;
1755 wc |= (c2 & 0x3F) << 12;
1756 wc |= (c3 & 0x3F) << 6;
1766 #ifdef UTF8_INPUT_ENABLE 1769 const unsigned short *
const *pp,
nkf_char psize,
1773 const unsigned short *p;
1776 if (pp == 0)
return 1;
1779 if (c1 < 0 || psize <= c1)
return 1;
1781 if (p == 0)
return 1;
1784 if (c0 < 0 || sizeof_utf8_to_euc_C2 <= c0)
return 1;
1786 if (val == 0)
return 1;
1807 const unsigned short *
const *pp;
1808 const unsigned short *
const *
const *ppp;
1809 static const char no_best_fit_chars_table_C2[] =
1810 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1811 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1812 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 2, 1, 1, 2,
1813 0, 0, 1, 1, 0, 1, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1};
1814 static const char no_best_fit_chars_table_C2_ms[] =
1815 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1816 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1817 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0,
1818 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0};
1819 static const char no_best_fit_chars_table_932_C2[] =
1820 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1821 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1822 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1,
1823 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0};
1824 static const char no_best_fit_chars_table_932_C3[] =
1825 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1826 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
1827 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1828 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1};
1834 }
else if(c2 < 0xe0){
1839 if(no_best_fit_chars_table_932_C2[c1&0x3F])
return 1;
1842 if(no_best_fit_chars_table_932_C3[c1&0x3F])
return 1;
1848 if(no_best_fit_chars_table_C2[c1&0x3F])
return 1;
1851 if(no_best_fit_chars_table_932_C3[c1&0x3F])
return 1;
1855 if(c2 == 0xC2 && no_best_fit_chars_table_C2_ms[c1&0x3F])
return 1;
1880 }
else if(c0 < 0xF0){
1883 if(c2 == 0xE3 && c1 == 0x82 && c0 == 0x94)
return 1;
1889 if(c0 == 0x94 || c0 == 0x96 || c0 == 0xBE)
return 1;
1892 if(c0 == 0x92)
return 1;
1897 if(c1 == 0x80 || c0 == 0x9C)
return 1;
1905 if(c0 == 0x94)
return 1;
1908 if(c0 == 0xBB)
return 1;
1918 if(c0 == 0x95)
return 1;
1921 if(c0 == 0xA5)
return 1;
1928 if(c0 == 0x8D)
return 1;
1934 if(0xA0 <= c0 && c0 <= 0xA5)
return 1;
1949 #ifdef SHIFTJIS_CP932 1952 if (
e2s_conv(*p2, *p1, &s2, &s1) == 0) {
1962 #ifdef UTF8_OUTPUT_ENABLE 1963 #define X0213_SURROGATE_FIND(tbl, size, euc) do { \ 1965 for (i = 0; i < size; i++) \ 1966 if (tbl[i][0] == euc) { \ 1975 const unsigned short *p;
1992 c2 = (c2&0x7f) - 0x21;
1993 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
2002 c2 = (c2&0x7f) - 0x21;
2003 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
2013 c1 = (c1 & 0x7f) - 0x21;
2014 if (0<=c1 && c1<sizeof_euc_to_utf8_1byte) {
2016 if (
x0213_f && 0xD800<=val && val<=0xDBFF) {
2017 nkf_char euc = (c2+0x21)<<8 | (c1+0x21);
2038 for (i = 0; i < sizeof_x0213_combining_chars; i++)
2041 if (i >= sizeof_x0213_combining_chars)
2043 euc = (c2&0x7f)<<8 | (c1&0x7f);
2044 for (i = 0; i < sizeof_x0213_combining_table; i++)
2059 }
else if (0xc0 <= c2 && c2 <= 0xef) {
2061 #ifdef NUMCHAR_OPTION 2072 #ifdef UTF8_INPUT_ENABLE 2097 for (i = 0; i < sizeof_x0213_1_surrogate_table; i++)
2104 for (i = 0; i < sizeof_x0213_2_surrogate_table; i++)
2130 }
else if (c2 == 0x8f){
2134 if (!
cp51932_f && !
x0213_f && 0xF5 <= c1 && c1 <= 0xFE && 0xA1 <= c0 && c0 <= 0xFE) {
2139 c2 = (c2 << 8) | (c1 & 0x7f);
2141 #ifdef SHIFTJIS_CP932 2144 if (
e2s_conv(c2, c1, &s2, &s1) == 0){
2165 #ifdef SHIFTJIS_CP932 2166 if (
cp51932_f && 0x79 <= c2 && c2 <= 0x7c){
2168 if (
e2s_conv(c2, c1, &s2, &s1) == 0){
2192 }
else if ((c2 ==
EOF) || (c2 == 0) || c2 <
SP) {
2194 }
else if (!
x0213_f && 0xF0 <= c2 && c2 <= 0xF9 && 0x40 <= c1 && c1 <= 0xFC) {
2196 if(c1 == 0x7F)
return 0;
2201 if (ret)
return ret;
2211 for (i = 0; i < sizeof_x0213_combining_table; i++) {
2223 for (i = 0; i < sizeof_x0213_combining_chars; i++) {
2235 static const char w_iconv_utf8_1st_byte[] =
2237 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
2238 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
2239 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 33,
2240 40, 41, 41, 41, 42, 43, 43, 43, 50, 50, 50, 50, 60, 60, 70, 70};
2247 if (c1 < 0 || 0xff < c1) {
2248 }
else if (c1 == 0) {
2250 }
else if ((c1 & 0xC0) == 0x80) {
2253 switch (w_iconv_utf8_1st_byte[c1 - 0xC0]) {
2255 if (c2 < 0x80 || 0xBF < c2)
return 0;
2258 if (c3 == 0)
return -1;
2259 if (c2 < 0xA0 || 0xBF < c2 || (c3 & 0xC0) != 0x80)
2264 if (c3 == 0)
return -1;
2265 if ((c2 & 0xC0) != 0x80 || (c3 & 0xC0) != 0x80)
2269 if (c3 == 0)
return -1;
2270 if (c2 < 0x80 || 0x9F < c2 || (c3 & 0xC0) != 0x80)
2274 if (c3 == 0)
return -2;
2275 if (c2 < 0x90 || 0xBF < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
2279 if (c3 == 0)
return -2;
2280 if (c2 < 0x80 || 0xBF < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
2284 if (c3 == 0)
return -2;
2285 if (c2 < 0x80 || 0x8F < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
2293 if (c1 == 0 || c1 ==
EOF){
2294 }
else if ((c1 & 0xf8) == 0xf0) {
2300 ret =
w2e_conv(c1, c2, c3, &c1, &c2);
2319 #define NKF_ICONV_INVALID_CODE_RANGE -13 2320 #define NKF_ICONV_WAIT_COMBINING_CHAR -14 2321 #define NKF_ICONV_NOT_COMBINED -15 2331 }
else if ((wc>>11) == 27) {
2334 }
else if (wc < 0xFFFF) {
2338 if (ret)
return ret;
2339 }
else if (wc < 0x10FFFF) {
2357 }
else if ((wc2>>11) == 27) {
2360 }
else if (wc2 < 0xFFFF) {
2363 for (i = 0; i < sizeof_x0213_combining_table; i++) {
2372 }
else if (wc2 < 0x10FFFF) {
2391 #define NKF_ICONV_NEED_ONE_MORE_BYTE (size_t)-1 2392 #define NKF_ICONV_NEED_TWO_MORE_BYTES (size_t)-2 2404 if (0xD8 <= c1 && c1 <= 0xDB) {
2405 if (0xDC <= c3 && c3 <= 0xDF) {
2412 if (0xD8 <= c2 && c2 <= 0xDB) {
2413 if (0xDC <= c4 && c4 <= 0xDF) {
2430 if (0xD8 <= c3 && c3 <= 0xDB) {
2437 if (0xD8 <= c2 && c2 <= 0xDB) {
2480 wc = c2 << 16 | c3 << 8 | c4;
2483 wc = c3 << 16 | c2 << 8 | c1;
2486 wc = c1 << 16 | c4 << 8 | c3;
2489 wc = c4 << 16 | c1 << 8 | c2;
2539 #define output_ascii_escape_sequence(mode) do { \ 2540 if (output_mode != ASCII && output_mode != ISO_8859_1) { \ 2543 (*o_putc)(ascii_intro); \ 2544 output_mode = mode; \ 2594 #ifdef NUMCHAR_OPTION 2602 c2 = 0x7F + c1 / 94;
2603 c1 = 0x21 + c1 % 94;
2615 else if (c2 ==
EOF) {
2629 (*o_putc)(c2 & 0x7f);
2634 ? c2<0x20 || 0x92<c2 || c1<0x20 || 0x7e<c1
2635 : c2<0x20 || 0x7e<c2 || c1<0x20 || 0x7e<c1)
return;
2649 if (
x0212_f && 0xE000 <= c2 && c2 <= 0xE757) {
2653 c2 += c2 < 10 ? 0x75 : 0x8FEB;
2654 c1 = 0x21 + c1 % 94;
2657 (*o_putc)((c2 & 0x7f) | 0x080);
2658 (*o_putc)(c1 | 0x080);
2660 (*o_putc)((c2 & 0x7f) | 0x080);
2661 (*o_putc)(c1 | 0x080);
2673 }
else if (c2 == 0) {
2678 (*o_putc)(
SS2); (*o_putc)(c1|0x80);
2681 (*o_putc)(c1 | 0x080);
2685 #ifdef SHIFTJIS_CP932 2688 if (
e2s_conv(c2, c1, &s2, &s1) == 0){
2699 (*o_putc)((c2 & 0x7f) | 0x080);
2700 (*o_putc)(c1 | 0x080);
2703 (*o_putc)((c2 & 0x7f) | 0x080);
2704 (*o_putc)(c1 | 0x080);
2713 (*o_putc)(c2 | 0x080);
2714 (*o_putc)(c1 | 0x080);
2721 #ifdef NUMCHAR_OPTION 2726 if (!
x0213_f && 0xE000 <= c2 && c2 <= 0xE757) {
2731 c1 += 0x40 + (c1 > 0x3e);
2745 }
else if (c2 == 0) {
2753 (*o_putc)(c1 | 0x080);
2757 if (
e2s_conv(c2, c1, &c2, &c1) == 0){
2770 #ifdef SHIFTJIS_CP932 2772 && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
2789 #ifdef UTF8_OUTPUT_ENABLE 2790 #define OUTPUT_UTF8(val) do { \ 2791 nkf_unicode_to_utf8(val, &c1, &c2, &c3, &c4); \ 2793 if (c2) (*o_putc)(c2); \ 2794 if (c3) (*o_putc)(c3); \ 2795 if (c4) (*o_putc)(c4); \ 2835 #define OUTPUT_UTF16_BYTES(c1, c2) do { \ 2836 if (output_endian == ENDIAN_LITTLE){ \ 2845 #define OUTPUT_UTF16(val) do { \ 2846 if (nkf_char_unicode_bmp_p(val)) { \ 2847 c2 = (val >> 8) & 0xff; \ 2849 OUTPUT_UTF16_BYTES(c1, c2); \ 2851 val &= VALUE_MASK; \ 2852 if (val <= UNICODE_MAX) { \ 2853 c2 = (val >> 10) + NKF_INT32_C(0xD7C0); \ 2854 c1 = (val & 0x3FF) + NKF_INT32_C(0xDC00); \ 2855 OUTPUT_UTF16_BYTES(c2 & 0xff, (c2 >> 8) & 0xff); \ 2856 OUTPUT_UTF16_BYTES(c1 & 0xff, (c1 >> 8) & 0xff); \ 2889 #define OUTPUT_UTF32(c) do { \ 2890 if (output_endian == ENDIAN_LITTLE){ \ 2891 (*o_putc)( (c) & 0xFF); \ 2892 (*o_putc)(((c) >> 8) & 0xFF); \ 2893 (*o_putc)(((c) >> 16) & 0xFF); \ 2897 (*o_putc)(((c) >> 16) & 0xFF); \ 2898 (*o_putc)(((c) >> 8) & 0xFF); \ 2899 (*o_putc)( (c) & 0xFF); \ 2943 #define SCORE_L2 (1) 2944 #define SCORE_KANA (SCORE_L2 << 1) 2945 #define SCORE_DEPEND (SCORE_KANA << 1) 2946 #define SCORE_CP932 (SCORE_DEPEND << 1) 2947 #define SCORE_X0212 (SCORE_CP932 << 1) 2948 #define SCORE_X0213 (SCORE_X0212 << 1) 2949 #define SCORE_NO_EXIST (SCORE_X0213 << 1) 2950 #define SCORE_iMIME (SCORE_NO_EXIST << 1) 2951 #define SCORE_ERROR (SCORE_iMIME << 1) 2953 #define SCORE_INIT (SCORE_iMIME) 3002 ptr->
score &= ~score;
3013 }
else if (c2 ==
SS2){
3015 }
else if (c2 == 0x8f){
3016 if ((c1 & 0x70) == 0x20){
3018 }
else if ((c1 & 0x70) == 0x60){
3020 }
else if ((c1 & 0x70) == 0x70){
3025 #ifdef UTF8_OUTPUT_ENABLE 3029 }
else if ((c2 & 0x70) == 0x20){
3031 }
else if ((c2 & 0x70) == 0x70){
3033 }
else if ((c2 & 0x70) >= 0x50){
3094 }
else if (0xa1 <= c && c <= 0xdf){
3099 }
else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xea)){
3102 }
else if (0xed <= c && c <= 0xee){
3105 #ifdef SHIFTJIS_CP932 3111 }
else if (0xf0 <= c && c <= 0xfc){
3120 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
3130 #ifdef SHIFTJIS_CP932 3131 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)) {
3143 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
3167 }
else if (
SS2 == c || (0xa1 <= c && c <= 0xfe)){
3171 }
else if (0x8f == c){
3180 if (0xa1 <= c && c <= 0xfe){
3190 if (0xa1 <= c && c <= 0xfe){
3200 #ifdef UTF8_INPUT_ENABLE 3213 }
else if (0xc0 <= c && c <= 0xdf){
3216 }
else if (0xe0 <= c && c <= 0xef){
3219 }
else if (0xf0 <= c && c <= 0xf4){
3228 if (0x80 <= c && c <= 0xbf){
3231 int bom = (ptr->
buf[0] == 0xef && ptr->
buf[1] == 0xbb
3232 && ptr->
buf[2] == 0xbf);
3234 &ptr->
buf[0], &ptr->
buf[1]);
3245 if (0x80 <= c && c <= 0xbf){
3262 int action_flag = 1;
3275 }
else if(p->
stat == 0){
3288 }
else if (c <=
DEL){
3308 #define STD_GC_BUFSIZE (256) 3362 hold_buf[hold_count++] = c2;
3363 return ((hold_count >=
HOLD_SIZE*2) ?
EOF : hold_count);
3420 while (hold_index < hold_count){
3421 c1 = hold_buf[hold_index++];
3426 else if (c1 <=
DEL){
3429 }
else if (
iconv ==
s_iconv && 0xa1 <= c1 && c1 <= 0xdf){
3434 if (hold_index < hold_count){
3435 c2 = hold_buf[hold_index++];
3446 switch ((*
iconv)(c1, c2, 0)) {
3449 if (hold_index < hold_count){
3450 c3 = hold_buf[hold_index++];
3451 }
else if ((c3 = (*
i_getc)(f)) ==
EOF) {
3456 if (hold_index < hold_count){
3457 c4 = hold_buf[hold_index++];
3458 }
else if ((c4 = (*
i_getc)(f)) ==
EOF) {
3463 (*iconv)(c1, c2, (c3<<8)|c4);
3467 if (hold_index < hold_count){
3468 c3 = hold_buf[hold_index++];
3470 }
else if ((c3 = (*
i_getc)(f)) ==
EOF) {
3474 if (hold_index < hold_count){
3475 c4 = hold_buf[hold_index++];
3477 }
else if ((c4 = (*
i_getc)(f)) ==
EOF) {
3479 if (fromhold_count <= 2)
3487 if (fromhold_count <= 2) {
3490 }
else if (fromhold_count == 3) {
3500 if (hold_index < hold_count){
3501 c3 = hold_buf[hold_index++];
3509 if ((*
iconv)(c1, c2, c3) == -3) {
3512 if (hold_index < hold_count){
3513 c4 = hold_buf[hold_index++];
3515 }
else if ((c4 = (*
i_getc)(f)) ==
EOF) {
3519 if (hold_index < hold_count){
3520 c5 = hold_buf[hold_index++];
3522 }
else if ((c5 = (*
i_getc)(f)) ==
EOF) {
3524 if (fromhold_count == 4)
3530 if (hold_index < hold_count){
3531 c6 = hold_buf[hold_index++];
3533 }
else if ((c6 = (*
i_getc)(f)) ==
EOF) {
3535 if (fromhold_count == 5) {
3537 }
else if (fromhold_count == 4) {
3548 if (fromhold_count == 6) {
3550 }
else if (fromhold_count == 5) {
3553 }
else if (fromhold_count == 4) {
3566 if (c3 ==
EOF)
break;
3579 switch(c2 = (*
i_getc)(f)){
3581 if((c2 = (*
i_getc)(f)) == 0x00){
3582 if((c2 = (*
i_getc)(f)) == 0xFE){
3583 if((c2 = (*
i_getc)(f)) == 0xFF){
3584 if(!input_encoding){
3592 (*i_ungetc)(0xFF,
f);
3594 (*i_ungetc)(0xFE,
f);
3595 }
else if(c2 == 0xFF){
3596 if((c2 = (*
i_getc)(
f)) == 0xFE){
3597 if(!input_encoding){
3604 (*i_ungetc)(0xFF,
f);
3606 (*i_ungetc)(0xFF,
f);
3608 (*i_ungetc)(0x00,
f);
3610 (*i_ungetc)(0x00,
f);
3613 if((c2 = (*
i_getc)(
f)) == 0xBB){
3614 if((c2 = (*
i_getc)(
f)) == 0xBF){
3615 if(!input_encoding){
3622 (*i_ungetc)(0xBF,
f);
3624 (*i_ungetc)(0xBB,
f);
3626 (*i_ungetc)(0xEF,
f);
3629 if((c2 = (*
i_getc)(
f)) == 0xFF){
3630 if((c2 = (*
i_getc)(
f)) == 0x00){
3631 if((c2 = (*
i_getc)(
f)) == 0x00){
3632 if(!input_encoding){
3639 (*i_ungetc)(0x00,
f);
3641 (*i_ungetc)(0x00,
f);
3643 if(!input_encoding){
3651 (*i_ungetc)(0xFF,
f);
3653 (*i_ungetc)(0xFE,
f);
3656 if((c2 = (*
i_getc)(
f)) == 0xFE){
3657 if((c2 = (*
i_getc)(
f)) == 0x00){
3658 if((c2 = (*
i_getc)(
f)) == 0x00){
3659 if(!input_encoding){
3667 (*i_ungetc)(0x00,
f);
3669 (*i_ungetc)(0x00,
f);
3671 if(!input_encoding){
3679 (*i_ungetc)(0xFE,
f);
3681 (*i_ungetc)(0xFF,
f);
3702 if (c1==
'@'|| c1==
'B') {
3714 if (c1==
'J'|| c1==
'B') {
3740 if (c2 == 0 && c1 ==
LF) {
3754 else if (c2 != 0 || c1 !=
LF) (*o_eol_conv)(c2, c1);
3811 #define char_size(c2,c1) (c2?2:1) 3824 }
else if (c1==
BS) {
3837 }
else if ((
f_prev == c1)
3863 }
else if (c1==
'\f') {
3867 }
else if ((c2==0 &&
nkf_isblank(c1)) || (c2 ==
'!' && c1 ==
'!')) {
3894 if (c1==(0xde&0x7f)) fold_state = 1;
3895 else if (c1==(0xdf&0x7f)) fold_state = 1;
3896 else if (c1==(0xa4&0x7f)) fold_state = 1;
3897 else if (c1==(0xa3&0x7f)) fold_state = 1;
3898 else if (c1==(0xa1&0x7f)) fold_state = 1;
3899 else if (c1==(0xb0&0x7f)) fold_state = 1;
3900 else if (
SP<=c1 && c1<=(0xdf&0x7f)) {
3924 }
else if ((prev0==
SP) ||
3934 if (c1==
'"') fold_state = 1;
3935 else if (c1==
'#') fold_state = 1;
3936 else if (c1==
'W') fold_state = 1;
3937 else if (c1==
'K') fold_state = 1;
3938 else if (c1==
'$') fold_state = 1;
3939 else if (c1==
'%') fold_state = 1;
3940 else if (c1==
'\'') fold_state = 1;
3941 else if (c1==
'(') fold_state = 1;
3942 else if (c1==
')') fold_state = 1;
3943 else if (c1==
'*') fold_state = 1;
3944 else if (c1==
'+') fold_state = 1;
3945 else if (c1==
',') fold_state = 1;
3961 switch(fold_state) {
3996 if (c1 == (0xde&0x7f)) {
4000 }
else if (c1 == (0xdf&0x7f) &&
ev[(
z_prev1-
SP)*2]) {
4020 (*o_zconv)(
cv[(c1-
SP)*2],
cv[(c1-
SP)*2+1]);
4031 if (
alpha_f&1 && c2 == 0x23) {
4034 }
else if (c2 == 0x21) {
4045 }
else if (
alpha_f&1 && 0x20<c1 && c1<0x7f &&
fv[c1-0x20]) {
4053 const char *entity = 0;
4055 case '>': entity =
">";
break;
4056 case '<': entity =
"<";
break;
4057 case '\"': entity =
""";
break;
4058 case '&': entity =
"&";
break;
4061 while (*entity) (*o_zconv)(0, *entity++);
4108 }
else if (c2 == 0x25) {
4110 static const int fullwidth_to_halfwidth[] =
4112 0x0000, 0x2700, 0x3100, 0x2800, 0x3200, 0x2900, 0x3300, 0x2A00,
4113 0x3400, 0x2B00, 0x3500, 0x3600, 0x365E, 0x3700, 0x375E, 0x3800,
4114 0x385E, 0x3900, 0x395E, 0x3A00, 0x3A5E, 0x3B00, 0x3B5E, 0x3C00,
4115 0x3C5E, 0x3D00, 0x3D5E, 0x3E00, 0x3E5E, 0x3F00, 0x3F5E, 0x4000,
4116 0x405E, 0x4100, 0x415E, 0x2F00, 0x4200, 0x425E, 0x4300, 0x435E,
4117 0x4400, 0x445E, 0x4500, 0x4600, 0x4700, 0x4800, 0x4900, 0x4A00,
4118 0x4A5E, 0x4A5F, 0x4B00, 0x4B5E, 0x4B5F, 0x4C00, 0x4C5E, 0x4C5F,
4119 0x4D00, 0x4D5E, 0x4D5F, 0x4E00, 0x4E5E, 0x4E5F, 0x4F00, 0x5000,
4120 0x5100, 0x5200, 0x5300, 0x2C00, 0x5400, 0x2D00, 0x5500, 0x2E00,
4121 0x5600, 0x5700, 0x5800, 0x5900, 0x5A00, 0x5B00, 0x0000, 0x5C00,
4122 0x0000, 0x0000, 0x2600, 0x5D00, 0x335E, 0x0000, 0x0000, 0x365F,
4123 0x375F, 0x385F, 0x395F, 0x3A5F, 0x3E5F, 0x425F, 0x445F, 0x0000
4125 if (fullwidth_to_halfwidth[c1-0x20]){
4126 c2 = fullwidth_to_halfwidth[c1-0x20];
4143 #define rot13(c) ( \ 4145 (c <= 'M') ? (c + 13): \ 4146 (c <= 'Z') ? (c - 13): \ 4148 (c <= 'm') ? (c + 13): \ 4149 (c <= 'z') ? (c - 13): \ 4153 #define rot47(c) ( \ 4155 ( c <= 'O') ? (c + 47) : \ 4156 ( c <= '~') ? (c - 47) : \ 4169 (*o_rot_conv)(c2,c1);
4177 if (0x20 < c1 && c1 < 0x74) {
4179 (*o_hira_conv)(c2,c1);
4184 (*o_hira_conv)(c2,c1);
4187 }
else if (c2 == 0x21 && (c1 == 0x33 || c1 == 0x34)) {
4189 (*o_hira_conv)(c2,c1);
4197 }
else if (c2 == 0x24 && 0x20 < c1 && c1 < 0x74) {
4199 }
else if (c2 == 0x21 && (c1 == 0x35 || c1 == 0x36)) {
4203 (*o_hira_conv)(c2,c1);
4210 #define RANGE_NUM_MAX 18 4234 if(c2 >= 0x00 && c2 <= 0x20 && c1 >= 0x7f && c1 <= 0xff) {
4238 if((c2 >= 0x29 && c2 <= 0x2f) || (c2 >= 0x75 && c2 <= 0x7e)) {
4244 start = range[i][0];
4247 if (c >= start && c <= end) {
4252 (*o_iso2022jp_check_conv)(c2,c1);
4259 (
const unsigned char *)
"\075?EUC-JP?B?",
4260 (
const unsigned char *)
"\075?SHIFT_JIS?B?",
4261 (
const unsigned char *)
"\075?ISO-8859-1?Q?",
4262 (
const unsigned char *)
"\075?ISO-8859-1?B?",
4263 (
const unsigned char *)
"\075?ISO-2022-JP?B?",
4264 (
const unsigned char *)
"\075?ISO-2022-JP?B?",
4265 (
const unsigned char *)
"\075?ISO-2022-JP?Q?",
4267 (
const unsigned char *)
"\075?UTF-8?B?",
4268 (
const unsigned char *)
"\075?UTF-8?Q?",
4270 (
const unsigned char *)
"\075?US-ASCII?Q?",
4278 #if defined(UTF8_INPUT_ENABLE) 4286 #if defined(UTF8_INPUT_ENABLE) 4294 'B',
'B',
'Q',
'B',
'B',
'B',
'Q',
4295 #if defined(UTF8_INPUT_ENABLE) 4305 #define MIME_BUF_SIZE (1024) 4306 #define MIME_BUF_MASK (MIME_BUF_SIZE-1) 4307 #define mime_input_buf(n) mime_input_state.buf[(n)&MIME_BUF_MASK] 4316 #define MAXRECOVER 20 4335 (*i_mungetc_buf)(c,
f);
4393 if (c==
'=' && d==
'?') {
4401 if (!( (c==
'+'||c==
'/'|| c==
'=' || c==
'?' ||
is_alnum(c))))
4420 const unsigned char *p,*q;
4426 p = mime_pattern[j];
4429 for(i=2;p[i]>
SP;i++) {
4433 while (mime_pattern[++j]) {
4434 p = mime_pattern[j];
4436 if (p[k]!=q[k])
break;
4439 p = mime_pattern[j];
4482 if (c1==
LF||c1==
SP||c1==
CR||
4483 c1==
'-'||c1==
'_'||
is_alnum(c1))
continue;
4494 if (!(++i<MAXRECOVER) || c1==
EOF)
break;
4495 if (c1==
'b'||c1==
'B') {
4497 }
else if (c1==
'q'||c1==
'Q') {
4503 if (!(++i<MAXRECOVER) || c1==
EOF)
break;
4535 fprintf(stderr,
"%s\n", str ? str :
"NULL");
4579 #if !defined(PERL_XS) && !defined(WIN32DLL) 4583 if (filename !=
NULL) printf(
"%s: ", filename);
4591 printf(
"%s%s%s%s\n",
4657 #ifdef NUMCHAR_OPTION 4673 if (buf[i] ==
'x' || buf[i] ==
'X'){
4674 for (j = 0; j < 7; j++){
4686 for (j = 0; j < 8; j++){
4719 #ifdef UNICODE_NORMALIZATION 4727 const unsigned char *array;
4728 int lower=0, upper=NORMALIZATION_TABLE_LENGTH-1;
4731 if (c ==
EOF || c > 0xFF || (c & 0xc0) == 0x80)
return c;
4735 while (lower <= upper) {
4736 int mid = (lower+upper) / 2;
4739 for (len=0; len < NORMALIZATION_TABLE_NFD_LENGTH && array[
len]; len++) {
4744 lower = 1, upper = 0;
4750 if (array[len] <
nkf_buf_at(buf, len)) lower = mid + 1;
4751 else upper = mid - 1;
4760 for (i=0; i < NORMALIZATION_TABLE_NFC_LENGTH && array[i]; i++)
4765 }
while (lower <= upper);
4788 }
else if (c ==
'_') {
4793 }
else if (c >
'/') {
4795 }
else if (c ==
'+' || c ==
'-') {
4807 nkf_char t1, t2, t3, t4, mode, exit_mode;
4830 if (c1<=
SP ||
DEL<=c1) {
4844 lwsp_buf =
nkf_xmalloc((lwsp_size+5)*
sizeof(
char));
4874 lwsp_buf[lwsp_count] = (
unsigned char)c1;
4875 if (lwsp_count++>lwsp_size){
4877 lwsp_buf_new =
nkf_xrealloc(lwsp_buf, (lwsp_size+5)*
sizeof(
char));
4878 lwsp_buf = lwsp_buf_new;
4884 if (lwsp_count > 0 && (c1 !=
'=' || (lwsp_buf[lwsp_count-1] !=
SP && lwsp_buf[lwsp_count-1] !=
TAB))) {
4886 for(lwsp_count--;lwsp_count>0;lwsp_count--)
4893 if (c1==
'='&&c2<
SP) {
4895 if (c1 ==
EOF)
return (
EOF);
4898 goto restart_mime_q;
4906 if (c2<=
SP)
return c2;
4940 if ((c1 ==
'?') && (c2 ==
'=')) {
4943 lwsp_buf =
nkf_xmalloc((lwsp_size+5)*
sizeof(
char));
4976 lwsp_buf[lwsp_count] = (
unsigned char)c1;
4977 if (lwsp_count++>lwsp_size){
4979 lwsp_buf_new =
nkf_xrealloc(lwsp_buf, (lwsp_size+5)*
sizeof(
char));
4980 lwsp_buf = lwsp_buf_new;
4986 if (lwsp_count > 0 && (c1 !=
'=' || (lwsp_buf[lwsp_count-1] !=
SP && lwsp_buf[lwsp_count-1] !=
TAB))) {
4988 for(lwsp_count--;lwsp_count>0;lwsp_count--)
5020 cc = ((t1 << 2) & 0x0fc) | ((t2 >> 4) & 0x03);
5023 cc = ((t2 << 4) & 0x0f0) | ((t3 >> 2) & 0x0f);
5026 cc = ((t3 << 6) & 0x0c0) | (t4 & 0x3f);
5037 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
5039 #define MIMEOUT_BUF_LENGTH 74 5050 const unsigned char *p;
5053 p = mime_pattern[0];
5054 for(i=0;mime_pattern[i];i++) {
5055 if (mode == mime_encode[i]) {
5056 p = mime_pattern[i];
5099 (*o_base64conv)(
EOF,0);
5101 (*o_base64conv)(0,
SP);
5106 (*o_base64conv)(
EOF,0);
5108 (*o_base64conv)(0,
SP);
5117 (*o_base64conv)(
EOF,0);
5119 (*o_base64conv)(0,
SP);
5143 (*o_mputc)(basis_64[((nkf_state->
mimeout_state & 0x3)<< 4)]);
5149 (*o_mputc)(basis_64[((nkf_state->
mimeout_state & 0xF) << 2)]);
5172 (*o_mputc)(
bin2hex(((c>>4)&0xf)));
5182 (*o_mputc)(basis_64[c>>2]);
5187 (*o_mputc)(basis_64[((nkf_state->
mimeout_state & 0x3)<< 4) | ((c & 0xF0) >> 4)]);
5193 (*o_mputc)(basis_64[((nkf_state->
mimeout_state & 0xF) << 2) | ((c & 0xC0) >>6)]);
5194 (*o_mputc)(basis_64[c & 0x3F]);
5214 if (c!=
CR && c!=
LF) {
5277 if (c ==
CR || c ==
LF) {
5282 }
else if (c <=
SP) {
5321 if (c==
CR || c==
LF) {
5348 static const char *str =
"boundary=\"";
5349 static int len = 10;
5369 for (j = 0; j <= i; ++j) {
5387 if (lastchar==
CR || lastchar ==
LF){
5408 if (lastchar ==
CR || lastchar ==
LF){
5484 (*o_base64conv)(c2,c1);
5488 typedef struct nkf_iconv_t {
5491 size_t input_buffer_size;
5492 char *output_buffer;
5493 size_t output_buffer_size;
5497 nkf_iconv_new(
char *tocode,
char *fromcode)
5499 nkf_iconv_t converter;
5502 converter->input_buffer =
nkf_xmalloc(converter->input_buffer_size);
5503 converter->output_buffer_size =
IOBUF_SIZE * 2;
5504 converter->output_buffer =
nkf_xmalloc(converter->output_buffer_size);
5505 converter->cd = iconv_open(tocode, fromcode);
5506 if (converter->cd == (iconv_t)-1)
5510 perror(fprintf(
"iconv doesn't support %s to %s conversion.", fromcode, tocode));
5513 perror(
"can't iconv_open");
5519 nkf_iconv_convert(nkf_iconv_t *converter,
FILE *
input)
5521 size_t invalid = (size_t)0;
5522 char *input_buffer = converter->input_buffer;
5523 size_t input_length = (size_t)0;
5524 char *output_buffer = converter->output_buffer;
5525 size_t output_length = converter->output_buffer_size;
5531 input_buffer[input_length++] = c;
5532 if (input_length < converter->input_buffer_size)
break;
5536 size_t ret =
iconv(converter->cd, &input_buffer, &input_length, &output_buffer, &output_length);
5537 while (output_length-- > 0) {
5538 (*o_putc)(output_buffer[converter->output_buffer_size-output_length]);
5540 if (ret == (
size_t) - 1) {
5543 if (input_buffer != converter->input_buffer)
5544 memmove(converter->input_buffer, input_buffer, input_length);
5547 converter->output_buffer_size *= 2;
5548 output_buffer =
realloc(converter->outbuf, converter->output_buffer_size);
5549 if (output_buffer ==
NULL) {
5550 perror(
"can't realloc");
5553 converter->output_buffer = output_buffer;
5556 perror(
"can't iconv");
5569 nkf_iconv_close(nkf_iconv_t *convert)
5573 iconv_close(converter->cd);
5602 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE) 5605 #ifdef UTF8_INPUT_ENABLE 5612 #ifdef UTF8_OUTPUT_ENABLE 5616 #ifdef UNICODE_NORMALIZATION 5632 #ifdef SHIFTJIS_CP932 5642 for (i = 0; i < 256; i++){
5688 input_encoding =
NULL;
5689 output_encoding =
NULL;
5700 if (!output_encoding) {
5703 if (!output_encoding) {
5767 #ifdef NUMCHAR_OPTION 5773 #ifdef UNICODE_NORMALIZATION 5787 if (input_encoding) {
5806 #if !defined(PERL_XS) && !defined(WIN32DLL) 5821 #define NEXT continue 5822 #define SKIP c2=0;continue 5823 #define MORE c2=c1;continue 5824 #define SEND (void)0 5826 #define set_input_mode(mode) do { \ 5827 input_mode = mode; \ 5829 set_input_codename("ISO-2022-JP"); \ 5830 debug("ISO-2022-JP"); \ 5839 int is_8bit =
FALSE;
5849 #if !defined(PERL_XS) && !defined(WIN32DLL) 5850 fprintf(stderr,
"no output encoding given\n");
5856 #ifdef UTF8_INPUT_ENABLE 5908 #ifdef INPUT_CODE_FIX 5909 if (!input_encoding)
5951 0xA1 <= c1 && c1 <= 0xDF) {
5956 }
else if (c1 >
DEL) {
5967 else if ((
iconv ==
s_iconv && 0xA0 <= c1 && c1 <= 0xDF) ||
5979 }
else if (
SP < c1 && c1 <
DEL) {
6003 }
else if (c1 ==
'?') {
6033 else if (c1 ==
'&') {
6041 else if (c1 ==
'$') {
6048 }
else if (c1 ==
'@' || c1 ==
'B') {
6052 }
else if (c1 ==
'(') {
6061 }
else if (c1 ==
'@'|| c1 ==
'B') {
6066 }
else if (c1 ==
'D'){
6070 }
else if (c1 ==
'O' || c1 ==
'Q'){
6073 }
else if (c1 ==
'P'){
6095 }
else if (c1 ==
'(') {
6103 else if (c1 ==
'I') {
6109 else if (c1 ==
'B' || c1 ==
'J' || c1 ==
'H') {
6124 else if (c1 ==
'.') {
6129 else if (c1 ==
'A') {
6140 else if (c1 ==
'N') {
6164 }
else if (c1 ==
'$') {
6168 }
else if ((
'E' <= c1 && c1 <=
'G') ||
6169 (
'O' <= c1 && c1 <=
'Q')) {
6177 static const nkf_char jphone_emoji_first_table[7] =
6178 {0xE1E0, 0xDFE0, 0xE2E0, 0xE3E0, 0xE4E0, 0xDFE0, 0xE0E0};
6181 while (
SP <= c1 && c1 <=
'z') {
6182 (*oconv)(0, c1 + c3);
6199 }
else if (c1 ==
LF || c1 ==
CR) {
6218 }
else if (c1 ==
LF && (c1=(*
i_getc)(f))!=
EOF && c1 ==
SP) {
6238 switch ((*
iconv)(c2, c1, 0)) {
6246 (*iconv)(c2, c1, c3|c4);
6271 if ((*
iconv)(c2, c1, c3) == -3) {
6303 0x7F <= c2 && c2 <= 0x92 &&
6304 0x21 <= c1 && c1 <= 0x7E) {
6331 (*iconv)(
EOF, 0, 0);
6362 unsigned char *cp_back =
NULL;
6367 while(*cp && *cp++!=
'-');
6368 while (*cp || cp_back) {
6377 if (!*cp || *cp ==
SP) {
6383 for (j=0;*p && *p !=
'=' && *p == cp[j];p++, j++);
6384 if (*p == cp[j] || cp[j] ==
SP){
6391 #if !defined(PERL_XS) && !defined(WIN32DLL) 6392 fprintf(stderr,
"unknown long option: --%s\n", cp);
6396 while(*cp && *cp !=
SP && cp++);
6410 input_encoding = enc;
6417 output_encoding = enc;
6421 if (p[0] ==
'0' || p[0] ==
'1') {
6429 if (strcmp(
long_option[i].name,
"overwrite") == 0){
6435 if (strcmp(
long_option[i].name,
"overwrite=") == 0){
6443 if (strcmp(
long_option[i].name,
"in-place") == 0){
6449 if (strcmp(
long_option[i].name,
"in-place=") == 0){
6459 if (strcmp(
long_option[i].name,
"cap-input") == 0){
6463 if (strcmp(
long_option[i].name,
"url-input") == 0){
6468 #ifdef NUMCHAR_OPTION 6469 if (strcmp(
long_option[i].name,
"numchar-input") == 0){
6475 if (strcmp(
long_option[i].name,
"no-output") == 0){
6485 #ifdef SHIFTJIS_CP932 6489 #ifdef UTF8_OUTPUT_ENABLE 6494 if (strcmp(
long_option[i].name,
"no-cp932") == 0){
6495 #ifdef SHIFTJIS_CP932 6499 #ifdef UTF8_OUTPUT_ENABLE 6504 #ifdef SHIFTJIS_CP932 6505 if (strcmp(
long_option[i].name,
"cp932inv") == 0){
6523 if (strcmp(
long_option[i].name,
"exec-out") == 0){
6528 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE) 6529 if (strcmp(
long_option[i].name,
"no-cp932ext") == 0){
6533 if (strcmp(
long_option[i].name,
"no-best-fit-chars") == 0){
6557 if (strcmp(
long_option[i].name,
"fb-subchar") == 0){
6561 if (strcmp(
long_option[i].name,
"fb-subchar=") == 0){
6570 }
else if(p[1] ==
'x' || p[1] ==
'X'){
6588 #ifdef UTF8_OUTPUT_ENABLE 6589 if (strcmp(
long_option[i].name,
"ms-ucs-map") == 0){
6594 #ifdef UNICODE_NORMALIZATION 6595 if (strcmp(
long_option[i].name,
"utf8mac-input") == 0){
6608 #if !defined(PERL_XS) && !defined(WIN32DLL) 6609 fprintf(stderr,
"unsupported long option: --%s\n",
long_option[i].name);
6625 }
else if (*cp==
'2') {
6652 if (*cp==
'@'||*cp==
'B')
6657 if (*cp==
'J'||*cp==
'B'||*cp==
'H')
6665 if (
'9'>= *cp && *cp>=
'0')
6673 #if defined(MSDOS) || defined(__OS2__) 6688 #ifdef UTF8_OUTPUT_ENABLE 6701 if (
'1'== cp[0] &&
'6'==cp[1]) {
6704 }
else if (
'3'== cp[0] &&
'2'==cp[1]) {
6715 }
else if (cp[0] ==
'B') {
6722 enc_idx = enc_idx ==
UTF_16 6726 enc_idx = enc_idx ==
UTF_16 6734 #ifdef UTF8_INPUT_ENABLE 6741 if (
'1'== cp[0] &&
'6'==cp[1]) {
6745 }
else if (
'3'== cp[0] &&
'2'==cp[1]) {
6756 }
else if (cp[0] ==
'B') {
6760 enc_idx = (enc_idx ==
UTF_16 6785 while (
'0'<= *cp && *cp <=
'4') {
6786 alpha_f |= 1 << (*cp++ -
'0');
6811 while(
'0'<= *cp && *cp <=
'9') {
6820 while(
'0'<= *cp && *cp <=
'9') {
6828 if (*cp==
'B'||*cp==
'Q') {
6831 }
else if (*cp==
'N') {
6833 }
else if (*cp==
'S') {
6835 }
else if (*cp==
'0') {
6846 }
else if (*cp==
'Q') {
6858 if (
'9'>= *cp && *cp>=
'0')
6880 }
else if (*cp==
'm') {
6882 }
else if (*cp==
'w') {
6884 }
else if (*cp==
'0') {
6890 if (
'2' <= *cp && *cp <=
'9') {
6893 }
else if (*cp ==
'0' || *cp ==
'1') {
6903 while(*cp && *cp++!=
'-');
6906 #if !defined(PERL_XS) && !defined(WIN32DLL) 6907 fprintf(stderr,
"unknown option: -%c\n", *(cp-1));
6917 #include "nkf32dll.c" 6918 #elif defined(PERL_XS) 6926 char *outfname =
NULL;
6930 _BufferSize.y = 400;
6932 #ifdef DEFAULT_CODE_LOCALE 6933 setlocale(LC_CTYPE,
"");
6937 for (argc--,argv++; (argc > 0) && **argv ==
'-'; argc--, argv++) {
6938 cp = (
unsigned char *)*argv;
6943 if (pipe(fds) < 0 || (pid = fork()) < 0){
6954 execvp(argv[1], &argv[1]);
6974 int exec_f_back = exec_f;
6988 exec_f = exec_f_back;
6995 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__)) 6996 if (freopen(
"",
"wb",stdout) ==
NULL)
7003 setbuf(stdout, (
char *)
NULL);
7009 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__)) 7010 if (freopen(
"",
"rb",stdin) == NULL)
return (-1);
7023 int is_argument_error =
FALSE;
7030 if ((fin = fopen((origfname = *argv++),
"r")) == NULL) {
7032 is_argument_error =
TRUE;
7045 +
strlen(
".nkftmpXXXXXX")
7047 strcpy(outfname, origfname);
7051 for (i =
strlen(outfname); i; --i){
7052 if (outfname[i - 1] ==
'/' 7053 || outfname[i - 1] ==
'\\'){
7059 strcat(outfname,
"ntXXXXXX");
7061 fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL,
7062 S_IREAD | S_IWRITE);
7064 strcat(outfname,
".nkftmpXXXXXX");
7065 fd = mkstemp(outfname);
7068 || (fd_backup = dup(
fileno(stdout))) < 0
7080 outfname =
"nkf.out";
7083 if(freopen(outfname,
"w", stdout) == NULL) {
7088 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__)) 7089 if (freopen(
"",
"wb",stdout) == NULL)
7097 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__)) 7098 if (freopen(
"",
"rb",fin) == NULL)
7107 char *filename =
NULL;
7109 if (nfiles > 1) filename = origfname;
7116 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__) 7127 if (
stat(origfname, &sb)) {
7128 fprintf(stderr,
"Can't stat %s\n", origfname);
7131 if (chmod(outfname, sb.st_mode)) {
7132 fprintf(stderr,
"Can't set permission %s\n", outfname);
7137 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__) 7138 tb[0] = tb[1] = sb.st_mtime;
7139 if (
utime(outfname, tb)) {
7140 fprintf(stderr,
"Can't set timestamp %s\n", outfname);
7145 if (
utime(outfname, &tb)) {
7146 fprintf(stderr,
"Can't set timestamp %s\n", outfname);
7153 unlink(backup_filename);
7155 if (rename(origfname, backup_filename)) {
7156 perror(backup_filename);
7157 fprintf(stderr,
"Can't rename %s to %s\n",
7158 origfname, backup_filename);
7163 if (unlink(origfname)){
7168 if (rename(outfname, origfname)) {
7170 fprintf(stderr,
"Can't rename %s to %s\n",
7171 outfname, origfname);
7178 if (is_argument_error)
7183 scanf(
"%d",&end_check);
#define nkf_char_unicode_new(c)
static int x0213_combining_p(nkf_char wc)
const unsigned short x0213_combining_chars[sizeof_x0213_combining_chars]
#define OUTPUT_UTF16(val)
#define output_ascii_escape_sequence(mode)
static nkf_char nkf_buf_at(nkf_buf_t *buf, int index)
static void status_check(struct input_code *ptr, nkf_char c)
static nkf_char hold_buf[HOLD_SIZE *2]
static void output_escape_sequence(int mode)
#define OUTPUT_UTF16_BYTES(c1, c2)
nkf_native_encoding NkfEncodingUTF_32
static nkf_char mime_begin(FILE *f)
static void set_iconv(nkf_char f, nkf_char(*iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0))
static void encode_fallback_xml(nkf_char c)
#define NKF_ICONV_INVALID_CODE_RANGE
const unsigned short *const x0212_shiftjis[]
static void * nkf_xmalloc(size_t size)
static nkf_char url_ungetc(nkf_char c, FILE *f)
size_t strlen(const char *)
static size_t unicode_iconv(nkf_char wc, int nocombine)
static size_t nkf_iconv_utf_16_combine(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
static nkf_char nkf_utf8_to_unicode(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
static const nkf_char score_table_8FF0[]
#define NKF_ICONV_WAIT_COMBINING_CHAR
static nkf_char std_getc(FILE *f)
struct @43 encoding_name_to_id_table[]
const unsigned short *const *const utf8_to_euc_3bytes_932[]
static nkf_char mime_ungetc_buf(nkf_char c, FILE *f)
static nkf_char(* i_cungetc)(nkf_char c, FILE *f)
static void(* o_mputc)(nkf_char c)
static void nkf_buf_push(nkf_buf_t *buf, nkf_char c)
static size_t nkf_iconv_utf_16(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
#define nkf_enc_asciicompat(enc)
#define nkf_enc_name(enc)
nkf_native_encoding NkfEncodingASCII
static void eof_mime(void)
static nkf_char(* i_mungetc)(nkf_char c, FILE *f)
static nkf_char e2w_combining(nkf_char comb, nkf_char c2, nkf_char c1)
static void s_status(struct input_code *, nkf_char)
static int h_conv(FILE *f, nkf_char c1, nkf_char c2)
static void e_oconv(nkf_char c2, nkf_char c1)
static nkf_char base64decode(nkf_char c)
static void(* o_iso2022jp_check_conv)(nkf_char c2, nkf_char c1)
static void(* o_eol_conv)(nkf_char c2, nkf_char c1)
static nkf_char(* iconv_for_check)(nkf_char c2, nkf_char c1, nkf_char c0)=0
static void j_oconv(nkf_char c2, nkf_char c1)
const unsigned short *const utf8_to_euc_2bytes_932[]
static void base64_conv(nkf_char c2, nkf_char c1)
SSL_METHOD *(* func)(void)
#define nkf_enc_to_iconv(enc)
nkf_encoding nkf_encoding_table[]
const unsigned short cp932inv[2][189]
static char * backup_suffix
#define nkf_char_unicode_p(c)
static size_t nkf_iconv_utf_16_nocombine(nkf_char c1, nkf_char c2)
static const unsigned char ev_x0213[]
static void nkf_unicode_to_utf8(nkf_char val, nkf_char *p1, nkf_char *p2, nkf_char *p3, nkf_char *p4)
static void status_push_ch(struct input_code *ptr, nkf_char c)
static void status_reinit(struct input_code *ptr)
#define UTF16_TO_UTF32(lead, trail)
static nkf_char numchar_getc(FILE *f)
static struct @45 mime_input_state
#define nkf_char_unicode_value_p(c)
static void w_oconv(nkf_char c2, nkf_char c1)
static const char * input_codename
static nkf_char mime_ungetc(nkf_char c, ARG_UNUSED FILE *f)
static int is_x0213_2_in_x0212(nkf_char c1)
static int nkf_enc_find_index(const char *name)
#define nkf_buf_length(buf)
static nkf_char unicode_iconv_combine(nkf_char wc, nkf_char wc2)
static int kanji_convert(FILE *f)
static void switch_mime_getc(void)
const unsigned short *const euc_to_utf8_2bytes[]
#define nkf_noescape_mime(c)
static nkf_char(* i_ugetc)(FILE *)
#define nkf_char_unicode_bmp_p(c)
const nkf_native_encoding * base_encoding
static nkf_char mime_getc_buf(FILE *f)
static char * get_backup_filename(const char *suffix, const char *filename)
#define DEFAULT_CODE_LOCALE
static void no_connection(nkf_char c2, nkf_char c1)
static void status_clear(struct input_code *ptr)
static const unsigned char dv[]
#define MIME_DECODE_DEFAULT
static const nkf_char mime_encode_method[]
static void print_guessed_code(char *filename)
static unsigned char ascii_intro
#define NKF_ICONV_NOT_COMBINED
RUBY_EXTERN void * memmove(void *, const void *, size_t)
static void encode_fallback_subchar(nkf_char c)
static nkf_char(* iconv)(nkf_char c2, nkf_char c1, nkf_char c0)
static nkf_char x0212_shift(nkf_char c)
static nkf_char w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0)
static nkf_encoding * input_encoding
static unsigned char prefix_table[256]
static nkf_char(* mime_iconv_back)(nkf_char c2, nkf_char c1, nkf_char c0)
static void s_oconv(nkf_char c2, nkf_char c1)
static nkf_char(* i_getc)(FILE *f)
static nkf_char cap_getc(FILE *f)
static const char * nkf_locale_charmap(void)
static int mime_decode_mode
static nkf_char e2w_conv(nkf_char c2, nkf_char c1)
static nkf_char(* i_mgetc)(FILE *)
static void * nkf_xrealloc(void *ptr, size_t size)
static nkf_encoding * nkf_default_encoding(void)
static nkf_char w_iconv_combine(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4, nkf_char c5, nkf_char c6)
static unsigned char kanji_intro
const unsigned short x0213_2_surrogate_table[sizeof_x0213_2_surrogate_table][3]
const unsigned short euc_to_utf8_1byte[]
static void w_oconv32(nkf_char c2, nkf_char c1)
static nkf_char(* i_uungetc)(nkf_char c, FILE *f)
static void set_code_score(struct input_code *ptr, nkf_char score)
static void set_input_encoding(nkf_encoding *enc)
static int fold_preserve_f
static void nkf_state_init(void)
static const nkf_char mime_encode[]
struct input_code input_code_list[]
#define NKF_ICONV_NEED_TWO_MORE_BYTES
static void oconv_newline(void(*func)(nkf_char, nkf_char))
static nkf_char mime_integrity(FILE *f, const unsigned char *p)
#define nkf_enc_unicode_p(enc)
static size_t nkf_iconv_utf_32_nocombine(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
static const nkf_char score_table_F0[]
static void encode_fallback_java(nkf_char c)
#define nkf_buf_empty_p(buf)
nkf_native_encoding NkfEncodingISO_2022_JP
static int unicode_to_jis_common2(nkf_char c1, nkf_char c0, const unsigned short *const *pp, nkf_char psize, nkf_char *p2, nkf_char *p1)
static nkf_char mime_getc(FILE *f)
static void mimeout_addchar(nkf_char c)
static nkf_char push_hold_buf(nkf_char c2)
static void iso2022jp_check_conv(nkf_char c2, nkf_char c1)
static void put_newline(void(*func)(nkf_char))
static int x0213_wait_combining_p(nkf_char wc)
#define set_input_mode(mode)
static nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0)
const unsigned short *const euc_to_utf8_2bytes_ms[]
#define range(low, item, hi)
static const char * get_guessed_code(void)
static unsigned char stdobuf[IOBUF_SIZE]
static nkf_char(* i_nungetc)(nkf_char c, FILE *f)
static struct @46 mimeout_state
static nkf_char w16e_conv(nkf_char val, nkf_char *p2, nkf_char *p1)
static nkf_char(* i_bgetc)(FILE *)
static const nkf_char score_table_8FA0[]
static nkf_char(* i_mungetc_buf)(nkf_char c, FILE *f)
static nkf_char cap_ungetc(nkf_char c, FILE *f)
#define is_ibmext_in_sjis(c2)
const unsigned short x0213_1_surrogate_table[sizeof_x0213_1_surrogate_table][3]
static nkf_char e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
static void code_score(struct input_code *ptr)
static nkf_char std_ungetc(nkf_char c, FILE *f)
static struct input_code * find_inputcode_byfunc(nkf_char(*iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0))
static nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
static void set_input_codename(const char *codename)
static int preserve_time_f
static void check_bom(FILE *f)
static void encode_fallback_html(nkf_char c)
static nkf_char nfc_getc(FILE *f)
static nkf_char w2e_conv(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
static nkf_char(* i_mgetc_buf)(FILE *)
static nkf_char broken_ungetc(nkf_char c, FILE *f)
const unsigned short *const *const utf8_to_euc_3bytes_x0213[]
#define UTF8_INPUT_ENABLE
static const char basis_64[]
static void hira_conv(nkf_char c2, nkf_char c1)
const unsigned short x0213_combining_table[sizeof_x0213_combining_table][3]
static nkf_char w_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
#define mime_input_buf(n)
static int nkf_str_caseeql(const char *src, const char *target)
static nkf_char url_getc(FILE *f)
#define nkf_enc_to_index(enc)
static const nkf_char score_table_8FE0[]
static int options(unsigned char *cp)
static nkf_char noconvert(FILE *f)
static void(* oconv)(nkf_char c2, nkf_char c1)
static nkf_char nkf_iconv_utf_32_combine(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4, nkf_char c5, nkf_char c6, nkf_char c7, nkf_char c8)
static nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
static void mime_input_buf_unshift(nkf_char c)
static void show_configuration(void)
static void set_output_encoding(nkf_encoding *enc)
nkf_native_encoding NkfEncodingUTF_16
const unsigned short *const utf8_to_euc_2bytes_ms[]
static void shift(struct cparse_params *v, long act, VALUE tok, VALUE val)
static void(* o_hira_conv)(nkf_char c2, nkf_char c1)
static nkf_char(* i_bungetc)(nkf_char c, FILE *f)
#define nkf_byte_jisx0201_katakana_p(c)
register unsigned int len
const unsigned short *const *const utf8_to_euc_3bytes_mac[]
const unsigned short *const x0212_to_utf8_2bytes[]
const unsigned short *const euc_to_utf8_2bytes_x0213[]
static void z_conv(nkf_char c2, nkf_char c1)
static nkf_char s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
#define X0213_SURROGATE_FIND(tbl, size, euc)
const unsigned short shiftjis_x0212[3][189]
static void w_status(struct input_code *, nkf_char)
static void(* o_rot_conv)(nkf_char c2, nkf_char c1)
#define setvbuffer(fp, buf, size)
static void eol_conv(nkf_char c2, nkf_char c1)
static void mime_putc(nkf_char c)
static nkf_char(* i_ngetc)(FILE *)
static const nkf_char score_table_A0[]
int utime(const char *filename, const struct utimbuf *times)
const struct normalization_pair normalization_table[]
static void(* o_base64conv)(nkf_char c2, nkf_char c1)
static nkf_encoding * nkf_enc_find(const char *name)
const unsigned short *const euc_to_utf8_2bytes_mac[]
const unsigned short *const utf8_to_euc_2bytes[]
static const unsigned char cv[]
static nkf_char utf32_to_nkf_char(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
static const unsigned char fv[]
static nkf_char unicode_subchar
static int module_connection(void)
static void rot_conv(nkf_char c2, nkf_char c1)
static void debug(const char *str)
static void(* o_zconv)(nkf_char c2, nkf_char c1)
static size_t nkf_iconv_utf_32(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
static nkf_char(* i_nfc_getc)(FILE *)
const unsigned short shiftjis_cp932[3][189]
static const unsigned char ev[]
#define char_size(c2, c1)
static void(* o_putc)(nkf_char c)
static nkf_char nfc_ungetc(nkf_char c, FILE *f)
nkf_native_encoding NkfEncodingShift_JIS
static nkf_char(* i_nfc_ungetc)(nkf_char c, FILE *f)
static void(* encode_fallback)(nkf_char c)
static nkf_buf_t * nkf_buf_new(int length)
#define UTF8_OUTPUT_ENABLE
#define nkf_enc_cp5022x_p(enc)
static nkf_char(* i_ungetc)(nkf_char c, FILE *f)
static unsigned char stdibuf[IOBUF_SIZE]
static nkf_char x0212_unshift(nkf_char c)
int main(int argc, char **argv)
static void w_oconv16(nkf_char c2, nkf_char c1)
static void e_status(struct input_code *, nkf_char)
static void unswitch_mime_getc(void)
static nkf_char w_iconv_nocombine(nkf_char c1, nkf_char c2, nkf_char c3)
RUBY_EXTERN int dup2(int, int)
const unsigned short *const *const utf8_to_euc_3bytes[]
static void mime_prechar(nkf_char c2, nkf_char c1)
const unsigned short *const x0212_to_utf8_2bytes_x0213[]
static nkf_char hex_getc(nkf_char ch, FILE *f, nkf_char(*g)(FILE *f), nkf_char(*u)(nkf_char c, FILE *f))
static int unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
static void clr_code_score(struct input_code *ptr, nkf_char score)
const unsigned short *const utf8_to_euc_2bytes_mac[]
static void version(void)
#define nkf_enc_to_oconv(enc)
#define MIMEOUT_BUF_LENGTH
static nkf_encoding * nkf_utf8_encoding(void)
const unsigned short *const *const utf8_to_euc_3bytes_ms[]
nkf_native_encoding NkfEncodingUTF_8
static nkf_encoding * output_encoding
static nkf_char(* i_cgetc)(FILE *)
static nkf_char numchar_ungetc(nkf_char c, FILE *f)
static void(* o_fconv)(nkf_char c2, nkf_char c1)
static nkf_char no_connection2(ARG_UNUSED nkf_char c2, ARG_UNUSED nkf_char c1, ARG_UNUSED nkf_char c0)
static const unsigned char * mime_pattern[]
static void nkf_buf_clear(nkf_buf_t *buf)
static nkf_state_t * nkf_state
static void open_mime(nkf_char mode)
static int no_best_fit_chars_f
static nkf_char nkf_buf_pop(nkf_buf_t *buf)
nkf_char(* mime_priority_func[])(nkf_char c2, nkf_char c1, nkf_char c0)
static nkf_encoding * nkf_locale_encoding(void)
static ULONG(STDMETHODCALLTYPE AddRef)(IDispatch __RPC_FAR *This)
static void fold_conv(nkf_char c2, nkf_char c1)
static void encode_fallback_perl(nkf_char c)
static nkf_char mime_begin_strict(FILE *f)
const unsigned short *const utf8_to_euc_2bytes_x0213[]
static const struct @44 long_option[]
static void no_putc(nkf_char c)
static void status_reset(struct input_code *ptr)
static void close_mime(void)
static nkf_char broken_getc(FILE *f)
static void code_status(nkf_char c)
static void std_putc(nkf_char c)
static void nkf_each_char_to_hex(void(*f)(nkf_char c2, nkf_char c1), nkf_char c)
static nkf_encoding * nkf_enc_from_index(int idx)
static void status_disable(struct input_code *ptr)
nkf_native_encoding NkfEncodingEUC_JP