#include #include #include #include "transcode_data.h" #if defined(__GNUC__) # define ARG_UNUSED __attribute__ ((unused)) #else # define ARG_UNUSED #endif /* * Encode */ static int stateless_TADTextBE_mbc_enc_len(const OnigUChar* p ARG_UNUSED, const OnigUChar* e ARG_UNUSED, OnigEncoding enc ARG_UNUSED) { return 4; } static int stateless_TADTextBE_is_mbc_newline(const OnigUChar* p, const OnigUChar* e, OnigEncoding enc ARG_UNUSED) { if (p + 3 < e) { if (p[2] == 0 && (p[3] == 0x0a || p[3] == 0x0b || p[3] == 0x0c || p[3] == 0x0d)) return 1; } return 0; } static OnigCodePoint stateless_TADTextBE_mbc_to_code(const OnigUChar* p, const OnigUChar* e ARG_UNUSED, OnigEncoding enc ARG_UNUSED) { return (OnigCodePoint)(((p[0] * 256 + p[1]) * 256 + p[2]) * 256 + p[3]); } static int stateless_TADTextBE_code_to_mbclen(OnigCodePoint code ARG_UNUSED, OnigEncoding enc ARG_UNUSED) { return 4; } static int stateless_TADTextBE_code_to_mbc(OnigCodePoint code, OnigUChar *buf, OnigEncoding enc ARG_UNUSED) { OnigUChar* p = buf; *p++ = (OnigUChar)((code & 0xff000000) >> 24); *p++ = (OnigUChar)((code & 0xff0000) >> 16); *p++ = (OnigUChar)((code & 0xff00) >> 8); *p++ = (OnigUChar) (code & 0xff); return 4; } static int stateless_TADTextBE_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, const OnigUChar** pp, const OnigUChar* e, OnigUChar* to, OnigEncoding enc) { int len; const OnigUChar* p = *pp; int i; /* TODO fold case */ len = stateless_TADTextBE_mbc_enc_len(p, e, enc); for (i = 0; i < len; ++i) { *to++ = *p++; } (*pp) += len; return len; /* return byte length of converted char to lower */ } static int stateless_TADTextBE_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED, OnigApplyAllCaseFoldFunc f ARG_UNUSED, void* arg ARG_UNUSED, OnigEncoding enc ARG_UNUSED) { /* TODO what to do here ? */ return 0; } static int stateless_TADTextBE_get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED, const OnigUChar* p ARG_UNUSED, const OnigUChar* e ARG_UNUSED, OnigCaseFoldCodeItem acs[] ARG_UNUSED, OnigEncoding enc ARG_UNUSED) { /* TODO what to do here ? */ return 0; } static int stateless_TADTextBE_property_name_to_ctype(OnigEncoding enc ARG_UNUSED, OnigUChar* p ARG_UNUSED, OnigUChar* e ARG_UNUSED) { /* don't support property name */ return ONIGERR_INVALID_CHAR_PROPERTY_NAME; } static int stateless_TADTextBE_is_code_ctype(OnigCodePoint code ARG_UNUSED, OnigCtype ctype ARG_UNUSED, OnigEncoding enc ARG_UNUSED) { /* don't support ctype */ return 0; } static int stateless_TADTextBE_get_ctype_code_range(OnigCtype ctype ARG_UNUSED, OnigCodePoint* sb_out ARG_UNUSED, const OnigCodePoint* ranges[] ARG_UNUSED, OnigEncoding enc ARG_UNUSED) { /* don't support ctype */ return ONIGERR_TYPE_BUG; } static OnigUChar* stateless_TADTextBE_left_adjust_char_head(const OnigUChar* start, const OnigUChar* s, const OnigUChar* e ARG_UNUSED, OnigEncoding enc ARG_UNUSED) { int rem; if (s <= start) return (UChar*)s; rem = (s - start) % 4; return (OnigUChar*)(s - rem); } static int stateless_TADTextBE_is_allowed_reverse_match(const OnigUChar* p ARG_UNUSED, const OnigUChar* e ARG_UNUSED, OnigEncoding enc ARG_UNUSED) { /* always didn't allow */ return 0; } static OnigEncodingType stateless_TADTextBE = { &stateless_TADTextBE_mbc_enc_len, "stateless-TADTextBE", /* name */ 4, /* max byte length */ 4, /* min byte length */ &stateless_TADTextBE_is_mbc_newline, &stateless_TADTextBE_mbc_to_code, &stateless_TADTextBE_code_to_mbclen, &stateless_TADTextBE_code_to_mbc, &stateless_TADTextBE_mbc_case_fold, &stateless_TADTextBE_apply_all_case_fold, &stateless_TADTextBE_get_case_fold_codes_by_str, &stateless_TADTextBE_property_name_to_ctype, &stateless_TADTextBE_is_code_ctype, &stateless_TADTextBE_get_ctype_code_range, &stateless_TADTextBE_left_adjust_char_head, &stateless_TADTextBE_is_allowed_reverse_match }; /* * Transcode */ <% map = { "00{00-20}" => :func_so, # Control "{21-7e}{21-7e}" => :func_so, # A Zone "{80-fd}{21-7e}" => :func_so, # B Zone "{21-7e}{80-fd}" => :func_so, # C Zone "{80-fd}{80-fd}" => :func_so, # D Zone "fe{21-7e,80-fd}" => :func_so, # Script/Language Change "fefe" => :func_si, # Script/Language Escape } transcode_generate_node(ActionMap.parse(map), "TADTextBE_decoder") map = { "{00,fe}{21-7e,80-fd}" => :func_so, # Script/Language Change } transcode_generate_node(ActionMap.parse(map), "TADTextBE_decoder_lang_escape") %> <%= transcode_generated_code %> static int tadtext_init(void *statep) { uint16_t *sp = statep; *sp = 0x21; /* System Script */ return 0; } /* * fe21 - 0021 * fe7e - 007e * fefe 0021 - 0121 * fefe 007e - 017e * fefe fe21 - 0221 * fefe fe7e - 027e * fefe fefe 0021 - 0321 * fefe fefe 007e - 037e */ static VALUE fun_si_TADTextBE_decoder(void *statep, const unsigned char *s ARG_UNUSED, size_t l ARG_UNUSED) { uint16_t *sp = statep; if (*sp & 0x8000) { *sp += 0x200; *sp |= 0x8000; /* prevent overflow */ } else { *sp = 0x8100; } return (VALUE)TADTextBE_decoder_lang_escape; } static ssize_t fun_so_TADTextBE_decoder(void *statep, const unsigned char *s, size_t l ARG_UNUSED, unsigned char* o, size_t osize ARG_UNUSED) { uint16_t *sp = statep; if (*sp & 0x8000) { *sp &= 0x7f00; if (s[2] == 0xfe) { *sp += 0x100; } *sp |= s[3]; return 0; } if (s[0] == 0xfe) { *sp = s[1]; return 0; } else { o[0] = *sp >> 8; o[1] = *sp & 0xff; o[2] = s[0]; o[3] = s[1]; return 4; } } static const rb_transcoder rb_TADTextBE_decoder = { "TADTextBE", "stateless-TADTextBE", TADTextBE_decoder, TRANSCODE_TABLE_INFO, 2, /* input_unit_length */ 4, /* max_input */ 4, /* max_output */ incompat_converter, /* asciicompat_type */ 2, tadtext_init, tadtext_init, /* state_size, state_init, state_fini */ NULL, fun_si_TADTextBE_decoder, NULL, fun_so_TADTextBE_decoder }; /* * Initialize */ void Init_troncode(void) { rb_define_dummy_encoding("TADTextBE"); rb_define_dummy_encoding("TADTextLE"); #if 0 /* * ロードしなくてもEncoding.listやEncoding.name_listを出せるようにするために * rb_encdb_declare はあるので、後付けの場合は必要ない */ rb_encdb_declare("stateless-TADTextBE"); /* いらない */ #endif rb_enc_register("stateless-TADTextBE", &stateless_TADTextBE); rb_register_transcoder(&rb_TADTextBE_decoder); }