#! /usr/bin/perl
#
# Copyright (c) 2001-2022, PostgreSQL Global Development Group
#
# src/backend/utils/mb/Unicode/UCS_to_EUC_JP.pl
#
# Generate UTF-8 <--> EUC_JP code conversion tables from
# map files provided by Unicode organization.
# Unfortunately it is prohibited by the organization
# to distribute the map files. So if you try to use this script,
# you have to obtain CP932.TXT and JIS0212.TXT from the
# organization's ftp site.

use strict;
use warnings;

use convutils;

my $this_script = 'src/backend/utils/mb/Unicode/UCS_to_EUC_JP.pl';

# Load JIS0212.TXT
my $jis0212 = &read_source("JIS0212.TXT");

my @mapping;

foreach my $i (@$jis0212)
{

	# We have a different mapping for this in the EUC_JP to UTF-8 direction.
	if ($i->{code} == 0x2243)
	{
		$i->{direction} = FROM_UNICODE;
	}

	if ($i->{code} == 0x2271)
	{
		$i->{direction} = TO_UNICODE;
	}

	if ($i->{ucs} >= 0x080)
	{
		$i->{code} = $i->{code} | 0x8f8080;
	}
	else
	{
		next;
	}

	push @mapping, $i;
}

# Load CP932.TXT.
my $ct932 = &read_source("CP932.TXT");

foreach my $i (@$ct932)
{
	my $sjis = $i->{code};

	# We have a different mapping for this in the EUC_JP to UTF-8 direction.
	if (   $sjis == 0xeefa
		|| $sjis == 0xeefb
		|| $sjis == 0xeefc)
	{
		next;
	}

	if ($sjis >= 0xa1)
	{
		my $jis = &sjis2jis($sjis);

		$i->{code} = $jis | (
			$jis < 0x100
			? 0x8e00
			: ($sjis >= 0xeffd ? 0x8f8080 : 0x8080));

		# Remember the SJIS code for later.
		$i->{sjis} = $sjis;

		push @mapping, $i;
	}
}

# extract only SJIS characters
foreach my $i (grep defined $_->{sjis}, @mapping)
{
	my $sjis = $i->{sjis};

	# These SJIS characters are excluded completely.
	if (   $sjis >= 0xed00 && $sjis <= 0xeef9
		|| $sjis >= 0xfa54 && $sjis <= 0xfa56
		|| $sjis >= 0xfa58 && $sjis <= 0xfc4b)
	{
		$i->{direction} = NONE;
		next;
	}

	# These SJIS characters are only in the UTF-8 to EUC_JP table
	if ($sjis == 0xeefa || $sjis == 0xeefb || $sjis == 0xeefc)
	{
		$i->{direction} = FROM_UNICODE;
		next;
	}

	if (   $sjis == 0x8790
		|| $sjis == 0x8791
		|| $sjis == 0x8792
		|| $sjis == 0x8795
		|| $sjis == 0x8796
		|| $sjis == 0x8797
		|| $sjis == 0x879a
		|| $sjis == 0x879b
		|| $sjis == 0x879c
		|| ($sjis >= 0xfa4a && $sjis <= 0xfa53))
	{
		$i->{direction} = TO_UNICODE;
		next;
	}
}

push @mapping, (
	{
		direction => BOTH,
		ucs       => 0x4efc,
		code      => 0x8ff4af,
		comment   => '# CJK(4EFC)'
	},
	{
		direction => BOTH,
		ucs       => 0x50f4,
		code      => 0x8ff4b0,
		comment   => '# CJK(50F4)'
	},
	{
		direction => BOTH,
		ucs       => 0x51EC,
		code      => 0x8ff4b1,
		comment   => '# CJK(51EC)'
	},
	{
		direction => BOTH,
		ucs       => 0x5307,
		code      => 0x8ff4b2,
		comment   => '# CJK(5307)'
	},
	{
		direction => BOTH,
		ucs       => 0x5324,
		code      => 0x8ff4b3,
		comment   => '# CJK(5324)'
	},
	{
		direction => BOTH,
		ucs       => 0x548A,
		code      => 0x8ff4b5,
		comment   => '# CJK(548A)'
	},
	{
		direction => BOTH,
		ucs       => 0x5759,
		code      => 0x8ff4b6,
		comment   => '# CJK(5759)'
	},
	{
		direction => BOTH,
		ucs       => 0x589E,
		code      => 0x8ff4b9,
		comment   => '# CJK(589E)'
	},
	{
		direction => BOTH,
		ucs       => 0x5BEC,
		code      => 0x8ff4ba,
		comment   => '# CJK(5BEC)'
	},
	{
		direction => BOTH,
		ucs       => 0x5CF5,
		code      => 0x8ff4bb,
		comment   => '# CJK(5CF5)'
	},
	{
		direction => BOTH,
		ucs       => 0x5D53,
		code      => 0x8ff4bc,
		comment   => '# CJK(5D53)'
	},
	{
		direction => BOTH,
		ucs       => 0x5FB7,
		code      => 0x8ff4be,
		comment   => '# CJK(5FB7)'
	},
	{
		direction => BOTH,
		ucs       => 0x6085,
		code      => 0x8ff4bf,
		comment   => '# CJK(6085)'
	},
	{
		direction => BOTH,
		ucs       => 0x6120,
		code      => 0x8ff4c0,
		comment   => '# CJK(6120)'
	},
	{
		direction => BOTH,
		ucs       => 0x654E,
		code      => 0x8ff4c1,
		comment   => '# CJK(654E)'
	},
	{
		direction => BOTH,
		ucs       => 0x663B,
		code      => 0x8ff4c2,
		comment   => '# CJK(663B)'
	},
	{
		direction => BOTH,
		ucs       => 0x6665,
		code      => 0x8ff4c3,
		comment   => '# CJK(6665)'
	},
	{
		direction => BOTH,
		ucs       => 0x6801,
		code      => 0x8ff4c6,
		comment   => '# CJK(6801)'
	},
	{
		direction => BOTH,
		ucs       => 0x6A6B,
		code      => 0x8ff4c9,
		comment   => '# CJK(6A6B)'
	},
	{
		direction => BOTH,
		ucs       => 0x6AE2,
		code      => 0x8ff4ca,
		comment   => '# CJK(6AE2)'
	},
	{
		direction => BOTH,
		ucs       => 0x6DF2,
		code      => 0x8ff4cc,
		comment   => '# CJK(6DF2)'
	},
	{
		direction => BOTH,
		ucs       => 0x6DF8,
		code      => 0x8ff4cb,
		comment   => '# CJK(6DF8)'
	},
	{
		direction => BOTH,
		ucs       => 0x7028,
		code      => 0x8ff4cd,
		comment   => '# CJK(7028)'
	},
	{
		direction => BOTH,
		ucs       => 0x70BB,
		code      => 0x8ff4ae,
		comment   => '# CJK(70BB)'
	},
	{
		direction => BOTH,
		ucs       => 0x7501,
		code      => 0x8ff4d0,
		comment   => '# CJK(7501)'
	},
	{
		direction => BOTH,
		ucs       => 0x7682,
		code      => 0x8ff4d1,
		comment   => '# CJK(7682)'
	},
	{
		direction => BOTH,
		ucs       => 0x769E,
		code      => 0x8ff4d2,
		comment   => '# CJK(769E)'
	},
	{
		direction => BOTH,
		ucs       => 0x7930,
		code      => 0x8ff4d4,
		comment   => '# CJK(7930)'
	},
	{
		direction => BOTH,
		ucs       => 0x7AE7,
		code      => 0x8ff4d9,
		comment   => '# CJK(7AE7)'
	},
	{
		direction => BOTH,
		ucs       => 0x7DA0,
		code      => 0x8ff4dc,
		comment   => '# CJK(7DA0)'
	},
	{
		direction => BOTH,
		ucs       => 0x7DD6,
		code      => 0x8ff4dd,
		comment   => '# CJK(7DD6)'
	},
	{
		direction => BOTH,
		ucs       => 0x8362,
		code      => 0x8ff4df,
		comment   => '# CJK(8362)'
	},
	{
		direction => BOTH,
		ucs       => 0x85B0,
		code      => 0x8ff4e1,
		comment   => '# CJK(85B0)'
	},
	{
		direction => BOTH,
		ucs       => 0x8807,
		code      => 0x8ff4e4,
		comment   => '# CJK(8807)'
	},
	{
		direction => BOTH,
		ucs       => 0x8B7F,
		code      => 0x8ff4e6,
		comment   => '# CJK(8B7F)'
	},
	{
		direction => BOTH,
		ucs       => 0x8CF4,
		code      => 0x8ff4e7,
		comment   => '# CJK(8CF4)'
	},
	{
		direction => BOTH,
		ucs       => 0x8D76,
		code      => 0x8ff4e8,
		comment   => '# CJK(8D76)'
	},
	{
		direction => BOTH,
		ucs       => 0x90DE,
		code      => 0x8ff4ec,
		comment   => '# CJK(90DE)'
	},
	{
		direction => BOTH,
		ucs       => 0x9115,
		code      => 0x8ff4ee,
		comment   => '# CJK(9115)'
	},
	{
		direction => BOTH,
		ucs       => 0x9592,
		code      => 0x8ff4f1,
		comment   => '# CJK(9592)'
	},
	{
		direction => BOTH,
		ucs       => 0x973B,
		code      => 0x8ff4f4,
		comment   => '# CJK(973B)'
	},
	{
		direction => BOTH,
		ucs       => 0x974D,
		code      => 0x8ff4f5,
		comment   => '# CJK(974D)'
	},
	{
		direction => BOTH,
		ucs       => 0x9751,
		code      => 0x8ff4f6,
		comment   => '# CJK(9751)'
	},
	{
		direction => BOTH,
		ucs       => 0x999E,
		code      => 0x8ff4fa,
		comment   => '# CJK(999E)'
	},
	{
		direction => BOTH,
		ucs       => 0x9AD9,
		code      => 0x8ff4fb,
		comment   => '# CJK(9AD9)'
	},
	{
		direction => BOTH,
		ucs       => 0x9B72,
		code      => 0x8ff4fc,
		comment   => '# CJK(9B72)'
	},
	{
		direction => BOTH,
		ucs       => 0x9ED1,
		code      => 0x8ff4fe,
		comment   => '# CJK(9ED1)'
	},
	{
		direction => BOTH,
		ucs       => 0xF929,
		code      => 0x8ff4c5,
		comment   => '# CJK COMPATIBILITY IDEOGRAPH-F929'
	},
	{
		direction => BOTH,
		ucs       => 0xF9DC,
		code      => 0x8ff4f2,
		comment   => '# CJK COMPATIBILITY IDEOGRAPH-F9DC'
	},
	{
		direction => BOTH,
		ucs       => 0xFA0E,
		code      => 0x8ff4b4,
		comment   => '# CJK COMPATIBILITY IDEOGRAPH-FA0E'
	},
	{
		direction => BOTH,
		ucs       => 0xFA0F,
		code      => 0x8ff4b7,
		comment   => '# CJK COMPATIBILITY IDEOGRAPH-FA0F'
	},
	{
		direction => BOTH,
		ucs       => 0xFA10,
		code      => 0x8ff4b8,
		comment   => '# CJK COMPATIBILITY IDEOGRAPH-FA10'
	},
	{
		direction => BOTH,
		ucs       => 0xFA11,
		code      => 0x8ff4bd,
		comment   => '# CJK COMPATIBILITY IDEOGRAPH-FA11'
	},
	{
		direction => BOTH,
		ucs       => 0xFA12,
		code      => 0x8ff4c4,
		comment   => '# CJK COMPATIBILITY IDEOGRAPH-FA12'
	},
	{
		direction => BOTH,
		ucs       => 0xFA13,
		code      => 0x8ff4c7,
		comment   => '# CJK COMPATIBILITY IDEOGRAPH-FA13'
	},
	{
		direction => BOTH,
		ucs       => 0xFA14,
		code      => 0x8ff4c8,
		comment   => '# CJK COMPATIBILITY IDEOGRAPH-FA14'
	},
	{
		direction => BOTH,
		ucs       => 0xFA15,
		code      => 0x8ff4ce,
		comment   => '# CJK COMPATIBILITY IDEOGRAPH-FA15'
	},
	{
		direction => BOTH,
		ucs       => 0xFA16,
		code      => 0x8ff4cf,
		comment   => '# CJK COMPATIBILITY IDEOGRAPH-FA16'
	},
	{
		direction => BOTH,
		ucs       => 0xFA17,
		code      => 0x8ff4d3,
		comment   => '# CJK COMPATIBILITY IDEOGRAPH-FA17'
	},
	{
		direction => BOTH,
		ucs       => 0xFA18,
		code      => 0x8ff4d5,
		comment   => '# CJK COMPATIBILITY IDEOGRAPH-FA18'
	},
	{
		direction => BOTH,
		ucs       => 0xFA19,
		code      => 0x8ff4d6,
		comment   => '# CJK COMPATIBILITY IDEOGRAPH-FA19'
	},
	{
		direction => BOTH,
		ucs       => 0xFA1A,
		code      => 0x8ff4d7,
		comment   => '# CJK COMPATIBILITY IDEOGRAPH-FA1A'
	},
	{
		direction => BOTH,
		ucs       => 0xFA1B,
		code      => 0x8ff4d8,
		comment   => '# CJK COMPATIBILITY IDEOGRAPH-FA1B'
	},
	{
		direction => BOTH,
		ucs       => 0xFA1C,
		code      => 0x8ff4da,
		comment   => '# CJK COMPATIBILITY IDEOGRAPH-FA1C'
	},
	{
		direction => BOTH,
		ucs       => 0xFA1D,
		code      => 0x8ff4db,
		comment   => '# CJK COMPATIBILITY IDEOGRAPH-FA1D'
	},
	{
		direction => BOTH,
		ucs       => 0xFA1E,
		code      => 0x8ff4de,
		comment   => '# CJK COMPATIBILITY IDEOGRAPH-FA1E'
	},
	{
		direction => BOTH,
		ucs       => 0xFA1F,
		code      => 0x8ff4e0,
		comment   => '# CJK COMPATIBILITY IDEOGRAPH-FA1F'
	},
	{
		direction => BOTH,
		ucs       => 0xFA20,
		code      => 0x8ff4e2,
		comment   => '# CJK COMPATIBILITY IDEOGRAPH-FA20'
	},
	{
		direction => BOTH,
		ucs       => 0xFA21,
		code      => 0x8ff4e3,
		comment   => '# CJK COMPATIBILITY IDEOGRAPH-FA21'
	},
	{
		direction => BOTH,
		ucs       => 0xFA22,
		code      => 0x8ff4e5,
		comment   => '# CJK COMPATIBILITY IDEOGRAPH-FA22'
	},
	{
		direction => BOTH,
		ucs       => 0xFA23,
		code      => 0x8ff4e9,
		comment   => '# CJK COMPATIBILITY IDEOGRAPH-FA23'
	},
	{
		direction => BOTH,
		ucs       => 0xFA24,
		code      => 0x8ff4ea,
		comment   => '# CJK COMPATIBILITY IDEOGRAPH-FA24'
	},
	{
		direction => BOTH,
		ucs       => 0xFA25,
		code      => 0x8ff4eb,
		comment   => '# CJK COMPATIBILITY IDEOGRAPH-FA25'
	},
	{
		direction => BOTH,
		ucs       => 0xFA26,
		code      => 0x8ff4ed,
		comment   => '# CJK COMPATIBILITY IDEOGRAPH-FA26'
	},
	{
		direction => BOTH,
		ucs       => 0xFA27,
		code      => 0x8ff4ef,
		comment   => '# CJK COMPATIBILITY IDEOGRAPH-FA27'
	},
	{
		direction => BOTH,
		ucs       => 0xFA28,
		code      => 0x8ff4f0,
		comment   => '# CJK COMPATIBILITY IDEOGRAPH-FA28'
	},
	{
		direction => BOTH,
		ucs       => 0xFA29,
		code      => 0x8ff4f3,
		comment   => '# CJK COMPATIBILITY IDEOGRAPH-FA29'
	},
	{
		direction => BOTH,
		ucs       => 0xFA2A,
		code      => 0x8ff4f7,
		comment   => '# CJK COMPATIBILITY IDEOGRAPH-FA2A'
	},
	{
		direction => BOTH,
		ucs       => 0xFA2B,
		code      => 0x8ff4f8,
		comment   => '# CJK COMPATIBILITY IDEOGRAPH-FA2B'
	},
	{
		direction => BOTH,
		ucs       => 0xFA2C,
		code      => 0x8ff4f9,
		comment   => '# CJK COMPATIBILITY IDEOGRAPH-FA2C'
	},
	{
		direction => BOTH,
		ucs       => 0xFA2D,
		code      => 0x8ff4fd,
		comment   => '# CJK COMPATIBILITY IDEOGRAPH-FA2D'
	},
	{
		direction => BOTH,
		ucs       => 0xFF07,
		code      => 0x8ff4a9,
		comment   => '# FULLWIDTH APOSTROPHE'
	},
	{
		direction => BOTH,
		ucs       => 0xFFE4,
		code      => 0x8fa2c3,
		comment   => '# FULLWIDTH BROKEN BAR'
	},

	# additional conversions for EUC_JP -> UTF-8 conversion
	{
		direction => TO_UNICODE,
		ucs       => 0x2116,
		code      => 0x8ff4ac,
		comment   => '# NUMERO SIGN'
	},
	{
		direction => TO_UNICODE,
		ucs       => 0x2121,
		code      => 0x8ff4ad,
		comment   => '# TELEPHONE SIGN'
	},
	{
		direction => TO_UNICODE,
		ucs       => 0x3231,
		code      => 0x8ff4ab,
		comment   => '# PARENTHESIZED IDEOGRAPH STOCK'
	});

print_conversion_tables($this_script, "EUC_JP", \@mapping);


#######################################################################
# sjis2jis ; SJIS => JIS conversion
sub sjis2jis
{
	my ($sjis) = @_;

	return $sjis if ($sjis <= 0x100);

	my $hi = $sjis >> 8;
	my $lo = $sjis & 0xff;

	if ($lo >= 0x80) { $lo--; }
	$lo -= 0x40;
	if ($hi >= 0xe0) { $hi -= 0x40; }
	$hi -= 0x81;
	my $pos = $lo + $hi * 0xbc;

	if ($pos >= 114 * 0x5e && $pos <= 115 * 0x5e + 0x1b)
	{

		# This region (115-ku) is out of range of JIS code but for
		# convenient to generate code in EUC CODESET 3, move this to
		# seemingly duplicate region (83-84-ku).
		$pos = $pos - ((31 * 0x5e) + 12);

		# after 85-ku 82-ten needs to be moved 2 codepoints
		$pos = $pos - 2 if ($pos >= 84 * 0x5c + 82);
	}

	my $hi2 = $pos / 0x5e;
	my $lo2 = ($pos % 0x5e);

	my $ret = $lo2 + 0x21 + (($hi2 + 0x21) << 8);

	return $ret;
}
