/*
    KMLOVoice

    A utility to process voice messages received with the ELSA
    MicroLink(tm) Office modem.

    Copyright (C) 2000 Oliver Gantz <Oliver.Gantz@epost.de>

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

    ------
    ELSA and MicroLink are trademarks of ELSA AG, Aachen.

    ===========================================================================
    This code was taken from Gert Doering's "mgetty+sendfax" distribution. It
    was found as "voice/libpvf/rockwell.c".

    A floating point version has been done by Torsten Duwe 1995. It was very
    remotely derived from Rockwell's "d.asm", converted to C and simplified.

    The fixed point version has been done by Peter Jaeckel 1996-1997.
    ===========================================================================
*/


#include <sys/types.h>

#include "adpcm.h"

/*
	PJ:
	A first attempt to implement basically all that the original
	Rockwell D.ASM code does in C. I never had to deal with assembler
	before, so be lenient when you judge me, please...

	NB: The guts of this code are not very pretty to look at.

	RV_ stands for Rockwell Voice.
*/

#define RV_PNT98    32113		/* 0.98		*/
#define RV_PNT012     393		/* 0.012	*/
#define RV_PNT006     197		/* 0.006	*/
#define RV_QDLMN   0x1F			/* QDLMN = IQDLMN = 2.87 mV. */
#define RV_DEMPCF  0x3333		/* 0.4		*/
#define RV_PDEMPCF 0x3333		/* 0.4		*/
#define RV_QORDER  8				/*  Total delay line length for the pole and zero delay lines */

/*
	Design Notes: Relation of QDataIndex to position in QData Buffer.
	the rotation is done by the QDataIndex.  This variable always
	points to the data to be multiplied by the coefficient a1.  The
	coefficients, a1..a2 and b1..b6, stay in the same relative
	position in the coefficient array. Updates to these values are
	done in place.  Illustration belows shows the value of QDataIndex
	and the Delay Line data in relation to the coefficient array.

	Position
	in Qdata  Start   2nd     3rd     4th
	-----------------------------------------
	0 a1      Y(n-1)  Y(n-2)  Q(n-1)  Q(n-2)
	1 a2      Y(n-2)  Q(n-1)  Q(n-2)  Q(n-3)
	2 b1      Q(n-1)  Q(n-2)  Q(n-3)  Q(n-4)
	3 b2      Q(n-2)  Q(n-3)  Q(n-4)  Q(n-5)
	4 b3      Q(n-3)  Q(n-4)  Q(n-5)  Q(n-6)
	5 b4      Q(n-4)  Q(n-5)  Q(n-6)  Y(n-1)
	6 b5      Q(n-5)  Q(n-6)  Y(n-1)  Y(n-2)
	7 b6      Q(n-6)  Y(n-1)  Y(n-2)  Q(n-1)
	-----------------------------------------
	QDataIndex   0       7       6       5
	-----------------------------------------
*/

static short RV_pzTable[8];				/* Coefficient Table for the pole and zero linear predictor. */
																	/* a1 a2 b1 b2 b3 b4 b5 b6 */
static short RV_QdataIndex = 0;		/* Delay line pointer to the coefficient a1. */
static short RV_Qdata[RV_QORDER];	/* Delay line. */

#ifdef POSTFILTER		/* DON'T USE THIS */
/*
	The POSTFILTER code is in Rockwell's original D.ASM, too.
	They too, don't use it in their distributed executables
	though. I have no idea under what circumstances it might be
	useful, I just left the code in here as I went through the
	effort of writing it before I realised that it appears to be
	useless here.
*/
static short RV_QPdata[RV_QORDER];
static short RV_QPPdata[RV_QORDER];
#endif

static short RV_LastNu = 0;			/* Last Nu value. */
static short RV_Dempz = 0;			/* De-emphasis filter delay line (one element). */
static short RV_NewQdata = 0;		/* Adaptive quantizer output. */
static short RV_NewAppData = 0;	/* Temporay data storage */

/* ML2bps, ML3bps, and ML4bps are combined in mul[][], just like Torsten suggested */

static short RV_mul[3][16] =
{		/* Multiplier table to calculate new Nu for 2/3/4 BPS. */
	{0x3333, 0x199A, 0x199A, 0x3333},
	{0x3800, 0x2800, 0x1CCD, 0x1CCD, 0x1CCD, 0x1CCD, 0x2800, 0x3800},
	{0x4CCD, 0x4000, 0x3333, 0x2666, 0x1CCD, 0x1CCD, 0x1CCD, 0x1CCD, 0x1CCD, 0x1CCD, 0x1CCD, 0x1CCD, 0x2666, 0x3333, 0x4000, 0x4CCD}
};

/* Zeta2bps, Zeta3bps, and Zeta4bps are combined in Zeta[][], just like Torsten suggested */
static unsigned short RV_Zeta[3][16] =
{		/*  Multiplier table for 2/3/4 BPS to calculate inverse */
		/*  quantizer output.  This number, index by the code */
		/*  word, times Nu is the inverse quantizer output. */
	{0xCFAE, 0xF183, 0x0E7D, 0x3052},
	{0xBB23, 0xD4FE, 0xE7CF, 0xF828, 0x07D8, 0x1831, 0x2B02, 0x44DD},
	{0xA88B, 0xBDCB, 0xCC29, 0xD7CF, 0xE1D8, 0xEAFB, 0xF395, 0xFBE4, 0x041C, 0x0C6B, 0x1505, 0x1E28, 0x2831, 0x33C7, 0x4235, 0x5775}
};

static short *RV_mul_p;
static short *RV_Zeta_p;
static unsigned short RV_silence_words[3] = {0x13ec, 0x23de, 0xc11c};
static unsigned short RV_silence_word;

/* Maximum limit for Nu.  Changes based on 2, 3, or 4 BPS selected. Initialization routine updates this value. */
static short RV_QDelayMX = 0;
/*  Array index by BPS-2 for updating QDelayMX */
static short RV_QDelayTable[3] = {0x54C4,0x3B7A,0x2ED5}; /* 2.01V, 1.41V, 1.11V */

/*
	Macro definitions used in the decompression, interpolation, and
	compression functions.
*/

static int RV_max_local_int16,RV_min_local_int16;
static int64_t RV_max_local_int32,RV_min_local_int32;

#define RV_clip_16(a) ((a)<RV_min_local_int16?RV_min_local_int16:(a)>RV_max_local_int16?RV_max_local_int16:(a))
#define RV_clip_32(a) ((a)<RV_min_local_int32?RV_min_local_int32:(a)>RV_max_local_int32?RV_max_local_int32:(a))

#define HIWORD(x) (((unsigned int)x) >> 16)
#define LOWORD(x) ((unsigned int)(((unsigned int)x) & 0xffff))
#define LOBYTE(x) ((unsigned int)(((unsigned int)x) & 0xff))
#define RV_round_32_into_16(x) (short)((((unsigned int)x)>>16)+((((unsigned int)x)>>15)&0x0001))

/*
	In order to stay as close as possible to the original assembler
	(a kludge, I know), we simulate the system's register(s) below
*/

static short RV_di;


/*
	Utilities. Routines that both the decompressor and the compressor use. */

/*
	pzPred
	Linear predictor coefficient update routine.  Local to this module.
	Inputs:
	CX = Q(n), i.e. WORD PTR NewQData
	Output:   DI points to (QDataIndex+7)%8.
*/
static void RV_pzPred (short cx)
{
	/*
		A little explanation is required here. Rockwell uses 16bit
		integers to represent numbers in [-1,1). They take 0x8000 to be -1
		and 0x7fff to be 0.999969482, i.e. the ints -32768 to 32767 are
		normalised over 32768 into the required interval.  The product of
		two such numbers is supposed to be, again, in the range [-1,1).
		I know that this is mathematically incorrect, but that's how they
		do it, just read on.

		The "adjustment" that is mentioned in D.ASM can be understood by
		the following example: Assume you want to multiply -1 with -0.5. In
		integers, that's imul 0x8000, 0xc000 (I know, it does actulally
		require a register), i.e. -32768*-16384. The result is 0x20000000.
		They only want to keep a 16 bit result, thus they need to round.
		First, however, an adjustment for the moved decimal point is
		required. The upper 16 bit of 0x20000000 are 0x2000 which
		corresponds only to 8192/32768=0.25 ! Thus, all the bits need to be
		left shifted by one place and the result will be 0x4000 which
		correctly corresponds to 0.5 now. This confusion is due to the fact
		that the original two numbers in total have two bits representing
		two different signs and the result, which is again represented by a
		total of 32 bits, needs only one sign bit.  Thus, the result in 32
		bits has effectively one more data bit available than the total of
		the two multiplicands. The nature of integer arithmetics feeds that
		bit in from the left behind the sign bit. A consequence of this is
		that the two leftmost bits of the resulting 32 bit integer are
		always equal, apart from one case, namely 0x8000*0x8000, or
		-32768*-32768, which results in 0x40000000. Arithmetically, we
		would expect this to be decimal-point-adjusted to the 16 bit
		representation 0x7fff. The Rockwell assembler code, however, just
		maps this to 0x8000, i.e. -1, by ignoring the special case of
		0x8000*0x8000. This explains the cryptic warnings like

		; Determine sign of Q(n) * Y(n-1)
		;
		; Do not change the sign determination method!

		in D.ASM. Personally, this is the first time ever I have seen
		anyone using arithmetics like -1 * -1 = -1 whilst -1 * -0.99 = 0.99 ;-).

		So, after this type of decimal-point-adjustment they then round off
		the lower 16 bit and just take the upper 16 to be the result.
	*/

	static int x,y;		/* local accumulator(s) */
	static short di;
	static int i;

	di = RV_QdataIndex;

	/*  Calculate coefficients a1 a2 b1 b2 b3 b4 b5 b6 . */

	for (i = 0; i < 8; i++) {
		x = RV_pzTable[i] * ((int) RV_PNT98);
		x <<= 1;
		/*
			Rockwell-adjust for decimal point shift, then round off
			lower 16 bits to obtain a 16 bit representation of the
			result.
		*/
		x = RV_round_32_into_16 (x);
		/* cx contains the NewQdata=Q(n) */
		y = ((int) cx) * ((int) RV_Qdata[di]);
		y <<= 1;	/* Rockwell-adjust for decimal point shift. */
		y = RV_round_32_into_16 (y);
		x += (y < 0 ? -1 : 1) * (i < 2 ? RV_PNT012 : RV_PNT006);
		/* i<2 ? The a's get RV_PNT012. All b's get RV_PNT006. */
		/*
			The result of a multiplication needs adjusting & rounding.
			The result of an addition/subtraction needs clipping.
		*/
		RV_pzTable[i] = RV_clip_16 (x);
		di++;
		di %= 8;
	}
}

/*
	Taken from D.ASM:

	Design Notes: Sum of Multiplications.

	Multiplications are 16-bit signed numbers producing a signed 32-bit
	result. The two operands are usually numbers less than one; this
	requires a 32-bit shift by the macro "adjust" to bring the decimal
	point in line.  The 32-bit addition is two 16-bit additions with
	carry.  The "clip" macro checks for overflow and limits the result of
	the addition to 0x7fffffff or 0x80000000 (for 32-bit results), or
	0x7fff or 0x8000 (for 16-bit results).  Note that the "clip" macro
	depends on the flags being set because of an addition; the 80x86
	processor does not update these flags because of a move operation.
*/

static int RV_XpzCalc (short cx)
{
	/*
		Linear pole and zero predictor calculate.  CX,BX register pair is the
		32 bit accumulator.  Local to this module.
		Input:   CX = Initial Value.  BX set to zero.
		Output:   CX,BX contains the result of the sum of products.
		Also, DI points to (QDataIndex+7)%8.
	*/
	static int x;		/* local accumulator */
	static int64_t sum;
	int i;

	RV_di = RV_QdataIndex;
	sum = (int)cx << 16;

	for (i = 0; i < 8; i++) {
		x = (int)RV_pzTable[i] * (int)RV_Qdata[RV_di];
		x <<= 1;		/* Rockwell-adjust for decimal point shift. */
		sum += x;
		sum = RV_clip_32 (sum);
		RV_di++;
		RV_di %= 8;
	}
	RV_di = (RV_QdataIndex + 7) % 8;

	return (int)sum;
}

static void RV_Reset (int bps)
{
	int i;
	unsigned short tmp_int16 = 0;
	unsigned int tmp_int32 = 0;

	tmp_int16 = ~tmp_int16;
	tmp_int16 >>= 1;
	RV_max_local_int16 =  tmp_int16;
	RV_min_local_int16 = tmp_int16;
	RV_min_local_int16 = -RV_min_local_int16;
	RV_min_local_int16--;
	tmp_int32 = ~tmp_int32;
	tmp_int32 >>= 1;
	RV_max_local_int32 = tmp_int32;
	RV_min_local_int32 = tmp_int32;
	RV_min_local_int32 = -RV_min_local_int32;
	RV_min_local_int32--;

	RV_QdataIndex = 0;
	for (i = 0; i < RV_QORDER; i++) {
		RV_Qdata[i] = 0;
#ifdef POSTFILTER
		RV_QPdata[i] = 0;
		RV_QPPdata[i] = 0;
#endif
	}
	RV_Dempz = 0;
	RV_NewQdata = 0;
	RV_NewAppData = 0;
	RV_LastNu = RV_QDLMN;
	RV_QDelayMX = RV_QDelayTable[bps-2];
	RV_silence_word = RV_silence_words[bps-2];
	RV_mul_p = RV_mul[bps-2];
	RV_Zeta_p = (short *)RV_Zeta[bps-2];
}


static short RV_DecomOne (short ax, short bx)
{
	/*
		RVDecomOne

		Decode a code word.  Local to this module.

		Inputs:
		AX = ML, adaptive multiplier for Nu.
		BX = Zeta, base inverse quantizer value, modified by Nu.
		Also, updates QdataIndex to (QdataIndex+7)%8 .
	*/
	static short si;
	static int LastNu_bak;
	static int x;		/* local accumulator */
	static int64_t sum;

	LastNu_bak = RV_LastNu;
	x = ((int) ax) * ((int) RV_LastNu);
	x <<= 1;		/* Rockwell-adjust for decimal point shift. */
							/* Round and Multiply by 4 */
	x = (RV_round_32_into_16 (x) * (int)4);
	RV_LastNu = RV_clip_16 (x);
	if (RV_LastNu < RV_QDLMN)
		RV_LastNu = RV_QDLMN;
	else if (RV_LastNu > RV_QDelayMX)
		RV_LastNu = RV_QDelayMX;

	x = bx * LastNu_bak;	/* Zeta * LastNu */
	x <<= 1;	/* Rockwell-adjust for decimal point shift. */
	x = (RV_round_32_into_16 (x) * (int)4);
	RV_NewQdata = RV_clip_16 (x);
	sum = RV_XpzCalc (RV_NewQdata);		/*  Compute (Xp+z)(n) + Q(n)  */
	si = HIWORD(sum);									/*  Y(n) done, save in SI for later */
#ifdef POSTFILTER
	sum = RV_App1Calc ((short)(HIWORD(sum)));
	sum = RV_App2Calc ((short)(HIWORD(sum)));
#endif
	/* Use a de-emphasis filter on Y(n) to remove the effects */
	/* of the emphasis filter used during compression. */
	x = RV_DEMPCF * RV_Dempz;
	x <<= 1;		/* Rockwell-adjust for decimal point shift. */
	sum += x;
	sum = RV_clip_32 (sum);
	RV_Dempz = HIWORD(sum);
	/*  Update predictor filter coefficients. */
	RV_pzPred (RV_NewQdata);
	RV_Qdata[RV_di] = si;  /* drop b6, now a1 Qdata */
	/*  Update delay line at the a1(n) table entry position. */
	RV_QdataIndex = RV_di;
	RV_di += 2;
	RV_di %= 8;
	RV_Qdata[RV_di] = RV_NewQdata;	/*  drop a2, now b1 Qdata */

	return RV_Dempz;
}

static void put_silence (int num_samples, FILE *out_f)
{
  /* Write num_samples 8 bit chars of value 0 */
	while (num_samples && (putc(0, out_f) != EOF))
		num_samples--;
/*
	if (num_samples) {
		perror ("write silence");
		exit (1);
	}
*/
}


static int getcodeword (FILE *in_f, int *codeword)
{
	/*
		Rockwell modems always pass on 16bit ints in little-endian format.
		Therefore, we have to read the data the same way if we don't want
		to miss a silence codeword.
	*/
	static int c;
	if (( c = getc(in_f)) == EOF)
		return 0;
  *codeword = c;
	if (( c = getc(in_f)) == EOF)
		return 8;
	*codeword |= (c << 8);
	return 16;
}


int adpcm_decode (FILE *in_f, FILE *out_f, int nbits)
{
	int w;												/* packed compressed codewords */
	int c;												/* single compressed codeword  */
	int mask = (1 << nbits) - 1;	/* bitmask for the decompression */
	int a = 0;										/* local accumulator */
	int valbits = 0;							/* number of bits valid in accumulator */

	/* The pvf header should have been written by now, start with the decompression */

	/* Reset the coefficient table for the pole and zero linear predictor. */
	for (w = 0; w < 8; w++)
		RV_pzTable[w] = 0;

	RV_Reset (nbits);
	/*
		The algorithm below (copied from Torsten Duwe's code)
		takes care of bit concatenation.
	*/
	while ((c=getcodeword(in_f,&w))) {
		/*
			Not using the pvf library generic read_bits interface because
			Rockwell modems always pass on 16bit ints in little-endian format.
			Therefore, we have to read the data the same way if we don't want
			to miss a silence codeword.
		*/
		if (w == RV_silence_word) {
			getcodeword(in_f, &w);
			put_silence (w, out_f);
			RV_Reset (nbits);
			valbits = 0;
			continue;
		}
		a |= w << valbits;
		valbits += c;
		while (valbits >= nbits) {
			c = a & mask;
			w = RV_DecomOne(RV_mul_p[c],RV_Zeta_p[c]);

			w >>= 8;
			if (w > 0x7f)
				w = 0x7f;
			if (w < -0x80)
				w = -0x80;
			w += 0x80;
			fwrite((void *)&w, 1, 1, out_f);
			a >>= nbits;
			valbits -= nbits;
		}
	}

  return 1;
}
