;;; txt-tbl.el --- useful functions for converting UNICODE CONSORTIUM table

;; Copyright (C) 1997 Miyashita Hisashi

;; Keywords: mule, multilingual, 
;;           character set, coding-system, ISO10646, Unicode

;; This file is part of MULE-UCS

;; MULE-UCS is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 2, or (at your option)
;; any later version.

;; MULE-UCS is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;; GNU General Public License for more details.

;; You should have received a copy of the GNU General Public License
;; along with this program; see the file COPYING.  If not, write to the
;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
;; Boston, MA 02111-1307, USA.

;; Comment:
;;  

(require 'trans-util)

(defconst ccl-c-notated-string-to-number
  (ccl-compile
   `(1
     ((r6 = 10)
      (r2 = 0)
      (read r0)
      (if (r0 == ?0) ((read r1)
		      (if (r1 == ?x)
			  ((r6 = 16) (read r0))
			((r6 = 8) (r0 = r1)))))
      (loop

       (if (r0 < ?0) ((r2 = -1) (break)))
       (r5 = (r0 > ?7)) (r5 &= (r6 == 8))
       (if r5 ((r2 = -1) (break)))

       (if (r0 > ?9)
	   ((r5 = (r6 != 16))
	    (r5 |= (r0 < ?A))
	    (if r5 ((r2 = -1) (break)))
	    (if (r0 > ?F)
		((r5 = (r0 < ?a))
		 (r5 |= (r0 > ?f))
		 (if r5 ((r2 = -1) (break)))
		 (r0 -= ,(- ?a 10)))
	      ((r0 -= ,(- ?A 10)))))
	 ((r0 -= ?0)))

       (r2 *= r6)
       (r2 += r0)
       (read r0)
       (repeat))))))
	   
(defun c-notated-string-to-number (string)
  (let ((vector [0 0 0 0 0 0 0 0 nil]))
    (ccl-execute-on-string ccl-c-notated-string-to-number
			   vector string)
  (aref vector 2)))

(defun make-table-region-unicode (begin end car-col cdr-col)
  (let ((cpoint begin)
	(maxcol (max car-col cdr-col))
	(match-regexp "^[^#]?")
	(i 0)
	endpoint
	vector table
	number1 number2)

    (while (> maxcol i)
      (if (/= i 0)
	  (setq match-regexp (concat match-regexp "[ \t]+")))
      (if (or (= i (1- car-col))
	      (= i (1- cdr-col)))
	  (setq match-regexp
		(concat match-regexp "\\([0-9][0-9a-fA-Fx]*\\)"))
	(setq match-regexp
	      (concat match-regexp "\\([^ \t]+\\)")))
      (setq i (1+ i)))

    (setq i 1)
    (while (progn 
	     (goto-char cpoint)
	     (forward-line 1)
	     (setq endpoint (point))
	     (goto-char cpoint)
	     (and (<= endpoint end)
		  (< cpoint endpoint)))
      (if (re-search-forward match-regexp
			     endpoint t)
	  (progn
	    (setq number1 (c-notated-string-to-number (match-string car-col))
		  number2 (c-notated-string-to-number (match-string cdr-col)))
	    (if (and (numberp number1)
		     (numberp number2)
		     (>= number1 0)
		     (>= number2 0))
		(setq table (cons (cons number1 number2)
				  table)))
	    (setq i (1+ i))
	    (message "Count: %d" i)))
	    (setq cpoint endpoint))

    (message "%d, %d" endpoint end)
    (nreverse table)))

(defun translate-unicode-table-file (filename charset
				     &optional car-col cdr-col)
  (interactive "fFilename: \nSCharset: ")
  (if (null car-col) (setq car-col 1))
  (if (null cdr-col) (setq cdr-col 2))
  (let ((buf (generate-new-buffer
	      (format "*%s-tbl-translate*" filename)))
	table)
    (switch-to-buffer buf)
    (insert-file-contents filename)
    (setq table (make-table-region-unicode
		 (point-min)
		 (point-max)
		 car-col
		 cdr-col))
    (erase-buffer)
    (mucs-print-character-a-list table 1 charset)))

(defun mucs-print-a-list (alist cols)
  (interactive "XA-list: \nnColumn: ")
  (let ((curlist alist) (i 1) curelem)
    (while (setq curelem (car curlist))
      (insert (format "(?\\x%x . ?\\x%x)" (car curelem) (cdr curelem)))
      (cond ((= i cols)
	     (insert "\n")
	     (setq i 1))
	    (t
	     (setq i (1+ i))))
      (setq curlist (cdr curlist)))))

(defun mucs-print-character-a-list (alist cols charset)
  (interactive "XA-list: \nnColumn: \nSCharset: ")
  (if (not (charsetp charset)) (error "%S must be charset." charset))
  (let ((curlist alist) (i 1) curelem)
    (while (setq curelem (car curlist))
      (insert (format "(?%s . ?\\x%x)"
		      (char-to-string
		       (make-char-from-codepoint charset (car curelem)))
		      (cdr curelem)))
      (cond ((= i cols)
	     (insert "\n")
	     (setq i 1))
	    (t
	     (setq i (1+ i))))
      (setq curlist (cdr curlist)))))


;;
;; Appendix.
;;

(defun translate-unicode-cns-table-file (filename)
  (interactive "fFilename:")
  (let ((buf (generate-new-buffer
	      (format "*%s-tbl-translate*" filename)))
	(car-col 1)
	(cdr-col 2)
	table code
	table1 table2 table3
	elem plane)
    (switch-to-buffer buf)
    (insert-file-contents filename)
    (setq table (make-table-region-unicode
		 (point-min)
		 (point-max)
		 car-col
		 cdr-col))
    (erase-buffer)
    (while (setq elem (car table))
      (setq plane (lsh (logand (car elem) ?\xf0000) -16)
	    code (logand (car elem) ?\xffff))
      (cond ((= plane 1)
	     (setq table1 (cons (cons code (cdr elem))
				table1)))
	    ((= plane 2)
	     (setq table2 (cons (cons code (cdr elem))
				table2)))
	    ((= plane ?\xe)
	     (setq table3 (cons (cons code (cdr elem))
				table3))))
      (setq table (cdr table)))
    (insert ";CNS 11643 Plane 1------------------------\n")
    (mucs-print-character-a-list table1 1 'chinese-cns11643-1)
    (insert ";CNS 11643 Plane 2------------------------\n")
    (mucs-print-character-a-list table2 1 'chinese-cns11643-2)
    (insert ";CNS 11643 Plane 3------------------------\n")
    (mucs-print-character-a-list table3 1 'chinese-cns11643-3)))

(provide 'txt-tbl)
