diff options
author | Eduardo Chappa <echappa@gmx.com> | 2013-02-03 00:59:38 -0700 |
---|---|---|
committer | Eduardo Chappa <echappa@gmx.com> | 2013-02-03 00:59:38 -0700 |
commit | 094ca96844842928810f14844413109fc6cdd890 (patch) | |
tree | e60efbb980f38ba9308ccb4fb2b77b87bbc115f3 /pith/charconv | |
download | alpine-094ca96844842928810f14844413109fc6cdd890.tar.xz |
Initial Alpine Version
Diffstat (limited to 'pith/charconv')
-rw-r--r-- | pith/charconv/Makefile.am | 19 | ||||
-rw-r--r-- | pith/charconv/Makefile.in | 527 | ||||
-rw-r--r-- | pith/charconv/filesys.c | 721 | ||||
-rw-r--r-- | pith/charconv/filesys.h | 50 | ||||
-rw-r--r-- | pith/charconv/makefile.wnt | 58 | ||||
-rw-r--r-- | pith/charconv/utf8.c | 2512 | ||||
-rw-r--r-- | pith/charconv/utf8.h | 106 |
7 files changed, 3993 insertions, 0 deletions
diff --git a/pith/charconv/Makefile.am b/pith/charconv/Makefile.am new file mode 100644 index 00000000..615a6082 --- /dev/null +++ b/pith/charconv/Makefile.am @@ -0,0 +1,19 @@ +## Process this file with automake to produce Makefile.in +## Use aclocal -I m4; automake + +# ======================================================================== +# Copyright 2006 University of Washington +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# ======================================================================== + +noinst_LIBRARIES = libpithcc.a + +libpithcc_a_SOURCES = filesys.c utf8.c + +AM_CPPFLAGS = -I@top_builddir@/include -I@top_srcdir@/include diff --git a/pith/charconv/Makefile.in b/pith/charconv/Makefile.in new file mode 100644 index 00000000..3a6f5220 --- /dev/null +++ b/pith/charconv/Makefile.in @@ -0,0 +1,527 @@ +# Makefile.in generated by automake 1.11.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, +# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, +# Inc. +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# ======================================================================== +# Copyright 2006 University of Washington +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# ======================================================================== + +VPATH = @srcdir@ +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +subdir = pith/charconv +DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acx_pthread.m4 \ + $(top_srcdir)/m4/gettext.m4 $(top_srcdir)/m4/iconv.m4 \ + $(top_srcdir)/m4/lib-ld.m4 $(top_srcdir)/m4/lib-link.m4 \ + $(top_srcdir)/m4/lib-prefix.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/m4/nls.m4 $(top_srcdir)/m4/po.m4 \ + $(top_srcdir)/m4/progtest.m4 $(top_srcdir)/VERSION \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs +CONFIG_HEADER = $(top_builddir)/include/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +LIBRARIES = $(noinst_LIBRARIES) +ARFLAGS = cru +libpithcc_a_AR = $(AR) $(ARFLAGS) +libpithcc_a_LIBADD = +am_libpithcc_a_OBJECTS = filesys.$(OBJEXT) utf8.$(OBJEXT) +libpithcc_a_OBJECTS = $(am_libpithcc_a_OBJECTS) +DEFAULT_INCLUDES = +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__depfiles_maybe = depfiles +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ + --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +CCLD = $(CC) +LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ + --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \ + $(LDFLAGS) -o $@ +SOURCES = $(libpithcc_a_SOURCES) +DIST_SOURCES = $(libpithcc_a_SOURCES) +ETAGS = etags +CTAGS = ctags +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_CFLAGS = @AM_CFLAGS@ +AM_LDFLAGS = @AM_LDFLAGS@ +AR = @AR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CP = @CP@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CYGPATH_W = @CYGPATH_W@ +C_CLIENT_CFLAGS = @C_CLIENT_CFLAGS@ +C_CLIENT_GCCOPTLEVEL = @C_CLIENT_GCCOPTLEVEL@ +C_CLIENT_LDFLAGS = @C_CLIENT_LDFLAGS@ +C_CLIENT_SPECIALS = @C_CLIENT_SPECIALS@ +C_CLIENT_TARGET = @C_CLIENT_TARGET@ +C_CLIENT_WITH_IPV6 = @C_CLIENT_WITH_IPV6@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +GMSGFMT = @GMSGFMT@ +GMSGFMT_015 = @GMSGFMT_015@ +GREP = @GREP@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +INTLLIBS = @INTLLIBS@ +INTL_MACOSX_LIBS = @INTL_MACOSX_LIBS@ +ISPELLPROG = @ISPELLPROG@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBICONV = @LIBICONV@ +LIBINTL = @LIBINTL@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN = @LN@ +LN_S = @LN_S@ +LTLIBICONV = @LTLIBICONV@ +LTLIBINTL = @LTLIBINTL@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAKE = @MAKE@ +MAKEINFO = @MAKEINFO@ +MKDIR_P = @MKDIR_P@ +MSGFMT = @MSGFMT@ +MSGFMT_015 = @MSGFMT_015@ +MSGMERGE = @MSGMERGE@ +NM = @NM@ +NMEDIT = @NMEDIT@ +NPA_PROG = @NPA_PROG@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +POSUB = @POSUB@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +PWPROG = @PWPROG@ +RANLIB = @RANLIB@ +REGEX_BUILD = @REGEX_BUILD@ +RM = @RM@ +SED = @SED@ +SENDMAIL = @SENDMAIL@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SPELLPROG = @SPELLPROG@ +STRIP = @STRIP@ +USE_NLS = @USE_NLS@ +VERSION = @VERSION@ +WEB_BINDIR = @WEB_BINDIR@ +WEB_BUILD = @WEB_BUILD@ +WEB_PUBCOOKIE_BUILD = @WEB_PUBCOOKIE_BUILD@ +WEB_PUBCOOKIE_LIB = @WEB_PUBCOOKIE_LIB@ +WEB_PUBCOOKIE_LINK = @WEB_PUBCOOKIE_LINK@ +XGETTEXT = @XGETTEXT@ +XGETTEXT_015 = @XGETTEXT_015@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +acx_pthread_config = @acx_pthread_config@ +alpine_interactive_spellcheck = @alpine_interactive_spellcheck@ +alpine_simple_spellcheck = @alpine_simple_spellcheck@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +lt_ECHO = @lt_ECHO@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +noinst_LIBRARIES = libpithcc.a +libpithcc_a_SOURCES = filesys.c utf8.c +AM_CPPFLAGS = -I@top_builddir@/include -I@top_srcdir@/include +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign pith/charconv/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign pith/charconv/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +clean-noinstLIBRARIES: + -test -z "$(noinst_LIBRARIES)" || rm -f $(noinst_LIBRARIES) +libpithcc.a: $(libpithcc_a_OBJECTS) $(libpithcc_a_DEPENDENCIES) + -rm -f libpithcc.a + $(libpithcc_a_AR) libpithcc.a $(libpithcc_a_OBJECTS) $(libpithcc_a_LIBADD) + $(RANLIB) libpithcc.a + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/filesys.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/utf8.Po@am__quote@ + +.c.o: +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(COMPILE) -c $< + +.c.obj: +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(COMPILE) -c `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LTCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + mkid -fID $$unique +tags: TAGS + +TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + set x; \ + here=`pwd`; \ + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: CTAGS +CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(LIBRARIES) +installdirs: +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + `test -z '$(STRIP)' || \ + echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libtool clean-noinstLIBRARIES \ + mostlyclean-am + +distclean: distclean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \ + clean-libtool clean-noinstLIBRARIES ctags distclean \ + distclean-compile distclean-generic distclean-libtool \ + distclean-tags distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-pdf install-pdf-am install-ps install-ps-am \ + install-strip installcheck installcheck-am installdirs \ + maintainer-clean maintainer-clean-generic mostlyclean \ + mostlyclean-compile mostlyclean-generic mostlyclean-libtool \ + pdf pdf-am ps ps-am tags uninstall uninstall-am + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/pith/charconv/filesys.c b/pith/charconv/filesys.c new file mode 100644 index 00000000..c9ef0f05 --- /dev/null +++ b/pith/charconv/filesys.c @@ -0,0 +1,721 @@ +#if !defined(lint) && !defined(DOS) +static char rcsid[] = "$Id: filesys.c 770 2007-10-24 00:23:09Z hubert@u.washington.edu $"; +#endif + +/* + * ======================================================================== + * Copyright 2006-2007 University of Washington + * Copyright 2013 Eduardo Chappa + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * ======================================================================== + */ + +/* includable WITHOUT dependency on c-client */ +#include "../../c-client/mail.h" +#include "../../c-client/utf8.h" + +#ifdef _WINDOWS +/* wingdi.h uses ERROR (!) and we aren't using the c-client ERROR so... */ +#undef ERROR +#endif + +#include <system.h> +#include <general.h> + +#include "../../c-client/fs.h" + +/* includable WITHOUT dependency on pico */ +#include "../../pico/keydefs.h" +#ifdef _WINDOWS +#include "../../pico/osdep/mswin.h" +#endif + +#include "filesys.h" +#include "utf8.h" + + +#define bad_char ((UCS) '?') + + +/* + * Make it easier to use the convert_to_locale function for filenames + * and directory names. Note, only one at a time because there's only + * one buffer. + * This isn't being freed as it stands now. + */ +char * +fname_to_locale(char *fname) +{ + static char *fname_locale_buf = NULL; + static size_t fname_locale_len = 0; + char *converted_fname, *p; + + p = convert_to_locale(fname); + if(p) + converted_fname = p; + else + converted_fname = fname; + + if(converted_fname){ + if(strlen(converted_fname)+1 > fname_locale_len){ + if(fname_locale_buf) + fs_give((void **) &fname_locale_buf); + + fname_locale_len = strlen(converted_fname)+1; + fname_locale_buf = (char *) fs_get(fname_locale_len * sizeof(char)); + } + + strncpy(fname_locale_buf, converted_fname, fname_locale_len); + fname_locale_buf[fname_locale_len-1] = '\0'; + } + else{ + if(fname_locale_len == 0){ + fname_locale_len = 1; + fname_locale_buf = (char *) fs_get(fname_locale_len * sizeof(char)); + } + + fname_locale_buf[0] = '\0'; + } + + if(p) + fs_give((void **) &p); + + return(fname_locale_buf); +} + + +/* + * Make it easier to use the convert_to_utf8 function for filenames + * and directory names. Note, only one at a time because there's only + * one buffer. + * This isn't being freed as it stands now. + */ +char * +fname_to_utf8(char *fname) +{ + static char *fname_utf8_buf = NULL; + static size_t fname_utf8_len = 0; + char *converted_fname, *p; + + p = convert_to_utf8(fname, NULL, 0); + if(p) + converted_fname = p; + else + converted_fname = fname; + + if(converted_fname){ + if(strlen(converted_fname)+1 > fname_utf8_len){ + if(fname_utf8_buf) + fs_give((void **) &fname_utf8_buf); + + fname_utf8_len = strlen(converted_fname)+1; + fname_utf8_buf = (char *) fs_get(fname_utf8_len * sizeof(char)); + } + + strncpy(fname_utf8_buf, converted_fname, fname_utf8_len); + fname_utf8_buf[fname_utf8_len-1] = '\0'; + } + else{ + if(fname_utf8_len == 0){ + fname_utf8_len = 1; + fname_utf8_buf = (char *) fs_get(fname_utf8_len * sizeof(char)); + } + + fname_utf8_buf[0] = '\0'; + } + + if(p) + fs_give((void **) &p); + + return(fname_utf8_buf); +} + + +/* + * The fp file pointer is open for read on a file which has contents + * that are encoded in the user's locale charset. That multibyte stream + * of characters is converted to wide characters and returned one at + * a time. + * + * Not sure what to do if an uninterpretable character happens. Returning + * the bad character now. + */ +UCS +read_a_wide_char(FILE *fp, + void *input_cs) /* input_cs ignored in Windows */ +{ +#ifdef _WINDOWS + _TINT val; + + val = _fgettc(fp); + if(val == _TEOF) + return(CCONV_EOF); + + return((UCS) val); +#else /* UNIX */ + unsigned long octets_so_far, remaining_octets; + unsigned char *inputp; + unsigned char inputbuf[20]; + int c; + UCS ucs; + + c = fgetc(fp); + if(c == EOF) + return(CCONV_EOF); + + /* + * Read enough bytes to make up a character and convert it to UCS-4. + */ + memset(inputbuf, 0, sizeof(inputbuf)); + inputbuf[0] = (unsigned char) c; + octets_so_far = 1; + for(;;){ + remaining_octets = octets_so_far; + inputp = inputbuf; + ucs = mbtow(input_cs, &inputp, &remaining_octets); + switch(ucs){ + case CCONV_BADCHAR: + return(bad_char); + + case CCONV_NEEDMORE: + if(octets_so_far >= sizeof(inputbuf)) + return(bad_char); + + c = fgetc(fp); + if(c == EOF) + return(CCONV_EOF); + + inputbuf[octets_so_far++] = (unsigned char) c; + break; + + default: + /* got a good UCS-4 character */ + return(ucs); + } + } + + return(bad_char); +#endif /* UNIX */ +} + + +int +write_a_wide_char(UCS ucs, FILE *fp) +{ +#ifdef _WINDOWS + int rv = 1; + TCHAR w; + + w = (TCHAR) ucs; + if(_fputtc(w, fp) == _TEOF) + rv = EOF; + + return(rv); +#else /* UNIX */ + int rv = 1; + int i, outchars; + unsigned char obuf[MAX(MB_LEN_MAX,32)]; + + if(ucs < 0x80){ + obuf[0] = (unsigned char) ucs; + outchars = 1; + } + else{ + outchars = wtomb((char *) obuf, ucs); + if(outchars < 0){ + outchars = 1; + obuf[0] = bad_char; /* ??? */ + } + } + + for(i = 0; i < outchars; i++) + if(fputc(obuf[i], fp) == EOF){ + rv = EOF; + break; + } + + return(rv); +#endif /* UNIX */ +} + + +int +our_stat(char *filename, struct stat *sbuf) +{ +#ifdef _WINDOWS + LPTSTR f = NULL; + int ret = -1; + struct _stat s; + + f = utf8_to_lptstr((LPSTR) filename); + if(f){ + ret = _tstat(f, &s); + + sbuf->st_dev = s.st_dev; + sbuf->st_ino = s.st_ino; + sbuf->st_mode = s.st_mode; + sbuf->st_nlink = s.st_nlink; + sbuf->st_uid = s.st_uid; + sbuf->st_gid = s.st_gid; + sbuf->st_rdev = s.st_rdev; + sbuf->st_size = s.st_size; + sbuf->st_atime = (time_t) s.st_atime; + sbuf->st_mtime = (time_t) s.st_mtime; + sbuf->st_ctime = (time_t) s.st_ctime; + + fs_give((void **) &f); + } + + return ret; +#else /* UNIX */ + return(stat(fname_to_locale(filename), sbuf)); +#endif /* UNIX */ +} + + +int +our_lstat(char *filename, struct stat *sbuf) +{ +#ifdef _WINDOWS + assert(0); /* lstat not used in Windows */ + return(-1); +#else /* UNIX */ + return(lstat(fname_to_locale(filename), sbuf)); +#endif /* UNIX */ +} + + +FILE * +our_fopen(char *path, char *mode) +{ +#ifdef _WINDOWS + LPTSTR p = NULL, m = NULL; + FILE *ret = NULL; + char *mode_with_ccs = NULL; + char buf[500]; + size_t len; + + if(mode && (*mode == 'r' || *mode == 'a')){ + char *force_bom_check = ", ccs=UNICODE"; + + if(strchr(mode, 'b')) + mode_with_ccs = mode; + else{ + /* + * The docs seem to say that we don't need the ccs parameter and + * if the file has a BOM at the beginning it will notice that and + * use it. However, we're not seeing that. Instead, what we see is + * that giving a parameter of UNICODE causes the desired behavior. + * This causes it to check for a BOM and if it finds one it uses it. + * If it doesn't find one, it treats the file as ANSI, which is what + * we want. + */ + if((len = strlen(mode) + strlen(force_bom_check)) < sizeof(buf)){ + len = sizeof(buf)-1; + mode_with_ccs = buf; + } + else + mode_with_ccs = (char *) MemAlloc((len+1) * sizeof(char)); + + if(mode_with_ccs) + snprintf(mode_with_ccs, len+1, "%s%s", mode, force_bom_check); + else + mode_with_ccs = mode; /* can't happen */ + } + } + else if(mode && (*mode == 'w')){ + char *force_utf8 = ", ccs=UTF-8"; + + if(strchr(mode, 'b')) + mode_with_ccs = mode; + else{ + if((len = strlen(mode) + strlen(force_utf8)) < sizeof(buf)){ + len = sizeof(buf)-1; + mode_with_ccs = buf; + } + else + mode_with_ccs = (char *) MemAlloc((len+1) * sizeof(char)); + + if(mode_with_ccs) + snprintf(mode_with_ccs, len+1, "%s%s", mode, force_utf8); + else + mode_with_ccs = mode; /* can't happen */ + } + } + + p = utf8_to_lptstr((LPSTR) path); + + if(p){ + m = utf8_to_lptstr((LPSTR) mode_with_ccs); + if(m){ + ret = _tfopen(p, m); + MemFree((void *) m); + } + + fs_give((void **) &p); + } + + if(mode_with_ccs && mode_with_ccs != buf && mode_with_ccs != mode) + MemFree((void *) mode_with_ccs); + + return ret; +#else /* UNIX */ + return(fopen(fname_to_locale(path), mode)); +#endif /* UNIX */ +} + + +int +our_open(char *path, int flags, mode_t mode) +{ +#ifdef _WINDOWS + LPTSTR p = NULL; + int ret = -1; + + /* + * Setting the _O_WTEXT flag when opening a file for reading + * will cause us to read the first few bytes to check for + * a BOM and to translate from that encoding if we find it. + * This only works with stream I/O, not low-level read/write. + * + * When opening for writing the flag _O_U8TEXT will cause + * us to put a UTF-8 BOM at the start of the file. + * + * O_TEXT will cause LF -> CRLF on output, opposite on input + * O_BINARY suppresses that. + * _O_U8TEXT implies O_TEXT. + */ + + p = utf8_to_lptstr((LPSTR) path); + + if(p){ + ret = _topen(p, flags, mode); + fs_give((void **) &p); + } + + return ret; +#else /* UNIX */ + return(open(fname_to_locale(path), flags, mode)); +#endif /* UNIX */ +} + + +int +our_creat(char *path, mode_t mode) +{ +#ifdef _WINDOWS + LPTSTR p = NULL; + int ret = -1; + + p = utf8_to_lptstr((LPSTR) path); + + if(p){ + ret = _tcreat(p, mode); + fs_give((void **) &p); + } + + return ret; +#else /* UNIX */ + return(creat(fname_to_locale(path), mode)); +#endif /* UNIX */ +} + + +int +our_mkdir(char *path, mode_t mode) +{ +#ifdef _WINDOWS + /* mode is a noop for _WINDOWS */ + LPTSTR p = NULL; + int ret = -1; + + p = utf8_to_lptstr((LPSTR) path); + + if(p){ + ret = _tmkdir(p); + fs_give((void **) &p); + } + + return ret; +#else /* UNIX */ + return(mkdir(fname_to_locale(path), mode)); +#endif /* UNIX */ +} + + +int +our_rename(char *oldpath, char *newpath) +{ +#ifdef _WINDOWS + LPTSTR pold = NULL, pnew = NULL; + int ret = -1; + + pold = utf8_to_lptstr((LPSTR) oldpath); + pnew = utf8_to_lptstr((LPSTR) newpath); + + if(pold && pnew) + ret = _trename(pold, pnew); + + if(pold) + fs_give((void **) &pold); + if(pnew) + fs_give((void **) &pnew); + + return ret; +#else /* UNIX */ + char *p, *pold; + size_t len; + int ret = -1; + + p = fname_to_locale(oldpath); + if(p){ + len = strlen(p); + pold = (char *) fs_get((len+1) * sizeof(char)); + strncpy(pold, p, len+1); + pold[len] = '\0'; + + ret = rename(pold, fname_to_locale(newpath)); + fs_give((void **) &pold); + } + + return ret; +#endif /* UNIX */ +} + + +int +our_unlink(char *path) +{ +#ifdef _WINDOWS + LPTSTR p = NULL; + int ret = -1; + + p = utf8_to_lptstr((LPSTR) path); + + if(p){ + ret = _tunlink(p); + fs_give((void **) &p); + } + + return ret; +#else /* UNIX */ + return(unlink(fname_to_locale(path))); +#endif /* UNIX */ +} + + +int +our_link(char *oldpath, char *newpath) +{ +#ifdef _WINDOWS + assert(0); /* link not used in Windows */ + return(-1); +#else /* UNIX */ + char *p, *pold; + size_t len; + int ret = -1; + + p = fname_to_locale(oldpath); + if(p){ + len = strlen(p); + pold = (char *) fs_get((len+1) * sizeof(char)); + strncpy(pold, p, len+1); + pold[len] = '\0'; + + ret = link(pold, fname_to_locale(newpath)); + fs_give((void **) &pold); + } + + return ret; +#endif /* UNIX */ +} + + +int +our_truncate(char *path, off_t size) +{ + int ret = -1; +#if defined(_WINDOWS) || !defined(HAVE_TRUNCATE) + int fdes; +#endif + +#ifdef _WINDOWS + if((fdes = our_open(path, O_RDWR | O_CREAT | S_IREAD | S_IWRITE | _O_U8TEXT, 0600)) != -1){ + if(chsize(fdes, size) == 0) + ret = 0; + + close(fdes); + } + +#else /* UNIX */ + +#ifdef HAVE_TRUNCATE + ret = truncate(fname_to_locale(path), size); +#else /* !HAVE_TRUNCATE */ + + if((fdes = our_open(path, O_RDWR, 0600)) != -1){ + ret = chsize(fdes, size) ; + + if(close(fdes)) + ret = -1; + } +#endif /* !HAVE_TRUNCATE */ +#endif /* UNIX */ + + return ret; +} + + +int +our_chmod(char *path, mode_t mode) +{ +#ifdef _WINDOWS + LPTSTR p = NULL; + int ret = -1; + + p = utf8_to_lptstr((LPSTR) path); + if(p){ + ret = _tchmod(p, mode); + fs_give((void **) &p); + } + + return ret; +#else /* UNIX */ + return(chmod(fname_to_locale(path), mode)); +#endif /* UNIX */ +} + + +int +our_chown(char *path, uid_t owner, gid_t group) +{ +#ifdef _WINDOWS + return 0; +#else /* UNIX */ + return(chown(fname_to_locale(path), owner, group)); +#endif /* UNIX */ +} + + +int +our_utime(char *path, struct utimbuf *buf) +{ +#ifdef _WINDOWS + LPTSTR p = NULL; + int ret = -1; + + p = utf8_to_lptstr((LPSTR) path); + + if(p){ + ret = _tutime(p, buf); + fs_give((void **) &p); + } + + return ret; +#else /* UNIX */ + return(utime(fname_to_locale(path), buf)); +#endif /* UNIX */ +} + +/* + * Return a malloc'd utf8-encoded char * of the provided environment + * variable. The env_variable argument is assumed not to be UTF-8. Returns + * NULL if no such environment variable. + * + * We'll pretty much swap out getenv's where convenient. Windows pretty + * much doesn't want to do getenv once we do unicode + */ +char * +our_getenv(char *env_variable) +{ +#ifdef _WINDOWS + TCHAR lptstr_env_variable[MAXPATH+1], *p; + int i; + + for(i = 0; env_variable[i] && i < MAXPATH; i++) + lptstr_env_variable[i] = env_variable[i]; + lptstr_env_variable[i] = '\0'; + if(p = _tgetenv(lptstr_env_variable)) + return(lptstr_to_utf8(p)); + else + return(NULL); +#else /* !_WINDOWS */ + char *p, *utf8_p, *env_cpy; + size_t len; + if((p = getenv(env_variable)) != NULL){ + /* all this when what we want is a cpystr */ + utf8_p = fname_to_utf8(p); + len = strlen(utf8_p); + env_cpy = (char *)fs_get((len+1)*sizeof(char)); + strncpy(env_cpy, utf8_p, len+1); + env_cpy[len] = '\0'; + + return(env_cpy); + } + else + return(NULL); +#endif /* !_WINDOWS */ +} + + +int +our_access(char *path, int mode) +{ +#ifdef _WINDOWS + LPTSTR p = NULL; + int ret = -1; + + p = utf8_to_lptstr((LPSTR) path); + if(p){ + ret = _taccess(p, mode); + fs_give((void **) &p); + } + + return ret; +#else /* UNIX */ + return(access(fname_to_locale(path), mode)); +#endif /* UNIX */ +} + + +/* + * Fgets that doesn't do any character encoding translation or any + * of that Windows stuff. + */ +char * +fgets_binary(char *s, int size, FILE *fp) +{ +#ifdef _WINDOWS + char *p; + char c; + int r; + + /* + * Use fread low-level input instead of fgets. + * Maybe if we understood better we wouldn't need this. + */ + if(!s) + return s; + + p = s; + while(p-s < size-1 && (r=fread(&c, sizeof(c), (size_t) 1, fp)) == 1 && c != '\n') + *p++ = c; + + if(p-s < size-1 && r == 1){ + /* must have gotten to end of line */ + *p++ = '\n'; + } + + *p = '\0'; + return(s); + +#else /* UNIX */ + return(fgets(s, size, fp)); +#endif /* UNIX */ +} diff --git a/pith/charconv/filesys.h b/pith/charconv/filesys.h new file mode 100644 index 00000000..07703092 --- /dev/null +++ b/pith/charconv/filesys.h @@ -0,0 +1,50 @@ +/*----------------------------------------------------------------------- + $Id: filesys.h 761 2007-10-23 22:35:18Z hubert@u.washington.edu $ + -----------------------------------------------------------------------*/ + +/* + * ======================================================================== + * Copyright 2006 University of Washington + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * ======================================================================== + */ + +#ifndef PITH_CHARCONV_FILESYS_INCLUDED +#define PITH_CHARCONV_FILESYS_INCLUDED + + +#include <general.h> + + +/* + * Exported Prototypes + */ +char *fname_to_locale(char *); +char *fname_to_utf8(char *); +UCS read_a_wide_char(FILE *fp, void *input_cs); +int write_a_wide_char(UCS ucs, FILE *fp); +int our_stat(char *, struct stat *); +FILE *our_fopen(char *, char *); +int our_open(char *, int, mode_t); +int our_creat(char *, mode_t); +int our_mkdir(char *, mode_t); +int our_rename(char *, char *); +int our_unlink(char *); +int our_link(char *, char *); +int our_lstat(char *, struct stat *); +int our_chmod(char *, mode_t); +int our_chown(char *, uid_t, gid_t); +int our_truncate(char *, off_t); +int our_utime(char *, struct utimbuf *); +int our_access(char *, int); +char *our_getenv(char *); +char *fgets_binary(char *, int, FILE *); + + +#endif /* PITH_CHARCONV_FILESYS_INCLUDED */ diff --git a/pith/charconv/makefile.wnt b/pith/charconv/makefile.wnt new file mode 100644 index 00000000..6700ec3b --- /dev/null +++ b/pith/charconv/makefile.wnt @@ -0,0 +1,58 @@ +# $Id: makefile.wnt 14098 2005-10-03 18:54:13Z jpf@u.washington.edu $ +# +# ======================================================================== +# Copyright 2006 University of Washington +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# ======================================================================== + +# +# +# Makefile for WIN NT version of libpithcc.lib +# +# +CC=cl +RM=del +CP=copy +RC=rc + +#includes symbol info for debugging +CDEBUG= #-Zi -Od +LDEBUG= /DEBUG /DEBUGTYPE:CV + +STDCFLAGS= -I..\..\include -I../../regex -nologo -MT -DWIN32 -DDOS -D_WINDOWS -DJOB_CONTROL -DMSC_MALLOC + +CFLAGS= $(CDEBUG) $(STDCFLAGS) $(NET) $(EXTRACFLAGS) + +LFLAGS= $(LDEBUG) $(EXTRALDFLAGS) + +RCFLAGS = + +# switches for library building +LIBER=lib +LIBARGS=/nologo /verbose + +HFILES= ../../include/system.h ../../include/general.h \ + filesys.h utf8.h + +OFILES= filesys.obj utf8.obj + +all: libpithcc.lib + +.c.obj: + $(CC) -c $(CFLAGS) "$(MAKEDIR)"\$*.c + +$(OFILES): $(HFILES) + +libpithcc.lib: $(OFILES) + $(RM) libpithcc.lib || rem + $(LIBER) /out:libpithcc.lib $(OFILES) + +clean: + $(RM) *.lib + $(RM) *.obj diff --git a/pith/charconv/utf8.c b/pith/charconv/utf8.c new file mode 100644 index 00000000..411e1ddd --- /dev/null +++ b/pith/charconv/utf8.c @@ -0,0 +1,2512 @@ +#if !defined(lint) && !defined(DOS) +static char rcsid[] = "$Id: utf8.c 1184 2008-12-16 23:52:15Z hubert@u.washington.edu $"; +#endif + +/* + * ======================================================================== + * Copyright 2006-2008 University of Washington + * Copyright 2013 Eduardo Chappa + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * ======================================================================== + */ + + +/* includable WITHOUT dependency on c-client */ +#include "../../c-client/mail.h" +#include "../../c-client/utf8.h" + +#ifdef _WINDOWS +/* wingdi.h uses ERROR (!) and we aren't using the c-client ERROR so... */ +#undef ERROR +#endif + +#include <system.h> + +#include "../../c-client/fs.h" + +/* includable WITHOUT dependency on pico */ +#include "../../pico/keydefs.h" + +#include "../osdep/collate.h" +#include "../filttype.h" + +#include "utf8.h" + +#include <stdarg.h> + + +unsigned single_width_chars_a_to_b(UCS *, int, int); + + +static char locale_charmap[50]; + +static int native_utf8; +static void *display_data; + +void +init_utf8_display(int utf8, void *rmap) +{ + native_utf8 = utf8; + display_data = rmap; +} + + +/* + * Argument is a UCS-4 wide character. + * Returns the environment dependent cell width of the + * character when printed to the screen. + * This will be -1 if the character is not printable. + * It will be >= zero if it is printable. + * + * Note that in the case it is not printable but it is still sent to + * Writechar, Writechar will print a '?' with width 1. + */ +int +wcellwidth(UCS ucs) +{ + char dummy[32]; + long w; + + /* + * We believe that on modern unix systems wchar_t is a UCS-4 character. + * That's the assumption here. + */ + + if(native_utf8){ /* display is UTF-8 capable */ + w = ucs4_width((unsigned long) ucs); + return((w & U4W_ERROR) ? -1 : w); + } + else if(display_data){ + if(wtomb(dummy, ucs) < 0) + return(-1); + else{ + w = ucs4_width((unsigned long) ucs); + return((w & U4W_ERROR) ? -1 : w); + } + } +#ifndef _WINDOWS + else + return(wcwidth((wchar_t) ucs)); +#else + return(0); +#endif +} + + +/* + * Argument is a UCS-4 wide character. + * It is converted to the multibyte version (for example UTF8 or EUC-JP). + * Dest is a buffer at least xx chars wide where the multi-byte version + * of the wide character will be written. + * The returned value is the number of bytes written to dest or -1 + * if the conversion can't be done. + */ +int +wtomb(char *dest, UCS ucs) +{ + /* + * We believe that on modern unix systems wchar_t is a UCS-4 character. + * That's the assumption here. + */ + + if(native_utf8){ + unsigned char *newdptr; + + newdptr = utf8_put((unsigned char *) dest, (unsigned long) ucs); + return((newdptr == (unsigned char *) dest) ? -1 : newdptr - (unsigned char *) dest); + } + else if(display_data){ + unsigned long ucs4; + int ret; + + ucs4 = (unsigned long) ucs; + ret = ucs4_rmaplen(&ucs4, 1, (unsigned short *) display_data, 0); + if(ret >= 0) + ucs4_rmapbuf((unsigned char *) dest, &ucs4, 1, (unsigned short *) display_data, 0); + else + ret = -1; + + return(ret); + } + else + return(wcrtomb(dest, (wchar_t) ucs, NULL)); +} + + +/* + * This function does not necessarily update inputp and remaining_octets, so + * don't rely on that. The c-client version does but the other doesn't. + */ +UCS +mbtow(void *input_cs, unsigned char **inputp, unsigned long *remaining_octets) +{ + UCS ucs; + + if(input_cs){ + CHARSET *cast_input_cs; + + cast_input_cs = (CHARSET *) input_cs; + + switch((ucs = (UCS) ucs4_cs_get(cast_input_cs, inputp, remaining_octets))){ + case U8G_ENDSTRG: + case U8G_ENDSTRI: + return(CCONV_NEEDMORE); + + default: + if(ucs & U8G_ERROR || ucs == UBOGON) + return(CCONV_BADCHAR); + + return(ucs); + } + } + else{ + size_t ret; + wchar_t w; + + /* + * Warning: input_cs and remaining_octets are unused in this + * half of the if/else. + * + * Unfortunately, we can't tell the difference between a source string + * that is just not long enough and one that has characters that can't + * be converted even though it is long enough. We return NEEDMORE in both cases. + */ + ret = mbstowcs(&w, (char *) (*inputp), 1); + if(ret == (size_t)(-1)) + return(CCONV_NEEDMORE); + else{ + ucs = (UCS) w; + return(ucs); + } + } +} + + +void +set_locale_charmap(char *charmap) +{ + if(charmap){ + strncpy(locale_charmap, charmap, sizeof(locale_charmap)); + locale_charmap[sizeof(locale_charmap)-1] = '\0'; + } + else + locale_charmap[0] = '\0'; +} + + +/* + * This ensures that the string is UTF-8. If str is already a UTF-8 string, + * NULL is returned. Otherwise, an allocated string which is UTF-8 is returned. + * The caller is responsible for freeing the returned value. + * + * Args str -- the string to convert + */ +char * +convert_to_utf8(char *str, char *fromcharset, int flags) +{ + char *ret = NULL; + char *fcharset; + SIZEDTEXT src, result; + const CHARSET *cs; + int try; + + src.data = (unsigned char *) str; + src.size = strlen(str); + + /* already UTF-8, return NULL */ + if(!(flags & CU8_NOINFER) + && (cs = utf8_infercharset(&src)) + && (cs->type == CT_ASCII || cs->type == CT_UTF8)) + return(ret); + + try = 1; + while(try < 5){ + switch(try){ + case 1: + fcharset = fromcharset; + if(fcharset && strucmp("UTF-8", fcharset) != 0) + break; /* give it a try */ + else + try++; /* fall through */ + + case 2: + if(!(flags & CU8_NOINFER)){ + fcharset = cs ? cs->name : NULL; + if(fcharset && strucmp("UTF-8", fcharset) != 0) + break; + else + try++; /* fall through */ + } + else + try++; /* fall through */ + + case 3: + fcharset = locale_charmap; + if(fcharset && strucmp("UTF-8", fcharset) != 0) + break; + else + try++; /* fall through */ + + default: + fcharset = "ISO-8859-1"; /* this will "work" */ + break; + } + + memset(&result, 0, sizeof(result)); + + if(fcharset && utf8_text(&src, fcharset, &result, 0L)){ + if(!(result.size == src.size && result.data == src.data)){ + ret = (char *) fs_get((result.size+1) * sizeof(char)); + strncpy(ret, (char *) result.data, result.size); + ret[result.size] = '\0'; + } + /* else no conversion necessary */ + + return(ret); + } + + try++; + } + + /* won't make it to here */ + return(ret); +} + + +/* + * Convert from UTF-8 to user's locale charset. + * This actually uses the wtomb routine to do the conversion, and that + * relies on setup_for_input_output having been called. + * If no conversion is necessary, NULL is returned, otherwise an allocated + * string in the locale charset is returned and the caller is responsible + * for freeing it. + */ +char * +convert_to_locale(char *utf8str) +{ +#define CHNK 500 + char *inp, *retp, *ret = NULL; + CBUF_S cb; + int r, alloced; + + if(native_utf8 || !utf8str || !utf8str[0]) + return(NULL); + + cb.cbuf[0] = '\0'; + cb.cbufp = cb.cbufend = cb.cbuf; + inp = utf8str; + + alloced = CHNK; + ret = (char *) fs_get(alloced * sizeof(char)); + retp = ret; + + /* + * There's gotta be a better way to do this but utf8_to_locale was + * available and everything looks like a nail when all you have + * is a hammer. + */ + while(*inp){ + /* + * We're placing the outgoing stream of characters in ret, a multi-byte + * array of characters in the user's locale charset. See if there is + * enough room for the next wide characters worth of output chars + * and allocate more space if not. + */ + if((alloced - (retp-ret)) < MAX(MB_LEN_MAX,32)){ + alloced += CHNK; + fs_resize((void **) &ret, alloced * sizeof(char)); + } + + r = utf8_to_locale((int) *inp++, &cb, + (unsigned char *) retp, alloced-(retp-ret)); + + retp += r; + } + + *retp = '\0'; + + fs_resize((void **) &ret, strlen(ret)+1); + + return(ret); +} + + +/* + * Pass in a stream of UTF-8 characters in 'c' and return obuf + * filled in with multi-byte characters. The return value is the + * number of valid characters in obuf to be used. + */ +int +utf8_to_locale(int c, CBUF_S *cb, unsigned char obuf[], size_t obuf_size) +{ + int outchars = 0; + + if(!(cb && cb->cbufp)) + return(0); + + if(cb->cbufp < cb->cbuf+sizeof(cb->cbuf)){ + unsigned char *inputp; + unsigned long remaining_octets; + UCS ucs; + + *(cb->cbufp)++ = (unsigned char) c; + inputp = cb->cbuf; + remaining_octets = (cb->cbufp - cb->cbuf) * sizeof(unsigned char); + ucs = (UCS) utf8_get(&inputp, &remaining_octets); + + switch(ucs){ + case U8G_ENDSTRG: /* incomplete character, wait */ + case U8G_ENDSTRI: /* incomplete character, wait */ + break; + + default: + if(ucs & U8G_ERROR || ucs == UBOGON){ + /* + * None of these cases is supposed to happen. If it + * does happen then the input stream isn't UTF-8 + * so something is wrong. Treat each character in the + * input buffer as a separate error character and + * print a '?' for each. + */ + for(inputp = cb->cbuf; inputp < cb->cbufp; inputp++) + obuf[outchars++] = '?'; + + cb->cbufp = cb->cbuf; + } + else{ + if(ucs >= 0x80 && wcellwidth(ucs) < 0){ + /* + * This happens when we have a UTF-8 character that + * we aren't able to print in our locale. For example, + * if the locale is setup with the terminal + * expecting ISO-8859-1 characters then there are + * lots of UTF-8 characters that can't be printed. + * Print a '?' instead. + */ + obuf[outchars++] = '?'; + } + else{ + /* + * Convert the ucs into the multibyte + * character that corresponds to the + * ucs in the users locale. + */ + outchars = wtomb((char *) obuf, ucs); + if(outchars < 0){ + obuf[0] = '?'; + outchars = 1; + } + } + + /* update the input buffer */ + if(inputp >= cb->cbufp) /* this should be the case */ + cb->cbufp = cb->cbuf; + else{ /* extra chars for some reason? */ + unsigned char *q, *newcbufp; + + newcbufp = (cb->cbufp - inputp) + cb->cbuf; + q = cb->cbuf; + while(inputp < cb->cbufp) + *q++ = *inputp++; + + cb->cbufp = newcbufp; + } + } + + break; + } + } + else{ /* error */ + obuf[0] = '?'; + outchars = 1; + cb->cbufp = cb->cbuf; /* start over */ + } + + return(outchars); +} + + +/* + * Returns the screen cells width of the UCS-4 string argument. + * The source string is zero terminated. + */ +unsigned +ucs4_str_width(UCS *ucsstr) +{ + unsigned width = 0; + int w; + + if(ucsstr) + while(*ucsstr){ + w = wcellwidth(*ucsstr++); + if(w != U4W_CTLSRGT) + width += (w < 0 ? 1 : w); + } + + return width; +} + + +/* + * Returns the screen cells width of the UCS-4 string argument + * from ucsstr[a] through (inclusive) ucsstr[b]. + * No checking is done to make sure a starts in the middle + * of a UCS-4 array. + */ +unsigned +ucs4_str_width_a_to_b(UCS *ucsstr, int a, int b) +{ + unsigned width = 0; + int i, w; + + if(ucsstr) + for(i = a; i <= b && ucsstr[i]; i++){ + w = wcellwidth(ucsstr[i]); + if(w != U4W_CTLSRGT) + width += (w < 0 ? 1 : w); + } + + return width; +} + + +/* + * Returns the screen cells width of the UCS-4 string argument + * from ustart through (exclusive) uend. + * No checking is done to make sure it starts in the middle + * of a UCS-4 array. + */ +unsigned +ucs4_str_width_ptr_to_ptr(UCS *ustart, UCS *uend) +{ + UCS *u; + unsigned width = 0; + int w; + + if(!ustart) + return width; + + if(ustart) + for(u = ustart; u < uend; u++){ + w = wcellwidth(*u); + if(w != U4W_CTLSRGT) + width += (w < 0 ? 1 : w); + } + + return(width); +} + + +/* + * Return the largest possible pointer into ucs4str so that the width + * of the string from ucs4str to the pointer (exclusive) + * is maxwidth or less. Also stops at a null character. + */ +UCS * +ucs4_particular_width(UCS *ucs4str, int maxwidth) +{ + UCS *u; + int w_consumed = 0, w, done = 0; + + u = ucs4str; + + if(u) + while(!done && *u && w_consumed <= maxwidth){ + w = wcellwidth(*u); + w = (w >= 0 ? w : 1); + if(w_consumed + w <= maxwidth){ + w_consumed += w; + ++u; + } + else + ++done; + } + + return(u); +} + + +/* + * Convert and copy a UTF-8 string into a UCS-4 NULL + * terminated array. Just like cpystr only it converts + * from UTF-8 to UCS-4. + * + * Returned UCS-4 string needs to be freed by caller. + */ +UCS * +utf8_to_ucs4_cpystr(char *utf8src) +{ + size_t retsize; + UCS *ret = NULL; + UCS ucs; + unsigned long remaining_octets; + unsigned char *readptr; + size_t arrayindex; + + /* + * We don't know how big to allocate the return array + * because variable numbers of octets in the src array + * will combine to make UCS-4 characters. The number of + * UCS-4 characters is less than or equal to the number + * of src characters, though. + */ + + if(!utf8src) + return NULL; + + retsize = strlen(utf8src) + 1; + + ret = (UCS *) fs_get(retsize * sizeof(*ret)); + memset(ret, 0, retsize * sizeof(*ret)); + + readptr = (unsigned char *) utf8src; + remaining_octets = retsize-1; + arrayindex = 0; + + while(remaining_octets > 0 && *readptr && arrayindex < retsize-1){ + ucs = (UCS) utf8_get(&readptr, &remaining_octets); + + if(ucs & U8G_ERROR || ucs == UBOGON) + remaining_octets = 0; + else + ret[arrayindex++] = ucs; + } + + ret[arrayindex] = '\0'; + + /* get rid of excess size */ + if(arrayindex+1 < retsize) + fs_resize((void **) &ret, (arrayindex + 1) * sizeof(*ret)); + + return ret; +} + + +/* + * Convert and copy a UCS-4 zero-terminated array into a UTF-8 NULL + * terminated string. Just like cpystr only it converts + * from UCS-4 to UTF-8. + * + * Returned UTF-8 string needs to be freed by caller. + */ +char * +ucs4_to_utf8_cpystr(UCS *ucs4src) +{ + unsigned char *ret = NULL; + unsigned char *writeptr; + int i; + + if(!ucs4src) + return NULL; + + /* + * Over-allocate and then resize at the end. + */ + + /* count characters in source */ + for(i = 0; ucs4src[i]; i++) + ; + + ret = (unsigned char *) fs_get((6*i + 1) * sizeof(*ret)); + memset(ret, 0, (6*i + 1) * sizeof(*ret)); + + writeptr = ret; + for(i = 0; ucs4src[i]; i++) + writeptr = utf8_put(writeptr, (unsigned long) ucs4src[i]); + + /* get rid of excess size */ + fs_resize((void **) &ret, (writeptr - ret + 1) * sizeof(*ret)); + + return ((char *) ret); +} + + +/* + * Similar to above but copy a fixed number of source + * characters instead of going until null terminator. + */ +char * +ucs4_to_utf8_cpystr_n(UCS *ucs4src, int ucs4src_len) +{ + unsigned char *ret = NULL; + unsigned char *writeptr; + int i; + + if(!ucs4src) + return NULL; + + /* + * Over-allocate and then resize at the end. + */ + + ret = (unsigned char *) fs_get((6*ucs4src_len + 1) * sizeof(*ret)); + memset(ret, 0, (6*ucs4src_len + 1) * sizeof(*ret)); + + writeptr = ret; + for(i = 0; i < ucs4src_len; i++) + writeptr = utf8_put(writeptr, (unsigned long) ucs4src[i]); + + /* get rid of excess size */ + fs_resize((void **) &ret, (writeptr - ret + 1) * sizeof(*ret)); + + return ((char *) ret); +} + + +#ifdef _WINDOWS +/* + * Convert a UTF-8 argument into an LPTSTR version + * of that argument. The result is allocated here + * and should be freed by the caller. + */ +LPTSTR +utf8_to_lptstr(LPSTR arg_utf8) +{ + int lptstr_len; + LPTSTR lptstr_ret = NULL; + + lptstr_len = MultiByteToWideChar( CP_UTF8, 0, arg_utf8, -1, NULL, 0 ); + if(lptstr_len > 0) + { + lptstr_ret = (LPTSTR)fs_get(lptstr_len * sizeof(TCHAR)); + lptstr_len = MultiByteToWideChar( CP_UTF8, 0, + arg_utf8, -1, lptstr_ret, lptstr_len ); + } + + if(!lptstr_len) + { + /* check GetLastError()? */ + lptstr_ret = (LPTSTR)fs_get(sizeof(TCHAR)); + lptstr_ret[0] = 0; + } + + return lptstr_ret; +} + + +/* + * Convert an LPTSTR argument into a UTF-8 version + * of that argument. The result is allocated here + * and should be freed by the caller. + */ +LPSTR +lptstr_to_utf8(LPTSTR arg_lptstr) +{ + int utf8str_len; + LPSTR utf8str_ret = NULL; + + utf8str_len = WideCharToMultiByte( CP_UTF8, 0, arg_lptstr, -1, NULL, 0, NULL, NULL ); + if(utf8str_len > 0) + { + utf8str_ret = (LPSTR)fs_get(utf8str_len * sizeof(CHAR)); + utf8str_len = WideCharToMultiByte( CP_UTF8, 0, + arg_lptstr, -1, utf8str_ret, utf8str_len, NULL, NULL ); + } + + if(!utf8str_len) + { + /* check GetLastError()? */ + utf8str_ret = (LPSTR)fs_get(sizeof(CHAR)); + utf8str_ret[0] = 0; + } + + return utf8str_ret; +} + + +/* + * Convert a UCS4 argument into an LPTSTR version + * of that argument. The result is allocated here + * and should be freed by the caller. + */ +LPTSTR +ucs4_to_lptstr(UCS *arg_ucs4) +{ + LPTSTR ret_lptstr = NULL; + size_t len; + size_t i; + + if(arg_ucs4){ + len = ucs4_strlen(arg_ucs4); + ret_lptstr = (LPTSTR) fs_get((len+1) * sizeof(TCHAR)); + /* bogus conversion ignores UTF-16 */ + for(i = 0; i < len; i++) + ret_lptstr[i] = arg_ucs4[i]; + + ret_lptstr[len] = '\0'; + } + + return(ret_lptstr); +} + + +/* + * Convert an LPTSTR argument into a UCS4 version + * of that argument. The result is MemAlloc'd here + * and should be freed by the caller. + */ +UCS * +lptstr_to_ucs4(LPTSTR arg_lptstr) +{ + UCS *ret_ucs4 = NULL; + size_t len; + size_t i; + + if(arg_lptstr){ + len = _tcslen(arg_lptstr); + ret_ucs4 = (UCS *) fs_get((len+1)*sizeof(UCS)); + /* bogus conversion ignores UTF-16 */ + for(i = 0; i < len; i++) + ret_ucs4[i] = arg_lptstr[i]; + + ret_ucs4[len] = '\0'; + } + + return(ret_ucs4); +} + +#endif /* _WINDOWS */ + + +/* + * Pass in a stream of UTF-8 characters 1-at-a-time in 'c' and return obuf + * 1-at-a-time filled in with UCS characters. The return value is the + * number of valid characters in obuf to be used. It can only + * be 1 or 0 characters since we're only getting one UTF-8 character + * at a time. + */ +int +utf8_to_ucs4_oneatatime(int c, CBUF_S *cb, UCS *obuf, int *obufwidth) +{ + int width = 0, outchars = 0; + + if(!(cb && cb->cbufp)) + return(0); + + if(cb->cbufp < cb->cbuf+sizeof(cb->cbuf)){ + unsigned char *inputp; + unsigned long remaining_octets; + UCS ucs; + + *cb->cbufp++ = (unsigned char) c; + inputp = cb->cbuf; + remaining_octets = (cb->cbufp - cb->cbuf) * sizeof(unsigned char); + ucs = (UCS) utf8_get(&inputp, &remaining_octets); + + switch(ucs){ + case U8G_ENDSTRG: /* incomplete character, wait */ + case U8G_ENDSTRI: /* incomplete character, wait */ + break; + + default: + if(ucs & U8G_ERROR || ucs == UBOGON){ + /* + * None of these cases is supposed to happen. If it + * does happen then the input stream isn't UTF-8 + * so something is wrong. + */ + outchars++; + *obuf = '?'; + cb->cbufp = cb->cbuf; + width = 1; + } + else{ + outchars++; + if(ucs < 0x80 && ucs >= 0x20) + width = 1; + + if(ucs >= 0x80 && (width=wcellwidth(ucs)) < 0){ + /* + * This happens when we have a UTF-8 character that + * we aren't able to print in our locale. For example, + * if the locale is setup with the terminal + * expecting ISO-8859-1 characters then there are + * lots of UTF-8 characters that can't be printed. + * Print a '?' instead. + * Don't think this should happen in Windows. + */ + *obuf = '?'; + } + else{ + *obuf = ucs; + } + + /* update the input buffer */ + if(inputp >= cb->cbufp) /* this should be the case */ + cb->cbufp = cb->cbuf; + else{ /* extra chars for some reason? */ + unsigned char *q, *newcbufp; + + newcbufp = (cb->cbufp - inputp) + cb->cbuf; + q = cb->cbuf; + while(inputp < cb->cbufp) + *q++ = *inputp++; + + cb->cbufp = newcbufp; + } + } + + break; + } + } + else{ /* error */ + *obuf = '?'; + outchars = 1; + width = 1; + cb->cbufp = cb->cbuf; /* start over */ + } + + if(obufwidth) + *obufwidth = width; + + return(outchars); +} + + +/* + * Return an allocated copy of a zero-terminated UCS-4 string. + */ +UCS * +ucs4_cpystr(UCS *ucs4src) +{ + size_t arraysize; + UCS *ret = NULL; + size_t i; + + if(!ucs4src) + return NULL; + + arraysize = ucs4_strlen(ucs4src); + + ret = (UCS *) fs_get((arraysize+1) * sizeof(*ret)); + memset(ret, 0, (arraysize+1) * sizeof(*ret)); + + for(i = 0; i < arraysize; i++) + ret[i] = ucs4src[i]; + + return ret; +} + + +UCS * +ucs4_strncpy(UCS *ucs4dst, UCS *ucs4src, size_t n) +{ + size_t i; + + if(ucs4src && ucs4dst){ + for(i = 0; i < n; i++){ + ucs4dst[i] = ucs4src[i]; + if(ucs4dst[i] == '\0') + break; + } + } + + return ucs4dst; +} + + +UCS * +ucs4_strncat(UCS *ucs4dst, UCS *ucs4src, size_t n) +{ + size_t i; + UCS *u; + + if(ucs4src && ucs4dst){ + for(u = ucs4dst; *u; u++) + ; + + for(i = 0; i < n; i++){ + u[i] = ucs4src[i]; + if(u[i] == '\0') + break; + } + + if(i == n) + u[i] = '\0'; + } + + return ucs4dst; +} + + +/* + * Like strlen only this returns the number of non-zero characters + * in a zero-terminated UCS-4 array. + */ +size_t +ucs4_strlen(UCS *ucs4str) +{ + size_t i = 0; + + if(ucs4str) + while(ucs4str[i]) + i++; + + return(i); +} + + +int +ucs4_strcmp(UCS *s1, UCS *s2) +{ + for(; *s1 == *s2; s1++, s2++) + if(*s1 == '\0') + return 0; + + return((*s1 < *s2) ? -1 : 1); +} + + +UCS * +ucs4_strchr(UCS *s, UCS c) +{ + if(!s) + return NULL; + + while(*s && *s != c) + s++; + + if(*s || !c) + return s; + else + return NULL; +} + + +UCS * +ucs4_strrchr(UCS *s, UCS c) +{ + UCS *ret = NULL; + + if(!s) + return ret; + + while(*s){ + if(*s == c) + ret = s; + + s++; + } + + return ret; +} + + +/* + * Returns the screen cells width of the UTF-8 string argument. + */ +unsigned +utf8_width(char *str) +{ + unsigned width = 0; + int this_width; + UCS ucs; + unsigned long remaining_octets; + char *readptr; + + if(!(str && *str)) + return(width); + + readptr = str; + remaining_octets = readptr ? strlen(readptr) : 0; + + while(remaining_octets > 0 && *readptr){ + + ucs = (UCS) utf8_get((unsigned char **) &readptr, &remaining_octets); + + if(ucs & U8G_ERROR || ucs == UBOGON){ + /* + * This should not happen, but do something to handle it anyway. + * Treat each character as a single width character, which is what should + * probably happen when we actually go to write it out. + */ + remaining_octets--; + readptr++; + this_width = 1; + } + else{ + this_width = wcellwidth(ucs); + + /* + * If this_width is -1 that means we can't print this character + * with our current locale. Writechar will print a '?'. + */ + if(this_width < 0) + this_width = 1; + } + + width += (unsigned) this_width; + } + + return(width); +} + + +/* + * Copy UTF-8 characters from src into dst. + * This is intended to be used if you want to truncate a string at + * the start instead of the end. For example, you have a long string + * like + * this_is_a_long_string + * but not enough space to fit it into a particular field. You want to + * end up with + * s_a_long_string + * where that fits in a particular width. Perhaps you'd use this with ... + * to get + * ...s_a_long_string + * This right adjusts the end of the string in the width space and + * cuts it off at the start. If there is enough width for the whole + * string it will copy the string into dst with no padding. + * + * Copy enough characters so that the result will have screen width of + * want_width screen cells in current locale. + * + * Dstlen is the available space in dst. No more than dstlen bytes will be written + * to dst. This is just for protection, it shouldn't be relied on to + * do anything useful. Dstlen should be large enough. Otherwise you'll get + * characters truncated in the middle or something like that. + * + * Returned value is the number of bytes written to dst, not including + * the possible terminating null. + * + * If we can't hit want_width exactly because of double width characters + * then we will pad the end of the string with space in order to make + * the width exact. + */ +size_t +utf8_to_width_rhs(char *dst, /* destination buffer */ + char *src, /* source string */ + size_t dstlen, /* space in dest */ + unsigned want_width) /* desired screen width */ +{ + int this_width; + unsigned width_consumed = 0; + UCS ucs; + unsigned long remaining_octets; + char *readptr, *goodreadptr, *savereadptr, *endptr; + size_t nb = 0; + + if(!src){ + if(dstlen > 0) + dst[0] = '\0'; + + return nb; + } + + /* + * Start at the end of the source string and go backwards until we + * get to the desired width, but not more than the width. + */ + readptr = src + strlen(src); + endptr = readptr; + goodreadptr = readptr; + width_consumed = 0; + savereadptr = readptr; + + for(readptr = savereadptr-1; readptr >= src && width_consumed < want_width && (endptr - readptr) < dstlen; + readptr = savereadptr-1){ + + savereadptr = readptr; + remaining_octets = goodreadptr - readptr; + ucs = (UCS) utf8_get((unsigned char **) &readptr, &remaining_octets); + + /* + * Handling the error case is tough because an error will be the normal thing that + * happens as we back through the string. So we're just going to punt on the + * error for now. + */ + if(!(ucs & U8G_ERROR || ucs == UBOGON)){ + if(remaining_octets > 0){ + /* + * This means there are some bad octets after this good + * character so things are not going to work out well. + * Bail out. + */ + savereadptr = src; /* we're done */ + } + else{ + this_width = wcellwidth(ucs); + + if(this_width < 0) + this_width = 1; + + if(width_consumed + (unsigned) this_width <= want_width){ /* ok */ + width_consumed += (unsigned) this_width; + goodreadptr = savereadptr; + } + else + savereadptr = src; /* we're done */ + } + } + } + + /* + * Copy characters from goodreadptr to endptr into dst. + */ + nb = MIN(endptr-goodreadptr, dstlen-1); + strncpy(dst, goodreadptr, nb); + dst[nb] = '\0'; + + /* + * Pad out with spaces in order to hit width exactly. + */ + while(width_consumed < want_width && nb < dstlen-1){ + dst[nb++] = ' '; + dst[nb] = '\0'; + width_consumed++; + } + + return nb; +} + + +/* + * The arguments being converted are UTF-8 strings. + * This routine attempts to make it possible to use screen cell + * widths in a format specifier. In a one-byte per screen cell + * world we might have used %10.10s to cause a string to occupy + * 10 screen positions. Since the width and precision are really + * referring to numbers of bytes instead of screen positions that + * won't work with UTF-8 input. We emulate that behavior with + * the format string %w. %m.nw means to use the m and n as + * screen width indicators instead of bytes indicators. + * + * There is no reason to use this routine unless you want to use + * min field with or precision with the specifier. A plain %w without + * widths is equivalent exactly to a plain %s in a regular printf. + * + * Double-width characters complicate things. It may not be possible + * to satisfy the request exactly. For example, %3w for an input + * string that is made up of two double-width characters. + * This routine will arbitrarily use a trailing space character if + * needed to make the width come out correctly where a half of a + * double-width character would have been needed. We'll see how + * that works for us. + * + * %w only works for strings (it's a %s replacement). + * + * Buffer overflow is handled by the size argument. %.30s will work + * to limit a particular string to 30 bytes, but you lose that + * ability with %w, since it may write more than precision bytes + * in order to get to the desired width. It is best to choose + * size large enough so that it doesn't come into play, otherwise + * it may be possible to get partial UTF-8 characters because of + * the truncation. + * + * The return value isn't quite the same as the return value + * of snprintf. It is the number of bytes written, not counting + * the trailing null, just like snprintf. However, if it is + * truncated due to size then the output is size, not the + * number of characters that would have been written. + */ +int +utf8_snprintf(char *dest, size_t size, char *fmt, ...) +{ + char newfmt[100], buf[20], *q, *pdest, *width_str, *end; + char *start_of_specifier; + char *input_str; + int int_arg; + double double_arg; + void *ptr_arg; + unsigned got_width; + int more_flags, ret, w; + int min_field_width, field_precision, modifier; + int flags_minus, flags_plus, flags_space, flags_zero, flags_pound; + va_list args; + + newfmt[0] = '\0'; + q = newfmt; + + pdest = dest; + +#define IS_ROOM_IN_DEST(n_more_chars) \ + ((pdest - dest + (n_more_chars) <= size) ? 1 : 0) + + /* + * Strategy: Look through the fmt string for %w's. Replace the + * %w's in the format string with %s's but with possibly different + * width and precision arguments which will make it come out right. + * Then call the regular system vsnprintf with the altered format + * string but same arguments. + * + * That would be nice but it doesn't quite work. Why? Because a + * %*w will need to have the value in the integer argument the * + * refers to modified. Can't do it as far as I can tell. Or we could + * remove the integer argument somehow before calling printf. Can't + * do it. Or we could somehow add an additional conversion specifier + * that caused nothing to be printed but ate up the integer arg. + * Can't figure out how to do that either. + * + * Since we can't figure out how to do it, the alternative is to + * construct the result one piece at a time, pasting together the + * pieces from the different conversions. + */ + va_start(args, fmt); + + while(*fmt && IS_ROOM_IN_DEST(1)){ + if(*fmt == '%'){ + start_of_specifier = fmt++; + + min_field_width = field_precision = -1; + flags_minus = flags_plus = flags_space = flags_zero = flags_pound = 0; + + /* flags */ + more_flags = 1; + while(more_flags){ + switch(*fmt){ + case '-': + flags_minus++; + fmt++; + break; + + case '+': + flags_plus++; + fmt++; + break; + + case ' ': + flags_space++; + fmt++; + break; + + case '0': + flags_zero++; + fmt++; + break; + + case '#': + flags_pound++; + fmt++; + break; + + default: + more_flags = 0; + break; + } + } + + /* minimum field width */ + if(*fmt == '*'){ + min_field_width = va_arg(args, int); + fmt++; + } + else if(*fmt >= '0' && *fmt <= '9'){ + width_str = fmt; + while (*fmt >= '0' && *fmt <= '9') + fmt++; + + strncpy(buf, width_str, MIN(fmt-width_str,sizeof(buf))); + if(sizeof(buf) > fmt-width_str) + buf[fmt-width_str] = '\0'; + + buf[sizeof(buf)-1] = '\0'; + + min_field_width = atoi(width_str); + } + + /* field precision */ + if(*fmt == '.'){ + fmt++; + if(*fmt == '*'){ + field_precision = va_arg(args, int); + fmt++; + } + else if(*fmt >= '0' && *fmt <= '9'){ + width_str = fmt; + while (*fmt >= '0' && *fmt <= '9') + fmt++; + + strncpy(buf, width_str, MIN(fmt-width_str,sizeof(buf))); + if(sizeof(buf) > fmt-width_str) + buf[fmt-width_str] = '\0'; + + buf[sizeof(buf)-1] = '\0'; + + field_precision = atoi(width_str); + } + } + + /* length modifier */ + if(*fmt == 'h' || *fmt == 'l' || *fmt == 'L') + modifier = *fmt++; + + /* conversion character */ + switch(*fmt){ + case 'w': + /* + * work with va_arg(char *) to figure out width + * and precision needed to produce the screen width + * and precision asked for in %w using some of the + * utf8 width routines we have. + */ + + input_str = va_arg(args, char *); + if(field_precision >=0 || min_field_width >= 0) + w = utf8_width(input_str); + + if(field_precision >= 0){ + if(w <= field_precision) + field_precision = -1; /* print it all */ + else{ + /* + * We need to cut off some of the input_str + * in this case. + */ + end = utf8_count_forw_width(input_str, field_precision, &got_width); + field_precision = (int) (end - input_str); + /* new w with this field_precision */ + w = got_width; + } + } + + /* need some padding */ + if(min_field_width >= 0) + min_field_width = ((field_precision >= 0) ? field_precision : strlen(input_str)) + + MAX(0, min_field_width - w); + + /* + * Now we just need to get the new format string + * set correctly in newfmt. + */ + q = newfmt; + if(q-newfmt < sizeof(newfmt)) + *q++ = '%'; + + if(flags_minus && q-newfmt < sizeof(newfmt)) + *q++ = '-'; + if(flags_plus && q-newfmt < sizeof(newfmt)) + *q++ = '+'; + if(flags_space && q-newfmt < sizeof(newfmt)) + *q++ = ' '; + if(flags_zero && q-newfmt < sizeof(newfmt)) + *q++ = '0'; + if(flags_pound && q-newfmt < sizeof(newfmt)) + *q++ = '#'; + + if(min_field_width >= 0){ + snprintf(buf, sizeof(buf), "%d", min_field_width); + sstrncpy(&q, buf, sizeof(newfmt)-(q-newfmt)); + } + + if(field_precision >= 0){ + if(q-newfmt < sizeof(newfmt)) + *q++ = '.'; + + snprintf(buf, sizeof(buf), "%d", field_precision); + sstrncpy(&q, buf, sizeof(newfmt)-(q-newfmt)); + } + + if(q-newfmt < sizeof(newfmt)) + *q++ = 's'; + + if(q-newfmt < sizeof(newfmt)) + *q++ = '\0'; + + snprintf(pdest, size - (pdest-dest), newfmt, input_str); + pdest += strlen(pdest); + + break; + + case '\0': + fmt--; + break; + + default: + /* make a new format which leaves out the dynamic '*' arguments */ + q = newfmt; + if(q-newfmt < sizeof(newfmt)) + *q++ = '%'; + + if(flags_minus && q-newfmt < sizeof(newfmt)) + *q++ = '-'; + if(flags_plus && q-newfmt < sizeof(newfmt)) + *q++ = '+'; + if(flags_space && q-newfmt < sizeof(newfmt)) + *q++ = ' '; + if(flags_zero && q-newfmt < sizeof(newfmt)) + *q++ = '0'; + if(flags_pound && q-newfmt < sizeof(newfmt)) + *q++ = '#'; + + if(min_field_width >= 0){ + snprintf(buf, sizeof(buf), "%d", min_field_width); + sstrncpy(&q, buf, sizeof(newfmt)-(q-newfmt)); + } + + if(field_precision >= 0){ + if(q-newfmt < sizeof(newfmt)) + *q++ = '.'; + + snprintf(buf, sizeof(buf), "%d", field_precision); + sstrncpy(&q, buf, sizeof(newfmt)-(q-newfmt)); + } + + if(q-newfmt < sizeof(newfmt)) + *q++ = *fmt; + + if(q-newfmt < sizeof(newfmt)) + *q++ = '\0'; + + switch(*fmt){ + case 'd': case 'i': case 'o': + case 'x': case 'X': case 'u': case 'c': + int_arg = va_arg(args, int); + snprintf(pdest, size - (pdest-dest), newfmt, int_arg); + pdest += strlen(pdest); + break; + + case 's': + input_str = va_arg(args, char *); + snprintf(pdest, size - (pdest-dest), newfmt, input_str); + pdest += strlen(pdest); + break; + + case 'f': case 'e': case 'E': + case 'g': case 'G': + double_arg = va_arg(args, double); + snprintf(pdest, size - (pdest-dest), newfmt, double_arg); + pdest += strlen(pdest); + break; + + case 'p': + ptr_arg = va_arg(args, void *); + snprintf(pdest, size - (pdest-dest), newfmt, ptr_arg); + pdest += strlen(pdest); + break; + + case '%': + if(IS_ROOM_IN_DEST(1)) + *pdest++ = '%'; + + break; + + default: + /* didn't think of this type */ + assert(0); + break; + } + + break; + } + + fmt++; + } + else{ + if(IS_ROOM_IN_DEST(1)) + *pdest++ = *fmt++; + } + } + + ret = pdest - dest; + + if(IS_ROOM_IN_DEST(1)) + *pdest++ = '\0'; + + va_end(args); + + return ret; +} + + +/* + * Copy UTF-8 characters from src into dst. + * Copy enough characters so that the result will have (<=) screen width of + * want_width screen cells in current locale. + * + * Dstlen is the available space in dst. No more than dstlen bytes will be written + * to dst. + * + * Returned value is the number of bytes written to dst, not including + * the possible terminating null. + * Got_width is another returned value. It is the width in screen cells of + * the string placed in dst. It will be the same as want_width if there + * are enough characters in the src to do that and if the character widths + * hit the width exactly. It will be less than want_width if we run out + * of src characters or if the next character width would skip over the + * width we want, because it is double width. + * + * Zero width characters are collected and included at the end of the string. + * That is, if we make it to want_width but there is still a zero length + * character sitting in src, we add that to dst. This might be an accent + * or something like that. + */ +size_t +utf8_to_width(char *dst, /* destination buffer */ + char *src, /* source string */ + size_t dstlen, /* space in dst */ + unsigned want_width, /* desired screen width */ + unsigned *got_width) /* returned screen width in dst */ +{ + int this_width; + unsigned width_consumed = 0; + UCS ucs; + unsigned long remaining_octets; + char *writeptr, *readptr, *savereadptr, *endptr; + int ran_out_of_space = 0; + + readptr = src; + + remaining_octets = readptr ? strlen(readptr) : 0; + + writeptr = dst; + endptr = writeptr + dstlen; + + if(readptr && writeptr){ + while(width_consumed <= want_width && remaining_octets > 0 && writeptr < dst + dstlen && !ran_out_of_space){ + savereadptr = readptr; + ucs = (UCS) utf8_get((unsigned char **) &readptr, &remaining_octets); + + if(ucs & U8G_ERROR || ucs == UBOGON) + remaining_octets = 0; + else{ + this_width = wcellwidth(ucs); + + /* + * If this_width is -1 that means we can't print this character + * with our current locale. Writechar will print a '?'. + */ + if(this_width < 0) + this_width = 1; + + if(width_consumed + (unsigned) this_width <= want_width){ + /* append this utf8 character to dst if it will fit */ + if(writeptr + (readptr - savereadptr) < endptr){ + width_consumed += this_width; + while(savereadptr < readptr) + *writeptr++ = *savereadptr++; + } + else + ran_out_of_space++; /* no more utf8 to dst */ + } + else + remaining_octets = 0; /* we're done */ + } + } + + if(writeptr < endptr) + *writeptr = '\0'; + } + + if(got_width) + *got_width = width_consumed; + + return(writeptr ? (writeptr - dst) : 0); +} + + +/* + * Str is a UTF-8 string. + * Count forward width screencell positions and return a pointer to the + * end of the string that is width wide. + * The returned pointer points at the next character (where the null would + * be placed). + * + * Got_width is another returned value. It is the width in screen cells of + * the string from str to the returned pointer. It will be the same as + * want_width if there are enough characters in the str to do that + * and if the character widths hit the width exactly. It will be less + * than want_width if we run out of characters or if the next character + * width would skip over the width we want, because it is double width. + */ +char * +utf8_count_forw_width(char *str, unsigned want_width, unsigned *got_width) +{ + int this_width; + unsigned width_consumed = 0; + UCS ucs; + unsigned long remaining_octets; + char *readptr; + char *retptr; + + retptr = readptr = str; + + remaining_octets = readptr ? strlen(readptr) : 0; + + while(width_consumed <= want_width && remaining_octets > 0){ + + ucs = (UCS) utf8_get((unsigned char **) &readptr, &remaining_octets); + + if(ucs & U8G_ERROR || ucs == UBOGON){ + /* + * This should not happen, but do something to handle it anyway. + * Treat each character as a single width character, which is what should + * probably happen when we actually go to write it out. + */ + remaining_octets--; + readptr++; + this_width = 1; + } + else{ + this_width = wcellwidth(ucs); + + /* + * If this_width is -1 that means we can't print this character + * with our current locale. Writechar will print a '?'. + */ + if(this_width < 0) + this_width = 1; + } + + if(width_consumed + (unsigned) this_width <= want_width){ + width_consumed += (unsigned) this_width; + retptr = readptr; + } + else + remaining_octets = 0; /* we're done */ + } + + if(got_width) + *got_width = width_consumed; + + return(retptr); +} + + +/* + * Copy a null terminator into a UTF-8 string in place so that the string is + * no more than a certain screen width wide. If the string is already less + * than or equal in width to the requested width, no change is made. + * + * The actual width accomplished is returned. Note that it may be less than + * max_width due to double width characters as well as due to the fact that + * it fits wholly in the max_width. + * + * Returned value is the actual screen width of str when done. + * + * A side effect is that a terminating null may have been written into + * the passed in string. + */ +unsigned +utf8_truncate(char *str, unsigned max_width) +{ + int this_width; + unsigned width_consumed = 0; + UCS ucs; + unsigned long remaining_octets; + char *readptr, *savereadptr; + + readptr = str; + + remaining_octets = readptr ? strlen(readptr) : 0; + + if(readptr){ + while(width_consumed <= max_width && remaining_octets > 0){ + + savereadptr = readptr; + ucs = (UCS) utf8_get((unsigned char **) &readptr, &remaining_octets); + + if(ucs & U8G_ERROR || ucs == UBOGON){ + /* + * This should not happen, but do something to handle it anyway. + * Treat each character as a single width character, which is what should + * probably happen when we actually go to write it out. + */ + remaining_octets--; + readptr++; + this_width = 1; + } + else{ + this_width = wcellwidth(ucs); + + /* + * If this_width is -1 that means we can't print this character + * with our current locale. Writechar will print a '?'. + */ + if(this_width < 0) + this_width = 1; + } + + if(width_consumed + (unsigned) this_width <= max_width){ + width_consumed += (unsigned) this_width; + } + else{ + remaining_octets = 0; /* we're done */ + *savereadptr = '\0'; + } + } + } + + return(width_consumed); +} + + +/* + * Copy UTF-8 characters from src into dst. + * Copy enough characters so that the result will have screen width of + * want_width screen cells in current locale. + * If there aren't enough characters in src to get to want_width, pad on + * left or right according to left_adjust argument. + * + * Dstlen is the available space in dst. No more than dstlen bytes will be written + * to dst. Dst will be null terminated if there is enough room, but not + * if that would overflow dst's len. + * + * Returned value is the number of bytes written to dst, not including + * the possible terminating null. + */ +size_t +utf8_pad_to_width(char *dst, /* destination buffer */ + char *src, /* source string */ + size_t dstlen, /* space in dst */ + unsigned want_width, /* desired screen width */ + int left_adjust) /* adjust left or right in want_width columns */ +{ + unsigned got_width = 0; + int need_more, howmany; + size_t len_left, bytes_used; + + bytes_used = utf8_to_width(dst, src, dstlen, want_width, &got_width); + len_left = dstlen - bytes_used; + + need_more = want_width - got_width; + howmany = MIN(need_more, len_left); + + if(howmany > 0){ + char *end, *newend, *p, *q; + + end = dst + bytes_used; + newend = end + howmany; + if(left_adjust){ + /* + * Add padding to end of string. Simply append + * the needed number of spaces, or however many will fit + * if we don't have enough space. + */ + for(q = end; q < newend; q++) + *q = ' '; + } + else{ + /* + * Add padding to start of string. + */ + + /* slide existing string over */ + for(p = end - 1, q = newend - 1; p >= dst; p--, q--) + *q = *p; + + /* fill rest with spaces */ + for(; q >= dst; q--) + *q = ' '; + } + + bytes_used += howmany; + } + + if(bytes_used < dstlen) + dst[bytes_used] = '\0'; + + return(bytes_used); +} + + +/* + * Str is a UTF-8 string. + * Start_here is a pointer into the string. It points one position past + * the last byte that should be considered a part of the length string. + * Count back want_width screencell positions and return a pointer to the + * start of the string that is want_width wide and ends with start_here. + * + * Since characters may be more than one cell width wide we may end up + * skipping over the exact width. That is, if we need to we'll go back + * too far (by one cell width). Account for that in the call by looking + * at got_width. + * + * Note that this call gives a possible got_width == want_width+1 as + * opposed to utf8_count_forw_width which gives got_width == want-1 instead. + * That was just what was needed at the time, maybe it needs to be + * optional. + */ +char * +utf8_count_back_width(char *str, char *start_here, unsigned want_width, unsigned *got_width) +{ + unsigned width_consumed = 0; + int this_width; + UCS ucs; + unsigned long remaining_octets; + char *ptr, *savereadptr, *goodreadptr; + + savereadptr = start_here; + goodreadptr = start_here; + + for(ptr = savereadptr - 1; width_consumed < want_width && ptr >= str; ptr = savereadptr - 1){ + + savereadptr = ptr; + remaining_octets = goodreadptr - ptr; + ucs = (UCS) utf8_get((unsigned char **) &ptr, &remaining_octets); + + if(!(ucs & U8G_ERROR || ucs == UBOGON)){ + if(remaining_octets > 0){ + /* + * This means there are some bad octets after this good + * character so things are not going to work out well. + * Bail out. + */ + savereadptr = str; /* we're done */ + } + else{ + this_width = wcellwidth(ucs); + + /* + * If this_width is -1 that means we can't print this character + * with our current locale. Writechar will print a '?'. + */ + if(this_width < 0) + this_width = 1; + + width_consumed += (unsigned) this_width; + goodreadptr = savereadptr; + } + } + } + + if(got_width) + *got_width = width_consumed; + + return(savereadptr); +} + + +/*---------------------------------------------------------------------- + copy the source string onto the destination string returning with + the destination string pointer at the end of the destination text + + motivation for this is to avoid twice passing over a string that's + being appended to twice (i.e., strcpy(t, x); t += strlen(t)) + + This doesn't really belong here but it is used here. + ----*/ +void +sstrncpy(char **d, char *s, int n) +{ + while(n-- > 0 && (**d = *s++) != '\0') + (*d)++; +} + + +/* + * If use_system_routines is set then NULL is the return value and it is + * not an error. Display_charmap and keyboard_charmap should come over as + * malloced strings and will be filled in with the result. + * + * Returns a void pointer to the input_cs CHARSET which is + * passed to mbtow via kbseq(). + * If !use_system_routines && NULL is returned, that is an error and err should + * have a message. + * display_charmap and keyboard_charmap should be malloced data and may be + * realloced and changed here. + */ +int +setup_for_input_output(int use_system_routines, char **display_charmap, + char **keyboard_charmap, void **input_cs_arg, char **err) +{ + const CHARSET *cs; + const CHARSET *input_cs = NULL; + int already_tried = 0; + int supported = 0; + char buf[1000]; + +#define cpstr(s) strcpy((char *)fs_get(1+strlen(s)), s) + + if(err) + *err = NULL; + + if(!display_charmap || !keyboard_charmap || !input_cs_arg){ + *err = cpstr("Bad call to setup_for_input_output"); + return(-1); + } + + if(use_system_routines){ +#if PREREQ_FOR_SYS_TRANSLATION + char *dcm; + + dcm = nl_langinfo_codeset_wrapper(); + dcm = dcm ? dcm : "US-ASCII"; + + init_utf8_display(0, NULL); + if(*display_charmap){ + if(dcm && strucmp(*display_charmap, dcm)){ + snprintf(buf, sizeof(buf), + _("Display character set \"%s\" is ignored when using system translation"), + *display_charmap); + + *err = cpstr(buf); + } + + fs_give((void **) display_charmap); + } + + if(*keyboard_charmap){ + if(!*err && dcm && strucmp(*keyboard_charmap, dcm)){ + snprintf(buf, sizeof(buf), + _("Keyboard character set \"%s\" is ignored when using system translation"), + *keyboard_charmap); + + *err = cpstr(buf); + } + + fs_give((void **) keyboard_charmap); + } + + *display_charmap = cpstr(dcm); + *keyboard_charmap = cpstr(dcm); +#else + *err = cpstr("Bad call to setup_for_input_output"); +#endif + + *input_cs_arg = NULL; + return(0); + } + + +try_again1: + if(!(*display_charmap)) + *display_charmap = cpstr("US-ASCII"); + + if(!(*keyboard_charmap)) + *keyboard_charmap = cpstr(*display_charmap); + + if(*keyboard_charmap){ + supported = input_charset_is_supported(*keyboard_charmap); + + if(supported){ + if(!strucmp(*keyboard_charmap, "utf-8")) + input_cs = utf8_charset(*keyboard_charmap); + else if((cs = utf8_charset(*keyboard_charmap)) != NULL) + input_cs = cs; + } + else{ + if(err && !*err){ + int iso2022jp = 0; + + if(!strucmp(*keyboard_charmap, "ISO-2022-JP")) + iso2022jp = 1; + + snprintf(buf, sizeof(buf), + /* TRANSLATORS: The first argument is the name of the character + set the user is trying to use (which is unsupported by alpine). + The second argument is " (except for posting)" if they are + trying to use ISO-2022-JP for something other than posting. */ + _("Character set \"%s\" is unsupported%s, using US-ASCII"), + *keyboard_charmap, + iso2022jp ? _(" (except for posting)") : ""); + + *err = cpstr(buf); + } + + input_cs = NULL; + fs_give((void **) keyboard_charmap); + *keyboard_charmap = cpstr("US-ASCII"); + if(!already_tried){ + already_tried++; + goto try_again1; + } + } + } + + +try_again2: + if(!(*display_charmap)) + *display_charmap = cpstr("US-ASCII"); + + if(*display_charmap){ + supported = output_charset_is_supported(*display_charmap); + if(supported){ + if(!strucmp(*display_charmap, "utf-8")) + init_utf8_display(1, NULL); + else if((cs = utf8_charset(*display_charmap)) != NULL) + init_utf8_display(0, utf8_rmap_gen(cs, NULL)); + } + else{ + if(err && !*err){ + int iso2022jp = 0; + + if(!strucmp(*display_charmap, "ISO-2022-JP")) + iso2022jp = 1; + + snprintf(buf, sizeof(buf), + _("Character set \"%s\" is unsupported%s, using US-ASCII"), + *display_charmap, + iso2022jp ? _(" (except for posting)") : ""); + + *err = cpstr(buf); + } + + fs_give((void **) display_charmap); + if(!already_tried){ + already_tried++; + goto try_again2; + } + } + } + else{ + if(err && !*err) + *err = cpstr(_("Help, can't figure out display character set or even use US-ASCII.")); + } + +#undef cpstr + + *input_cs_arg = (void *) input_cs; + + return(0); +} + + +int +input_charset_is_supported(char *input_charset) +{ + const CHARSET *cs; + + if(!(input_charset && *input_charset)) + return 0; + + if(!strucmp(input_charset, "utf-8")) + return 1; + + if((cs = utf8_charset(input_charset)) != NULL){ + + /* + * This was true 2006-09-25. + */ + switch(cs->type){ + case CT_ASCII: case CT_1BYTE0: case CT_1BYTE: + case CT_1BYTE8: case CT_EUC: case CT_DBYTE: + case CT_DBYTE2: case CT_SJIS: case CT_UCS2: + case CT_UCS4: case CT_UTF16: + return 1; + break; + + default: + break; + } + } + + return 0; +} + + +int +output_charset_is_supported(char *output_charset) +{ + const CHARSET *cs; + + if(!(output_charset && *output_charset)) + return 0; + + if(!strucmp(output_charset, "utf-8")) + return 1; + + if((cs = utf8_charset(output_charset)) != NULL && utf8_rmap_gen(cs, NULL)) + return 1; + + return 0; +} + + +int +posting_charset_is_supported(char *posting_charset) +{ + return(posting_charset && *posting_charset + && (!strucmp(posting_charset, "ISO-2022-JP") + || output_charset_is_supported(posting_charset))); +} + + +/* + * This function is only defined in this special case and so calls + * to it should be wrapped in the same macro conditionals. + * + * Returns the default display charset for a UNIX terminal emulator, + * it is what nl_langinfo(CODESET) should return but we need to + * wrap nl_langinfo because we know of strange behaving implementations. + */ +#if !defined(_WINDOWS) && HAVE_LANGINFO_H && defined(CODESET) +char * +nl_langinfo_codeset_wrapper(void) +{ + char *ret = NULL; + + ret = nl_langinfo(CODESET); + + /* + * If the value returned from nl_langinfo() is not a real charset, + * see if we can figure out what they meant. If we can't figure it + * out return NULL and let the caller decide what to do. + */ + if(ret && *ret && !output_charset_is_supported(ret)){ + if(!strcmp("ANSI_X3.4-1968", ret) + || !strcmp("646", ret) + || !strcmp("ASCII", ret) + || !strcmp("C", ret) + || !strcmp("POSIX", ret)) + ret = "US-ASCII"; + else if(!strucmp(ret, "UTF8")) + ret = "UTF-8"; + else if(!strucmp(ret, "EUCJP")) + ret = "EUC-JP"; + else if(!strucmp(ret, "EUCKP")) + ret = "EUC-KP"; + else if(!strucmp(ret, "SJIS")) + ret = "SHIFT-JIS"; + else if(strstr(ret, "8859")){ + char *p; + + /* check for digits after 8859 */ + p = strstr(ret, "8859"); + p += 4; + if(!isdigit(*p)) + p++; + + if(isdigit(*p)){ + static char buf[12]; + + memset(buf, 0, sizeof(buf)); + strncpy(buf, "ISO-8859-", sizeof(buf)); + buf[9] = *p++; + if(isdigit(*p)) + buf[10] = *p; + + ret = buf; + } + } + } + + if(ret && !output_charset_is_supported(ret)) + ret = NULL; + + return(ret); +} +#endif + + +/* + * Convert the "orig" string from UTF-8 to "charset". If no conversion is + * needed the return value will point to orig. If a conversion is done, + * the return string should be freed by the caller. + * If not possible, returns NULL. + */ +char * +utf8_to_charset(char *orig, char *charset, int report_err) +{ + SIZEDTEXT src, dst; + char *ret = orig; + + if(!charset || !charset[0] || !orig || !orig[0] || !strucmp(charset, "utf-8")) + return ret; + + src.size = strlen(orig); + src.data = (unsigned char *) orig; + + if(!strucmp(charset, "us-ascii")){ + size_t i; + + for(i = 0; i < src.size; i++) + if(src.data[i] & 0x80) + return NULL; + + return ret; + } + + /* + * This works for ISO-2022-JP because of special code in utf8_cstext + * but not for other 2022 charsets. + */ + memset(&dst, 0, sizeof(dst)); + if(utf8_cstext(&src, charset, &dst, report_err ? 0 : '?') && dst.size > 0 && dst.data) + ret = (char *) dst.data; /* c-client already null terminates it */ + else + ret = NULL; + + if((unsigned char *) ret != dst.data && dst.data) + fs_give((void **) &dst.data); + + return ret; +} + + +/* + * Turn a number into a string with comma's + * + * Args: number -- The long to be turned into a string. + * + * Result: pointer to static string representing number with commas + * Can use up to 3 comatose results at once. + */ +char * +comatose(long int number) +{ + long i, x, done_one; + static char buf[3][50]; + static int whichbuf = 0; + char *b; + + whichbuf = (whichbuf + 1) % 3; + + if(number == 0){ + strncpy(buf[whichbuf], "0", sizeof(buf[0])); + buf[whichbuf][sizeof(buf[0])-1] = '\0'; + return(buf[whichbuf]); + } + + done_one = 0; + b = buf[whichbuf]; + for(i = 1000000000; i >= 1; i /= 1000) { + x = number / i; + number = number % i; + if(x != 0 || done_one) { + if(b != buf[whichbuf] && (b-buf[whichbuf]) < sizeof(buf[0])) + *b++ = ','; + + snprintf(b, sizeof(buf[0])-(b-buf[whichbuf]), done_one ? "%03ld" : "%ld", x); + b += strlen(b); + done_one = 1; + } + } + + if(b-buf[whichbuf] < sizeof(buf[0])) + *b = '\0'; + + return(buf[whichbuf]); +} + + +/* leave out the commas */ +char * +tose(long int number) +{ + static char buf[3][50]; + static int whichbuf = 0; + + whichbuf = (whichbuf + 1) % 3; + + snprintf(buf[whichbuf], sizeof(buf[0]), "%ld", number); + + return(buf[whichbuf]); +} + + +/* + * line_paint - where the real work of managing what is displayed gets done. + */ +void +line_paint(int offset, /* current dot offset into vl */ + struct display_line *displ, + int *passwd) /* flag to hide display of chars */ +{ + int i, w, w2, already_got_one = 0; + int vfirst, vlast, dfirst, dlast, vi, di; + int new_vbase; + unsigned (*width_a_to_b)(UCS *, int, int); + + /* + * Set passwd to 10 in caller if you want to conceal the + * password but not print asterisks for feedback. + * + * Set passwd to 1 in caller to conceal by printing asterisks. + */ + if(passwd && *passwd >= 10){ /* don't show asterisks */ + if(*passwd > 10) + return; + else + *passwd = 11; /* only blat once */ + + i = 0; + (*displ->movecursor)(displ->row, displ->col); + while(i++ <= displ->dwid) + (*displ->writechar)(' '); + + (*displ->movecursor)(displ->row, displ->col); + return; + } + + if(passwd && *passwd) + width_a_to_b = single_width_chars_a_to_b; + else + width_a_to_b = ucs4_str_width_a_to_b; + + /* + * vl is the virtual line (the actual data). We operate on it by typing + * characters to be added and deleting and so forth. In this routine we + * copy a subset of those UCS-4 characters in vl into dl, the display + * array, and show that subset on the screen. + * + * Offset is the location of the cursor in vl. + * + * We will display the string starting from vbase. + * We have dwid screen cells to work in. + * We may have to adjust vbase in order to display the + * part of the string that contains the cursor. + * + * We'll make the display look like + * vl a b c d e f g h i j k l m + * xxxxxxxxxxxxx <- width dwid window + * < d e f g h > + * | + * vbase + * The < will be there if vbase > 0. + * The > will be there if the string from vbase to the + * end can't all fit in the window. + */ + + memset(displ->dl, 0, displ->dlen * sizeof(UCS)); + + /* + * Adjust vbase so offset is not out of the window to the right. + * (The +2 in w + 2 is for a possible " >" if the string goes past + * the right hand edge of the window and if the last visible character + * is double wide. We don't want the offset to be under that > character.) + */ + for(w = (*width_a_to_b)(displ->vl, displ->vbase, offset); + w + 2 + (displ->vbase ? 1 : 0) > displ->dwid; + w = (*width_a_to_b)(displ->vl, displ->vbase, offset)){ + /* + * offset is off the window to the right + * It looks like a b c d e f g h + * | | + * vbase offset + * and offset is either past the right edge, + * or right at the right edge (and maybe under >), + * or one before right at the edge (and maybe on space + * for half a character). + * + * Since the characters may be double width it is slightly + * complicated to figure out how far to increase vbase. + * We're going to scoot over past width w/2 characters and + * then see if that's sufficient. + */ + new_vbase = displ->vbase + 1; + for(w2 = (*width_a_to_b)(displ->vl, displ->vbase+1, new_vbase); + w2 < displ->dwid/2; + w2 = (*width_a_to_b)(displ->vl, displ->vbase+1, new_vbase)) + new_vbase++; + + displ->vbase = new_vbase; + } + + /* adjust so offset is not out of the window to the left */ + while(displ->vbase > 0 && displ->vbase >= offset){ + /* add about dwid/2 more width */ + new_vbase = displ->vbase - 1; + for(w2 = (*width_a_to_b)(displ->vl, new_vbase, displ->vbase); + w2 < (displ->dwid+1)/2 && new_vbase > 0; + w2 = (*width_a_to_b)(displ->vl, new_vbase, displ->vbase)) + new_vbase--; + + /* but don't let it get too small, recheck off right end */ + for(w = (*width_a_to_b)(displ->vl, new_vbase, offset); + w + 2 + (new_vbase ? 1 : 0) > displ->dwid; + w = (*width_a_to_b)(displ->vl, displ->vbase, offset)) + new_vbase++; + + displ->vbase = MAX(new_vbase, 0); + } + + if(displ->vbase == 1 && ((passwd && *passwd) || wcellwidth(displ->vl[0]) == 1)) + displ->vbase = 0; + + vfirst = displ->vbase; + dfirst = 0; + if(displ->vbase > 0){ /* off screen cue left */ + dfirst = 1; /* index which matches vfirst */ + displ->dl[0] = '<'; + } + + vlast = displ->vused-1; /* end */ + w = (*width_a_to_b)(displ->vl, vfirst, vlast); + + if(w + dfirst > displ->dwid){ /* off window right */ + + /* find last ucs character to be printed */ + while(w + dfirst > displ->dwid - 1) /* -1 for > */ + w = (*width_a_to_b)(displ->vl, vfirst, --vlast); + + /* worry about double-width characters */ + if(w + dfirst == displ->dwid - 1){ /* no prob, hit it exactly */ + dlast = dfirst + vlast - vfirst + 1; /* +1 for > */ + displ->dl[dlast] = '>'; + } + else{ + dlast = dfirst + vlast - vfirst + 1; + displ->dl[dlast++] = ' '; + displ->dl[dlast] = '>'; + } + } + else + dlast = dfirst + vlast - vfirst; + + /* + * Copy the relevant part of the virtual line into the display line. + */ + for(vi = vfirst, di = dfirst; vi <= vlast; vi++, di++) + if(passwd && *passwd) + displ->dl[di] = '*'; /* to conceal password */ + else + displ->dl[di] = displ->vl[vi]; + + /* + * Add spaces to clear the rest of the line. + * We have dwid total space to fill. + */ + w = (*width_a_to_b)(displ->dl, 0, dlast); /* width through dlast */ + for(di = dlast+1, i = displ->dwid - w; i > 0 ; i--) + displ->dl[di++] = ' '; + + /* + * Draw from left to right, skipping until we get to + * something that is different. Characters may be different + * widths than they were initially so paint from there the + * rest of the way. + */ + for(di = 0; displ->dl[di]; di++){ + if(already_got_one || displ->dl[di] != displ->olddl[di]){ + /* move cursor first time */ + if(!already_got_one++){ + w = (di > 0) ? (*width_a_to_b)(displ->dl, 0, di-1) : 0; + (*displ->movecursor)(displ->row, displ->col + w); + } + + (*displ->writechar)(displ->dl[di]); + displ->olddl[di] = displ->dl[di]; + } + } + + memset(&displ->olddl[di], 0, (displ->dlen - di) * sizeof(UCS)); + + /* + * Move the cursor to the offset. + * + * The offset is relative to the start of the virtual array. We need + * to find the location on the screen. The offset into the display array + * will be offset-vbase+dfirst. We want to be at the start of that + * character, so we need to find the width of all the characters up + * to that point. + */ + w = (offset > 0) ? (*width_a_to_b)(displ->dl, 0, offset-displ->vbase+dfirst-1) : 0; + + (*displ->movecursor)(displ->row, displ->col + w); +} + + +/* + * This is just like ucs4_str_width_a_to_b() except all of the characters + * are assumed to be of width 1. This is for printing out *'s when user + * enters a password, while still managing to use the same code to do the + * display. + */ +unsigned +single_width_chars_a_to_b(UCS *ucsstr, int a, int b) +{ + unsigned width = 0; + int i; + + if(ucsstr) + for(i = a; i <= b && ucsstr[i]; i++) + width++; + + return width; +} diff --git a/pith/charconv/utf8.h b/pith/charconv/utf8.h new file mode 100644 index 00000000..d22a8a7c --- /dev/null +++ b/pith/charconv/utf8.h @@ -0,0 +1,106 @@ +/*----------------------------------------------------------------------- + $Id: utf8.h 1025 2008-04-08 22:59:38Z hubert@u.washington.edu $ + -----------------------------------------------------------------------*/ + +/* + * ======================================================================== + * Copyright 2006-2008 University of Washington + * Copyright 2013 Eduardo Chappa + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * ======================================================================== + */ + +#ifndef PITH_CHARCONV_UTF8_INCLUDED +#define PITH_CHARCONV_UTF8_INCLUDED + + +#include <general.h> +#include "../filttype.h" + + +/* flags for convert_to_utf8 */ +#define CU8_NONE 0x00 +#define CU8_NOINFER 0x01 /* Not ok to infer charset */ + + +/* + * The data in vl and dl is UCS-4 characters. + * They are arrays of size vlen and dlen of unsigned longs. + */ +struct display_line { + int row, col; /* where display starts */ + UCS *vl; /* virtual line, the actual data string */ + int vlen; /* size of vl array */ + int vused; /* elements of vl in use */ + int vbase; /* index into array, first virtual char on display */ + UCS *dl; /* visible part of virtual line on display */ + UCS *olddl; + int dlen; /* size of dl array */ + int dwid; /* screenwidth avail for dl */ + void (*movecursor)(int, int); + void (*writechar)(UCS); +}; + + +/* + * Exported Prototypes + */ +void init_utf8_display(int, void *); +int wcellwidth(UCS); +int wtomb(char *, UCS); +UCS mbtow(void *, unsigned char **, unsigned long *); +void set_locale_charmap(char *); +char *convert_to_utf8(char *, char *, int); +char *convert_to_locale(char *); +int utf8_to_locale(int c, CBUF_S *cb, unsigned char obuf[], size_t obuf_size); +unsigned ucs4_str_width(UCS *); +unsigned ucs4_str_width_a_to_b(UCS *, int, int); +unsigned ucs4_str_width_ptr_to_ptr(UCS *, UCS *); +UCS *ucs4_particular_width(UCS*, int); +UCS *utf8_to_ucs4_cpystr(char *); +char *ucs4_to_utf8_cpystr(UCS *); +char *ucs4_to_utf8_cpystr_n(UCS *, int); +#ifdef _WINDOWS +LPTSTR utf8_to_lptstr(LPSTR); +LPSTR lptstr_to_utf8(LPTSTR); +LPTSTR ucs4_to_lptstr(UCS *); +UCS *lptstr_to_ucs4(LPTSTR); +#endif /* _WINDOWS */ +int utf8_to_ucs4_oneatatime(int, CBUF_S *, UCS *, int *); +size_t ucs4_strlen(UCS *s); +int ucs4_strcmp(UCS *s1, UCS *s2); +UCS *ucs4_cpystr(UCS *s); +UCS *ucs4_strncpy(UCS *ucs4dst, UCS *ucs4src, size_t n); +UCS *ucs4_strncat(UCS *ucs4dst, UCS *ucs4src, size_t n); +UCS *ucs4_strchr(UCS *s, UCS c); +UCS *ucs4_strrchr(UCS *s, UCS c); +unsigned utf8_width(char *); +size_t utf8_to_width_rhs(char *, char *, size_t, unsigned); +int utf8_snprintf(char *, size_t, char *, ...); +size_t utf8_to_width(char *, char *, size_t, unsigned, unsigned *); +size_t utf8_pad_to_width(char *, char *, size_t, unsigned, int); +unsigned utf8_truncate(char *, unsigned); +char *utf8_count_back_width(char *, char *, unsigned, unsigned *); +char *utf8_count_forw_width(char *, unsigned, unsigned *); +void sstrncpy(char **, char *, int); +int setup_for_input_output(int, char **, char **, void **, char **); +int input_charset_is_supported(char *); +int output_charset_is_supported(char *); +int posting_charset_is_supported(char *); +char *utf8_to_charset(char *, char *, int); +char *comatose(long); +char *tose(long); +void line_paint(int, struct display_line *, int *); + +#if !defined(_WINDOWS) && HAVE_LANGINFO_H && defined(CODESET) +char *nl_langinfo_codeset_wrapper(void); +#endif + + +#endif /* PITH_CHARCONV_UTF8_INCLUDED */ |