regedit: Allow importing UCS-2 and UTF-8 files created by Windows'
regedit v5
Paul Chitescu
paulc at voip.null.ro
Thu Oct 12 09:30:31 CDT 2006
Changelog: regedit: Allow importing UCS-2 and UTF-8 files created by Windows'
regedit v5.
The code tries to detect the encoding from the first read bytes. UTF-8 is not
modified while UCS-2 is converted by keeping only the lower byte.
-------------- next part --------------
--- ./programs/regedit/regproc.c.orig 2006-08-01 19:41:14.000000000 +0300
+++ ./programs/regedit/regproc.c 2006-10-12 17:21:23.000000000 +0300
@@ -881,6 +881,8 @@
{
LPSTR line = NULL; /* line read from input stream */
ULONG lineSize = REG_VAL_BUF_SIZE;
+ int encoding = -1; /* guessed file encoding */
+ int offset = 0; /* offset for encoding magic */
line = HeapAlloc(GetProcessHeap(), 0, lineSize);
CHECK_ENOUGH_MEMORY(line);
@@ -896,7 +898,7 @@
/* Do we need to expand the buffer ? */
assert (s >= line && s <= line + lineSize);
size_remaining = lineSize - (s-line);
- if (size_remaining < 2) /* room for 1 character and the \0 */
+ if (size_remaining < 2*((encoding > 0) ? encoding : 1)) /* room for 1 character and the \0 */
{
char *new_buffer;
size_t new_size = lineSize + REG_VAL_BUF_SIZE;
@@ -915,6 +917,8 @@
* eof, error, eol or getting the maximum amount. Abort on error.
*/
size_to_get = (size_remaining > INT_MAX ? INT_MAX : size_remaining);
+ if ((encoding < 0) || (encoding > 1))
+ memset(s, 0, size_to_get);
if (NULL == fgets (s, size_to_get, in)) {
if (ferror(in)) {
perror ("While reading input");
@@ -929,6 +933,49 @@
}
}
+ /* Attempt to guess the encoding from the first several bytes */
+ if (encoding < 0) {
+ if (line [0] == '\377' && line [1] == '\376') {
+ encoding = 2; /* FF FE => UCS-2 little endian */
+ offset = 2;
+ }
+ else if (line [0] == '\357' && line [1] == '\277' && line [2] == '\275') {
+ encoding = 1; /* EF BF BD => UTF-8 */
+ offset = 3;
+ if (line [3] == '\357' && line [4] == '\277' && line [5] == '\275')
+ offset += 3;
+ }
+ else
+ encoding = 0; /* default => ASCII (hope so...) */
+ if (encoding) {
+ fprintf(stderr,"%s: WARNING - %s encoding (detected %d magic bytes).\n",
+ getAppName(),((encoding == 1) ? "UTF-8" : "UCS-2"),offset);
+ }
+ }
+
+ /* Extremely crude routine to convert to 8-bit */
+ if (encoding) {
+ int i = 0;
+ char c;
+ if (encoding > 1) {
+ /* FIXME: this is ugly - we should read binary */
+ if (! (offset || *s))
+ offset = encoding-1;
+ /* FIXME: we should convert properly */
+ while ((c = s [(encoding*i)+offset]) != 0)
+ s [i++] = c;
+ if (s [(encoding*i)+offset+1] == 0x0a)
+ s [i++] = '\n';
+ }
+ else if (offset) {
+ /* just skip over the magic */
+ while ((c = s [i+offset]) != 0)
+ s [i++] = c;
+ }
+ s [i] = '\0';
+ offset = 0;
+ }
+
/* If we didn't read the eol nor the eof go around for the rest */
s_eol = strchr (s, '\n');
if (!feof (in) && !s_eol) {
@@ -958,8 +1005,8 @@
/* The following error protection could be made more self-
* correcting but I thought it not worth trying.
*/
- if ((c = fgetc (in)) == EOF || c != ' ' ||
- (c = fgetc (in)) == EOF || c != ' ')
+ if ((c = fgetc (in)) == EOF || (c == '\0' ? (c = fgetc(in)) : c) != ' ' ||
+ (c = fgetc (in)) == EOF || (c == '\0' ? (c = fgetc(in)) : c) != ' ')
fprintf(stderr,"%s: ERROR - invalid continuation.\n",
getAppName());
continue;
More information about the wine-patches
mailing list