Alexander Scott-Johns : notepad: Improve encoding detection when opening files.

Alexandre Julliard julliard at winehq.org
Thu Jul 2 08:25:09 CDT 2009


Module: wine
Branch: master
Commit: 8b6b7b2c39d77f7cd29657ecc3956955e5aa75c2
URL:    http://source.winehq.org/git/wine.git/?a=commit;h=8b6b7b2c39d77f7cd29657ecc3956955e5aa75c2

Author: Alexander Scott-Johns <alexander.scott.johns at googlemail.com>
Date:   Mon Jun 29 22:24:59 2009 +0100

notepad: Improve encoding detection when opening files.

---

 programs/notepad/dialog.c |   97 +++++++++++++++++++++++++++++++++++++++++---
 programs/notepad/main.h   |    8 ++++
 2 files changed, 98 insertions(+), 7 deletions(-)

diff --git a/programs/notepad/dialog.c b/programs/notepad/dialog.c
index 026fe25..7f2fade 100644
--- a/programs/notepad/dialog.c
+++ b/programs/notepad/dialog.c
@@ -26,6 +26,7 @@
 #include <windows.h>
 #include <commdlg.h>
 #include <shlwapi.h>
+#include <winternl.h>
 
 #include "main.h"
 #include "dialog.h"
@@ -37,6 +38,13 @@ static const WCHAR helpfileW[] = { 'n','o','t','e','p','a','d','.','h','l','p',0
 
 static INT_PTR WINAPI DIALOG_PAGESETUP_DlgProc(HWND hDlg, UINT msg, WPARAM wParam, LPARAM lParam);
 
+/* Swap bytes of WCHAR buffer (big-endian <-> little-endian). */
+static inline void byteswap_wide_string(LPWSTR str, UINT num)
+{
+    UINT i;
+    for (i = 0; i < num; i++) str[i] = RtlUshortByteSwap(str[i]);
+}
+
 VOID ShowLastError(void)
 {
     DWORD error = GetLastError();
@@ -195,6 +203,43 @@ BOOL DoCloseFile(void)
     return(TRUE);
 }
 
+static inline ENCODING detect_encoding_of_buffer(const void* buffer, int size)
+{
+    static const char bom_utf8[] = { 0xef, 0xbb, 0xbf };
+    if (size >= sizeof(bom_utf8) && !memcmp(buffer, bom_utf8, sizeof(bom_utf8)))
+        return ENCODING_UTF8;
+    else
+    {
+        int flags = IS_TEXT_UNICODE_SIGNATURE |
+                    IS_TEXT_UNICODE_REVERSE_SIGNATURE |
+                    IS_TEXT_UNICODE_ODD_LENGTH;
+        IsTextUnicode(buffer, size, &flags);
+        if (flags & IS_TEXT_UNICODE_SIGNATURE)
+            return ENCODING_UTF16LE;
+        else if (flags & IS_TEXT_UNICODE_REVERSE_SIGNATURE)
+            return ENCODING_UTF16BE;
+        else
+            return ENCODING_ANSI;
+    }
+}
+
+/* Similar to SetWindowTextA, but uses a CP_UTF8 encoded input, not CP_ACP.
+ * lpTextInUtf8 should be NUL-terminated and not include the BOM.
+ *
+ * Returns FALSE on failure, TRUE on success, like SetWindowTextA/W.
+ */
+static BOOL SetWindowTextUtf8(HWND hwnd, LPCSTR lpTextInUtf8)
+{
+    BOOL ret;
+    int lenW = MultiByteToWideChar(CP_UTF8, 0, lpTextInUtf8, -1, NULL, 0);
+    LPWSTR textW = HeapAlloc(GetProcessHeap(), 0, lenW * sizeof(WCHAR));
+    if (!textW)
+        return FALSE;
+    MultiByteToWideChar(CP_UTF8, 0, lpTextInUtf8, -1, textW, lenW);
+    ret = SetWindowTextW(hwnd, textW);
+    HeapFree(GetProcessHeap(), 0, textW);
+    return ret;
+}
 
 void DoOpenFile(LPCWSTR szFileName)
 {
@@ -203,6 +248,8 @@ void DoOpenFile(LPCWSTR szFileName)
     LPSTR pTemp;
     DWORD size;
     DWORD dwNumRead;
+    ENCODING enc;
+    BOOL succeeded;
     WCHAR log[5];
 
     /* Close any files and prompt to save changes */
@@ -224,9 +271,9 @@ void DoOpenFile(LPCWSTR szFileName)
 	ShowLastError();
 	return;
     }
-    size++;
 
-    pTemp = HeapAlloc(GetProcessHeap(), 0, size);
+    /* Extra memory for (WCHAR)'\0'-termination. */
+    pTemp = HeapAlloc(GetProcessHeap(), 0, size+2);
     if (!pTemp)
     {
 	CloseHandle(hFile);
@@ -243,12 +290,48 @@ void DoOpenFile(LPCWSTR szFileName)
     }
 
     CloseHandle(hFile);
-    pTemp[dwNumRead] = 0;
 
-    if((size -1) >= 2 && (BYTE)pTemp[0] == 0xff && (BYTE)pTemp[1] == 0xfe)
-	SetWindowTextW(Globals.hEdit, (LPWSTR)pTemp + 1);
-    else
-	SetWindowTextA(Globals.hEdit, pTemp);
+    size = dwNumRead;
+    pTemp[size] = 0;    /* make sure it's  (char)'\0'-terminated */
+    pTemp[size+1] = 0;  /* make sure it's (WCHAR)'\0'-terminated */
+
+    enc = detect_encoding_of_buffer(pTemp, size);
+
+    /* SetWindowTextUtf8 and SetWindowTextA try to allocate memory, so we
+     * check if they succeed.
+     */
+    switch (enc)
+    {
+    case ENCODING_UTF16BE:
+        byteswap_wide_string((WCHAR*) pTemp, size/sizeof(WCHAR));
+        /* fall through */
+
+    case ENCODING_UTF16LE:
+        if (size >= 2 && (BYTE)pTemp[0] == 0xff && (BYTE)pTemp[1] == 0xfe)
+            succeeded = SetWindowTextW(Globals.hEdit, (LPWSTR)pTemp + 1);
+        else
+            succeeded = SetWindowTextW(Globals.hEdit, (LPWSTR)pTemp);
+        break;
+
+    case ENCODING_UTF8:
+        if (size >= 3 && (BYTE)pTemp[0] == 0xef && (BYTE)pTemp[1] == 0xbb &&
+                                                   (BYTE)pTemp[2] == 0xbf)
+            succeeded = SetWindowTextUtf8(Globals.hEdit, pTemp+3);
+        else
+            succeeded = SetWindowTextUtf8(Globals.hEdit, pTemp);
+        break;
+
+    default:
+        succeeded = SetWindowTextA(Globals.hEdit, pTemp);
+        break;
+    }
+
+    if (!succeeded)
+    {
+        ShowLastError();
+        HeapFree(GetProcessHeap(), 0, pTemp);
+        return;
+    }
 
     HeapFree(GetProcessHeap(), 0, pTemp);
 
diff --git a/programs/notepad/main.h b/programs/notepad/main.h
index f81c437..bb9b7cc 100644
--- a/programs/notepad/main.h
+++ b/programs/notepad/main.h
@@ -25,6 +25,14 @@
 
 #define MAX_STRING_LEN      255
 
+typedef enum
+{
+    ENCODING_ANSI,
+    ENCODING_UTF16LE,
+    ENCODING_UTF16BE,
+    ENCODING_UTF8
+} ENCODING;
+
 typedef struct
 {
   HANDLE   hInstance;




More information about the wine-cvs mailing list