[2/5] notepad: Improve encoding detection when opening files. (try 2)
Alexander Scott-Johns
alexander.scott.johns at googlemail.com
Wed Jul 1 13:55:28 CDT 2009
(Based on patch [1/4] of previous attempt.)
Try 2: Simplified the patch a bit, by removing the enc argument to
DoOpenFile. This is added back in patch [4/5].
---
programs/notepad/dialog.c | 98 +++++++++++++++++++++++++++++++++++++++++----
programs/notepad/main.h | 8 ++++
2 files changed, 98 insertions(+), 8 deletions(-)
-------------- next part --------------
From 6198ce38a92cf9b75d19133642b7d62c11e3a911 Mon Sep 17 00:00:00 2001
From: Alexander Scott-Johns <alexander.scott.johns at googlemail.com>
Date: Mon, 29 Jun 2009 22:24:59 +0100
Subject: notepad: Improve encoding detection when opening files.
---
programs/notepad/dialog.c | 98 +++++++++++++++++++++++++++++++++++++++++----
programs/notepad/main.h | 8 ++++
2 files changed, 98 insertions(+), 8 deletions(-)
diff --git a/programs/notepad/dialog.c b/programs/notepad/dialog.c
index 026fe25..59fc61b 100644
--- a/programs/notepad/dialog.c
+++ b/programs/notepad/dialog.c
@@ -37,6 +37,14 @@ static const WCHAR helpfileW[] = { 'n','o','t','e','p','a','d','.','h','l','p',0
static INT_PTR WINAPI DIALOG_PAGESETUP_DlgProc(HWND hDlg, UINT msg, WPARAM wParam, LPARAM lParam);
+/* Swap bytes of WCHAR buffer (big-endian <-> little-endian). */
+static inline void byteswap_wide_string(LPWSTR str, UINT num)
+{
+ UINT i;
+ for (i = 0; i < num; i++)
+ str[i] = (WCHAR) MAKEWORD(HIBYTE((WORD) str[i]), LOBYTE((WORD) str[i]));
+}
+
VOID ShowLastError(void)
{
DWORD error = GetLastError();
@@ -195,6 +203,43 @@ BOOL DoCloseFile(void)
return(TRUE);
}
+static inline ENCODING detect_encoding_of_buffer(const void* buffer, int size)
+{
+ static const char bom_utf8[] = { 0xef, 0xbb, 0xbf };
+ if (size >= sizeof(bom_utf8) && !memcmp(buffer, bom_utf8, sizeof(bom_utf8)))
+ return ENCODING_UTF8;
+ else
+ {
+ int flags = IS_TEXT_UNICODE_SIGNATURE |
+ IS_TEXT_UNICODE_REVERSE_SIGNATURE |
+ IS_TEXT_UNICODE_ODD_LENGTH;
+ IsTextUnicode(buffer, size, &flags);
+ if (flags & IS_TEXT_UNICODE_SIGNATURE)
+ return ENCODING_UTF16LE;
+ else if (flags & IS_TEXT_UNICODE_REVERSE_SIGNATURE)
+ return ENCODING_UTF16BE;
+ else
+ return ENCODING_ANSI;
+ }
+}
+
+/* Similar to SetWindowTextA, but uses a CP_UTF8 encoded input, not CP_ACP.
+ * lpTextInUtf8 should be NUL-terminated and not include the BOM.
+ *
+ * Returns FALSE on failure, TRUE on success, like SetWindowTextA/W.
+ */
+static BOOL SetWindowTextUtf8(HWND hwnd, LPCSTR lpTextInUtf8)
+{
+ BOOL ret;
+ int lenW = MultiByteToWideChar(CP_UTF8, 0, lpTextInUtf8, -1, NULL, 0);
+ LPWSTR textW = HeapAlloc(GetProcessHeap(), 0, lenW * sizeof(WCHAR));
+ if (!textW)
+ return FALSE;
+ MultiByteToWideChar(CP_UTF8, 0, lpTextInUtf8, -1, textW, lenW);
+ ret = SetWindowTextW(hwnd, textW);
+ HeapFree(GetProcessHeap(), 0, textW);
+ return ret;
+}
void DoOpenFile(LPCWSTR szFileName)
{
@@ -203,6 +248,8 @@ void DoOpenFile(LPCWSTR szFileName)
LPSTR pTemp;
DWORD size;
DWORD dwNumRead;
+ ENCODING enc;
+ BOOL succeeded;
WCHAR log[5];
/* Close any files and prompt to save changes */
@@ -224,9 +271,9 @@ void DoOpenFile(LPCWSTR szFileName)
ShowLastError();
return;
}
- size++;
- pTemp = HeapAlloc(GetProcessHeap(), 0, size);
+ /* Extra memory for (WCHAR)'\0'-termination. */
+ pTemp = HeapAlloc(GetProcessHeap(), 0, size+2);
if (!pTemp)
{
CloseHandle(hFile);
@@ -234,7 +281,7 @@ void DoOpenFile(LPCWSTR szFileName)
return;
}
- if (!ReadFile(hFile, pTemp, size, &dwNumRead, NULL))
+ if (!ReadFile(hFile, pTemp, size+2, &dwNumRead, NULL))
{
CloseHandle(hFile);
HeapFree(GetProcessHeap(), 0, pTemp);
@@ -243,12 +290,47 @@ void DoOpenFile(LPCWSTR szFileName)
}
CloseHandle(hFile);
- pTemp[dwNumRead] = 0;
- if((size -1) >= 2 && (BYTE)pTemp[0] == 0xff && (BYTE)pTemp[1] == 0xfe)
- SetWindowTextW(Globals.hEdit, (LPWSTR)pTemp + 1);
- else
- SetWindowTextA(Globals.hEdit, pTemp);
+ pTemp[size] = 0; /* make sure it's (char)'\0'-terminated */
+ pTemp[size+1] = 0; /* make sure it's (WCHAR)'\0'-terminated */
+
+ enc = detect_encoding_of_buffer(pTemp, size);
+
+ /* SetWindowTextUtf8 and SetWindowTextA try to allocate memory, so we
+ * check if they succeed.
+ */
+ switch (enc)
+ {
+ case ENCODING_UTF16BE:
+ byteswap_wide_string((WCHAR*) pTemp, size/sizeof(WCHAR));
+ /* fall through */
+
+ case ENCODING_UTF16LE:
+ if (size >= 2 && (BYTE)pTemp[0] == 0xff && (BYTE)pTemp[1] == 0xfe)
+ succeeded = SetWindowTextW(Globals.hEdit, (LPWSTR)(pTemp+2));
+ else
+ succeeded = SetWindowTextW(Globals.hEdit, (LPWSTR)pTemp);
+ break;
+
+ case ENCODING_UTF8:
+ if (size >= 3 && (BYTE)pTemp[0] == 0xef && (BYTE)pTemp[1] == 0xbb &&
+ (BYTE)pTemp[2] == 0xbf)
+ succeeded = SetWindowTextUtf8(Globals.hEdit, pTemp+3);
+ else
+ succeeded = SetWindowTextUtf8(Globals.hEdit, pTemp);
+ break;
+
+ default:
+ succeeded = SetWindowTextA(Globals.hEdit, pTemp);
+ break;
+ }
+
+ if (!succeeded)
+ {
+ ShowLastError();
+ HeapFree(GetProcessHeap(), 0, pTemp);
+ return;
+ }
HeapFree(GetProcessHeap(), 0, pTemp);
diff --git a/programs/notepad/main.h b/programs/notepad/main.h
index f81c437..bb9b7cc 100644
--- a/programs/notepad/main.h
+++ b/programs/notepad/main.h
@@ -25,6 +25,14 @@
#define MAX_STRING_LEN 255
+typedef enum
+{
+ ENCODING_ANSI,
+ ENCODING_UTF16LE,
+ ENCODING_UTF16BE,
+ ENCODING_UTF8
+} ENCODING;
+
typedef struct
{
HANDLE hInstance;
--
1.5.6.3
More information about the wine-patches
mailing list