Alexander Scott-Johns : notepad: Improve encoding detection when opening files.
Alexandre Julliard
julliard at winehq.org
Thu Jul 2 08:25:09 CDT 2009
Module: wine
Branch: master
Commit: 8b6b7b2c39d77f7cd29657ecc3956955e5aa75c2
URL: http://source.winehq.org/git/wine.git/?a=commit;h=8b6b7b2c39d77f7cd29657ecc3956955e5aa75c2
Author: Alexander Scott-Johns <alexander.scott.johns at googlemail.com>
Date: Mon Jun 29 22:24:59 2009 +0100
notepad: Improve encoding detection when opening files.
---
programs/notepad/dialog.c | 97 +++++++++++++++++++++++++++++++++++++++++---
programs/notepad/main.h | 8 ++++
2 files changed, 98 insertions(+), 7 deletions(-)
diff --git a/programs/notepad/dialog.c b/programs/notepad/dialog.c
index 026fe25..7f2fade 100644
--- a/programs/notepad/dialog.c
+++ b/programs/notepad/dialog.c
@@ -26,6 +26,7 @@
#include <windows.h>
#include <commdlg.h>
#include <shlwapi.h>
+#include <winternl.h>
#include "main.h"
#include "dialog.h"
@@ -37,6 +38,13 @@ static const WCHAR helpfileW[] = { 'n','o','t','e','p','a','d','.','h','l','p',0
static INT_PTR WINAPI DIALOG_PAGESETUP_DlgProc(HWND hDlg, UINT msg, WPARAM wParam, LPARAM lParam);
+/* Swap bytes of WCHAR buffer (big-endian <-> little-endian). */
+static inline void byteswap_wide_string(LPWSTR str, UINT num)
+{
+ UINT i;
+ for (i = 0; i < num; i++) str[i] = RtlUshortByteSwap(str[i]);
+}
+
VOID ShowLastError(void)
{
DWORD error = GetLastError();
@@ -195,6 +203,43 @@ BOOL DoCloseFile(void)
return(TRUE);
}
+static inline ENCODING detect_encoding_of_buffer(const void* buffer, int size)
+{
+ static const char bom_utf8[] = { 0xef, 0xbb, 0xbf };
+ if (size >= sizeof(bom_utf8) && !memcmp(buffer, bom_utf8, sizeof(bom_utf8)))
+ return ENCODING_UTF8;
+ else
+ {
+ int flags = IS_TEXT_UNICODE_SIGNATURE |
+ IS_TEXT_UNICODE_REVERSE_SIGNATURE |
+ IS_TEXT_UNICODE_ODD_LENGTH;
+ IsTextUnicode(buffer, size, &flags);
+ if (flags & IS_TEXT_UNICODE_SIGNATURE)
+ return ENCODING_UTF16LE;
+ else if (flags & IS_TEXT_UNICODE_REVERSE_SIGNATURE)
+ return ENCODING_UTF16BE;
+ else
+ return ENCODING_ANSI;
+ }
+}
+
+/* Similar to SetWindowTextA, but uses a CP_UTF8 encoded input, not CP_ACP.
+ * lpTextInUtf8 should be NUL-terminated and not include the BOM.
+ *
+ * Returns FALSE on failure, TRUE on success, like SetWindowTextA/W.
+ */
+static BOOL SetWindowTextUtf8(HWND hwnd, LPCSTR lpTextInUtf8)
+{
+ BOOL ret;
+ int lenW = MultiByteToWideChar(CP_UTF8, 0, lpTextInUtf8, -1, NULL, 0);
+ LPWSTR textW = HeapAlloc(GetProcessHeap(), 0, lenW * sizeof(WCHAR));
+ if (!textW)
+ return FALSE;
+ MultiByteToWideChar(CP_UTF8, 0, lpTextInUtf8, -1, textW, lenW);
+ ret = SetWindowTextW(hwnd, textW);
+ HeapFree(GetProcessHeap(), 0, textW);
+ return ret;
+}
void DoOpenFile(LPCWSTR szFileName)
{
@@ -203,6 +248,8 @@ void DoOpenFile(LPCWSTR szFileName)
LPSTR pTemp;
DWORD size;
DWORD dwNumRead;
+ ENCODING enc;
+ BOOL succeeded;
WCHAR log[5];
/* Close any files and prompt to save changes */
@@ -224,9 +271,9 @@ void DoOpenFile(LPCWSTR szFileName)
ShowLastError();
return;
}
- size++;
- pTemp = HeapAlloc(GetProcessHeap(), 0, size);
+ /* Extra memory for (WCHAR)'\0'-termination. */
+ pTemp = HeapAlloc(GetProcessHeap(), 0, size+2);
if (!pTemp)
{
CloseHandle(hFile);
@@ -243,12 +290,48 @@ void DoOpenFile(LPCWSTR szFileName)
}
CloseHandle(hFile);
- pTemp[dwNumRead] = 0;
- if((size -1) >= 2 && (BYTE)pTemp[0] == 0xff && (BYTE)pTemp[1] == 0xfe)
- SetWindowTextW(Globals.hEdit, (LPWSTR)pTemp + 1);
- else
- SetWindowTextA(Globals.hEdit, pTemp);
+ size = dwNumRead;
+ pTemp[size] = 0; /* make sure it's (char)'\0'-terminated */
+ pTemp[size+1] = 0; /* make sure it's (WCHAR)'\0'-terminated */
+
+ enc = detect_encoding_of_buffer(pTemp, size);
+
+ /* SetWindowTextUtf8 and SetWindowTextA try to allocate memory, so we
+ * check if they succeed.
+ */
+ switch (enc)
+ {
+ case ENCODING_UTF16BE:
+ byteswap_wide_string((WCHAR*) pTemp, size/sizeof(WCHAR));
+ /* fall through */
+
+ case ENCODING_UTF16LE:
+ if (size >= 2 && (BYTE)pTemp[0] == 0xff && (BYTE)pTemp[1] == 0xfe)
+ succeeded = SetWindowTextW(Globals.hEdit, (LPWSTR)pTemp + 1);
+ else
+ succeeded = SetWindowTextW(Globals.hEdit, (LPWSTR)pTemp);
+ break;
+
+ case ENCODING_UTF8:
+ if (size >= 3 && (BYTE)pTemp[0] == 0xef && (BYTE)pTemp[1] == 0xbb &&
+ (BYTE)pTemp[2] == 0xbf)
+ succeeded = SetWindowTextUtf8(Globals.hEdit, pTemp+3);
+ else
+ succeeded = SetWindowTextUtf8(Globals.hEdit, pTemp);
+ break;
+
+ default:
+ succeeded = SetWindowTextA(Globals.hEdit, pTemp);
+ break;
+ }
+
+ if (!succeeded)
+ {
+ ShowLastError();
+ HeapFree(GetProcessHeap(), 0, pTemp);
+ return;
+ }
HeapFree(GetProcessHeap(), 0, pTemp);
diff --git a/programs/notepad/main.h b/programs/notepad/main.h
index f81c437..bb9b7cc 100644
--- a/programs/notepad/main.h
+++ b/programs/notepad/main.h
@@ -25,6 +25,14 @@
#define MAX_STRING_LEN 255
+typedef enum
+{
+ ENCODING_ANSI,
+ ENCODING_UTF16LE,
+ ENCODING_UTF16BE,
+ ENCODING_UTF8
+} ENCODING;
+
typedef struct
{
HANDLE hInstance;
More information about the wine-cvs
mailing list