[1/4] notepad: Improve encoding detection when opening files.
Alexander Scott-Johns
alexander.scott.johns at googlemail.com
Mon Jun 29 19:24:50 CDT 2009
I have reorganized the patches - the Encoding drop-down box is now
added in the 3rd patch.
---
programs/notepad/dialog.c | 102 ++++++++++++++++++++++++++++++++++++++++----
programs/notepad/dialog.h | 2 +-
programs/notepad/main.c | 6 +-
programs/notepad/main.h | 9 ++++
4 files changed, 105 insertions(+), 14 deletions(-)
-------------- next part --------------
From c8110d47a2e66442a342f9373ff5c6e51338ed83 Mon Sep 17 00:00:00 2001
From: Alexander Scott-Johns <alexander.scott.johns at googlemail.com>
Date: Mon, 29 Jun 2009 22:24:59 +0100
Subject: notepad: Improve encoding detection when opening files.
---
programs/notepad/dialog.c | 102 ++++++++++++++++++++++++++++++++++++++++----
programs/notepad/dialog.h | 2 +-
programs/notepad/main.c | 6 +-
programs/notepad/main.h | 9 ++++
4 files changed, 105 insertions(+), 14 deletions(-)
diff --git a/programs/notepad/dialog.c b/programs/notepad/dialog.c
index d7354e7..606b777 100644
--- a/programs/notepad/dialog.c
+++ b/programs/notepad/dialog.c
@@ -37,6 +37,14 @@ static const WCHAR helpfileW[] = { 'n','o','t','e','p','a','d','.','h','l','p',0
static INT_PTR WINAPI DIALOG_PAGESETUP_DlgProc(HWND hDlg, UINT msg, WPARAM wParam, LPARAM lParam);
+/* Swap bytes of WCHAR buffer (big-endian <-> little-endian). */
+static inline void byteswap_wide_string(LPWSTR str, UINT num)
+{
+ UINT i;
+ for (i = 0; i < num; i++)
+ str[i] = (WCHAR) MAKEWORD(HIBYTE((WORD) str[i]), LOBYTE((WORD) str[i]));
+}
+
VOID ShowLastError(void)
{
DWORD error = GetLastError();
@@ -195,14 +203,52 @@ BOOL DoCloseFile(void)
return(TRUE);
}
+static inline ENCODING detect_encoding_of_buffer(const void* buffer, int size)
+{
+ static const char bom_utf8[] = { 0xef, 0xbb, 0xbf };
+ if (size >= sizeof(bom_utf8) && !memcmp(buffer, bom_utf8, sizeof(bom_utf8)))
+ return ENCODING_UTF8;
+ else
+ {
+ int flags = IS_TEXT_UNICODE_SIGNATURE |
+ IS_TEXT_UNICODE_REVERSE_SIGNATURE |
+ IS_TEXT_UNICODE_ODD_LENGTH;
+ IsTextUnicode(buffer, size, &flags);
+ if (flags & IS_TEXT_UNICODE_SIGNATURE)
+ return ENCODING_UTF16LE;
+ else if (flags & IS_TEXT_UNICODE_REVERSE_SIGNATURE)
+ return ENCODING_UTF16BE;
+ else
+ return ENCODING_ANSI;
+ }
+}
-void DoOpenFile(LPCWSTR szFileName)
+/* Similar to SetWindowTextA, but uses a CP_UTF8 encoded input, not CP_ACP.
+ * lpTextInUtf8 should be NUL-terminated and not include the BOM.
+ *
+ * Returns FALSE on failure, TRUE on success, like SetWindowTextA/W.
+ */
+static BOOL SetWindowTextUtf8(HWND hwnd, LPCSTR lpTextInUtf8)
+{
+ BOOL ret;
+ int lenW = MultiByteToWideChar(CP_UTF8, 0, lpTextInUtf8, -1, NULL, 0);
+ LPWSTR textW = HeapAlloc(GetProcessHeap(), 0, lenW * sizeof(WCHAR));
+ if (!textW)
+ return FALSE;
+ MultiByteToWideChar(CP_UTF8, 0, lpTextInUtf8, -1, textW, lenW);
+ ret = SetWindowTextW(hwnd, textW);
+ HeapFree(GetProcessHeap(), 0, textW);
+ return ret;
+}
+
+void DoOpenFile(LPCWSTR szFileName, ENCODING enc)
{
static const WCHAR dotlog[] = { '.','L','O','G',0 };
HANDLE hFile;
LPSTR pTemp;
DWORD size;
DWORD dwNumRead;
+ BOOL succeeded;
WCHAR log[5];
/* Close any files and prompt to save changes */
@@ -224,9 +270,9 @@ void DoOpenFile(LPCWSTR szFileName)
ShowLastError();
return;
}
- size++;
- pTemp = HeapAlloc(GetProcessHeap(), 0, size);
+ /* Extra memory for (WCHAR)'\0'-termination. */
+ pTemp = HeapAlloc(GetProcessHeap(), 0, size+2);
if (!pTemp)
{
CloseHandle(hFile);
@@ -234,7 +280,7 @@ void DoOpenFile(LPCWSTR szFileName)
return;
}
- if (!ReadFile(hFile, pTemp, size, &dwNumRead, NULL))
+ if (!ReadFile(hFile, pTemp, size+2, &dwNumRead, NULL))
{
CloseHandle(hFile);
HeapFree(GetProcessHeap(), 0, pTemp);
@@ -243,12 +289,48 @@ void DoOpenFile(LPCWSTR szFileName)
}
CloseHandle(hFile);
- pTemp[dwNumRead] = 0;
- if((size -1) >= 2 && (BYTE)pTemp[0] == 0xff && (BYTE)pTemp[1] == 0xfe)
- SetWindowTextW(Globals.hEdit, (LPWSTR)pTemp + 1);
- else
- SetWindowTextA(Globals.hEdit, pTemp);
+ pTemp[size] = 0; /* make sure it's (char)'\0'-terminated */
+ pTemp[size+1] = 0; /* make sure it's (WCHAR)'\0'-terminated */
+
+ if (enc == ENCODING_AUTO)
+ enc = detect_encoding_of_buffer(pTemp, size);
+
+ /* SetWindowTextUtf8 and SetWindowTextA try to allocate memory, so we
+ * check if they succeed.
+ */
+ switch (enc)
+ {
+ case ENCODING_UTF16BE:
+ byteswap_wide_string((WCHAR*) pTemp, size/sizeof(WCHAR));
+ /* fall through */
+
+ case ENCODING_UTF16LE:
+ if (size >= 2 && (BYTE)pTemp[0] == 0xff && (BYTE)pTemp[1] == 0xfe)
+ succeeded = SetWindowTextW(Globals.hEdit, (LPWSTR)(pTemp+2));
+ else
+ succeeded = SetWindowTextW(Globals.hEdit, (LPWSTR)pTemp);
+ break;
+
+ case ENCODING_UTF8:
+ if (size >= 3 && (BYTE)pTemp[0] == 0xef && (BYTE)pTemp[1] == 0xbb &&
+ (BYTE)pTemp[2] == 0xbf)
+ succeeded = SetWindowTextUtf8(Globals.hEdit, pTemp+3);
+ else
+ succeeded = SetWindowTextUtf8(Globals.hEdit, pTemp);
+ break;
+
+ default:
+ succeeded = SetWindowTextA(Globals.hEdit, pTemp);
+ break;
+ }
+
+ if (!succeeded)
+ {
+ ShowLastError();
+ HeapFree(GetProcessHeap(), 0, pTemp);
+ return;
+ }
HeapFree(GetProcessHeap(), 0, pTemp);
@@ -308,7 +390,7 @@ VOID DIALOG_FileOpen(VOID)
if (GetOpenFileNameW(&openfilename))
- DoOpenFile(openfilename.lpstrFile);
+ DoOpenFile(openfilename.lpstrFile, ENCODING_AUTO);
}
diff --git a/programs/notepad/dialog.h b/programs/notepad/dialog.h
index d927143..7aabb6e 100644
--- a/programs/notepad/dialog.h
+++ b/programs/notepad/dialog.h
@@ -54,4 +54,4 @@ int DIALOG_StringMsgBox(HWND hParent, int formatId, LPCWSTR szString, DWORD dwFl
VOID ShowLastError(void);
BOOL FileExists(LPCWSTR szFilename);
BOOL DoCloseFile(void);
-void DoOpenFile(LPCWSTR szFileName);
+void DoOpenFile(LPCWSTR szFileName, ENCODING enc);
diff --git a/programs/notepad/main.c b/programs/notepad/main.c
index c195668..2156d6b 100644
--- a/programs/notepad/main.c
+++ b/programs/notepad/main.c
@@ -578,7 +578,7 @@ static LRESULT WINAPI NOTEPAD_WndProc(HWND hWnd, UINT msg, WPARAM wParam,
DragQueryFileW(hDrop, 0, szFileName, ARRAY_SIZE(szFileName));
DragFinish(hDrop);
- DoOpenFile(szFileName);
+ DoOpenFile(szFileName, ENCODING_AUTO);
break;
}
@@ -689,7 +689,7 @@ static void HandleCommandLine(LPWSTR cmdline)
if (file_exists)
{
- DoOpenFile(file_name);
+ DoOpenFile(file_name, ENCODING_AUTO);
InvalidateRect(Globals.hMainWnd, NULL, FALSE);
if (opt_print)
DIALOG_FilePrint();
@@ -698,7 +698,7 @@ static void HandleCommandLine(LPWSTR cmdline)
{
switch (AlertFileDoesNotExist(file_name)) {
case IDYES:
- DoOpenFile(file_name);
+ DoOpenFile(file_name, ENCODING_ANSI);
break;
case IDNO:
diff --git a/programs/notepad/main.h b/programs/notepad/main.h
index f81c437..465897b 100644
--- a/programs/notepad/main.h
+++ b/programs/notepad/main.h
@@ -25,6 +25,15 @@
#define MAX_STRING_LEN 255
+typedef enum
+{
+ ENCODING_AUTO,
+ ENCODING_ANSI,
+ ENCODING_UTF16LE,
+ ENCODING_UTF16BE,
+ ENCODING_UTF8
+} ENCODING;
+
typedef struct
{
HANDLE hInstance;
--
1.5.6.3
More information about the wine-patches
mailing list