RichEdit EM_STREAMIN CP_UTF8 nulls out some input characters
- From: "Albert Oppenheimer" <spamforall@xxxxxxxx>
- Date: Sat, 4 Feb 2006 07:44:31 -0600
When I read a file encoded as UTF-8 into a RichEdit control, e.g.
SendMessage(hrichedit, EM_STREAMIN,
SF_TEXT | SF_USECODEPAGE | (CP_UTF8 << 16),
(LPARAM)(&editstream));
some of the characters from the input file are being replaced with nulls.
The first null usually occurs at offset 0xffd or 0xffe in the text buffer
(as retrieved by GetWindowText), and subsequent nulls occur after the same
interval, e.g. at 0x1ffb and 0x2ff9. Using a hex file viewer, I verified
that the input file was correct, with all normal ASCII characters (no
nulls), as originally input when the file was created. This occurs even on
files that contain only ASCII-7 characters, with nothing in them that
requires UTF-8 multi-character encoding. If I use Notepad to insert a UTF-8
byte-order character at the front of the file, the first null doesn't turn
up in the GetWindowText buffer until 0x1ff8.
This started occurring in a general-purpose text editor, so to isolate the
problem, I created a stripped-down program that did nothing except create
the window and read in a file. The problem still occurs.
I wonder if there are any known problems with UTF-8 streamin.
I am on XP with all high-priority updates applied as of right now.
In case anyone wants to look it over or try it out, I am including the test
program below. It has to be compiled with UNICODE defined. Here is the
command file I use to build it, using the Borland line-mode compiler:
bcc32 -c -6 -W -WU -Ie:\bcc55\include /Le:\bcc55\lib x.c
if ERRORLEVEL 1 goto :EOF
ilink32 /aa /Le:\bcc55\lib c0w32w.obj x.obj,x.exe,,import32.lib cw32i.lib
if ERRORLEVEL 1 goto :EOF
rem *** build complete ***
Thanks,
Allie
#include <wchar.h>
#include <windows.h>
#include <richedit.h>
long nothing(long l) {return l;}
HWND hmain;
HWND hedit;
int lenin;
WCHAR filename[80];
WCHAR msgbuild[100];
static MSG msg;
static HINSTANCE instance;
DWORD CALLBACK StreamIn(DWORD_PTR fd, LPBYTE buffer,
LONG count, LONG* recount) {
DWORD len;
*recount = 0;
if (!ReadFile((HANDLE)fd, buffer, count, &len, 0))
{ wsprintf(msgbuild, L"File Read Error %d", GetLastError());
MessageBox(hmain, msgbuild, L"debug", MB_OK);
return TRUE;
}
*recount = len;
lenin += len;
return FALSE;
}
static void reader(void) {
int len;
WCHAR* bufp;
WCHAR* p;
EDITSTREAM es;
HANDLE fd;
WPARAM stream;
SetWindowText(hedit, L"");
fd = CreateFile(filename, FILE_READ_DATA, 0, 0, OPEN_EXISTING, 0, 0);
if (fd == INVALID_HANDLE_VALUE)
{ MessageBox(0, L"CreateFile failed", L"debug", MB_OK);
return;
}
stream = SF_TEXT | SF_USECODEPAGE | (CP_UTF8 << 16);
lenin = 0;
es.dwCookie = (DWORD_PTR)fd;
es.dwError = FALSE;
es.pfnCallback = (EDITSTREAMCALLBACK)StreamIn;
SendMessage(hedit, EM_STREAMIN, stream, (LPARAM)(&es));
len = GetWindowTextLength(hedit) + 1;
p = bufp = (WCHAR*)HeapAlloc(GetProcessHeap(), 0, len * sizeof(WCHAR));
GetWindowText(hedit, bufp, len);
while (TRUE)
{ while (*p)
++p;
wsprintf(msgbuild, L"read %d %x window %d %x null %d %x",
lenin, lenin, len, len, p - bufp, p - bufp);
MessageBox(hmain, msgbuild, L"debug", MB_OK);
++p;
if (p - bufp > len / 2)
break;
}
HeapFree(GetProcessHeap(), 0, bufp);
CloseHandle(fd);
if (es.dwError)
MessageBox(0, L"es.dwError", L"debug", MB_OK);
}
LRESULT APIENTRY MainProc(HWND hwnd, UINT msg, WPARAM wparam, LPARAM lparam)
{
DWORD style;
switch (msg) {
case WM_DESTROY:
PostQuitMessage(0);
break;
case WM_CREATE:
style = WS_CHILD | WS_VSCROLL | ES_AUTOVSCROLL | ES_MULTILINE |
ES_NOHIDESEL;
hedit = CreateWindowEx(0, RICHEDIT_CLASS, L"",
style | WS_VISIBLE, 0, 0, 0, 0, hwnd, (HMENU)1, instance, 0);
if (!hedit)
{ wsprintf(msgbuild, L"hedit error %d", GetLastError());
MessageBox(0, msgbuild, L"debug", MB_OK);
}
break;
case WM_SIZE:
if (wparam == SIZE_MINIMIZED)
break;
MoveWindow(hedit, 0, 0, LOWORD(lparam), HIWORD(lparam), TRUE);
break;
case WM_SETFOCUS:
SetFocus(hedit);
break;
case WM_SYSCOMMAND:
default:
return DefWindowProc(hwnd, msg, wparam, lparam);
}
return 0;
}
int APIENTRY wWinMain(HINSTANCE hInst, HINSTANCE hdummy, LPCTSTR pdummy, int
idummy) {
WNDCLASS wc;
WCHAR* fnp;
wcscpy(filename, GetCommandLine());
fnp = wcschr(filename, ' ');
if (fnp)
{ while (*fnp == ' ')
++fnp;
if (*fnp == 0)
fnp = 0;
}
wcscpy(filename, fnp ? fnp : L"ed.c");
instance = hInst;
LoadLibrary(L"C:\\WINDOWS\\system32\\riched20.dll");
wc.cbClsExtra = 0;
wc.cbWndExtra = 0;
wc.hbrBackground = 0; // GetStockObject(LTGRAY_BRUSH);
wc.hInstance = hInst;
wc.hCursor = LoadCursor(NULL, IDC_ARROW);
wc.hIcon = NULL; // LoadIcon(NULL, IDI_APPLICATION);
wc.lpfnWndProc = (WNDPROC)MainProc;
wc.lpszClassName = L"xMain";
wc.lpszMenuName = 0;
wc.style = CS_HREDRAW | CS_VREDRAW;
if (!RegisterClass(&wc))
{ wsprintf(msgbuild, L"RegisterClass Error %d", GetLastError());
MessageBox(0, msgbuild, L"debug", MB_OK);
return 0;
}
hmain = CreateWindow(L"xMain", filename,
WS_OVERLAPPED | WS_CAPTION | WS_SYSMENU | WS_MINIMIZEBOX,
CW_USEDEFAULT, CW_USEDEFAULT, 770, 480, 0, 0, hInst, 0);
if (!hmain)
{ wsprintf(msgbuild, L"hmain error %d", GetLastError());
MessageBox(0, msgbuild, L"debug", MB_OK);
return 0;
}
ShowWindow(hmain, SW_SHOW);
UpdateWindow(hmain);
reader();
while(GetMessage(&msg, 0, 0, 0))
{ TranslateMessage(&msg);
DispatchMessage(&msg);
}
nothing((long)hdummy);
nothing((long)pdummy);
nothing((long)idummy);
return msg.wParam;
}
.
- Follow-Ups:
- Re: RichEdit EM_STREAMIN CP_UTF8 nulls out some input characters
- From: Albert Oppenheimer
- Re: RichEdit EM_STREAMIN CP_UTF8 nulls out some input characters
- Prev by Date: Re: Problem with ShowWindow/SW_HIDE on XP
- Next by Date: Re: How to get rid of window owner?
- Previous by thread: A title of an MDI-Window
- Next by thread: Re: RichEdit EM_STREAMIN CP_UTF8 nulls out some input characters
- Index(es):
Relevant Pages
|