Re: Request-Reply Architecture with Sockets
- From: "Charles.Deisler@xxxxxxxxx" <Charles.Deisler@xxxxxxxxx>
- Date: 10 Nov 2005 09:47:04 -0800
is are some code snips from a crawler i wrote recently.. you would need
to wire sockets up but it shows an async pattern..
public bool ScrapePage()
{
ServicePointManager.UseNagleAlgorithm = true;
ServicePointManager.Expect100Continue = true;
ServicePointManager.CheckCertificateRevocationList = true;
ServicePointManager.DefaultConnectionLimit =
ServicePointManager.DefaultPersistentConnectionLimit;
try
{
IWebProxy Iproxy = this._proxy;
this._servicePoint =
ServicePointManager.FindServicePoint(this._url);
int hashCode = this._servicePoint.GetHashCode();
this._httpWebRequest =
(HttpWebRequest)WebRequest.Create(this._url);
this._httpWebRequest.Timeout = _timeout;
this._httpWebRequest.UserAgent = this._userAgent;
this._httpWebRequest.KeepAlive = false;
this._httpWebRequest.ProtocolVersion = HttpVersion.Version10;
this._requestState = new RequestState();
this._requestState.request = this._httpWebRequest;
IAsyncResult result = (IAsyncResult)
this._httpWebRequest.BeginGetResponse(new
AsyncCallback(RespCallback),this._requestState);
ThreadPool.RegisterWaitForSingleObject (result.AsyncWaitHandle, new
WaitOrTimerCallback(TimeoutCallback), this._httpWebRequest,
DefaultTimeout, true);
allDone.WaitOne();
if(this._requestState.response != null)
this._requestState.response.Close();
return true;
}
catch(WebException ex)
{
MessageBox.Show(ex.Status.ToString(), "PageScraper WebException",
MessageBoxButtons.OK, MessageBoxIcon.Information);
allDone.Set();
return false;
}
catch(Exception ex)
{
MessageBox.Show(ex.Message+"\n"+ex.TargetSite.ToString(),"PageScraper
Exception", MessageBoxButtons.OK, MessageBoxIcon.Information);
allDone.Set();
return false;
}
finally
{
//allDone.Set();
}
return false;
}
private void RespCallback(IAsyncResult asynchronousResult)
{
//this._controller.Status("Receiving page from host " +
this._url.Host,"statusBar2");
try
{
this._requestState = (RequestState) asynchronousResult.AsyncState;
this._httpWebRequest = this._requestState.request;
this._requestState.response = (HttpWebResponse)
this._httpWebRequest.EndGetResponse(asynchronousResult);
this._responseStream =
this._requestState.response.GetResponseStream();
this._requestState.streamResponse = this._responseStream;
IAsyncResult asynchronousInputRead =
this._responseStream.BeginRead(this._requestState.BufferRead, 0,
BUFFER_SIZE, new AsyncCallback(ReadCallBack), this._requestState);
return;
}
catch(WebException ex)
{
//MessageBox.Show(ex.Message,this._url.AbsoluteUri,
MessageBoxButtons.OK, MessageBoxIcon.Information);
WebExceptionManager.LogWebException(ex.Status.ToString(),ex.Message,this._httpWebRequest.RequestUri.ToString());
this._controller.Status(ex.Status.ToString(),"statusBar2");
WebResponseManager.HandleWebException(this._worker.ownerAgent,this._worker.currentDomainSetIndex,this._url.AbsoluteUri);
allDone.Set();
return;
}
catch(Exception ex)
{
MessageBox.Show(ex.Message,"RespCallback Exception",
MessageBoxButtons.OK, MessageBoxIcon.Information);
allDone.Set();
return;
}
finally
{
//allDone.Set();
}
//allDone.Set();
}
private void ReadCallBack(IAsyncResult asyncResult)
{
//this._controller.Status("Reading response from host " +
this._url,"statusBar2");
int ctr = 0;
try
{
this._requestState = (RequestState)asyncResult.AsyncState;
this._responseStream = this._requestState.streamResponse;
int read = this._responseStream.EndRead( asyncResult );
if (read > 0)
{
ctr++;
//this._controller.Status(ctr.ToString()+" Reading response from
host " + this._url,"statusBar2");
this._requestState.requestData.Append(Encoding.ASCII.GetString(this._requestState.BufferRead,
0, read));
IAsyncResult asynchronousResult = this._responseStream.BeginRead(
this._requestState.BufferRead, 0, BUFFER_SIZE, new
AsyncCallback(ReadCallBack), this._requestState);
return;
}
else
{
if(this._requestState.requestData.Length>1)
{
this._controller.Status("Successful response from host " +
this._url.Host,"statusBar2");
//string stringContent;
//stringContent = this._requestState.requestData.ToString();
WebResponseManager.QueueWebResponseForProcessing(this._worker.ownerAgent,this._worker.currentDomainSetIndex,this._requestState.requestData.ToString(),this._worker.processType,Convert.ToString(@_worker.regexPattern),this._controller,this._url.ToString(),this._refdomain);
}
else
{
this._controller.Status("Successful response from host " +
this._url.Host,"statusBar2");
WebResponseManager.QueueWebResponseForProcessing(this._worker.ownerAgent,this._worker.currentDomainSetIndex,this._requestState.requestData.ToString(),this._worker.processType,Convert.ToString(@_worker.regexPattern),this._controller,this._url.ToString(),this._refdomain);
}
this._responseStream.Close();
allDone.Set();
}
}
catch(WebException wex)
{
//WebExceptionManager.LogWebException(ex.Status.ToString(),ex.Message,this._httpWebRequest.RequestUri.ToString());
WebResponseManager.HandleWebException(this._worker.ownerAgent,this._worker.currentDomainSetIndex,this._url.AbsoluteUri);
//this._worker.ownerAgent,this._worker.currentDomainSetIndex,this._requestState.requestData.ToString(),this._worker.processType,Convert.ToString(@_worker.regexPattern),this._controller,this._url.AbsoluteUri,this._refdomain)
//MessageBox.Show(wex.Message,"PageScraper.ReadCallback
WebException", MessageBoxButtons.OK, MessageBoxIcon.Information);
//throw wex;
allDone.Set();
}
catch(Exception ex)
{
MessageBox.Show(ex.Message,"PageScraper.ReadCallback Exception",
MessageBoxButtons.OK, MessageBoxIcon.Information);
//throw ex;
allDone.Set();
}
finally
{
//allDone.Set();
}
//allDone.Set();
}
}
public class RequestState
{
// This class stores the State of the request.
const int BUFFER_SIZE = 1024;
public StringBuilder requestData;
public byte[] BufferRead;
public HttpWebRequest request;
public HttpWebResponse response;
public Stream streamResponse;
public RequestState()
{
BufferRead = new byte[BUFFER_SIZE];
requestData = new StringBuilder("");
request = null;
streamResponse = null;
}
}
.
- References:
- Request-Reply Architecture with Sockets
- From: John
- Request-Reply Architecture with Sockets
- Prev by Date: Re: Problem with HttpWebRequest to read a web page
- Next by Date: Members type not available in the IDE
- Previous by thread: Request-Reply Architecture with Sockets
- Next by thread: Simple Question
- Index(es):
Loading