views:

622

answers:

5

New demo code:

I am trying to get the captcha image from a AOL, and i keep getting an error 418.

unit imageunit;
///
///  h t t p s://new.aol.com/productsweb/
///
interface

uses
  Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms,
  Dialogs, StdCtrls, IdIOHandler, IdIOHandlerSocket, IdIOHandlerStack, IdSSL,
  IdSSLOpenSSL, IdIntercept, IdZLibCompressorBase, IdCompressorZLib,
  IdCookieManager, IdBaseComponent, IdComponent, IdTCPConnection, IdTCPClient,
  IdHTTP,jpeg,GIFImg, ExtCtrls, PerlRegEx;

type
  TForm2 = class(TForm)
    IdHTTP1: TIdHTTP;
    IdCookieManager1: TIdCookieManager;
    IdCompressorZLib1: TIdCompressorZLib;
    IdConnectionIntercept1: TIdConnectionIntercept;
    IdSSLIOHandlerSocketOpenSSL1: TIdSSLIOHandlerSocketOpenSSL;
    Panel1: TPanel;
    Image1: TImage;
    Panel2: TPanel;
    Button1: TButton;
    PerlRegEx1: TPerlRegEx;
    Memo1: TMemo;
    procedure Button1Click(Sender: TObject);
  private
    { Private declarations }
  public
    { Public declarations }
  end;

var
  Form2: TForm2;

implementation

{$R *.dfm}

function getaimcaptchaimage(data:string):string;
var
    Regex: TPerlRegEx;
    ResultString: string;
begin
Regex := TPerlRegEx.Create(nil);
Regex.RegEx := '= 1 then begin
        ResultString := Regex.SubExpressions[1];
    end;
  result:=Resultstring;
end;
end;


procedure TForm2.Button1Click(Sender: TObject);
var
  JPI : TJPEGImage;
  streamdata:TMemoryStream;
  SStream: Tstringstream;
  website:string;
begin
  streamdata := TMemoryStream.Create;
  SStream := tstringstream.Create ( '' );

  try
  idhttp1.Get('h t t p s://new.aol.com/productsweb/',SStream);
  memo1.Text:=UTF8ToWideString ( SStream.DataString );
  website:='h t t p s://new.aol.com/productsweb/WordVerImage'+getaimcaptchaimage( UTF8ToWideString ( SStream.DataString ));
  form2.Caption:=website;
  idhttp1.Get(website, Streamdata);
          Except
          { Handle exceptions }
          On E : Exception Do
               Begin
                MessageDlg('Exception: '+E.Message,mtError, [mbOK], 0);
               End;

          End;

  //h t t p s://new.aol.com/productsweb/WordVerImage?20890843
  //h t t p s://new.aol.com/productsweb/WordVerImage?91868359


  ///
  ///  gives error 418 unused
  ///

  streamdata.Position := 0;
  JPI := TJPEGImage.Create;
  Try
  JPI.LoadFromStream ( streamdata );
  Finally
  Image1.Picture.Assign ( JPI );
  JPI.Free;
  streamdata.Free;
  End;

end;

end.

Form:


object Form2: TForm2
  Left = 0
  Top = 0
  Caption = 'Form2'
  ClientHeight = 247
  ClientWidth = 480
  Color = clBtnFace
  Font.Charset = DEFAULT_CHARSET
  Font.Color = clWindowText
  Font.Height = -11
  Font.Name = 'Tahoma'
  Font.Style = []
  OldCreateOrder = False
  PixelsPerInch = 96
  TextHeight = 13
  object Panel1: TPanel
    Left = 0
    Top = 41
    Width = 480
    Height = 206
    Align = alClient
    TabOrder = 0
    object Image1: TImage
      Left = 1
      Top = 1
      Width = 478
      Height = 115
      Align = alClient
      ExplicitLeft = 5
      ExplicitTop = 17
      ExplicitWidth = 200
      ExplicitHeight = 70
    end
    object Memo1: TMemo
      Left = 1
      Top = 116
      Width = 478
      Height = 89
      Align = alBottom
      TabOrder = 0
      ExplicitLeft = 80
      ExplicitTop = 152
      ExplicitWidth = 185
    end
  end
  object Panel2: TPanel
    Left = 0
    Top = 0
    Width = 480
    Height = 41
    Align = alTop
    TabOrder = 1
    object Button1: TButton
      Left = 239
      Top = 6
      Width = 75
      Height = 25
      Caption = 'Button1'
      TabOrder = 0
      OnClick = Button1Click
    end
  end
  object IdHTTP1: TIdHTTP
    Intercept = IdConnectionIntercept1
    IOHandler = IdSSLIOHandlerSocketOpenSSL1
    MaxAuthRetries = 100
    AllowCookies = True
    HandleRedirects = True
    RedirectMaximum = 100
    ProxyParams.BasicAuthentication = False
    ProxyParams.ProxyPort = 0
    Request.ContentLength = -1
    Request.Accept = 
      'image/gif, image/jpeg, image/pjpeg, image/pjpeg, application/x-s' +
      'hockwave-flash, application/cade, application/xaml+xml, applicat' +
      'ion/vnd.ms-xpsdocument, application/x-ms-xbap, application/x-ms-' +
      'application, */*'
    Request.BasicAuthentication = False
    Request.Referer = 'http://www.yahoo.com'
    Request.UserAgent = 
      'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.1) Gecko/201001' +
      '22 firefox/3.6.1'
    HTTPOptions = [hoForceEncodeParams]
    CookieManager = IdCookieManager1
    Compressor = IdCompressorZLib1
    Left = 40
    Top = 160
  end
  object IdCookieManager1: TIdCookieManager
    Left = 360
    Top = 136
  end
  object IdCompressorZLib1: TIdCompressorZLib
    Left = 408
    Top = 56
  end
  object IdConnectionIntercept1: TIdConnectionIntercept
    Left = 304
    Top = 72
  end
  object IdSSLIOHandlerSocketOpenSSL1: TIdSSLIOHandlerSocketOpenSSL
    Intercept = IdConnectionIntercept1
    MaxLineAction = maException
    Port = 0
    DefaultPort = 0
    SSLOptions.Mode = sslmUnassigned
    SSLOptions.VerifyMode = []
    SSLOptions.VerifyDepth = 0
    Left = 192
    Top = 136
  end
  object PerlRegEx1: TPerlRegEx
    Options = []
    Left = 120
    Top = 56
  end
end

If you go to h t t p s://new.aol.com/productsweb/ you will notice the captcha image has a url like h t t p s://new.aol.com/productsweb/WordVerImage?91868359

I put that url in the edit box and get an error.

What is wrong with this code?

*take the extra spaces out of the URLs

A: 

http://en.wikipedia.org/wiki/List_of_HTTP_status_codes search for 418

don't send spam :)

Andrey
not sending spam... and that's I'm a tea pot code..
Bill
-1. Does not answer the question.
Rob Kennedy
+4  A: 

I think it means "go away". Somehow, through headers or something in your request, it's determining that you smell like a bot. Maybe because you're asking for an image that it knows it didn't just make for you. Yeah, that's probably it. If I go to your URL in my browser, I get an 418 too.

Chris Thornton
You are missing a cookie: http://stackoverflow.com/questions/2724233/2725126#2725126
Jim McKeeth
I've never done cookies with indy. As far as I could tell I just need to enable the cookie manager correct?
Bill
+2  A: 

It's not your code. try in a browser....
(You obviously need to remove the blanks from 'h t t p s'...)

This URL https://new.aol.com/productsweb/ apparently needs to be called before you can get a captcha image. Otherwise you get an (improper) error 418 Unused.
Sometimes I had to try twice with the image # as I first got a 420 Unused error...

You better ask them, as their API does not really seem stable...

RE: the http 418 joke. If you want some fun with the http error codes read on A Web Developer and His Girlfriend(s)

François
as for the spaces, I realize that, I had to put spaces in because I don't have enough points at stackoverflow to put urls
Bill
+5  A: 

There is a cookie involved. If you go straight to the captcha URL https://new.aol.com/productsweb/WordVerImage?91868359 in a browser that has not visited https://new.aol.com/productsweb/ then you get (after a refresh):

<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN"> 
<html><head> 
<title>418 unused</title> 
</head><body> 
<h1>unused</h1> 
<p>The server encountered an internal error or
misconfiguration and was unable to complete
your request.</p> 
<p>Please contact the server administrator,
 null and inform them of the time the error occurred,
and anything you might have done that may have
caused the error.</p> 
<p>More information about this error may be available
in the server error log.</p> 
</body></html> 

But if you visit https://new.aol.com/productsweb/ first, then you will get an image. Clear the cookie and you get the error again (although as Francois pointed out, you get no response first, and then on refresh you get the image.)

Indy supports cookies, so you will need to add support for cookies, and then either get the cookie by visiting the productsweb first, or simulate it from a known value.

You will notice that the image generated is not based on the number passed as a parameter alone, but also the cookie. Have two different browsers (Chrome and Firefox) that each have a different cookie, and then visit the same captcha URL and you will get two different images.

Curious what you are trying to accomplish with this.

Jim McKeeth
the main application goes to new.aol.com first, and has cookies enabled. I still get the problem I'll update the demo code to reflect this.
Bill
My employer wants me to do this. I don't question the person who writes my pay check in the current economy. I'm lucky to have a decent paying job.
Bill
+1 for knowing exactly what's going on here.
Chris Thornton
Bill, if you're doing something that AOL doesn't want you to (and I suspect you are), wouldn't it be better to figure that out *now*, rather than wait until you've deployed your program and then field lots of support calls when AOL discovers what you're doing and blocks your app? Who's writing that paycheck when your employer is giving refunds for a faulty product? "The current economy" is all the *more* reason to be careful before investing time and money.
Rob Kennedy
I'll take that into consideration. I'll see what he says in the morning.
Bill
A: 

at my old projects i was readed captcha from web. i done this with embedded web browser component cache procedures. so if you can read temporary internet files from your code you can read the image data from cache. i added a simple code to below,

the GetCachedFileFromURL and ClearAllEntries functions is declarated in TEmbeddedWebBrowser unit. i was used only my copied code in my solution for lower exe size. but you can use component updated source. the component is open source.

uses
 WinInet;

function GetCachedFileFromURL(strUL: string; var strLocalFile: string): Boolean;
var
   lpEntryInfo: PInternetCacheEntryInfo;
   hCacheDir: LongWord;
   dwEntrySize: LongWord;
   dwLastError: LongWord;
begin
   Result := False;
   dwEntrySize := 0;
  // Begin the enumeration of the Internet cache.
   FindFirstUrlCacheEntry(nil, TInternetCacheEntryInfo(nil^), dwEntrySize);
   GetMem(lpEntryInfo, dwEntrySize);
   hCacheDir := FindFirstUrlCacheEntry(nil, lpEntryInfo^, dwEntrySize);
   if (hCacheDir <> 0) and (strUL = lpEntryInfo^.lpszSourceUrlName) then
      begin
         strLocalFile := lpEntryInfo^.lpszLocalFileName;
         Result := True;
      end;
   FreeMem(lpEntryInfo);
   if Result = False then
      repeat
         dwEntrySize := 0;
      // Retrieves the next cache group in a cache group enumeration
         FindNextUrlCacheEntry(hCacheDir, TInternetCacheEntryInfo(nil^), dwEntrySize);
         dwLastError := GetLastError();
         if (GetLastError = ERROR_INSUFFICIENT_BUFFER) then
            begin
               GetMem(lpEntryInfo, dwEntrySize);
               if (FindNextUrlCacheEntry(hCacheDir, lpEntryInfo^, dwEntrySize)) then
                  begin
                     if strUL = lpEntryInfo^.lpszSourceUrlName then
                        begin
                           strLocalFile := lpEntryInfo^.lpszLocalFileName;
                           Result := True;
                           Break;
                        end;
                  end;
               FreeMem(lpEntryInfo);
            end;
      until (dwLastError = ERROR_NO_MORE_ITEMS);
end;

procedure TForm1.ClearCache();
begin
  SearchPattern := spAll;
  ClearAllEntries;
end;

usage

procedure TForm1.Button1Click(Sender: TObject);
var
 fname:string;
 jpImg:TJPEGImage;
begin
  ClearCache;
  try
   jpImg:=TJPEGImage.Create; 
   GetCachedFileFromURL('https://ebildirge.ssk.gov.tr/WPEB/PG',fname);
   jpImg.LoadFromFile(fname);
  finally
   FreeAndNil(jpgImg);
  end;
end;
sabri.arslan
I was trying to stay away from using ie, but thank you for the demo code.
Bill
i understand. but this code already uses internet explorer cache.all you have to do install tembeddedwb component to delphi. also they have demo for this.
sabri.arslan
I'm going to end up using embeddedWeb for this project, since I cannot figure out the problem. I was trying to stay away from IE. I've got quite a bit of experience with embeddedweb, last time I used it, it had ie8 issues. I'll probably be using your code or similar. Thanks again.
Bill
Quick question, the clear cache, is that just something for your project or am I missing something?
Bill
yes clear cache is for my project. but if you don't clear cache ie can show cached old captcha. so that because i was cleared cache for all times.i was completely extracted all necessary codes from embeddedwb if you want i can send full project. yesterday i was tried to add here but there is a lot of code. can you write you email address here.
sabri.arslan
i was added full project. you can download from http://sabriarslan.com/e-bildirge.zipthe code started from unit1.pas formshow event. you can follow from there
sabri.arslan
Thanks for the code. I got it figured out.
Bill