//=-- AfCommonLog.cpp - AccessFormatPlugin for Common Log Format files... -----=
//
//  This class defines an AccessFormatPlugin that understands the structure of 
//  the common log format, and can parse it... it has limited capabilities to
//  process fields tacked onto the end of the CLF lines, but isn't really 
//  designed to handle TOO many variants...
//
//=----------------------------------------------------------------------------=
//  This file is copyright (c) 1997-2000 Chris Lattner
//=----------------------------------------------------------------------------=

#include "AccessFormatPlugin.h"
#include "Date.h"
#include <string.h>

#define EXTRA_FIELD_DEBUG 0

// See below class def for Plugin Initialization.

class AfCommonLog : public AccessFormatPluginImpl<AfCommonLog> {
public :
  inline static unsigned int GetCurrentVersion() {
    return 100;                       // Version 1.00
  }

  inline static const char *GetPluginName() {
    return "CommonLogFormat";
  }

  inline static int GetPriority() { return 100; }

  AfCommonLog(int &CreateError) {
    StatusCode = 0;
    FileLength = 0;
    CreateError = 0;
    memset(ExtraFields, 0, sizeof(ExtraFields));
  }

  AfCommonLog(const String &ExLine, int &CreateError) {
    StatusCode = 0;
    FileLength = 0;
    memset(ExtraFields, 0, sizeof(ExtraFields));

    String TempLine = ExLine;
    CreateError = ParseAccessEx(TempLine, 1);
  }

  virtual ~AfCommonLog() {};

  virtual void operator=(const AccessFormatPlugin &F);
  virtual int ParseAccess(String &Line);

  //virtual const String &GetField(int FieldNo) const;
  //virtual void SetField(int FieldNo, const String &FieldVal);


  virtual const String &GetHost()       const { return Host; }
  virtual       String &GetHost()             { return Host; }
  virtual const String &GetAuth()       const { return Auth; }
  virtual       String &GetAuth()             { return Auth;  }
  virtual const Date   &GetDate()       const { return RDate; }
  virtual       Date   &GetDate()             { return RDate; }

  virtual const String &GetRetType()    const { return RetrievalType; }
  virtual       String &GetRetType()          { return RetrievalType; }
  virtual const String &GetURL()        const { return URL; }
  virtual       String &GetURL()              { return URL; }
  virtual const String &GetProtocol()   const { return Protocol; }
  virtual       String &GetProtocol()         { return Protocol; }
  virtual const String &GetDomain()     const { return Domain; }
  virtual       String &GetDomain()           { return Domain; }
  virtual const String &GetReferrer()   const { return Referrer; }
  virtual       String &GetReferrer()         { return Referrer; }
  virtual const String &GetBrowser()    const { return Browser; }
  virtual       String &GetBrowser()          { return Browser; }

  virtual       int GetStatusCode()     const { return StatusCode; }
  virtual       int GetLength()         const { return FileLength; }

  virtual void SetStatusCode(int S)         { StatusCode = S; }
  virtual void SetLength(int L)             { FileLength = L; }
  virtual void SetDomain(const String &d)   { DefaultDomain = d; }

private :
  int ParseAccessEx(String &Line, int Diagnose);
  void DetectExtraFields(String &Line);
  static int DeduceFieldType(const String &Field);

  String *ExtraFields[3]; // Allow up to 3 extra fields at EOL

  String DefaultDomain;

  // Data //
  String Host;            // Host that downloaded the page
  String Auth;            // Username/Password if authenticated
  Date   RDate;           // Retrieval Date/Time
  String RetrievalType;   // Method used to retrieve
  String URL;             // URL Retrieved
  String Protocol;        // Protocol used to retrieve
  String Domain;          // Domain, common across the whole log file...
  
  String Browser;         // Browser info string...
  String Referrer;        // URL of referring page...

  int StatusCode;         // Status code of operation
  int FileLength;         // Length of file downloaded
};

// INIT_PLUGINS includes the AccessLog plugin in the internal list of 
//   available plugins
INIT_PLUGIN(AfCommonLog);


// ac-1.goldrush.com - - [22/Jan/1998:23:04:09 -0800] "GET / HTTP/1.0" 206 2445

int AfCommonLog::ParseAccess(String &Line) {
  return ParseAccessEx(Line, 0);
}

int AfCommonLog::ParseAccessEx(String &Line, int Diagnose) {
  String Temp;

  Domain = DefaultDomain;           // Default

//cout << "ParseAccess: " << Line << endl;
  Line.Tokenize(" ", Host);         // Host is terminated by space
  if (Host.Length() == 0) return -1;

  // Authorization data is terminated by the start of the data [] field
  Line.Tokenize("[", Auth);
  if (Auth.Length() ==  0) return -1;
    
  Line.Tokenize("]", Temp);
  if (Temp.Length() == 0) return -1;
  if (ParseDateString(Temp)) return -1;  // Valid date?
    
  Line.Tokenize("\"", Temp);        // Ignore up through the quote
  Line.Tokenize("\"", Temp);        // Get the command into Temp
  if (Temp.Length() == 0) return -1;
    
  Temp.Tokenize(" ", RetrievalType);
  Temp.Tokenize(" ", URL);
  Protocol = Temp;
    
  if ((RetrievalType.Length() == 0) 
   || (          URL.Length() == 0)) // Protocol may be empty...
    return -1;
    
  Line.Tokenize(" ", Temp);
  if (Temp.Length() == 0) return -1;
  StatusCode = Temp.atoi();
    
  Line.Tokenize(" ", Temp);
  if (Temp.Length() == 0) return -1;
  FileLength = Temp.atoi();

  if (!Diagnose) {

    // Parse Extra Fields:
    //
    if (ExtraFields[0] == 0) return 0;   // Bail out early...
    
    // Loop through ExtraFields array until we run out or hit end...
    unsigned i = 0;
    int LastQuoted = 0;
    
    while (i < (sizeof(ExtraFields) / sizeof(ExtraFields[0])) &&
           ExtraFields[i] != 0) {
      if (Line[0] == '"') {
        LastQuoted = 1;
        Line.Right(Line.Length()-1);
        continue;
      }
      
      if (Line[0] == ' ' || Line[0] == '\t') {
        LastQuoted = 0;
        Line.Right(Line.Length()-1);
        continue;
    }
      
      if (LastQuoted)
        Line.Tokenize("\"", Temp);        // Grab up to the quote
      else
        Line.Tokenize(" ", Temp);         // Grab up 'till whitespace...
      
#if EXTRA_FIELD_DEBUG
      cout << "Read Field: " << Temp << endl;
#endif
      if (Temp.Length() == 1 && Temp[0] == '-') Temp = "";
      *(ExtraFields[i++]) = Temp;
    }

  } else {
    DetectExtraFields(Line);
  }

  return 0;
}

void AfCommonLog::DetectExtraFields(String &Line) {
  String Temp;
  unsigned i = 0;
  int LastQuoted = 0;
  
  while (Line.Length() && i < sizeof(ExtraFields) / sizeof(ExtraFields[0])) {
    if (Line[0] == '"') {
      LastQuoted = 1;
      Line.Right(Line.Length()-1);
      continue;
    }
    
    if (Line[0] == ' ' || Line[0] == '\t') {
      LastQuoted = 0;
      Line.Right(Line.Length()-1);
      continue;
    }
    
    if (LastQuoted)
      Line.Tokenize("\"", Temp);        // Grab up to the quote
    else
      Line.Tokenize(" ", Temp);         // Grab up 'till whitespace...

    switch (DeduceFieldType(Temp)) {
    case FEBrowser:  ExtraFields[i++] = &Browser;  break;
    case FEReferrer: ExtraFields[i++] = &Referrer; break;
    case FEDomain:   ExtraFields[i++] = &Domain;   break;
    }
#if EXTRA_FIELD_DEBUG
    cout << "Tokenized: " << ExtraFields[i-1] 
         << " Temp='" << Temp << "'  Line= '" << Line << "'\n";
#endif
  } 
}



// DeduceFieldType - Use heuristics to try to guess what these fields are that
// are tacked onto the end of the log file line.  We start very specific (hoping
// that we will get lucky) and end up pretty general... if we don't know by
// the end, ignore it and let the user tell us in the config file.
//
// TODO: Should also test for virtual host appended, if I knew the format...
#include "PlatformAbstraction.h"
#ifndef WINDOWS
#warning "TODO Later"
#endif
int AfCommonLog::DeduceFieldType(const String &Field) {
  if (Field.Length() == 1 && Field[0] == '-')
    return FEReferrer;    // Take a while guess...

  if (Field[0] == 'M' && Field.substr("Mozilla/") == 0)
    return FEBrowser;     // Kill an easy case...

  if (Field[4] == ':' && Field.substr("http://") == 0)
    return FEReferrer;

  if (Field.substr("://") < 15 && Field.substr("://") > 0)
    return FEReferrer;

  // Look for a browser type.  We try to roughly match this pattern: 
  // "[a-zA-Z]+/[0-9.]+"
  int i;
  for (i = 0; (Field[i] >= 'a' && Field[i] <= 'z') ||
                  (Field[i] >= 'A' && Field[i] <= 'Z'); i++) {
    // Empty, just scan...
  }
  if (i == 0 || Field[i++] != '/') return 0;
  if ((Field[i] >= '0' && Field[i] <= '9') || Field[i] == '.')
    return FEBrowser;

  // TODO: Emit warning to user to tell them to customize with .cfg file.
  return 0;
}



void AfCommonLog::operator=(const AccessFormatPlugin &f) {
  const AfCommonLog &F = (const AfCommonLog &)f;

  Host          = F.Host;
  Auth          = F.Auth;
  RDate         = F.RDate;
  RetrievalType = F.RetrievalType;
  URL           = F.URL;
  Protocol      = F.Protocol;
  StatusCode    = F.StatusCode;
  FileLength    = F.FileLength;  
  Browser       = F.Browser;
  Referrer      = F.Referrer;
  memcpy(&ExtraFields, &F.ExtraFields, sizeof(ExtraFields));
}

