//=-- AfCommonLog.cpp - AccessFormatPlugin for Common Log Format files... -----= // // This class defines an AccessFormatPlugin that understands the structure of // the common log format, and can parse it... it has limited capabilities to // process fields tacked onto the end of the CLF lines, but isn't really // designed to handle TOO many variants... // //=----------------------------------------------------------------------------= // This file is copyright (c) 1997-2000 Chris Lattner //=----------------------------------------------------------------------------= #include "AccessFormatPlugin.h" #include "Date.h" #include #define EXTRA_FIELD_DEBUG 0 // See below class def for Plugin Initialization. class AfCommonLog : public AccessFormatPluginImpl { public : inline static unsigned int GetCurrentVersion() { return 100; // Version 1.00 } inline static const char *GetPluginName() { return "CommonLogFormat"; } inline static int GetPriority() { return 100; } AfCommonLog(int &CreateError) { StatusCode = 0; FileLength = 0; CreateError = 0; memset(ExtraFields, 0, sizeof(ExtraFields)); } AfCommonLog(const String &ExLine, int &CreateError) { StatusCode = 0; FileLength = 0; memset(ExtraFields, 0, sizeof(ExtraFields)); String TempLine = ExLine; CreateError = ParseAccessEx(TempLine, 1); } virtual ~AfCommonLog() {}; virtual void operator=(const AccessFormatPlugin &F); virtual int ParseAccess(String &Line); //virtual const String &GetField(int FieldNo) const; //virtual void SetField(int FieldNo, const String &FieldVal); virtual const String &GetHost() const { return Host; } virtual String &GetHost() { return Host; } virtual const String &GetAuth() const { return Auth; } virtual String &GetAuth() { return Auth; } virtual const Date &GetDate() const { return RDate; } virtual Date &GetDate() { return RDate; } virtual const String &GetRetType() const { return RetrievalType; } virtual String &GetRetType() { return RetrievalType; } virtual const String &GetURL() const { return URL; } virtual String &GetURL() { return URL; } virtual const String &GetProtocol() const { return Protocol; } virtual String &GetProtocol() { return Protocol; } virtual const String &GetDomain() const { return Domain; } virtual String &GetDomain() { return Domain; } virtual const String &GetReferrer() const { return Referrer; } virtual String &GetReferrer() { return Referrer; } virtual const String &GetBrowser() const { return Browser; } virtual String &GetBrowser() { return Browser; } virtual int GetStatusCode() const { return StatusCode; } virtual int GetLength() const { return FileLength; } virtual void SetStatusCode(int S) { StatusCode = S; } virtual void SetLength(int L) { FileLength = L; } virtual void SetDomain(const String &d) { DefaultDomain = d; } private : int ParseAccessEx(String &Line, int Diagnose); void DetectExtraFields(String &Line); static int DeduceFieldType(const String &Field); String *ExtraFields[3]; // Allow up to 3 extra fields at EOL String DefaultDomain; // Data // String Host; // Host that downloaded the page String Auth; // Username/Password if authenticated Date RDate; // Retrieval Date/Time String RetrievalType; // Method used to retrieve String URL; // URL Retrieved String Protocol; // Protocol used to retrieve String Domain; // Domain, common across the whole log file... String Browser; // Browser info string... String Referrer; // URL of referring page... int StatusCode; // Status code of operation int FileLength; // Length of file downloaded }; // INIT_PLUGINS includes the AccessLog plugin in the internal list of // available plugins INIT_PLUGIN(AfCommonLog); // ac-1.goldrush.com - - [22/Jan/1998:23:04:09 -0800] "GET / HTTP/1.0" 206 2445 int AfCommonLog::ParseAccess(String &Line) { return ParseAccessEx(Line, 0); } int AfCommonLog::ParseAccessEx(String &Line, int Diagnose) { String Temp; Domain = DefaultDomain; // Default //cout << "ParseAccess: " << Line << endl; Line.Tokenize(" ", Host); // Host is terminated by space if (Host.Length() == 0) return -1; // Authorization data is terminated by the start of the data [] field Line.Tokenize("[", Auth); if (Auth.Length() == 0) return -1; Line.Tokenize("]", Temp); if (Temp.Length() == 0) return -1; if (ParseDateString(Temp)) return -1; // Valid date? Line.Tokenize("\"", Temp); // Ignore up through the quote Line.Tokenize("\"", Temp); // Get the command into Temp if (Temp.Length() == 0) return -1; Temp.Tokenize(" ", RetrievalType); Temp.Tokenize(" ", URL); Protocol = Temp; if ((RetrievalType.Length() == 0) || ( URL.Length() == 0)) // Protocol may be empty... return -1; Line.Tokenize(" ", Temp); if (Temp.Length() == 0) return -1; StatusCode = Temp.atoi(); Line.Tokenize(" ", Temp); if (Temp.Length() == 0) return -1; FileLength = Temp.atoi(); if (!Diagnose) { // Parse Extra Fields: // if (ExtraFields[0] == 0) return 0; // Bail out early... // Loop through ExtraFields array until we run out or hit end... unsigned i = 0; int LastQuoted = 0; while (i < (sizeof(ExtraFields) / sizeof(ExtraFields[0])) && ExtraFields[i] != 0) { if (Line[0] == '"') { LastQuoted = 1; Line.Right(Line.Length()-1); continue; } if (Line[0] == ' ' || Line[0] == '\t') { LastQuoted = 0; Line.Right(Line.Length()-1); continue; } if (LastQuoted) Line.Tokenize("\"", Temp); // Grab up to the quote else Line.Tokenize(" ", Temp); // Grab up 'till whitespace... #if EXTRA_FIELD_DEBUG cout << "Read Field: " << Temp << endl; #endif if (Temp.Length() == 1 && Temp[0] == '-') Temp = ""; *(ExtraFields[i++]) = Temp; } } else { DetectExtraFields(Line); } return 0; } void AfCommonLog::DetectExtraFields(String &Line) { String Temp; unsigned i = 0; int LastQuoted = 0; while (Line.Length() && i < sizeof(ExtraFields) / sizeof(ExtraFields[0])) { if (Line[0] == '"') { LastQuoted = 1; Line.Right(Line.Length()-1); continue; } if (Line[0] == ' ' || Line[0] == '\t') { LastQuoted = 0; Line.Right(Line.Length()-1); continue; } if (LastQuoted) Line.Tokenize("\"", Temp); // Grab up to the quote else Line.Tokenize(" ", Temp); // Grab up 'till whitespace... switch (DeduceFieldType(Temp)) { case FEBrowser: ExtraFields[i++] = &Browser; break; case FEReferrer: ExtraFields[i++] = &Referrer; break; case FEDomain: ExtraFields[i++] = &Domain; break; } #if EXTRA_FIELD_DEBUG cout << "Tokenized: " << ExtraFields[i-1] << " Temp='" << Temp << "' Line= '" << Line << "'\n"; #endif } } // DeduceFieldType - Use heuristics to try to guess what these fields are that // are tacked onto the end of the log file line. We start very specific (hoping // that we will get lucky) and end up pretty general... if we don't know by // the end, ignore it and let the user tell us in the config file. // // TODO: Should also test for virtual host appended, if I knew the format... #include "PlatformAbstraction.h" #ifndef WINDOWS #warning "TODO Later" #endif int AfCommonLog::DeduceFieldType(const String &Field) { if (Field.Length() == 1 && Field[0] == '-') return FEReferrer; // Take a while guess... if (Field[0] == 'M' && Field.substr("Mozilla/") == 0) return FEBrowser; // Kill an easy case... if (Field[4] == ':' && Field.substr("http://") == 0) return FEReferrer; if (Field.substr("://") < 15 && Field.substr("://") > 0) return FEReferrer; // Look for a browser type. We try to roughly match this pattern: // "[a-zA-Z]+/[0-9.]+" int i; for (i = 0; (Field[i] >= 'a' && Field[i] <= 'z') || (Field[i] >= 'A' && Field[i] <= 'Z'); i++) { // Empty, just scan... } if (i == 0 || Field[i++] != '/') return 0; if ((Field[i] >= '0' && Field[i] <= '9') || Field[i] == '.') return FEBrowser; // TODO: Emit warning to user to tell them to customize with .cfg file. return 0; } void AfCommonLog::operator=(const AccessFormatPlugin &f) { const AfCommonLog &F = (const AfCommonLog &)f; Host = F.Host; Auth = F.Auth; RDate = F.RDate; RetrievalType = F.RetrievalType; URL = F.URL; Protocol = F.Protocol; StatusCode = F.StatusCode; FileLength = F.FileLength; Browser = F.Browser; Referrer = F.Referrer; memcpy(&ExtraFields, &F.ExtraFields, sizeof(ExtraFields)); }