- Joined
- Jul 11, 2006
- Messages
- 188
- Reaction score
- 184
This is my tokenizer
Makes easier to load bags or things like that... ;P
ps. the current code parses files like these:
and big commented section contains code for parsing webzen style files... and doesn't uses boost::tokenizer, but i don't recommend using it because of memory leaks
!!comments after columns are not supported!!
like:
-----------
simple usage:
there are some functions for checking row count, column count, section count, if it exists or not, and things like that... but let intellisense help you with that
if you can't compile it, its not my fault...
btw, you'll need boost lib installed...
Makes easier to load bags or things like that... ;P
Code:
#pragma once
#include <map>
#include <iostream>
#include <fstream>
#include <string>
#include <boost/tokenizer.hpp>
#include <boost/algorithm/string.hpp>
namespace DarKTeaM {
class TokenizerField
{
protected:
std::string Value;
public:
TokenizerField();
TokenizerField(std::string Val);
int ToInteger();
long ToLong();
double ToDouble();
float ToFloat();
std::string ToString();
char* ToStringPtr();
};
class TokenizerRow
{
public:
std::map<int, TokenizerField> Fields;
int FieldCount;
public:
TokenizerRow();
TokenizerRow(std::string strLine);
TokenizerField& operator[] (int FieldIndex);
char* GetField(int FieldIndex, char* Default);
std::string GetField(int FieldIndex, std::string Default);
float GetField(int FieldIndex, float Default);
int GetField(int FieldIndex, int Default);
long GetField(int FieldIndex, long Default);
double GetField(int FieldIndex, double Default);
bool FieldExists(int FieldIndex);
};
class TokenizerSection
{
public:
std::map<int, TokenizerRow> Rows;
int RowCount;
public:
TokenizerSection();
TokenizerRow& operator[] (int RowIndex);
bool RowExists(int RowIndex);
};
class Tokenizer
{
public:
std::map<int, TokenizerSection> Sections;
bool Loaded;
public:
Tokenizer(std::string szFile);
TokenizerSection& operator[](int SectionIndex);
bool SectionExists(int SectionIndex);
};
TokenizerField::TokenizerField() {
this->Value = "";
}
TokenizerField::TokenizerField(std::string Val) {
this->Value = Val;
}
int TokenizerField::ToInteger() {
return atoi(this->Value.c_str());
}
long TokenizerField::ToLong() {
return atol(this->Value.c_str());
}
double TokenizerField::ToDouble() {
return atof(this->Value.c_str());
}
float TokenizerField::ToFloat() {
return (float)(atof(this->Value.c_str()));
}
std::string TokenizerField::ToString() {
return this->Value;
}
char* TokenizerField::ToStringPtr() {
return (char*)this->Value.c_str();
}
TokenizerRow::TokenizerRow()
{
this->FieldCount = 0;
}
TokenizerField& TokenizerRow::operator [](int FieldIndex)
{
return this->Fields[FieldIndex];
}
char* TokenizerRow::GetField (int FieldIndex, char* Default)
{
if(!this->FieldExists(FieldIndex)) return Default;
return this->Fields[FieldIndex].ToStringPtr();
}
std::string TokenizerRow::GetField (int FieldIndex, std::string Default)
{
if(!this->FieldExists(FieldIndex)) return Default;
return this->Fields[FieldIndex].ToString();
}
float TokenizerRow::GetField (int FieldIndex, float Default)
{
if(!this->FieldExists(FieldIndex)) return Default;
return this->Fields[FieldIndex].ToFloat();
}
int TokenizerRow::GetField (int FieldIndex, int Default)
{
if(!this->FieldExists(FieldIndex)) return Default;
return this->Fields[FieldIndex].ToInteger();
}
long TokenizerRow::GetField (int FieldIndex, long Default)
{
if(!this->FieldExists(FieldIndex)) return Default;
return this->Fields[FieldIndex].ToLong();
}
double TokenizerRow::GetField (int FieldIndex, double Default)
{
if(!this->FieldExists(FieldIndex)) return Default;
return this->Fields[FieldIndex].ToDouble();
}
TokenizerRow::TokenizerRow(std::string strLine)
{
this->FieldCount = 0;
std::string sep1 = "\\";
std::string sep2 = " \t";
std::string sep3 = "\"";
boost::escaped_list_separator<char> sep(sep1, sep2, sep3);
boost::tokenizer<boost::escaped_list_separator<char> > tok(strLine, sep);
for (boost::tokenizer<boost::escaped_list_separator<char> >::iterator it = tok.begin();
it != tok.end(); ++it)
{
if(*it == "") continue;
this->Fields[this->FieldCount++] = TokenizerField(std::string(*it));
}
}
bool TokenizerRow::FieldExists(int FieldIndex)
{
std::map<int, TokenizerField>::iterator it = this->Fields.find(FieldIndex);
return (bool)(it != this->Fields.end());
}
TokenizerSection::TokenizerSection()
{
this->Rows.clear();
this->RowCount = 0;
}
TokenizerRow& TokenizerSection::operator[] (int RowIndex)
{
return this->Rows[RowIndex];
}
bool TokenizerSection::RowExists(int RowIndex)
{
std::map<int, TokenizerRow>::iterator it = this->Rows.find(RowIndex);
return (bool)(it != this->Rows.end());
}
Tokenizer::Tokenizer(std::string szFile)
{
this->Loaded = false;
std::fstream f(szFile.c_str(), std::ios::in);
if(f.is_open() && f.good())
{
TokenizerSection sec = TokenizerSection();
int CurrentSection = 0;
int RowIndex = 0;
bool SectionOpen = false;
while(!f.eof())
{
char temp[4096];
char* dump = NULL;
std::string line = "";
ZeroMemory(&temp[0], 4096);
f.getline(&temp[0], 4095);
line.assign(&temp[0]);
dump = (char*)line.c_str();
bool finishstart = false;
bool finishend = false;
while(true)
{
if(line.length() > 0)
{
if(line.at(0) == ' ' || line.at(0) == '\t')
{
line.erase(0, 1);
}
else
{
if(finishend == true) break;
finishstart = true;
}
}
else
{
break;
}
if(line.length() > 0)
{
if(line.at(line.length()-1) == ' ' || line.at(line.length()-1) == '\t')
{
line.erase(line.length()-1, 1);
}
else
{
if(finishstart == true) break;
finishend = true;
}
}
else
{
break;
}
}
if( line.substr(0, 2) == "//" || line.substr(0, 1) == "#" || line == "" )
{
continue;
}
TokenizerRow row(line);
if(SectionOpen == false)
{
if(row.FieldCount == 2)
{
if(row[1].ToString() == "{")
{
//MessageBox(0, "Abrindo seção", "Seção", MB_OK);
CurrentSection = row[0].ToInteger();
SectionOpen = true;
RowIndex = 0;
}
}
}
else
{
if(row.FieldCount == 1)
{
if(row[0].ToString() == "}" && SectionOpen == true)
{
//MessageBox(0, "Fechando", "Seção", MB_OK);
this->Sections[CurrentSection] = sec;
sec = TokenizerSection();
SectionOpen = false;
continue;
}
}
sec[RowIndex++] = row;
sec.RowCount++;
}
}
this->Loaded = true;
}
}
TokenizerSection& Tokenizer::operator[](int SectionIndex)
{
return this->Sections[SectionIndex];
}
bool Tokenizer::SectionExists(int SectionIndex)
{
std::map<int, TokenizerSection>::iterator it = this->Sections.find(SectionIndex);
return (bool)(this->Sections.end() != it);
}
/*
Programado por WoLf
Versão antiga!
class TokenizerRow
{
public:
std::map<DWORD, std::string> Columns;
int ColumnCount;
std::string GetString(DWORD Column, std::string Default = "")
{
std::map<DWORD, std::string>::iterator it = this->Columns.find(Column);
if(it == this->Columns.end())
{
return Default;
}
return it->second;
}
char* GetStringPtr(DWORD Column, std::string Default = "")
{
std::map<DWORD, std::string>::iterator it = this->Columns.find(Column);
if(it == this->Columns.end())
{
return (char*)Default.c_str();
}
return (char*)it->second.c_str();
}
int GetInt(DWORD Column, DWORD Default = -1, BOOL Hex = FALSE)
{
std::map<DWORD, std::string>::iterator it = this->Columns.find(Column);
if(it == this->Columns.end())
{
return Default;
}
if(Hex)
{
unsigned int pOffset = Default;
sscanf_s(it->second.c_str(), "%x", &pOffset);
return pOffset;
}
return atoi(it->second.c_str());
}
double GetFloat(DWORD Column, double Default = 0.0f)
{
std::map<DWORD, std::string>::iterator it = this->Columns.find(Column);
if(it == this->Columns.end())
{
return Default;
}
return atof(it->second.c_str());
}
};
class TokenizerSection
{
public:
std::map<DWORD, TokenizerRow> Rows;
int RowCount;
};
class TokenizerGroup
{
public:
std::map<DWORD, TokenizerSection> Sections;
bool GetSection(DWORD Index, TokenizerSection & section)
{
std::map<DWORD, TokenizerSection>::iterator it = this->Sections.find(Index);
if(it == this->Sections.end())
{
return false;
}
else
{
section = it->second;
return true;
}
}
};
class Tokenizer
{
private:
char* m_pBuffer;
DWORD m_pBufferSize;
DWORD m_pBufferIndex;
public:
bool ParseLine(std::string line, TokenizerRow & pRow)
{
std::string data = "";
char* dump = (char*)line.c_str();
bool openstring = false;
bool clearingspace = true;
int column = 0;
for(unsigned int i = 0; i < line.length(); i++)
{
if(clearingspace)
{
if(dump[i] == ' ' || dump[i] == '\t')
{
continue;
}
clearingspace = false;
}
if(openstring)
{
if(dump[i] == '"')
{
openstring = false;
continue;
}
data += dump[i];
}
else
{
if(dump[i] == '"')
{
if(data != "")
{
return false;
}
openstring = true;
continue;
}
else
{
if(dump[i] == '\t' || dump[i] == ' ')
{
if(data != "")
{
pRow.Columns[column++] = data;
data = "";
}
continue;
}
data += dump[i];
}
}
}
if(data != "")
{
pRow.Columns[column++] = data;
}
data = "";
pRow.ColumnCount = column;
return true;
}
bool ParseFile(std::string file, TokenizerGroup & tok)
{
std::fstream f(file.c_str(), std::ios::in);
if(f.is_open())
{
if(f.good())
{
TokenizerSection sec = TokenizerSection();
int current_sec = 0;
int sec_index = 0;
bool sec_open = false;
while(!f.eof())
{
char temp[4096];
char* dump = NULL;
std::string line = "";
ZeroMemory(&temp[0], 4096);
f.getline(&temp[0], 4095);
line.assign(&temp[0]);
dump = (char*)line.c_str();
int start = 0;
int end = 0;
for(DWORD i = 0; i < line.length(); i++)
{
if(dump[i] != ' ' && dump[i] != '\t')
{
break;
}
start++;
}
for(DWORD i = line.length()-1; i >= 0; i--)
{
if(dump[i] != ' ' && dump[i] != '\t')
{
break;
}
end++;
}
line = line.substr(start, line.length() - end - start);
std::string::size_type k = 0;
while((k=line.find(13,k))!=line.npos)
{
line.erase(k, 1);
}
if(line.substr(0, 2) == "//") continue;
if(line[0] == '#') continue;
if(line.compare("end") == false)
{
if(sec_open == false)
{
return false; // falha de sintaxe
}
sec_open = false;
sec.RowCount = sec_index;
tok.Sections[current_sec] = sec;
continue;
}
if(line == "") continue;
TokenizerRow row;
if(!this->ParseLine(line, row))
return false;
if(row.ColumnCount == 1)
{
if(sec_open == false)
{
sec_index = 0;
current_sec = row.GetInt(0, 0);
sec = TokenizerSection();
sec_open = true;
continue;
}
}
sec.Rows[sec_index++] = row;
}
}
else
{
f.close();
return false;
}
f.close();
return true;
}
else
{
return false;
}
}
};
*/
}
ps. the current code parses files like these:
Code:
# comment line
// comment line
0 {
12345 1234.5 "string column"
}
and big commented section contains code for parsing webzen style files... and doesn't uses boost::tokenizer, but i don't recommend using it because of memory leaks
Code:
0
12345 1234.5 "string column"
end
!!comments after columns are not supported!!
like:
Code:
0 {
col1 col2 col3 // comment
}
-----------
simple usage:
Code:
DarKTeaM::Tokenizer token ("parsethis.txt");
token[0] = access to section 0
token[0][0] = access to section 0, and row 0
token[0][0][0] = access to section 0, row 0, and column 0
token[0][0][0].Function() = where function is your requested type... line ToInteger(), ToString(), etc...
there are some functions for checking row count, column count, section count, if it exists or not, and things like that... but let intellisense help you with that
if you can't compile it, its not my fault...
btw, you'll need boost lib installed...