您的位置:首页 > 其它

VW源码阅读笔记

2016-03-10 17:01 357 查看
global_data.h

struct vw
{ shared_data* sd;
parser* p;
#ifndef _WIN32
pthread_t parse_thread;
#else
HANDLE parse_thread;
#endif
AllReduceType all_reduce_type;
AllReduce* all_reduce;

LEARNER::base_learner* l;//the top level learner
LEARNER::base_learner* scorer;//a scoring function
LEARNER::base_learner* cost_sensitive;//a cost sensitive learning algorithm.

void learn(example*);

void (*set_minmax)(shared_data* sd, float label);

size_t current_pass;

uint32_t num_bits; // log_2 of the number of features.
bool default_bits;

string data_filename; // was vm["data"]

bool daemon;
size_t num_children;

bool save_per_pass;
float initial_weight;
float initial_constant;

bool bfgs;
bool hessian_on;

bool save_resume;
version_struct model_file_ver;
double normalized_sum_norm_x;
bool vw_is_main;  // true if vw is executable; false in library mode

po::options_description opts;
po::options_description* new_opts;
po::variables_map vm;
std::stringstream* file_options;
vector<std::string> args;

void* /*Search::search*/ searchstr;

uint32_t wpp;

int stdout_fileno;

std::string per_feature_regularizer_input;
std::string per_feature_regularizer_output;
std::string per_feature_regularizer_text;

float l1_lambda; //the level of l_1 regularization to impose.
float l2_lambda; //the level of l_2 regularization to impose.
float power_t;//the power on learning rate decay.
int reg_mode;

size_t pass_length;
size_t numpasses;
size_t passes_complete;
size_t parse_mask; // 1 << num_bits -1
bool permutations; // if true - permutations of features generated instead of simple combinations. false by default
v_array<v_string> interactions; // interactions of namespaces to cross.
std::vector<std::string> pairs; // pairs of features to cross.
std::vector<std::string> triples; // triples of features to cross.
bool ignore_some;
bool ignore[256];//a set of namespaces to ignore

bool redefine_some;          // --redefine param was used
unsigned char redefine[256]; // keeps new chars for amespaces

std::vector<std::string> ngram_strings;
std::vector<std::string> skip_strings;
uint32_t ngram[256];//ngrams to generate.
uint32_t skips[256];//skips in ngrams.
std::vector<std::string> limit_strings; // descriptor of feature limits
uint32_t limit[256];//count to limit features by
uint32_t affix_features[256]; // affixes to generate (up to 8 per namespace)
bool     spelling_features[256]; // generate spelling features for which namespace
vector<string> dictionary_path;  // where to look for dictionaries
vector<feature_dict*> namespace_dictionaries[256]; // each namespace has a list of dictionaries attached to it
vector<dictionary_info> loaded_dictionaries; // which dictionaries have we loaded from a file to memory?

bool multilabel_prediction;
bool audit;//should I print lots of debugging information?
bool quiet;//Should I suppress progress-printing of updates?
bool training;//Should I train if lable data is available?
bool active;
bool adaptive;//Should I use adaptive individual learning rates?
bool normalized_updates; //Should every feature be normalized
bool invariant_updates; //Should we use importance aware/safe updates
size_t random_seed;
bool random_weights;
bool random_positive_weights; // for initialize_regressor w/ new_mf
bool add_constant;
bool nonormalize;
bool do_reset_source;
bool holdout_set_off;
bool early_terminate;
uint32_t holdout_period;
uint32_t holdout_after;
size_t check_holdout_every_n_passes;  // default: 1, but search might want to set it higher if you spend multiple passes learning a single policy

size_t normalized_idx; //offset idx where the norm is stored (1 or 2 depending on whether adaptive is true)

uint32_t lda;

std::string text_regressor_name;
std::string inv_hash_regressor_name;

size_t length () { return ((size_t)1) << num_bits; };

v_array<LEARNER::base_learner* (*)(vw&)> reduction_stack;

//Prediction output
v_array<int> final_prediction_sink; // set to send global predictions to.
int raw_prediction; // file descriptors for text output.

void (*print)(int,float,float,v_array<char>);
void (*print_text)(int, string, v_array<char>);
loss_function* loss;

char* program_name;

bool stdin_off;

//runtime accounting variables.
float initial_t;
float eta;//learning rate control.
float eta_decay_rate;
time_t init_time;

std::string final_regressor_name;
regressor reg;

size_t max_examples; // for TLC

bool hash_inv;
bool print_invert;

// Set by --progress <arg>
bool  progress_add;   // additive (rather than multiplicative) progress dumps
float progress_arg;   // next update progress dump multiplier

bool seeded; // whether the instance is sharing model state with others

std::map< std::string, size_t> name_index_map;

vw();
};
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签:  源码