//*input documents and features --------
KEYWORDS_1_ENABLE = true;
KEYWORDS_2_ENABLE = true;
TEXT_ENABLE = true;

KEYWORDS_1_WEIGHT = 1;
KEYWORDS_2_WEIGHT = 1;
TEXT_WEIGHT = .5;

DATA_KEYWORDS_1_PATH = data/keywords_1/;
DATA_KEYWORDS_2_PATH = data/keywords_2/;
DATA_TEXT_PATH = data/text/;
DATA_TOPIC_PATH = data/topic/; //***NOT USED***
DATA_DATE_PATH = data/date; //***NOT USED***

//*KeyGraph Settings -----------------
HARD_CLUSTERING = false; // KeyGraph by default does soft clustering by allowing documents to be assigned to multiple topics. By setting this parameter to 'true' KeyGraph will not assign a document to multiple topics.

CENTROID_KEYWORD_DF_MIN = 3; //***NOT USED***
SIMILARITY_KEYWORD_DF_MIN = 5; //***NOT USED***

NODE_DF_MIN = 7; // This parameter is used to filter rare words from the KeyGraph. It's value is the minimum document frequency.
NODE_DF_MAX = .084; // This parameter is used to filter common words from the KeyGraph. It's value is maximum document frequency specified in percentage.

EDGE_CORRELATION_MIN = .15434; // This parameter is used to filter rare links from the KeyGraph. It's value is the minimum correlation between the corresponding words.
EDGE_DF_MIN = 5; // This parameter is used to filter rare links from the KeyGraph. It's value is the minimum document frequency.
EDGE_CP_MIN_TO_DUPLICATE = .70; //***NOT USED***

DOC_KEYWORDS_SIZE_MIN = 4; // This parameter is used to filter small documents from the dataset. It's value is the minimum number of keywords for a document.
DOC_SIM2KEYGRAPH_MIN = .3524; // This parameter is used to assign documents to topics. It's value specifies the minimum cosine similarly between a document and the topic feature.
DOC_CHAR_SIZE_MAX = 2000; //***NOT USED***
DOC_SIM2CENTROID_MIN = 0.00; //***NOT USED***

CLUSTER_NODE_SIZE_MAX = 500;
CLUSTER_NODE_SIZE_MIN = 5;
CLUSTER_VAR_MAX = 1.99; //***NOT USED***
CLUSTER_INTERSECT_MIN = .15;

TOPIC_MAX = 1000; //***NOT USED***
TOPIC_MIN_SIZE = 5;

CLUSTERING_ALG= betweenness;

Last edited May 2, 2011 at 6:12 PM by sayyadi, version 10

Comments

No comments yet.