2021
Zhang, Jiajia; Feng, Tao; Timmermans, Harry; Lin, Zhengkui
Improved imputation of rule sets in class association rule modeling: application to transportation mode choice Journal Article
In: Transportation, vol. XX, no. X, 2021, ISSN: 0049-4488, (Funding Information: This work was supported by China Scholarship Council.).
Abstract | Links | BibTeX | Tags: Class association rules, FP-tree, Rule merging, Transportation mode choice
@article{c8074a50b1504dc9bbb68a3a8a592d14,
title = {Improved imputation of rule sets in class association rule modeling: application to transportation mode choice},
author = {Jiajia Zhang and Tao Feng and Harry Timmermans and Zhengkui Lin},
doi = {10.1007/s11116-021-10238-9},
issn = {0049-4488},
year = {2021},
date = {2021-11-26},
journal = {Transportation},
volume = {XX},
number = {X},
publisher = {Springer},
abstract = {Predicting transportation mode choice is a critical component of forecasting travel demand. Recently, machine learning methods have become increasingly more popular in predicting transportation mode choice. Class association rules (CARs) have been applied to transportation mode choice, but the application of the imputed rules for prediction remains a long-standing challenge. Based on CARs, this paper proposes a new rule merging approach, called CARM, to improve predictive accuracy. In the suggested approach, first, CARs are imputed from the frequent pattern tree (FP-tree) based on the frequent pattern growth (FP-growth) algorithm. Next, the rules are pruned based on the concept of pessimistic error rate. Finally, the rules are merged to form new rules without increasing predictive error. Using the 2015 Dutch National Travel Survey, the performance of suggested model is compared with the performance of CARIG that uses the information gain statistic to generate new rules, class-based association rules (CBA), decision trees (DT) and the multinomial logit (MNL) model. In addition, the proposed model is assessed using a ten-fold cross validation test. The results show that the accuracy of the proposed model is 91.1%, which outperforms CARIG, CBA, DT and the MNL model.},
note = {Funding Information: This work was supported by China Scholarship Council.},
keywords = {Class association rules, FP-tree, Rule merging, Transportation mode choice},
pubstate = {published},
tppubtype = {article}
}
Zhang, Jiajia; Feng, Tao; Lin, Zhengkui; Timmermans, Harry J P
Advancing Association Rule Base on Gini Impurity Statistic for Predicting Transportation Mode Choice Proceedings Article
In: 2021, (100th Transportation Research Board Annual Meeting ; Conference date: 21-01-2021 Through 29-01-2021).
Abstract | BibTeX | Tags: Class association rules, Gini impurity, Transportation mode choice, Weight of rules
@inproceedings{Zhang2021,
title = {Advancing Association Rule Base on Gini Impurity Statistic for Predicting Transportation Mode Choice},
author = {Jiajia Zhang and Tao Feng and Zhengkui Lin and Harry J P Timmermans},
year = {2021},
date = {2021-01-01},
abstract = {Recently, machine learning approaches have been applied to predict transportation mode choice as an alternative to the more commonly used discrete choice models. General class association rules (CARs) have been introduced as a promising machine learning method, but the interpretability of the prediction results in terms of the underlying behavioral decision-making process has remained a concern. In an attempt to improve CARs, this study proposes a more advanced association rule model (named CARGIGI) with stronger interpretability. Based on the original CARIG approach that uses information gain (IG) statistic for improving the predictive accuracy, in this model, the Gini impurity (GI) statistic is used to generate new rules for improving predictive accuracy and calculate the relative importance of the variables, that of the variable levels and the weight of rules in transportation mode decision process. The weight of rules is introduced as a new pruning indicator to improve the predictive accuracy, while the relative importance of the level of a variable is used to enhance the behavioral interpretability of the results. The suggested approach is applied to the 2015 Dutch National Travel Survey. Results indicate that travel distance, OV card usage frequency, travel time, and travel purpose are the most important variables, while travel party and gender are the least important variables for predicting transportation mode choice. In addition, a 10-fold cross validation test is conducted to validate the advanced model. The results show that the newly proposed model outperform both the selected machine learning algorithms and the MNL model.},
note = {100th Transportation Research Board Annual Meeting ; Conference date: 21-01-2021 Through 29-01-2021},
keywords = {Class association rules, Gini impurity, Transportation mode choice, Weight of rules},
pubstate = {published},
tppubtype = {inproceedings}
}