@book{CTT100420620,
  author = {HAJIME KIMURA},
  title = {Reinforcement Learning for Continuous Action using Stochastic Gradient Ascent},
  publisher = {Intelligent Autonomous Systems},
  year = 1998,
}

@article{CTT100471594,
  author = {木村元},
  title = {強化学習による４足ロボットの歩行動作獲得},
  journal = {電気学会 電子情報システム部門誌},
  year = 2002,
}

@article{CTT100458005,
  author = {佐藤　誠 and 木村　元 and 小林　重信},
  title = {報酬の分散を推定するＴＤアルゴリズムとMean-Variance強化学習法の提案},
  journal = {人工知能学会論文誌},
  year = 2001,
}

@article{CTT100458014,
  author = {荒牧　岳志 and 木村　元 and 小俣　透 and 小林　重信},
  title = {強化学習による２アクチュエータ５リンク環状ロボットの移動動作獲得},
  journal = {第１９回日本ロボット学会学術講演会},
  year = 2001,
}

@article{CTT100458013,
  author = {後藤　正徳 and 木村　元 and 小林　重信},
  title = {トランザクション処理におけるタイムアウト間隔の学習},
  journal = {計測自動制御学会第２８回知能システムシンポジウム},
  year = 2001,
}

@article{CTT100458012,
  author = {青木　圭 and 木村　元 and 小林　重信},
  title = {強化学習による上下水道系の制御},
  journal = {計測自動制御学会第２８回知能システムシンポジウム},
  year = 2001,
}

@article{CTT100457988,
  author = {木村　元 and 小林　重信},
  title = {確率的２分木の行動選択を用いたActor-Criticアルゴリズム：多数の行動を扱う強化学習},
  journal = {計測自動制御学会論文集},
  year = 2001,
}

@article{CTT100457989,
  author = {Hajime Kimura and Toru Yamashita and Shigenobu Kobayashi},
  title = {Reinforcement learning of walking behavior for a four-legged robot},
  journal = {Proceedings of the 40th IEEE Conference on Decision and Control},
  year = 2001,
}

@article{CTT100457991,
  author = {木村　元 and 小林　重信},
  title = {２分木構造のActor-Criticによる２自由度ロボットの強化学習},
  journal = {計測自動制御学会第１３回自律分散システムシンポジウム},
  year = 2001,
}

@article{CTT100457992,
  author = {山下　透 and 木村　元 and 小林　重信},
  title = {強化学習による多足歩行ロボットの実現},
  journal = {計測自動制御学会第１３回自律分散システムシンポジウム},
  year = 2001,
}

@article{CTT100458004,
  author = {井口　圭一 and 木村　元 and 小林　重信},
  title = {ＧＡによる並列二重倒立振子の振り上げ安定化制御},
  journal = {計測自動制御学会第１３回自律分散システムシンポジウム},
  year = 2001,
}

@article{CTT100440443,
  author = {木村　元 and 小林　重信},
  title = {Actorに適正度の履歴を用いたActor-Criticアルゴリズム--不完全なValue-Functionのもとでの強化学習},
  journal = {人工知能学会誌},
  year = 2000,
}

@article{CTT100440444,
  author = {木村元},
  title = {確率的２分木の行動選択を用いた強化学習による多数の類似行動の扱いについて},
  journal = {計測自動制御学会 第27回知能システムシンポジウム},
  year = 2000,
}

@article{CTT100420617,
  author = {HAJIME KIMURA},
  title = {Reinforcement Learning for Crawling Robot Motion Using Stochastic Gradient Ascent.},
  journal = {Journal of Japanese Society for Artificial Intelligerce},
  year = 1999,
}

@article{CTT100433463,
  author = {Hajime Kimura and Shigenobu Kobayashi},
  title = {Stochastic Real-Valued Reinforcement Learning to Solve a Non-Linear Control Problem},
  journal = {Proc. 1999 Int. Conf. on Systems, Man and Cybernetics},
  year = 1999,
}

@article{CTT100431822,
  author = {Hajime Kimura and Shigenobu Kobayashi},
  title = {Stochastic Real-Valued Reinforcement Learning to Solve a  Non-Linear Control Problem},
  journal = {1999 IEEE International Conference on Systems, Man, and  Cybernetics},
  year = 1999,
}

@article{CTT100433451,
  author = {木村　元 and 小林重信},
  title = {確率的傾斜法を用いた強化学習とロボットへの適用},
  journal = {電気学会電子・情報・システム部門誌},
  year = 1999,
}

@article{CTT100431821,
  author = {Hajime Kimura and Shigenobu Kobayashi},
  title = {Efficient Non-Linear Control by Combining Q-learning with Local Linear Controllers},
  journal = {Proceedings of the 16th International Conference on  Machine Learning, pp.210--219, Morgan Kaufmann Publishers (1999)},
  year = 1999,
}

@article{CTT100433450,
  author = {木村　元 and 宮崎和光 and 小林重信},
  title = {強化学習システムの設計指針},
  journal = {計測と制御},
  year = 1999,
}

@article{CTT100420618,
  author = {Hajime Kimura and Kazuteru Miyazaki and Shigenobu Kobayashi},
  title = {An Analysis of Actor/Critic Algorithms using Eligibility Traces : Reinforcement Learning with Imperfect Value Function},
  journal = {Proceedings of the 15th International Conference on Machine Learning},
  year = 1998,
}

@article{CTT100420623,
  author = {HAJIME KIMURA},
  title = {Reinforcement Learning for Partially Observable Markov Decision processes.},
  journal = {Journal of Japanese Society for Artificial Intelligence},
  year = 1997,
}

@article{CTT100420619,
  author = {HAJIME KIMURA},
  title = {Reinforcement Learning in POMDPs with Function Approximation},
  journal = {Proceedings of the 14th International Conference on Machine Learning},
  year = 1997,
}

@article{CTT100420616,
  author = {HAJIME KIMURA},
  title = {Reinforcement Learning in Partially observable Markov Decision Processes : A Stochastic Gradient Method},
  journal = {Journal of Japanese Society for Artificial Intelligence},
  year = 1996,
}

@article{CTT100420621,
  author = {HAJIME KIMURA},
  title = {Reinforcement Learning by Stochastic Hill Climbing on Discounted Reward},
  journal = {Proceedings of the 12th International Conference on Machine Learning},
  year = 1995,
}

@article{CTT100420622,
  author = {HAJIME KIMURA},
  title = {Reinforcement learning with delayed rewards on continuous state space},
  journal = {Proceedings of the 3rd International Conference on Fuzzy Logic, Neural Nets and Soft Computing},
  year = 1994,
}

@misc{CTT100594339,
  author = {木村元},
  title = {部分観測マルコフ決定過程下での強化学習：確率的傾斜法による接近},
  year = 1997,
}



@phdthesis{CTT100594339,
  author = {木村元},
  title = {部分観測マルコフ決定過程下での強化学習：確率的傾斜法による接近},
  school = {東京工業大学},
  year = 1997,
}