@article{schmitz2024123688, Author = {Simon Schmitz and Karoline Brucke and Pranay Kasturi and Esmail Ansari and Peter Klement}, Title = {Forecast-based and data-driven reinforcement learning for residential heat pump operation}, Journal = {Applied Energy}, Year = {2024}, Pages = {123688}, Month = {10}, Doi = {https://doi.org/10.1016/j.apenergy.2024.123688}, type = {article}, Abstract = {Electrified residential heating systems have great potential for flexibility provision to the electricity grid with advanced operation control mechanisms being able to harness that. In this work, we therefore apply a reinforcement learning (RL) approach for the operation of a residential heat pump in a simulation study and compare the results with a classical rule-based approach. Doing so, we consider an apartment complex with 100 living units and a central heat pump along with a central hot water tank serving as heat storage. Unlike other studies in the field, we focus on a data driven approach where no building model is required and living comfort of the residents is never compromised. Both factors maximize the applicability in real world buildings. Additionally, we examine the effects of uncertainty on the heat pump operation. This is carried out by testing four different observation spaces each with different data visibility and availability to the RL agent. With that we also simulate the heat pump operation under forecast conditions which has not been done before to the best of our knowledge. We find that the inertia of typical residential heat systems is high enough so that missing or uncertain information has only a minor effect on the operation. Compared to the rule-based approach all RL agents are able to exploit variable electricity prices and the flexibility of the heat storage in such a way, that electricity costs and energy consumption can be significantly reduced. Additionally, a large proportion of the nominal electrical power of the installed heat pump could be saved with the presented intelligent operation. The robustness of the approach is shown by running ten independent training and testing cycles for all setups with reproducible results.} } @COMMENT{Bibtex file generated on }