Kea 1.9.11
ha_service.cc
Go to the documentation of this file.
1// Copyright (C) 2018-2021 Internet Systems Consortium, Inc. ("ISC")
2//
3// This Source Code Form is subject to the terms of the Mozilla Public
4// License, v. 2.0. If a copy of the MPL was not distributed with this
5// file, You can obtain one at http://mozilla.org/MPL/2.0/.
6
7#include <config.h>
8
9#include <command_creator.h>
10#include <ha_log.h>
11#include <ha_service.h>
12#include <ha_service_states.h>
14#include <cc/data.h>
15#include <config/timeouts.h>
16#include <dhcp/iface_mgr.h>
17#include <dhcpsrv/cfgmgr.h>
18#include <dhcpsrv/lease_mgr.h>
20#include <http/date_time.h>
21#include <http/response_json.h>
24#include <util/stopwatch.h>
25#include <boost/pointer_cast.hpp>
26#include <boost/make_shared.hpp>
27#include <boost/weak_ptr.hpp>
28#include <functional>
29#include <sstream>
30
31using namespace isc::asiolink;
32using namespace isc::config;
33using namespace isc::data;
34using namespace isc::dhcp;
35using namespace isc::hooks;
36using namespace isc::http;
37using namespace isc::log;
38using namespace isc::util;
39namespace ph = std::placeholders;
40
41namespace isc {
42namespace ha {
43
52
53HAService::HAService(const IOServicePtr& io_service, const NetworkStatePtr& network_state,
54 const HAConfigPtr& config, const HAServerType& server_type)
55 : io_service_(io_service), network_state_(network_state), config_(config),
56 server_type_(server_type), client_(), listener_(), communication_state_(),
57 query_filter_(config), mutex_(), pending_requests_(),
58 lease_update_backlog_(config->getDelayedUpdatesLimit()) {
59
60 if (server_type == HAServerType::DHCPv4) {
62
63 } else {
65 }
66
67 network_state_->reset(NetworkState::Origin::HA_COMMAND);
68
70
71 // Create the client and(or) listener as appropriate.
72 if (!config_->getEnableMultiThreading()) {
73 // Not configured for multi-threading, start a client in ST mode.
74 client_.reset(new HttpClient(*io_service_, 0));
75 } else {
76 // Create an MT-mode client.
78 config_->getHttpClientThreads(), true));
79
80 // If we're configured to use our own listener create and start it.
81 if (config_->getHttpDedicatedListener()) {
82 // Get the server address and port from this server's URL.
83 auto my_url = config_->getThisServerConfig()->getUrl();
84 IOAddress server_address(IOAddress::IPV4_ZERO_ADDRESS());
85 try {
86 // Since we do not currently support hostname resolution,
87 // we need to make sure we have an IP address here.
88 server_address = IOAddress(my_url.getStrippedHostname());
89 } catch (const std::exception& ex) {
90 isc_throw(Unexpected, "server Url:" << my_url.getStrippedHostname()
91 << " is not a valid IP address");
92 }
93
94 // Fetch how many threads the listener will use.
95 uint32_t listener_threads = config_->getHttpListenerThreads();
96
97 // Instantiate the listener.
98 listener_.reset(new CmdHttpListener(server_address, my_url.getPort(),
99 listener_threads));
100 }
101 }
102
104 .arg(HAConfig::HAModeToString(config->getHAMode()))
105 .arg(HAConfig::PeerConfig::roleToString(config->getThisServerConfig()->getRole()));
106}
107
109 // Stop client and/or listener.
111
112 network_state_->reset(NetworkState::Origin::HA_COMMAND);
113}
114
115void
117 StateModel::defineEvents();
118
119 defineEvent(HA_HEARTBEAT_COMPLETE_EVT, "HA_HEARTBEAT_COMPLETE_EVT");
120 defineEvent(HA_LEASE_UPDATES_COMPLETE_EVT, "HA_LEASE_UPDATES_COMPLETE_EVT");
121 defineEvent(HA_SYNCING_FAILED_EVT, "HA_SYNCING_FAILED_EVT");
122 defineEvent(HA_SYNCING_SUCCEEDED_EVT, "HA_SYNCING_SUCCEEDED_EVT");
123 defineEvent(HA_MAINTENANCE_NOTIFY_EVT, "HA_MAINTENANCE_NOTIFY_EVT");
124 defineEvent(HA_MAINTENANCE_START_EVT, "HA_MAINTENANCE_START_EVT");
125 defineEvent(HA_MAINTENANCE_CANCEL_EVT, "HA_MAINTENANCE_CANCEL_EVT");
126}
127
128void
130 StateModel::verifyEvents();
131
139}
140
141void
143 StateModel::defineStates();
144
146 std::bind(&HAService::backupStateHandler, this),
147 config_->getStateMachineConfig()->getStateConfig(HA_BACKUP_ST)->getPausing());
148
151 config_->getStateMachineConfig()->getStateConfig(HA_COMMUNICATION_RECOVERY_ST)->getPausing());
152
154 std::bind(&HAService::normalStateHandler, this),
155 config_->getStateMachineConfig()->getStateConfig(HA_HOT_STANDBY_ST)->getPausing());
156
158 std::bind(&HAService::normalStateHandler, this),
159 config_->getStateMachineConfig()->getStateConfig(HA_LOAD_BALANCING_ST)->getPausing());
160
162 std::bind(&HAService::inMaintenanceStateHandler, this),
163 config_->getStateMachineConfig()->getStateConfig(HA_IN_MAINTENANCE_ST)->getPausing());
164
166 std::bind(&HAService::partnerDownStateHandler, this),
167 config_->getStateMachineConfig()->getStateConfig(HA_PARTNER_DOWN_ST)->getPausing());
168
171 config_->getStateMachineConfig()->getStateConfig(HA_PARTNER_IN_MAINTENANCE_ST)->getPausing());
172
174 std::bind(&HAService::passiveBackupStateHandler, this),
175 config_->getStateMachineConfig()->getStateConfig(HA_PASSIVE_BACKUP_ST)->getPausing());
176
178 std::bind(&HAService::readyStateHandler, this),
179 config_->getStateMachineConfig()->getStateConfig(HA_READY_ST)->getPausing());
180
182 std::bind(&HAService::syncingStateHandler, this),
183 config_->getStateMachineConfig()->getStateConfig(HA_SYNCING_ST)->getPausing());
184
186 std::bind(&HAService::terminatedStateHandler, this),
187 config_->getStateMachineConfig()->getStateConfig(HA_TERMINATED_ST)->getPausing());
188
190 std::bind(&HAService::waitingStateHandler, this),
191 config_->getStateMachineConfig()->getStateConfig(HA_WAITING_ST)->getPausing());
192}
193
194void
196 if (doOnEntry()) {
199
200 // Log if the state machine is paused.
202 }
203
204 // There is nothing to do in that state. This server simply receives
205 // lease updates from the partners.
207}
208
209void
211 if (doOnEntry()) {
214
215 // Log if the state machine is paused.
217 }
218
220
223
224 // Check if the clock skew is still acceptable. If not, transition to
225 // the terminated state.
226 } else if (shouldTerminate()) {
228
229 } else if (isPartnerStateInvalid()) {
231
232 } else {
233
234 // Transitions based on the partner's state.
235 switch (communication_state_->getPartnerState()) {
238 break;
239
242 break;
243
246 break;
247
248 case HA_TERMINATED_ST:
250 break;
251
253 if (shouldPartnerDown()) {
255
256 } else {
258 }
259 break;
260
261 case HA_WAITING_ST:
262 case HA_SYNCING_ST:
263 case HA_READY_ST:
264 // The partner seems to be waking up, perhaps after communication-recovery.
265 // If our backlog queue is overflown we need to synchronize our lease database.
266 // There is no need to send ha-reset to the partner because the partner is
267 // already synchronizing its lease database.
268 if (!communication_state_->isCommunicationInterrupted() &&
271 } else {
272 // Backlog was not overflown, so there is no need to synchronize our
273 // lease database. Let's wait until our partner completes synchronization
274 // and transitions to the load-balancing state.
276 }
277 break;
278
279 default:
280 // If the communication is still interrupted, let's continue sitting
281 // in this state until it is resumed or until the transition to the
282 // partner-down state, depending on what happens first.
283 if (communication_state_->isCommunicationInterrupted()) {
285 break;
286 }
287
288 // The communication has been resumed. The partner server must be in a state
289 // in which it can receive outstanding lease updates we collected. The number of
290 // outstanding lease updates must not exceed the configured limit. Finally, the
291 // lease updates must be successfully sent. If that all works, we will transition
292 // to the normal operation.
293 if ((communication_state_->getPartnerState() == getNormalState()) ||
294 (communication_state_->getPartnerState() == HA_COMMUNICATION_RECOVERY_ST)) {
296 // If our lease backlog was overflown or we were unable to send lease
297 // updates to the partner we should notify the partner that it should
298 // synchronize the lease database. We do it by sending ha-reset command.
299 if (sendHAReset()) {
301 }
302 break;
303 }
304 // The backlog was not overflown and we successfully sent our lease updates.
305 // We can now transition to the normal operation state. If the partner
306 // fails to send his outstanding lease updates to us it should send the
307 // ha-reset command to us.
309 break;
310 }
311
312 // The partner appears to be in unexpected state, we have exceeded the number
313 // of lease updates in a backlog or an attempt to send lease updates failed.
314 // In all these cases we follow plan B and transition to the waiting state.
315 // The server will then attempt to synchronize the entire lease database.
317 }
318 }
319
320 // When exiting this state we must ensure that lease updates backlog is cleared.
321 if (doOnExit()) {
323 }
324}
325
326void
328 // If we are transitioning from another state, we have to define new
329 // serving scopes appropriate for the new state. We don't do it if
330 // we remain in this state.
331 if (doOnEntry()) {
334
335 // Log if the state machine is paused.
337 }
338
340
343 return;
344 }
345
346 // Check if the clock skew is still acceptable. If not, transition to
347 // the terminated state.
348 if (shouldTerminate()) {
350 return;
351 }
352
353 // Check if the partner state is valid per current configuration. If it is
354 // in an invalid state let's transition to the waiting state and stay there
355 // until the configuration is corrected.
356 if (isPartnerStateInvalid()) {
358 return;
359 }
360
361 switch (communication_state_->getPartnerState()) {
364 break;
365
368 break;
369
372 break;
373
374 case HA_TERMINATED_ST:
376 break;
377
379 if (shouldPartnerDown()) {
381
382 } else if (config_->amAllowingCommRecovery()) {
384
385 } else {
387 }
388 break;
389
390 default:
392 }
393
394 if (doOnExit()) {
395 // Do nothing here but doOnExit() call clears the "on exit" flag
396 // when transitioning to the communication-recovery state. In that
397 // state we need this flag to be cleared.
398 }
399}
400
401void
403 // If we are transitioning from another state, we have to define new
404 // serving scopes appropriate for the new state. We don't do it if
405 // we remain in this state.
406 if (doOnEntry()) {
407 // In this state the server remains silent and waits for being
408 // shutdown.
411
412 // Log if the state machine is paused.
414
416 }
417
419
420 // We don't transition out of this state unless explicitly mandated
421 // by the administrator via a dedicated command which cancels
422 // the maintenance.
424}
425
426void
428 // If we are transitioning from another state, we have to define new
429 // serving scopes appropriate for the new state. We don't do it if
430 // we remain in this state.
431 if (doOnEntry()) {
432
433 bool maintenance = (getLastEvent() == HA_MAINTENANCE_START_EVT);
434
435 // It may be administratively disabled to handle partner's scope
436 // in case of failure. If this is the case we'll just handle our
437 // default scope (or no scope at all). The user will need to
438 // manually enable this server to handle partner's scope.
439 // If we're in the maintenance mode we serve all scopes because
440 // it is not a failover situation.
441 if (maintenance || config_->getThisServerConfig()->isAutoFailover()) {
443 } else {
445 }
447
448 // Log if the state machine is paused.
450
451 if (maintenance) {
452 // If we ended up in the partner-down state as a result of
453 // receiving the ha-maintenance-start command let's log it.
455 }
456 }
457
459
462 return;
463 }
464
465 // Check if the clock skew is still acceptable. If not, transition to
466 // the terminated state.
467 if (shouldTerminate()) {
469 return;
470 }
471
472 // Check if the partner state is valid per current configuration. If it is
473 // in an invalid state let's transition to the waiting state and stay there
474 // until the configuration is corrected.
475 if (isPartnerStateInvalid()) {
477 return;
478 }
479
480 switch (communication_state_->getPartnerState()) {
487 break;
488
489 case HA_READY_ST:
492 break;
493
494 case HA_TERMINATED_ST:
496 break;
497
498 default:
500 }
501}
502
503void
505 // If we are transitioning from another state, we have to define new
506 // serving scopes appropriate for the new state. We don't do it if
507 // we remain in this state.
508 if (doOnEntry()) {
510
512
513 // Log if the state machine is paused.
515
517 }
518
520
521 if (isModelPaused()) {
523 return;
524 }
525
526 // Check if the clock skew is still acceptable. If not, transition to
527 // the terminated state.
528 if (shouldTerminate()) {
530 return;
531 }
532
533 switch (communication_state_->getPartnerState()) {
536 break;
537 default:
539 }
540}
541
542void
544 // If we are transitioning from another state, we have to define new
545 // serving scopes appropriate for the new state. We don't do it if
546 // we remain in this state.
547 if (doOnEntry()) {
550
551 // In the passive-backup state we don't send heartbeat.
552 communication_state_->stopHeartbeat();
553
554 // Log if the state machine is paused.
556 }
558}
559
560void
562 // If we are transitioning from another state, we have to define new
563 // serving scopes appropriate for the new state. We don't do it if
564 // we remain in this state.
565 if (doOnEntry()) {
568
569 // Log if the state machine is paused.
571 }
572
574
577 return;
578 }
579
580 // Check if the clock skew is still acceptable. If not, transition to
581 // the terminated state.
582 if (shouldTerminate()) {
584 return;
585 }
586
587 // Check if the partner state is valid per current configuration. If it is
588 // in an invalid state let's transition to the waiting state and stay there
589 // until the configuration is corrected.
590 if (isPartnerStateInvalid()) {
592 return;
593 }
594
595 switch (communication_state_->getPartnerState()) {
600 break;
601
604 break;
605
608 break;
609
610 case HA_READY_ST:
611 // If both servers are ready, the primary server "wins" and is
612 // transitioned first.
613 if (config_->getThisServerConfig()->getRole() == HAConfig::PeerConfig::PRIMARY) {
616 } else {
618 }
619 break;
620
621 case HA_TERMINATED_ST:
623 break;
624
626 if (shouldPartnerDown()) {
628
629 } else {
631 }
632 break;
633
634 default:
636 }
637}
638
639void
641 // If we are transitioning from another state, we have to define new
642 // serving scopes appropriate for the new state. We don't do it if
643 // we remain in this state.
644 if (doOnEntry()) {
647
648 // Log if the state machine is paused.
650 }
651
654 return;
655 }
656
657 // Check if the clock skew is still acceptable. If not, transition to
658 // the terminated state.
659 if (shouldTerminate()) {
661 return;
662 }
663
664 // Check if the partner state is valid per current configuration. If it is
665 // in an invalid state let's transition to the waiting state and stay there
666 // until the configuration is corrected.
667 if (isPartnerStateInvalid()) {
669 return;
670 }
671
672 // We don't want to perform synchronous attempt to synchronize with
673 // a partner until we know that the partner is responding. Therefore,
674 // we wait for the heartbeat to complete successfully before we
675 // initiate the synchronization.
676 switch (communication_state_->getPartnerState()) {
677 case HA_TERMINATED_ST:
679 return;
680
682 // If the partner appears to be offline, let's transition to the partner
683 // down state. Otherwise, we'd be stuck trying to synchronize with a
684 // dead partner.
685 if (shouldPartnerDown()) {
687
688 } else {
690 }
691 break;
692
693 default:
694 // We don't want the heartbeat to interfere with the synchronization,
695 // so let's temporarily stop it.
696 communication_state_->stopHeartbeat();
697
698 // Timeout is configured in milliseconds. Need to convert to seconds.
699 unsigned int dhcp_disable_timeout =
700 static_cast<unsigned int>(config_->getSyncTimeout() / 1000);
701 if (dhcp_disable_timeout == 0) {
702 ++dhcp_disable_timeout;
703 }
704
705 // Perform synchronous leases update.
706 std::string status_message;
707 int sync_status = synchronize(status_message,
708 config_->getFailoverPeerConfig()->getName(),
709 dhcp_disable_timeout);
710
711 // If the leases synchronization was successful, let's transition
712 // to the ready state.
713 if (sync_status == CONTROL_RESULT_SUCCESS) {
715
716 } else {
717 // If the synchronization was unsuccessful we're back to the
718 // situation that the partner is unavailable and therefore
719 // we stay in the syncing state.
721 }
722 }
723
724 // Make sure that the heartbeat is re-enabled.
726}
727
728void
730 // If we are transitioning from another state, we have to define new
731 // serving scopes appropriate for the new state. We don't do it if
732 // we remain in this state.
733 if (doOnEntry()) {
736
737 // In the terminated state we don't send heartbeat.
738 communication_state_->stopHeartbeat();
739
740 // Log if the state machine is paused.
742
744 }
745
747}
748
749void
751 // If we are transitioning from another state, we have to define new
752 // serving scopes appropriate for the new state. We don't do it if
753 // we remain in this state.
754 if (doOnEntry()) {
757
758 // Log if the state machine is paused.
760 }
761
762 // Only schedule the heartbeat for non-backup servers.
763 if ((config_->getHAMode() != HAConfig::PASSIVE_BACKUP) &&
764 (config_->getThisServerConfig()->getRole() != HAConfig::PeerConfig::BACKUP)) {
766 }
767
770 return;
771 }
772
773 // Backup server must remain in its own state.
774 if (config_->getThisServerConfig()->getRole() == HAConfig::PeerConfig::BACKUP) {
776 return;
777 }
778
779 // We're not a backup server, so we're either primary or secondary. If this is
780 // a passive-backup mode of operation, we're primary and we should transition
781 // to the passive-backup state.
782 if (config_->getHAMode() == HAConfig::PASSIVE_BACKUP) {
784 return;
785 }
786
787 // Check if the clock skew is still acceptable. If not, transition to
788 // the terminated state.
789 if (shouldTerminate()) {
791 return;
792 }
793
794 // Check if the partner state is valid per current configuration. If it is
795 // in an invalid state let's sit in the waiting state until the configuration
796 // is corrected.
797 if (isPartnerStateInvalid()) {
799 return;
800 }
801
802 switch (communication_state_->getPartnerState()) {
809 case HA_READY_ST:
810 // If we're configured to not synchronize lease database, proceed directly
811 // to the "ready" state.
812 verboseTransition(config_->amSyncingLeases() ? HA_SYNCING_ST : HA_READY_ST);
813 break;
814
815 case HA_SYNCING_ST:
817 break;
818
819 case HA_TERMINATED_ST:
820 // We have checked above whether the clock skew is exceeding the threshold
821 // and we should terminate. If we're here, it means that the clock skew
822 // is acceptable. The partner may be still in the terminated state because
823 // it hasn't been restarted yet. Probably, this server is the first one
824 // being restarted after syncing the clocks. Let's just sit in the waiting
825 // state until the partner gets restarted.
828 break;
829
830 case HA_WAITING_ST:
831 // If both servers are waiting, the primary server 'wins' and is
832 // transitioned to the next state first.
833 if (config_->getThisServerConfig()->getRole() == HAConfig::PeerConfig::PRIMARY) {
834 // If we're configured to not synchronize lease database, proceed directly
835 // to the "ready" state.
836 verboseTransition(config_->amSyncingLeases() ? HA_SYNCING_ST : HA_READY_ST);
837
838 } else {
840 }
841 break;
842
844 if (shouldPartnerDown()) {
846
847 } else {
849 }
850 break;
851
852 default:
854 }
855}
856
857void
858HAService::verboseTransition(const unsigned state) {
859 // Get current and new state name.
860 std::string current_state_name = getStateLabel(getCurrState());
861 std::string new_state_name = getStateLabel(state);
862
863 // Turn them to upper case so as they are better visible in the logs.
864 boost::to_upper(current_state_name);
865 boost::to_upper(new_state_name);
866
867 if (config_->getHAMode() != HAConfig::PASSIVE_BACKUP) {
868 // If this is load-balancing or hot-standby mode we also want to log
869 // partner's state.
870 auto partner_state = communication_state_->getPartnerState();
871 std::string partner_state_name = getStateLabel(partner_state);
872 boost::to_upper(partner_state_name);
873
874 // Log the transition.
876 .arg(current_state_name)
877 .arg(new_state_name)
878 .arg(partner_state_name);
879
880 } else {
881 // In the passive-backup mode we don't know the partner's state.
883 .arg(current_state_name)
884 .arg(new_state_name);
885 }
886
887 // If we're transitioning directly from the "waiting" to "ready"
888 // state it indicates that the database synchronization is
889 // administratively disabled. Let's remind the user about this
890 // configuration setting.
891 if ((state == HA_READY_ST) && (getCurrState() == HA_WAITING_ST)) {
893 }
894
895 // Do the actual transition.
896 transition(state, getNextEvent());
897
898 // Inform the administrator whether or not lease updates are generated.
899 // Updates are never generated by a backup server so it doesn't make
900 // sense to log anything for the backup server.
901 if ((config_->getHAMode() != HAConfig::PASSIVE_BACKUP) &&
902 (config_->getThisServerConfig()->getRole() != HAConfig::PeerConfig::BACKUP)) {
903 if (shouldSendLeaseUpdates(config_->getFailoverPeerConfig())) {
905 .arg(new_state_name);
906
907 } else if (!config_->amSendingLeaseUpdates()) {
908 // Lease updates are administratively disabled.
910 .arg(new_state_name);
911
912 } else {
913 // Lease updates are not administratively disabled, but they
914 // are not issued because this is the backup server or because
915 // in this state the server should not generate lease updates.
917 .arg(new_state_name);
918 }
919 }
920}
921
922int
924 if (config_->getThisServerConfig()->getRole() == HAConfig::PeerConfig::BACKUP) {
925 return (HA_BACKUP_ST);
926 }
927
928 switch (config_->getHAMode()) {
930 return (HA_LOAD_BALANCING_ST);
932 return (HA_HOT_STANDBY_ST);
933 default:
934 return (HA_PASSIVE_BACKUP_ST);
935 }
936}
937
938bool
940 if (isModelPaused()) {
942 unpauseModel();
943 return (true);
944 }
945 return (false);
946}
947
948void
950 // Inform the administrator if the state machine is paused.
951 if (isModelPaused()) {
952 std::string state_name = stateToString(getCurrState());
953 boost::to_upper(state_name);
955 .arg(state_name);
956 }
957}
958
959void
962}
963
964bool
966 return (inScopeInternal(query4));
967}
968
969bool
971 return (inScopeInternal(query6));
972}
973
974template<typename QueryPtrType>
975bool
976HAService::inScopeInternal(QueryPtrType& query) {
977 // Check if the query is in scope (should be processed by this server).
978 std::string scope_class;
979 const bool in_scope = query_filter_.inScope(query, scope_class);
980 // Whether or not the query is going to be processed by this server,
981 // we associate the query with the appropriate class.
982 query->addClass(dhcp::ClientClass(scope_class));
983 // The following is the part of the server failure detection algorithm.
984 // If the query should be processed by the partner we need to check if
985 // the partner responds. If the number of unanswered queries exceeds a
986 // configured threshold, we will consider the partner to be offline.
987 if (!in_scope && communication_state_->isCommunicationInterrupted()) {
988 communication_state_->analyzeMessage(query);
989 }
990 // Indicate if the query is in scope.
991 return (in_scope);
992}
993
994void
996 std::string current_state_name = getStateLabel(getCurrState());
997 boost::to_upper(current_state_name);
998
999 // DHCP service should be enabled in the following states.
1000 const bool should_enable = ((getCurrState() == HA_COMMUNICATION_RECOVERY_ST) ||
1007
1008 if (!should_enable && network_state_->isServiceEnabled()) {
1009 std::string current_state_name = getStateLabel(getCurrState());
1010 boost::to_upper(current_state_name);
1012 .arg(config_->getThisServerName())
1013 .arg(current_state_name);
1014 network_state_->disableService(NetworkState::Origin::HA_COMMAND);
1015
1016 } else if (should_enable && !network_state_->isServiceEnabled()) {
1017 std::string current_state_name = getStateLabel(getCurrState());
1018 boost::to_upper(current_state_name);
1020 .arg(config_->getThisServerName())
1021 .arg(current_state_name);
1022 network_state_->enableService(NetworkState::Origin::HA_COMMAND);
1023 }
1024}
1025
1026bool
1028 // Checking whether the communication with the partner is OK is the
1029 // first step towards verifying if the server is up.
1030 if (communication_state_->isCommunicationInterrupted()) {
1031 // If the communication is interrupted, we also have to check
1032 // whether the partner answers DHCP requests. The only cases
1033 // when we don't (can't) do it are: the hot standby configuration
1034 // in which this server is a primary and when the DHCP service is
1035 // disabled so we can't analyze incoming traffic. Note that the
1036 // primary server can't check delayed responses to the partner
1037 // because the partner doesn't respond to any queries in this
1038 // configuration.
1039 if (network_state_->isServiceEnabled() &&
1040 ((config_->getHAMode() == HAConfig::LOAD_BALANCING) ||
1041 (config_->getThisServerConfig()->getRole() == HAConfig::PeerConfig::STANDBY))) {
1042 return (communication_state_->failureDetected());
1043 }
1044
1045 // Hot standby / primary case.
1046 return (true);
1047 }
1048
1049 // Shouldn't transition to the partner down state.
1050 return (false);
1051}
1052
1053bool
1055 // Check if skew is fatally large.
1056 bool should_terminate = communication_state_->clockSkewShouldTerminate();
1057
1058 // If not issue a warning if it's getting large.
1059 if (!should_terminate) {
1060 communication_state_->clockSkewShouldWarn();
1061 }
1062
1063 return (should_terminate);
1064}
1065
1066bool
1069}
1070
1071bool
1073 switch (communication_state_->getPartnerState()) {
1075 if (config_->getHAMode() != HAConfig::LOAD_BALANCING) {
1077 return (true);
1078 }
1079 break;
1080
1081 case HA_HOT_STANDBY_ST:
1082 if (config_->getHAMode() != HAConfig::HOT_STANDBY) {
1084 return (true);
1085 }
1086 break;
1087
1089 if (config_->getHAMode() != HAConfig::LOAD_BALANCING) {
1091 return (true);
1092 }
1093 break;
1094
1095 default:
1096 ;
1097 }
1098 return (false);
1099}
1100
1101size_t
1103 const dhcp::Lease4CollectionPtr& leases,
1104 const dhcp::Lease4CollectionPtr& deleted_leases,
1105 const hooks::ParkingLotHandlePtr& parking_lot) {
1106
1107 // Get configurations of the peers. Exclude this instance.
1108 HAConfig::PeerConfigMap peers_configs = config_->getOtherServersConfig();
1109
1110 size_t sent_num = 0;
1111
1112 // Schedule sending lease updates to each peer.
1113 for (auto p = peers_configs.begin(); p != peers_configs.end(); ++p) {
1114 HAConfig::PeerConfigPtr conf = p->second;
1115
1116 // Check if the lease updates should be queued. This is the case when the
1117 // server is in the communication-recovery state. Queued lease updates may
1118 // be sent when the communication is re-established.
1119 if (shouldQueueLeaseUpdates(conf)) {
1120 // Lease updates for deleted leases.
1121 for (auto l = deleted_leases->begin(); l != deleted_leases->end(); ++l) {
1123 }
1124
1125 // Lease updates for new allocations and updated leases.
1126 for (auto l = leases->begin(); l != leases->end(); ++l) {
1128 }
1129
1130 continue;
1131 }
1132
1133 // Check if the lease update should be sent to the server. If we're in
1134 // the partner-down state we don't send lease updates to the partner.
1135 if (!shouldSendLeaseUpdates(conf)) {
1136 continue;
1137 }
1138
1139 // Lease updates for deleted leases.
1140 for (auto l = deleted_leases->begin(); l != deleted_leases->end(); ++l) {
1142 parking_lot);
1143 }
1144
1145 // Lease updates for new allocations and updated leases.
1146 for (auto l = leases->begin(); l != leases->end(); ++l) {
1148 parking_lot);
1149 }
1150
1151 // If we're contacting a backup server from which we don't expect a
1152 // response prior to responding to the DHCP client we don't count
1153 // it.
1154 if ((config_->amWaitingBackupAck() || (conf->getRole() != HAConfig::PeerConfig::BACKUP))) {
1155 ++sent_num;
1156 }
1157 }
1158
1159 return (sent_num);
1160}
1161
1162size_t
1164 const dhcp::Lease6CollectionPtr& leases,
1165 const dhcp::Lease6CollectionPtr& deleted_leases,
1166 const hooks::ParkingLotHandlePtr& parking_lot) {
1167
1168 // Get configurations of the peers. Exclude this instance.
1169 HAConfig::PeerConfigMap peers_configs = config_->getOtherServersConfig();
1170
1171 size_t sent_num = 0;
1172
1173 // Schedule sending lease updates to each peer.
1174 for (auto p = peers_configs.begin(); p != peers_configs.end(); ++p) {
1175 HAConfig::PeerConfigPtr conf = p->second;
1176
1177 // Check if the lease updates should be queued. This is the case when the
1178 // server is in the communication-recovery state. Queued lease updates may
1179 // be sent when the communication is re-established.
1180 if (shouldQueueLeaseUpdates(conf)) {
1181 for (auto l = deleted_leases->begin(); l != deleted_leases->end(); ++l) {
1183 }
1184
1185 // Lease updates for new allocations and updated leases.
1186 for (auto l = leases->begin(); l != leases->end(); ++l) {
1188 }
1189
1190 continue;
1191 }
1192
1193 // Check if the lease update should be sent to the server. If we're in
1194 // the partner-down state we don't send lease updates to the partner.
1195 if (!shouldSendLeaseUpdates(conf)) {
1196 continue;
1197 }
1198
1199 // If we're contacting a backup server from which we don't expect a
1200 // response prior to responding to the DHCP client we don't count
1201 // it.
1202 if (config_->amWaitingBackupAck() || (conf->getRole() != HAConfig::PeerConfig::BACKUP)) {
1203 ++sent_num;
1204 }
1205
1206 // Send new/updated leases and deleted leases in one command.
1207 asyncSendLeaseUpdate(query, conf, CommandCreator::createLease6BulkApply(leases, deleted_leases),
1208 parking_lot);
1209 }
1210
1211 return (sent_num);
1212}
1213
1214template<typename QueryPtrType>
1215bool
1217 const ParkingLotHandlePtr& parking_lot) {
1218 if (MultiThreadingMgr::instance().getMode()) {
1219 std::lock_guard<std::mutex> lock(mutex_);
1220 return (leaseUpdateCompleteInternal(query, parking_lot));
1221 } else {
1222 return (leaseUpdateCompleteInternal(query, parking_lot));
1223 }
1224}
1225
1226template<typename QueryPtrType>
1227bool
1228HAService::leaseUpdateCompleteInternal(QueryPtrType& query,
1229 const ParkingLotHandlePtr& parking_lot) {
1230 auto it = pending_requests_.find(query);
1231
1232 // If there are no more pending requests for this query, let's unpark
1233 // the DHCP packet.
1234 if (it == pending_requests_.end() || (--pending_requests_[query] <= 0)) {
1235 parking_lot->unpark(query);
1236
1237 // If we have unparked the packet we can clear pending requests for
1238 // this query.
1239 if (it != pending_requests_.end()) {
1240 pending_requests_.erase(it);
1241 }
1242 return (true);
1243 }
1244 return (false);
1245}
1246
1247template<typename QueryPtrType>
1248void
1250 if (MultiThreadingMgr::instance().getMode()) {
1251 std::lock_guard<std::mutex> lock(mutex_);
1252 updatePendingRequestInternal(query);
1253 } else {
1254 updatePendingRequestInternal(query);
1255 }
1256}
1257
1258template<typename QueryPtrType>
1259void
1260HAService::updatePendingRequestInternal(QueryPtrType& query) {
1261 if (pending_requests_.count(query) == 0) {
1262 pending_requests_[query] = 1;
1263 } else {
1264 ++pending_requests_[query];
1265 }
1266}
1267
1268template<typename QueryPtrType>
1269void
1270HAService::asyncSendLeaseUpdate(const QueryPtrType& query,
1271 const HAConfig::PeerConfigPtr& config,
1272 const ConstElementPtr& command,
1273 const ParkingLotHandlePtr& parking_lot) {
1274 // Create HTTP/1.1 request including our command.
1275 PostHttpRequestJsonPtr request = boost::make_shared<PostHttpRequestJson>
1276 (HttpRequest::Method::HTTP_POST, "/", HttpVersion::HTTP_11(),
1277 HostHttpHeader(config->getUrl().getHostname()));
1278 config->addBasicAuthHttpHeader(request);
1279 request->setBodyAsJson(command);
1280 request->finalize();
1281
1282 // Response object should also be created because the HTTP client needs
1283 // to know the type of the expected response.
1284 HttpResponseJsonPtr response = boost::make_shared<HttpResponseJson>();
1285
1286 // When possible we prefer to pass weak pointers to the queries, rather
1287 // than shared pointers, to avoid memory leaks in case cross reference
1288 // between the pointers.
1289 boost::weak_ptr<typename QueryPtrType::element_type> weak_query(query);
1290
1291 // Schedule asynchronous HTTP request.
1292 client_->asyncSendRequest(config->getUrl(), config->getTlsContext(),
1293 request, response,
1294 [this, weak_query, parking_lot, config]
1295 (const boost::system::error_code& ec,
1296 const HttpResponsePtr& response,
1297 const std::string& error_str) {
1298 // Get the shared pointer of the query. The server should keep the
1299 // pointer to the query and then park it. Therefore, we don't really
1300 // expect it to be null. If it is null, something is really wrong.
1301 QueryPtrType query = weak_query.lock();
1302 if (!query) {
1303 isc_throw(Unexpected, "query is null while receiving response from"
1304 " HA peer. This is programmatic error");
1305 }
1306
1307 // There are three possible groups of errors during the lease update.
1308 // One is the IO error causing issues in communication with the peer.
1309 // Another one is an HTTP parsing error. The last type of error is
1310 // when non-success error code is returned in the response carried
1311 // in the HTTP message or if the JSON response is otherwise broken.
1312
1313 bool lease_update_success = true;
1314
1315 // Handle first two groups of errors.
1316 if (ec || !error_str.empty()) {
1317 LOG_WARN(ha_logger, HA_LEASE_UPDATE_COMMUNICATIONS_FAILED)
1318 .arg(query->getLabel())
1319 .arg(config->getLogLabel())
1320 .arg(ec ? ec.message() : error_str);
1321
1322 // Communication error, so let's drop parked packet. The DHCP
1323 // response will not be sent.
1324 lease_update_success = false;
1325
1326 } else {
1327
1328 // Handle third group of errors.
1329 try {
1330 int rcode = 0;
1331 auto args = verifyAsyncResponse(response, rcode);
1332 // In the v6 case the server may return a list of failed lease
1333 // updates and we should log them.
1334 logFailedLeaseUpdates(query, args);
1335
1336 } catch (const std::exception& ex) {
1338 .arg(query->getLabel())
1339 .arg(config->getLogLabel())
1340 .arg(ex.what());
1341
1342 // Error while doing an update. The DHCP response will not be sent.
1343 lease_update_success = false;
1344 }
1345 }
1346
1347 // We don't care about the result of the lease update to the backup server.
1348 // It is a best effort update.
1349 if (config->getRole() != HAConfig::PeerConfig::BACKUP) {
1350 if (lease_update_success) {
1351 // If the lease update was successful and we have sent it to the server
1352 // to which we also send heartbeats (primary, secondary or standby) we
1353 // can assume that the server is online and we can defer next heartbeat.
1354 communication_state_->poke();
1355
1356 } else {
1357 communication_state_->setPartnerState("unavailable");
1358 }
1359 }
1360
1361 // It is possible to configure the server to not wait for a response from
1362 // the backup server before we unpark the packet and respond to the client.
1363 // Here we check if we're dealing with such situation.
1364 if (config_->amWaitingBackupAck() || (config->getRole() != HAConfig::PeerConfig::BACKUP)) {
1365 // We're expecting a response from the backup server or it is not
1366 // a backup server and the lease update was unsuccessful. In such
1367 // case the DHCP exchange fails.
1368 if (!lease_update_success) {
1369 parking_lot->drop(query);
1370 }
1371 } else {
1372 // This was a response from the backup server and we're configured to
1373 // not wait for their acknowledgments, so there is nothing more to do.
1374 return;
1375 }
1376
1377 if (leaseUpdateComplete(query, parking_lot)) {
1378 // If we have finished sending the lease updates we need to run the
1379 // state machine until the state machine finds that additional events
1380 // are required, such as next heartbeat or a lease update. The runModel()
1381 // may transition to another state, schedule asynchronous tasks etc.
1382 // Then it returns control to the DHCP server.
1383 runModel(HA_LEASE_UPDATES_COMPLETE_EVT);
1384 }
1385 },
1387 std::bind(&HAService::clientConnectHandler, this, ph::_1, ph::_2),
1388 std::bind(&HAService::clientHandshakeHandler, this, ph::_1),
1389 std::bind(&HAService::clientCloseHandler, this, ph::_1)
1390 );
1391
1392 // The number of pending requests is the number of requests for which we
1393 // expect an acknowledgment prior to responding to the DHCP clients. If
1394 // we're configured to wait for the acks from the backups or it is not
1395 // a backup increase the number of pending requests.
1396 if (config_->amWaitingBackupAck() || (config->getRole() != HAConfig::PeerConfig::BACKUP)) {
1397 // Request scheduled, so update the request counters for the query.
1398 updatePendingRequest(query);
1399 }
1400}
1401
1402bool
1403HAService::shouldSendLeaseUpdates(const HAConfig::PeerConfigPtr& peer_config) const {
1404 // Never send lease updates if they are administratively disabled.
1405 if (!config_->amSendingLeaseUpdates()) {
1406 return (false);
1407 }
1408
1409 // Always send updates to the backup server.
1410 if (peer_config->getRole() == HAConfig::PeerConfig::BACKUP) {
1411 return (true);
1412 }
1413
1414 // Never send updates if this is a backup server.
1415 if (config_->getThisServerConfig()->getRole() == HAConfig::PeerConfig::BACKUP) {
1416 return (false);
1417 }
1418
1419 // In other case, whether we send lease updates or not depends on our
1420 // state.
1421 switch (getCurrState()) {
1422 case HA_HOT_STANDBY_ST:
1425 return (true);
1426
1427 default:
1428 ;
1429 }
1430
1431 return (false);
1432}
1433
1434bool
1435HAService::shouldQueueLeaseUpdates(const HAConfig::PeerConfigPtr& peer_config) const {
1436 if (!config_->amSendingLeaseUpdates()) {
1437 return (false);
1438 }
1439
1440 if (peer_config->getRole() == HAConfig::PeerConfig::BACKUP) {
1441 return (false);
1442 }
1443
1444 return (getCurrState() == HA_COMMUNICATION_RECOVERY_ST);
1445}
1446
1447void
1448HAService::logFailedLeaseUpdates(const PktPtr& query,
1449 const ConstElementPtr& args) const {
1450 // If there are no arguments, it means that the update was successful.
1451 if (!args || (args->getType() != Element::map)) {
1452 return;
1453 }
1454
1455 // Instead of duplicating the code between the failed-deleted-leases and
1456 // failed-leases, let's just have one function that does it for both.
1457 auto log_proc = [](const PktPtr query, const ConstElementPtr& args,
1458 const std::string& param_name, const log::MessageID& mesid) {
1459
1460 // Check if there are any failed leases.
1461 auto failed_leases = args->get(param_name);
1462
1463 // The failed leases must be a list.
1464 if (failed_leases && (failed_leases->getType() == Element::list)) {
1465 // Go over the failed leases and log each of them.
1466 for (int i = 0; i < failed_leases->size(); ++i) {
1467 auto lease = failed_leases->get(i);
1468 if (lease->getType() == Element::map) {
1469
1470 // ip-address
1471 auto ip_address = lease->get("ip-address");
1472
1473 // lease type
1474 auto lease_type = lease->get("type");
1475
1476 // error-message
1477 auto error_message = lease->get("error-message");
1478
1479 LOG_INFO(ha_logger, mesid)
1480 .arg(query->getLabel())
1481 .arg(lease_type && (lease_type->getType() == Element::string) ?
1482 lease_type->stringValue() : "(unknown)")
1483 .arg(ip_address && (ip_address->getType() == Element::string) ?
1484 ip_address->stringValue() : "(unknown)")
1485 .arg(error_message && (error_message->getType() == Element::string) ?
1486 error_message->stringValue() : "(unknown)");
1487 }
1488 }
1489 }
1490 };
1491
1492 // Process "failed-deleted-leases"
1493 log_proc(query, args, "failed-deleted-leases", HA_LEASE_UPDATE_DELETE_FAILED_ON_PEER);
1494
1495 // Process "failed-leases".
1496 log_proc(query, args, "failed-leases", HA_LEASE_UPDATE_CREATE_UPDATE_FAILED_ON_PEER);
1497}
1498
1500HAService::processStatusGet() const {
1501 ElementPtr ha_servers = Element::createMap();
1502
1503 // Local part
1504 ElementPtr local = Element::createMap();
1506 role = config_->getThisServerConfig()->getRole();
1507 std::string role_txt = HAConfig::PeerConfig::roleToString(role);
1508 local->set("role", Element::create(role_txt));
1509 int state = getCurrState();
1510 try {
1511 local->set("state", Element::create(stateToString(state)));
1512
1513 } catch (...) {
1514 // Empty string on error.
1515 local->set("state", Element::create(std::string()));
1516 }
1517 std::set<std::string> scopes = query_filter_.getServedScopes();
1518 ElementPtr list = Element::createList();
1519 for (std::string scope : scopes) {
1520 list->add(Element::create(scope));
1521 }
1522 local->set("scopes", list);
1523 ha_servers->set("local", local);
1524
1525 // Do not include remote server information if this is a backup server or
1526 // we're in the passive-backup mode.
1527 if ((config_->getHAMode() == HAConfig::PASSIVE_BACKUP) ||
1528 (config_->getThisServerConfig()->getRole() == HAConfig::PeerConfig::BACKUP)) {
1529 return (ha_servers);
1530 }
1531
1532 // Remote part
1533 ElementPtr remote = communication_state_->getReport();
1534
1535 try {
1536 role = config_->getFailoverPeerConfig()->getRole();
1537 std::string role_txt = HAConfig::PeerConfig::roleToString(role);
1538 remote->set("role", Element::create(role_txt));
1539
1540 } catch (...) {
1541 remote->set("role", Element::create(std::string()));
1542 }
1543 ha_servers->set("remote", remote);
1544
1545 return (ha_servers);
1546}
1547
1549HAService::processHeartbeat() {
1550 ElementPtr arguments = Element::createMap();
1551 std::string state_label = getState(getCurrState())->getLabel();
1552 arguments->set("state", Element::create(state_label));
1553
1554 std::string date_time = HttpDateTime().rfc1123Format();
1555 arguments->set("date-time", Element::create(date_time));
1556
1557 auto scopes = query_filter_.getServedScopes();
1558 ElementPtr scopes_list = Element::createList();
1559 for (auto scope : scopes) {
1560 scopes_list->add(Element::create(scope));
1561 }
1562 arguments->set("scopes", scopes_list);
1563
1564 return (createAnswer(CONTROL_RESULT_SUCCESS, "HA peer status returned.",
1565 arguments));
1566}
1567
1569HAService::processHAReset() {
1570 if (getCurrState() == HA_WAITING_ST) {
1571 return (createAnswer(CONTROL_RESULT_SUCCESS, "HA state machine already in WAITING state."));
1572 }
1573 verboseTransition(HA_WAITING_ST);
1574 runModel(NOP_EVT);
1575 return (createAnswer(CONTROL_RESULT_SUCCESS, "HA state machine reset."));
1576}
1577
1578void
1579HAService::asyncSendHeartbeat() {
1580 HAConfig::PeerConfigPtr partner_config = config_->getFailoverPeerConfig();
1581
1582 // Create HTTP/1.1 request including our command.
1583 PostHttpRequestJsonPtr request = boost::make_shared<PostHttpRequestJson>
1584 (HttpRequest::Method::HTTP_POST, "/", HttpVersion::HTTP_11(),
1585 HostHttpHeader(partner_config->getUrl().getHostname()));
1586 partner_config->addBasicAuthHttpHeader(request);
1587 request->setBodyAsJson(CommandCreator::createHeartbeat(server_type_));
1588 request->finalize();
1589
1590 // Response object should also be created because the HTTP client needs
1591 // to know the type of the expected response.
1592 HttpResponseJsonPtr response = boost::make_shared<HttpResponseJson>();
1593
1594 // Schedule asynchronous HTTP request.
1595 client_->asyncSendRequest(partner_config->getUrl(),
1596 partner_config->getTlsContext(),
1597 request, response,
1598 [this, partner_config]
1599 (const boost::system::error_code& ec,
1600 const HttpResponsePtr& response,
1601 const std::string& error_str) {
1602
1603 // There are three possible groups of errors during the heartbeat.
1604 // One is the IO error causing issues in communication with the peer.
1605 // Another one is an HTTP parsing error. The last type of error is
1606 // when non-success error code is returned in the response carried
1607 // in the HTTP message or if the JSON response is otherwise broken.
1608
1609 bool heartbeat_success = true;
1610
1611 // Handle first two groups of errors.
1612 if (ec || !error_str.empty()) {
1613 LOG_WARN(ha_logger, HA_HEARTBEAT_COMMUNICATIONS_FAILED)
1614 .arg(partner_config->getLogLabel())
1615 .arg(ec ? ec.message() : error_str);
1616 heartbeat_success = false;
1617
1618 } else {
1619
1620 // Handle third group of errors.
1621 try {
1622 // Response must contain arguments and the arguments must
1623 // be a map.
1624 int rcode = 0;
1625 ConstElementPtr args = verifyAsyncResponse(response, rcode);
1626 if (!args || args->getType() != Element::map) {
1627 isc_throw(CtrlChannelError, "returned arguments in the response"
1628 " must be a map");
1629 }
1630 // Response must include partner's state.
1631 ConstElementPtr state = args->get("state");
1632 if (!state || state->getType() != Element::string) {
1633 isc_throw(CtrlChannelError, "server state not returned in response"
1634 " to a ha-heartbeat command or it is not a string");
1635 }
1636 // Remember the partner's state. This may throw if the returned
1637 // state is invalid.
1638 communication_state_->setPartnerState(state->stringValue());
1639
1640 ConstElementPtr date_time = args->get("date-time");
1641 if (!date_time || date_time->getType() != Element::string) {
1642 isc_throw(CtrlChannelError, "date-time not returned in response"
1643 " to a ha-heartbeat command or it is not a string");
1644 }
1645 // Note the time returned by the partner to calculate the clock skew.
1646 communication_state_->setPartnerTime(date_time->stringValue());
1647
1648 // Remember the scopes served by the partner.
1649 try {
1650 auto scopes = args->get("scopes");
1651 communication_state_->setPartnerScopes(scopes);
1652
1653 } catch (...) {
1654 // We don't want to fail if the scopes are missing because
1655 // this would be incompatible with old HA hook library
1656 // versions. We may make it mandatory one day, but during
1657 // upgrades of existing HA setup it would be a real issue
1658 // if we failed here.
1659 }
1660
1661 } catch (const std::exception& ex) {
1663 .arg(partner_config->getLogLabel())
1664 .arg(ex.what());
1665 heartbeat_success = false;
1666 }
1667 }
1668
1669 // If heartbeat was successful, let's mark the connection with the
1670 // peer as healthy.
1671 if (heartbeat_success) {
1672 communication_state_->poke();
1673
1674 } else {
1675 // We were unable to retrieve partner's state, so let's mark it
1676 // as unavailable.
1677 communication_state_->setPartnerState("unavailable");
1678 // Log if the communication is interrupted.
1679 if (communication_state_->isCommunicationInterrupted()) {
1681 .arg(partner_config->getName());
1682 }
1683 }
1684
1685 // Whatever the result of the heartbeat was, the state machine needs
1686 // to react to this. Let's run the state machine until the state machine
1687 // finds that some new events are required, i.e. next heartbeat or
1688 // lease update. The runModel() may transition to another state, schedule
1689 // asynchronous tasks etc. Then it returns control to the DHCP server.
1690 startHeartbeat();
1691 runModel(HA_HEARTBEAT_COMPLETE_EVT);
1692 },
1694 std::bind(&HAService::clientConnectHandler, this, ph::_1, ph::_2),
1695 std::bind(&HAService::clientHandshakeHandler, this, ph::_1),
1696 std::bind(&HAService::clientCloseHandler, this, ph::_1)
1697 );
1698}
1699
1700void
1701HAService::scheduleHeartbeat() {
1702 if (!communication_state_->isHeartbeatRunning()) {
1703 startHeartbeat();
1704 }
1705}
1706
1707void
1708HAService::startHeartbeat() {
1709 if (config_->getHeartbeatDelay() > 0) {
1710 communication_state_->startHeartbeat(config_->getHeartbeatDelay(),
1711 std::bind(&HAService::asyncSendHeartbeat,
1712 this));
1713 }
1714}
1715
1716void
1717HAService::asyncDisableDHCPService(HttpClient& http_client,
1718 const std::string& server_name,
1719 const unsigned int max_period,
1720 PostRequestCallback post_request_action) {
1721 HAConfig::PeerConfigPtr remote_config = config_->getPeerConfig(server_name);
1722
1723 // Create HTTP/1.1 request including our command.
1724 PostHttpRequestJsonPtr request = boost::make_shared<PostHttpRequestJson>
1725 (HttpRequest::Method::HTTP_POST, "/", HttpVersion::HTTP_11(),
1726 HostHttpHeader(remote_config->getUrl().getHostname()));
1727
1728 remote_config->addBasicAuthHttpHeader(request);
1729 request->setBodyAsJson(CommandCreator::createDHCPDisable(max_period,
1730 server_type_));
1731 request->finalize();
1732
1733 // Response object should also be created because the HTTP client needs
1734 // to know the type of the expected response.
1735 HttpResponseJsonPtr response = boost::make_shared<HttpResponseJson>();
1736
1737 // Schedule asynchronous HTTP request.
1738 http_client.asyncSendRequest(remote_config->getUrl(),
1739 remote_config->getTlsContext(),
1740 request, response,
1741 [this, remote_config, post_request_action]
1742 (const boost::system::error_code& ec,
1743 const HttpResponsePtr& response,
1744 const std::string& error_str) {
1745
1746 // There are three possible groups of errors during the heartbeat.
1747 // One is the IO error causing issues in communication with the peer.
1748 // Another one is an HTTP parsing error. The last type of error is
1749 // when non-success error code is returned in the response carried
1750 // in the HTTP message or if the JSON response is otherwise broken.
1751
1752 std::string error_message;
1753
1754 // Handle first two groups of errors.
1755 if (ec || !error_str.empty()) {
1756 error_message = (ec ? ec.message() : error_str);
1757 LOG_ERROR(ha_logger, HA_DHCP_DISABLE_COMMUNICATIONS_FAILED)
1758 .arg(remote_config->getLogLabel())
1759 .arg(error_message);
1760
1761 } else {
1762
1763 // Handle third group of errors.
1764 try {
1765 int rcode = 0;
1766 static_cast<void>(verifyAsyncResponse(response, rcode));
1767
1768 } catch (const std::exception& ex) {
1769 error_message = ex.what();
1771 .arg(remote_config->getLogLabel())
1772 .arg(error_message);
1773 }
1774 }
1775
1776 // If there was an error communicating with the partner, mark the
1777 // partner as unavailable.
1778 if (!error_message.empty()) {
1779 communication_state_->setPartnerState("unavailable");
1780 }
1781
1782 // Invoke post request action if it was specified.
1783 if (post_request_action) {
1784 post_request_action(error_message.empty(),
1785 error_message);
1786 }
1787 },
1789 std::bind(&HAService::clientConnectHandler, this, ph::_1, ph::_2),
1790 std::bind(&HAService::clientHandshakeHandler, this, ph::_1),
1791 std::bind(&HAService::clientCloseHandler, this, ph::_1)
1792 );
1793}
1794
1795void
1796HAService::asyncEnableDHCPService(HttpClient& http_client,
1797 const std::string& server_name,
1798 PostRequestCallback post_request_action) {
1799 HAConfig::PeerConfigPtr remote_config = config_->getPeerConfig(server_name);
1800
1801 // Create HTTP/1.1 request including our command.
1802 PostHttpRequestJsonPtr request = boost::make_shared<PostHttpRequestJson>
1803 (HttpRequest::Method::HTTP_POST, "/", HttpVersion::HTTP_11(),
1804 HostHttpHeader(remote_config->getUrl().getHostname()));
1805 remote_config->addBasicAuthHttpHeader(request);
1806 request->setBodyAsJson(CommandCreator::createDHCPEnable(server_type_));
1807 request->finalize();
1808
1809 // Response object should also be created because the HTTP client needs
1810 // to know the type of the expected response.
1811 HttpResponseJsonPtr response = boost::make_shared<HttpResponseJson>();
1812
1813 // Schedule asynchronous HTTP request.
1814 http_client.asyncSendRequest(remote_config->getUrl(),
1815 remote_config->getTlsContext(),
1816 request, response,
1817 [this, remote_config, post_request_action]
1818 (const boost::system::error_code& ec,
1819 const HttpResponsePtr& response,
1820 const std::string& error_str) {
1821
1822 // There are three possible groups of errors during the heartbeat.
1823 // One is the IO error causing issues in communication with the peer.
1824 // Another one is an HTTP parsing error. The last type of error is
1825 // when non-success error code is returned in the response carried
1826 // in the HTTP message or if the JSON response is otherwise broken.
1827
1828 std::string error_message;
1829
1830 // Handle first two groups of errors.
1831 if (ec || !error_str.empty()) {
1832 error_message = (ec ? ec.message() : error_str);
1833 LOG_ERROR(ha_logger, HA_DHCP_ENABLE_COMMUNICATIONS_FAILED)
1834 .arg(remote_config->getLogLabel())
1835 .arg(error_message);
1836
1837 } else {
1838
1839 // Handle third group of errors.
1840 try {
1841 int rcode = 0;
1842 static_cast<void>(verifyAsyncResponse(response, rcode));
1843
1844 } catch (const std::exception& ex) {
1845 error_message = ex.what();
1847 .arg(remote_config->getLogLabel())
1848 .arg(error_message);
1849 }
1850 }
1851
1852 // If there was an error communicating with the partner, mark the
1853 // partner as unavailable.
1854 if (!error_message.empty()) {
1855 communication_state_->setPartnerState("unavailable");
1856 }
1857
1858 // Invoke post request action if it was specified.
1859 if (post_request_action) {
1860 post_request_action(error_message.empty(),
1861 error_message);
1862 }
1863 },
1865 std::bind(&HAService::clientConnectHandler, this, ph::_1, ph::_2),
1866 std::bind(&HAService::clientHandshakeHandler, this, ph::_1),
1867 std::bind(&HAService::clientCloseHandler, this, ph::_1)
1868 );
1869}
1870
1871void
1872HAService::localDisableDHCPService() {
1873 network_state_->disableService(NetworkState::Origin::HA_COMMAND);
1874}
1875
1876void
1877HAService::localEnableDHCPService() {
1878 network_state_->enableService(NetworkState::Origin::HA_COMMAND);
1879}
1880
1881void
1882HAService::asyncSyncLeases() {
1883 PostSyncCallback null_action;
1884
1885 // Timeout is configured in milliseconds. Need to convert to seconds.
1886 unsigned int dhcp_disable_timeout =
1887 static_cast<unsigned int>(config_->getSyncTimeout() / 1000);
1888 if (dhcp_disable_timeout == 0) {
1889 // Ensure that we always use at least 1 second timeout.
1890 dhcp_disable_timeout = 1;
1891 }
1892
1893 asyncSyncLeases(*client_, config_->getFailoverPeerConfig()->getName(),
1894 dhcp_disable_timeout, LeasePtr(), null_action);
1895}
1896
1897void
1898HAService::asyncSyncLeases(http::HttpClient& http_client,
1899 const std::string& server_name,
1900 const unsigned int max_period,
1901 const dhcp::LeasePtr& last_lease,
1902 PostSyncCallback post_sync_action,
1903 const bool dhcp_disabled) {
1904 // Synchronization starts with a command to disable DHCP service of the
1905 // peer from which we're fetching leases. We don't want the other server
1906 // to allocate new leases while we fetch from it. The DHCP service will
1907 // be disabled for a certain amount of time and will be automatically
1908 // re-enabled if we die during the synchronization.
1909 asyncDisableDHCPService(http_client, server_name, max_period,
1910 [this, &http_client, server_name, max_period, last_lease,
1911 post_sync_action, dhcp_disabled]
1912 (const bool success, const std::string& error_message) {
1913
1914 // If we have successfully disabled the DHCP service on the peer,
1915 // we can start fetching the leases.
1916 if (success) {
1917 // The last argument indicates that disabling the DHCP
1918 // service on the partner server was successful.
1919 asyncSyncLeasesInternal(http_client, server_name, max_period,
1920 last_lease, post_sync_action, true);
1921
1922 } else {
1923 post_sync_action(success, error_message, dhcp_disabled);
1924 }
1925 });
1926}
1927
1928void
1929HAService::asyncSyncLeasesInternal(http::HttpClient& http_client,
1930 const std::string& server_name,
1931 const unsigned int max_period,
1932 const dhcp::LeasePtr& last_lease,
1933 PostSyncCallback post_sync_action,
1934 const bool dhcp_disabled) {
1935
1936 HAConfig::PeerConfigPtr partner_config = config_->getFailoverPeerConfig();
1937
1938 // Create HTTP/1.1 request including our command.
1939 PostHttpRequestJsonPtr request = boost::make_shared<PostHttpRequestJson>
1940 (HttpRequest::Method::HTTP_POST, "/", HttpVersion::HTTP_11(),
1941 HostHttpHeader(partner_config->getUrl().getHostname()));
1942 partner_config->addBasicAuthHttpHeader(request);
1943 if (server_type_ == HAServerType::DHCPv4) {
1944 request->setBodyAsJson(CommandCreator::createLease4GetPage(
1945 boost::dynamic_pointer_cast<Lease4>(last_lease), config_->getSyncPageLimit()));
1946
1947 } else {
1948 request->setBodyAsJson(CommandCreator::createLease6GetPage(
1949 boost::dynamic_pointer_cast<Lease6>(last_lease), config_->getSyncPageLimit()));
1950 }
1951 request->finalize();
1952
1953 // Response object should also be created because the HTTP client needs
1954 // to know the type of the expected response.
1955 HttpResponseJsonPtr response = boost::make_shared<HttpResponseJson>();
1956
1957 // Schedule asynchronous HTTP request.
1958 http_client.asyncSendRequest(partner_config->getUrl(),
1959 partner_config->getTlsContext(),
1960 request, response,
1961 [this, partner_config, post_sync_action, &http_client, server_name,
1962 max_period, dhcp_disabled]
1963 (const boost::system::error_code& ec,
1964 const HttpResponsePtr& response,
1965 const std::string& error_str) {
1966
1967 // Holds last lease received on the page of leases. If the last
1968 // page was hit, this value remains null.
1969 LeasePtr last_lease;
1970
1971 // There are three possible groups of errors during the heartbeat.
1972 // One is the IO error causing issues in communication with the peer.
1973 // Another one is an HTTP parsing error. The last type of error is
1974 // when non-success error code is returned in the response carried
1975 // in the HTTP message or if the JSON response is otherwise broken.
1976
1977 std::string error_message;
1978
1979 // Handle first two groups of errors.
1980 if (ec || !error_str.empty()) {
1981 error_message = (ec ? ec.message() : error_str);
1982 LOG_ERROR(ha_logger, HA_LEASES_SYNC_COMMUNICATIONS_FAILED)
1983 .arg(partner_config->getLogLabel())
1984 .arg(error_message);
1985
1986 } else {
1987 // Handle third group of errors.
1988 try {
1989 int rcode = 0;
1990 ConstElementPtr args = verifyAsyncResponse(response, rcode);
1991
1992 // Arguments must be a map.
1993 if (args && (args->getType() != Element::map)) {
1994 isc_throw(CtrlChannelError,
1995 "arguments in the received response must be a map");
1996 }
1997
1998 ConstElementPtr leases = args->get("leases");
1999 if (!leases || (leases->getType() != Element::list)) {
2000 isc_throw(CtrlChannelError,
2001 "server response does not contain leases argument or this"
2002 " argument is not a list");
2003 }
2004
2005 // Iterate over the leases and update the database as appropriate.
2006 const auto& leases_element = leases->listValue();
2007
2008 LOG_INFO(ha_logger, HA_LEASES_SYNC_LEASE_PAGE_RECEIVED)
2009 .arg(leases_element.size())
2010 .arg(server_name);
2011
2012 for (auto l = leases_element.begin(); l != leases_element.end(); ++l) {
2013 try {
2014
2015 if (server_type_ == HAServerType::DHCPv4) {
2016 Lease4Ptr lease = Lease4::fromElement(*l);
2017
2018 // Check if there is such lease in the database already.
2019 Lease4Ptr existing_lease = LeaseMgrFactory::instance().getLease4(lease->addr_);
2020 if (!existing_lease) {
2021 // There is no such lease, so let's add it.
2022 LeaseMgrFactory::instance().addLease(lease);
2023
2024 } else if (existing_lease->cltt_ < lease->cltt_) {
2025 // If the existing lease is older than the fetched lease, update
2026 // the lease in our local database.
2027 // Update lease current expiration time with value received from the
2028 // database. Some database backends reject operations on the lease if
2029 // the current expiration time value does not match what is stored.
2030 Lease::syncCurrentExpirationTime(*existing_lease, *lease);
2031 LeaseMgrFactory::instance().updateLease4(lease);
2032
2033 } else {
2034 LOG_DEBUG(ha_logger, DBGLVL_TRACE_BASIC, HA_LEASE_SYNC_STALE_LEASE4_SKIP)
2035 .arg(lease->addr_.toText())
2036 .arg(lease->subnet_id_);
2037 }
2038
2039 // If we're not on the last page and we're processing final lease on
2040 // this page, let's record the lease as input to the next
2041 // lease4-get-page command.
2042 if ((leases_element.size() >= config_->getSyncPageLimit()) &&
2043 (l + 1 == leases_element.end())) {
2044 last_lease = boost::dynamic_pointer_cast<Lease>(lease);
2045 }
2046
2047 } else {
2048 Lease6Ptr lease = Lease6::fromElement(*l);
2049
2050 // Check if there is such lease in the database already.
2051 Lease6Ptr existing_lease = LeaseMgrFactory::instance().getLease6(lease->type_,
2052 lease->addr_);
2053 if (!existing_lease) {
2054 // There is no such lease, so let's add it.
2055 LeaseMgrFactory::instance().addLease(lease);
2056
2057 } else if (existing_lease->cltt_ < lease->cltt_) {
2058 // If the existing lease is older than the fetched lease, update
2059 // the lease in our local database.
2060 // Update lease current expiration time with value received from the
2061 // database. Some database backends reject operations on the lease if
2062 // the current expiration time value does not match what is stored.
2063 Lease::syncCurrentExpirationTime(*existing_lease, *lease);
2064 LeaseMgrFactory::instance().updateLease6(lease);
2065
2066 } else {
2067 LOG_DEBUG(ha_logger, DBGLVL_TRACE_BASIC, HA_LEASE_SYNC_STALE_LEASE6_SKIP)
2068 .arg(lease->addr_.toText())
2069 .arg(lease->subnet_id_);
2070 }
2071
2072 // If we're not on the last page and we're processing final lease on
2073 // this page, let's record the lease as input to the next
2074 // lease6-get-page command.
2075 if ((leases_element.size() >= config_->getSyncPageLimit()) &&
2076 (l + 1 == leases_element.end())) {
2077 last_lease = boost::dynamic_pointer_cast<Lease>(lease);
2078 }
2079 }
2080
2081 } catch (const std::exception& ex) {
2082 LOG_WARN(ha_logger, HA_LEASE_SYNC_FAILED)
2083 .arg((*l)->str())
2084 .arg(ex.what());
2085 }
2086 }
2087
2088 } catch (const std::exception& ex) {
2089 error_message = ex.what();
2091 .arg(partner_config->getLogLabel())
2092 .arg(error_message);
2093 }
2094 }
2095
2096 // If there was an error communicating with the partner, mark the
2097 // partner as unavailable.
2098 if (!error_message.empty()) {
2099 communication_state_->setPartnerState("unavailable");
2100
2101 } else if (last_lease) {
2102 // This indicates that there are more leases to be fetched.
2103 // Therefore, we have to send another leaseX-get-page command.
2104 asyncSyncLeases(http_client, server_name, max_period, last_lease,
2105 post_sync_action, dhcp_disabled);
2106 return;
2107 }
2108
2109 // Invoke post synchronization action if it was specified.
2110 if (post_sync_action) {
2111 post_sync_action(error_message.empty(),
2112 error_message,
2113 dhcp_disabled);
2114 }
2115 },
2116 HttpClient::RequestTimeout(config_->getSyncTimeout()),
2117 std::bind(&HAService::clientConnectHandler, this, ph::_1, ph::_2),
2118 std::bind(&HAService::clientHandshakeHandler, this, ph::_1),
2119 std::bind(&HAService::clientCloseHandler, this, ph::_1)
2120 );
2121
2122}
2123
2125HAService::processSynchronize(const std::string& server_name,
2126 const unsigned int max_period) {
2127 std::string answer_message;
2128 int sync_status = synchronize(answer_message, server_name, max_period);
2129 return (createAnswer(sync_status, answer_message));
2130}
2131
2132int
2133HAService::synchronize(std::string& status_message, const std::string& server_name,
2134 const unsigned int max_period) {
2135 IOService io_service;
2136 HttpClient client(io_service);
2137
2138 asyncSyncLeases(client, server_name, max_period, Lease4Ptr(),
2139 [&](const bool success, const std::string& error_message,
2140 const bool dhcp_disabled) {
2141 // If there was a fatal error while fetching the leases, let's
2142 // log an error message so as it can be included in the response
2143 // to the controlling client.
2144 if (!success) {
2145 status_message = error_message;
2146 }
2147
2148 // Whether or not there was an error while fetching the leases,
2149 // we need to re-enable the DHCP service on the peer if the
2150 // DHCP service was disabled in the course of synchronization.
2151 if (dhcp_disabled) {
2152 asyncEnableDHCPService(client, server_name,
2153 [&](const bool success,
2154 const std::string& error_message) {
2155 // It is possible that we have already recorded an error
2156 // message while synchronizing the lease database. Don't
2157 // override the existing error message.
2158 if (!success && status_message.empty()) {
2159 status_message = error_message;
2160 }
2161
2162 // The synchronization process is completed, so let's break
2163 // the IO service so as we can return the response to the
2164 // controlling client.
2165 io_service.stop();
2166 });
2167
2168 } else {
2169 // Also stop IO service if there is no need to enable DHCP
2170 // service.
2171 io_service.stop();
2172 }
2173 });
2174
2175 LOG_INFO(ha_logger, HA_SYNC_START).arg(server_name);
2176
2177 // Measure duration of the synchronization.
2178 Stopwatch stopwatch;
2179
2180 // Run the IO service until it is stopped by any of the callbacks. This
2181 // makes it synchronous.
2182 io_service.run();
2183
2184 // End measuring duration.
2185 stopwatch.stop();
2186
2187 // If an error message has been recorded, return an error to the controlling
2188 // client.
2189 if (!status_message.empty()) {
2190 postNextEvent(HA_SYNCING_FAILED_EVT);
2191
2193 .arg(server_name)
2194 .arg(status_message);
2195
2196 return (CONTROL_RESULT_ERROR);
2197
2198 }
2199
2200 // Everything was fine, so let's return a success.
2201 status_message = "Lease database synchronization complete.";
2202 postNextEvent(HA_SYNCING_SUCCEEDED_EVT);
2203
2205 .arg(server_name)
2206 .arg(stopwatch.logFormatLastDuration());
2207
2208 return (CONTROL_RESULT_SUCCESS);
2209}
2210
2211void
2212HAService::asyncSendLeaseUpdatesFromBacklog(HttpClient& http_client,
2213 const HAConfig::PeerConfigPtr& config,
2214 PostRequestCallback post_request_action) {
2215 if (lease_update_backlog_.size() == 0) {
2216 post_request_action(true, "");
2217 return;
2218 }
2219
2220 ConstElementPtr command;
2221 if (server_type_ == HAServerType::DHCPv4) {
2223 Lease4Ptr lease = boost::dynamic_pointer_cast<Lease4>(lease_update_backlog_.pop(op_type));
2224 if (op_type == LeaseUpdateBacklog::ADD) {
2225 command = CommandCreator::createLease4Update(*lease);
2226 } else {
2227 command = CommandCreator::createLease4Delete(*lease);
2228 }
2229
2230 } else {
2231 command = CommandCreator::createLease6BulkApply(lease_update_backlog_);
2232 }
2233
2234 // Create HTTP/1.1 request including our command.
2235 PostHttpRequestJsonPtr request = boost::make_shared<PostHttpRequestJson>
2236 (HttpRequest::Method::HTTP_POST, "/", HttpVersion::HTTP_11(),
2237 HostHttpHeader(config->getUrl().getHostname()));
2238 config->addBasicAuthHttpHeader(request);
2239 request->setBodyAsJson(command);
2240 request->finalize();
2241
2242 // Response object should also be created because the HTTP client needs
2243 // to know the type of the expected response.
2244 HttpResponseJsonPtr response = boost::make_shared<HttpResponseJson>();
2245
2246 http_client.asyncSendRequest(config->getUrl(), config->getTlsContext(),
2247 request, response,
2248 [this, &http_client, config, post_request_action]
2249 (const boost::system::error_code& ec,
2250 const HttpResponsePtr& response,
2251 const std::string& error_str) {
2252
2253 std::string error_message;
2254
2255 if (ec || !error_str.empty()) {
2256 error_message = (ec ? ec.message() : error_str);
2257 LOG_WARN(ha_logger, HA_LEASES_BACKLOG_COMMUNICATIONS_FAILED)
2258 .arg(config->getLogLabel())
2259 .arg(ec ? ec.message() : error_str);
2260
2261 } else {
2262 // Handle third group of errors.
2263 try {
2264 int rcode = 0;
2265 auto args = verifyAsyncResponse(response, rcode);
2266 } catch (const std::exception& ex) {
2267 error_message = ex.what();
2269 .arg(config->getLogLabel())
2270 .arg(ex.what());
2271 }
2272 }
2273
2274 // Recursively send all outstanding lease updates or break when an
2275 // error occurs. In DHCPv6, this is a single iteration because we use
2276 // lease6-bulk-apply, which combines many lease updates in a single
2277 // transaction. In the case of DHCPv4, each update is sent in its own
2278 // transaction.
2279 if (error_message.empty()) {
2280 asyncSendLeaseUpdatesFromBacklog(http_client, config, post_request_action);
2281 } else {
2282 post_request_action(error_message.empty(), error_message);
2283 }
2284 });
2285}
2286
2287bool
2288HAService::sendLeaseUpdatesFromBacklog() {
2289 auto num_updates = lease_update_backlog_.size();
2290 if (num_updates == 0) {
2292 return (true);
2293 }
2294
2295 IOService io_service;
2296 HttpClient client(io_service);
2297 auto remote_config = config_->getFailoverPeerConfig();
2298 bool updates_successful = true;
2299
2301 .arg(num_updates)
2302 .arg(remote_config->getName());
2303
2304 asyncSendLeaseUpdatesFromBacklog(client, remote_config,
2305 [&](const bool success, const std::string&) {
2306 io_service.stop();
2307 updates_successful = success;
2308 });
2309
2310 // Measure duration of the updates.
2311 Stopwatch stopwatch;
2312
2313 // Run the IO service until it is stopped by the callback. This makes it synchronous.
2314 io_service.run();
2315
2316 // End measuring duration.
2317 stopwatch.stop();
2318
2319 if (updates_successful) {
2321 .arg(remote_config->getName())
2322 .arg(stopwatch.logFormatLastDuration());
2323 }
2324
2325 return (updates_successful);
2326}
2327
2328void
2329HAService::asyncSendHAReset(HttpClient& http_client,
2330 const HAConfig::PeerConfigPtr& config,
2331 PostRequestCallback post_request_action) {
2332 ConstElementPtr command = CommandCreator::createHAReset(server_type_);
2333
2334 // Create HTTP/1.1 request including our command.
2335 PostHttpRequestJsonPtr request = boost::make_shared<PostHttpRequestJson>
2336 (HttpRequest::Method::HTTP_POST, "/", HttpVersion::HTTP_11(),
2337 HostHttpHeader(config->getUrl().getHostname()));
2338 config->addBasicAuthHttpHeader(request);
2339 request->setBodyAsJson(command);
2340 request->finalize();
2341
2342 // Response object should also be created because the HTTP client needs
2343 // to know the type of the expected response.
2344 HttpResponseJsonPtr response = boost::make_shared<HttpResponseJson>();
2345
2346 http_client.asyncSendRequest(config->getUrl(), config->getTlsContext(),
2347 request, response,
2348 [this, config, post_request_action]
2349 (const boost::system::error_code& ec,
2350 const HttpResponsePtr& response,
2351 const std::string& error_str) {
2352
2353 std::string error_message;
2354
2355 if (ec || !error_str.empty()) {
2356 error_message = (ec ? ec.message() : error_str);
2357 LOG_WARN(ha_logger, HA_RESET_COMMUNICATIONS_FAILED)
2358 .arg(config->getLogLabel())
2359 .arg(ec ? ec.message() : error_str);
2360
2361 } else {
2362 // Handle third group of errors.
2363 try {
2364 int rcode = 0;
2365 auto args = verifyAsyncResponse(response, rcode);
2366 } catch (const std::exception& ex) {
2367 error_message = ex.what();
2369 .arg(config->getLogLabel())
2370 .arg(ex.what());
2371 }
2372 }
2373
2374 post_request_action(error_message.empty(), error_message);
2375 });
2376}
2377
2378bool
2379HAService::sendHAReset() {
2380 IOService io_service;
2381 HttpClient client(io_service);
2382 auto remote_config = config_->getFailoverPeerConfig();
2383 bool reset_successful = true;
2384
2385 asyncSendHAReset(client, remote_config,
2386 [&](const bool success, const std::string&) {
2387 io_service.stop();
2388 reset_successful = success;
2389 });
2390
2391 // Run the IO service until it is stopped by the callback. This makes it synchronous.
2392 io_service.run();
2393
2394 return (reset_successful);
2395}
2396
2398HAService::processScopes(const std::vector<std::string>& scopes) {
2399 try {
2400 query_filter_.serveScopes(scopes);
2401 adjustNetworkState();
2402
2403 } catch (const std::exception& ex) {
2404 return (createAnswer(CONTROL_RESULT_ERROR, ex.what()));
2405 }
2406
2407 return (createAnswer(CONTROL_RESULT_SUCCESS, "New HA scopes configured."));
2408}
2409
2411HAService::processContinue() {
2412 if (unpause()) {
2413 return (createAnswer(CONTROL_RESULT_SUCCESS, "HA state machine continues."));
2414 }
2415 return (createAnswer(CONTROL_RESULT_SUCCESS, "HA state machine is not paused."));
2416}
2417
2419HAService::processMaintenanceNotify(const bool cancel) {
2420 if (cancel) {
2421 if (getCurrState() != HA_IN_MAINTENANCE_ST) {
2422 return (createAnswer(CONTROL_RESULT_ERROR, "Unable to cancel the"
2423 " maintenance for the server not in the"
2424 " in-maintenance state."));
2425 }
2426
2427 postNextEvent(HA_MAINTENANCE_CANCEL_EVT);
2428 verboseTransition(getPrevState());
2429 runModel(NOP_EVT);
2430 return (createAnswer(CONTROL_RESULT_SUCCESS, "Server maintenance canceled."));
2431 }
2432
2433 switch (getCurrState()) {
2434 case HA_BACKUP_ST:
2436 case HA_TERMINATED_ST:
2437 // The reason why we don't return an error result here is that we have to
2438 // have a way to distinguish between the errors caused by the communication
2439 // issues and the cases when there is no communication error but the server
2440 // is not allowed to enter the in-maintenance state. In the former case, the
2441 // partner would go to partner-down. In the case signaled by the special
2442 // result code entering the maintenance state is not allowed.
2443 return (createAnswer(HA_CONTROL_RESULT_MAINTENANCE_NOT_ALLOWED,
2444 "Unable to transition the server from the "
2445 + stateToString(getCurrState()) + " to"
2446 " in-maintenance state."));
2447 default:
2448 verboseTransition(HA_IN_MAINTENANCE_ST);
2449 runModel(HA_MAINTENANCE_NOTIFY_EVT);
2450 }
2451 return (createAnswer(CONTROL_RESULT_SUCCESS, "Server is in-maintenance state."));
2452}
2453
2455HAService::processMaintenanceStart() {
2456 switch (getCurrState()) {
2457 case HA_BACKUP_ST:
2460 case HA_TERMINATED_ST:
2461 return (createAnswer(CONTROL_RESULT_ERROR, "Unable to transition the server from"
2462 " the " + stateToString(getCurrState()) + " to"
2463 " partner-in-maintenance state."));
2464 default:
2465 ;
2466 }
2467
2468 HAConfig::PeerConfigPtr remote_config = config_->getFailoverPeerConfig();
2469
2470 // Create HTTP/1.1 request including ha-maintenance-notify command
2471 // with the cancel flag set to false.
2472 PostHttpRequestJsonPtr request = boost::make_shared<PostHttpRequestJson>
2473 (HttpRequest::Method::HTTP_POST, "/", HttpVersion::HTTP_11(),
2474 HostHttpHeader(remote_config->getUrl().getHostname()));
2475 remote_config->addBasicAuthHttpHeader(request);
2476 request->setBodyAsJson(CommandCreator::createMaintenanceNotify(false, server_type_));
2477 request->finalize();
2478
2479 // Response object should also be created because the HTTP client needs
2480 // to know the type of the expected response.
2481 HttpResponseJsonPtr response = boost::make_shared<HttpResponseJson>();
2482
2483 IOService io_service;
2484 HttpClient client(io_service);
2485
2486 boost::system::error_code captured_ec;
2487 std::string captured_error_message;
2488 int captured_rcode = 0;
2489
2490 // Schedule asynchronous HTTP request.
2491 client.asyncSendRequest(remote_config->getUrl(),
2492 remote_config->getTlsContext(),
2493 request, response,
2494 [this, remote_config, &io_service, &captured_ec, &captured_error_message,
2495 &captured_rcode]
2496 (const boost::system::error_code& ec,
2497 const HttpResponsePtr& response,
2498 const std::string& error_str) {
2499
2500 io_service.stop();
2501
2502 // There are three possible groups of errors. One is the IO error
2503 // causing issues in communication with the peer. Another one is
2504 // an HTTP parsing error. The last type of error is when non-success
2505 // error code is returned in the response carried in the HTTP message
2506 // or if the JSON response is otherwise broken.
2507
2508 std::string error_message;
2509
2510 // Handle first two groups of errors.
2511 if (ec || !error_str.empty()) {
2512 error_message = (ec ? ec.message() : error_str);
2513 LOG_ERROR(ha_logger, HA_MAINTENANCE_NOTIFY_COMMUNICATIONS_FAILED)
2514 .arg(remote_config->getLogLabel())
2515 .arg(error_message);
2516
2517 } else {
2518
2519 // Handle third group of errors.
2520 try {
2521 static_cast<void>(verifyAsyncResponse(response, captured_rcode));
2522
2523 } catch (const std::exception& ex) {
2524 error_message = ex.what();
2526 .arg(remote_config->getLogLabel())
2527 .arg(error_message);
2528 }
2529 }
2530
2531 // If there was an error communicating with the partner, mark the
2532 // partner as unavailable.
2533 if (!error_message.empty()) {
2534 communication_state_->setPartnerState("unavailable");
2535 }
2536
2537 captured_ec = ec;
2538 captured_error_message = error_message;
2539 },
2541 std::bind(&HAService::clientConnectHandler, this, ph::_1, ph::_2),
2542 std::bind(&HAService::clientHandshakeHandler, this, ph::_1),
2543 std::bind(&HAService::clientCloseHandler, this, ph::_1)
2544 );
2545
2546 // Run the IO service until it is stopped by any of the callbacks. This
2547 // makes it synchronous.
2548 io_service.run();
2549
2550 // If there was a communication problem with the partner we assume that
2551 // the partner is already down while we receive this command.
2552 if (captured_ec || (captured_rcode == CONTROL_RESULT_ERROR)) {
2553 postNextEvent(HA_MAINTENANCE_START_EVT);
2554 verboseTransition(HA_PARTNER_DOWN_ST);
2555 runModel(NOP_EVT);
2557 "Server is now in the partner-down state as its"
2558 " partner appears to be offline for maintenance."));
2559
2560 } else if (captured_rcode == CONTROL_RESULT_SUCCESS) {
2561 // If the partner responded indicating no error it means that the
2562 // partner has been transitioned to the in-maintenance state. In that
2563 // case we transition to the partner-in-maintenance state.
2564 postNextEvent(HA_MAINTENANCE_START_EVT);
2565 verboseTransition(HA_PARTNER_IN_MAINTENANCE_ST);
2566 runModel(NOP_EVT);
2567
2568 } else {
2569 // Partner server returned a special status code which means that it can't
2570 // transition to the partner-in-maintenance state.
2571 return (createAnswer(CONTROL_RESULT_ERROR, "Unable to transition to the"
2572 " partner-in-maintenance state. The partner server responded"
2573 " with the following message to the ha-maintenance-notify"
2574 " command: " + captured_error_message + "."));
2575
2576 }
2577
2579 "Server is now in the partner-in-maintenance state"
2580 " and its partner is in-maintenance state. The partner"
2581 " can be now safely shut down."));
2582}
2583
2585HAService::processMaintenanceCancel() {
2586 if (getCurrState() != HA_PARTNER_IN_MAINTENANCE_ST) {
2587 return (createAnswer(CONTROL_RESULT_ERROR, "Unable to cancel maintenance"
2588 " request because the server is not in the"
2589 " partner-in-maintenance state."));
2590 }
2591
2592 HAConfig::PeerConfigPtr remote_config = config_->getFailoverPeerConfig();
2593
2594 // Create HTTP/1.1 request including ha-maintenance-notify command
2595 // with the cancel flag set to true.
2596 PostHttpRequestJsonPtr request = boost::make_shared<PostHttpRequestJson>
2597 (HttpRequest::Method::HTTP_POST, "/", HttpVersion::HTTP_11(),
2598 HostHttpHeader(remote_config->getUrl().getHostname()));
2599 remote_config->addBasicAuthHttpHeader(request);
2600 request->setBodyAsJson(CommandCreator::createMaintenanceNotify(true, server_type_));
2601 request->finalize();
2602
2603 // Response object should also be created because the HTTP client needs
2604 // to know the type of the expected response.
2605 HttpResponseJsonPtr response = boost::make_shared<HttpResponseJson>();
2606
2607 IOService io_service;
2608 HttpClient client(io_service);
2609
2610 std::string error_message;
2611
2612 // Schedule asynchronous HTTP request.
2613 client.asyncSendRequest(remote_config->getUrl(),
2614 remote_config->getTlsContext(),
2615 request, response,
2616 [this, remote_config, &io_service, &error_message]
2617 (const boost::system::error_code& ec,
2618 const HttpResponsePtr& response,
2619 const std::string& error_str) {
2620
2621 io_service.stop();
2622
2623 // Handle first two groups of errors.
2624 if (ec || !error_str.empty()) {
2625 error_message = (ec ? ec.message() : error_str);
2626 LOG_ERROR(ha_logger, HA_MAINTENANCE_NOTIFY_CANCEL_COMMUNICATIONS_FAILED)
2627 .arg(remote_config->getLogLabel())
2628 .arg(error_message);
2629
2630 } else {
2631
2632 // Handle third group of errors.
2633 try {
2634 int rcode = 0;
2635 static_cast<void>(verifyAsyncResponse(response, rcode));
2636
2637 } catch (const std::exception& ex) {
2638 error_message = ex.what();
2640 .arg(remote_config->getLogLabel())
2641 .arg(error_message);
2642 }
2643 }
2644
2645 // If there was an error communicating with the partner, mark the
2646 // partner as unavailable.
2647 if (!error_message.empty()) {
2648 communication_state_->setPartnerState("unavailable");
2649 }
2650 },
2652 std::bind(&HAService::clientConnectHandler, this, ph::_1, ph::_2),
2653 std::bind(&HAService::clientHandshakeHandler, this, ph::_1),
2654 std::bind(&HAService::clientCloseHandler, this, ph::_1)
2655 );
2656
2657 // Run the IO service until it is stopped by any of the callbacks. This
2658 // makes it synchronous.
2659 io_service.run();
2660
2661 // There was an error in communication with the partner or the
2662 // partner was unable to revert its state.
2663 if (!error_message.empty()) {
2665 "Unable to cancel maintenance. The partner server responded"
2666 " with the following message to the ha-maintenance-notify"
2667 " command: " + error_message + "."));
2668 }
2669
2670 // Successfully reverted partner's state. Let's also revert our state to the
2671 // previous one.
2672 postNextEvent(HA_MAINTENANCE_CANCEL_EVT);
2673 verboseTransition(getPrevState());
2674 runModel(NOP_EVT);
2675
2677 "Server maintenance successfully canceled."));
2678}
2679
2681HAService::verifyAsyncResponse(const HttpResponsePtr& response, int& rcode) {
2682 // Set the return code to error in case of early throw.
2683 rcode = CONTROL_RESULT_ERROR;
2684 // The response must cast to JSON type.
2685 HttpResponseJsonPtr json_response =
2686 boost::dynamic_pointer_cast<HttpResponseJson>(response);
2687 if (!json_response) {
2688 isc_throw(CtrlChannelError, "no valid HTTP response found");
2689 }
2690
2691 // Body holds the response to our command.
2692 ConstElementPtr body = json_response->getBodyAsJson();
2693 if (!body) {
2694 isc_throw(CtrlChannelError, "no body found in the response");
2695 }
2696
2697 // Body should contain a list of responses from multiple servers.
2698 if (body->getType() != Element::list) {
2699 // Some control agent errors are returned as a map.
2700 if (body->getType() == Element::map) {
2701 ElementPtr list = Element::createList();
2702 ElementPtr answer = Element::createMap();
2703 answer->set(CONTROL_RESULT, Element::create(rcode));
2704 ConstElementPtr text = body->get(CONTROL_TEXT);
2705 if (text) {
2706 answer->set(CONTROL_TEXT, text);
2707 }
2708 list->add(answer);
2709 body = list;
2710 } else {
2711 isc_throw(CtrlChannelError, "body of the response must be a list");
2712 }
2713 }
2714
2715 // There must be at least one response.
2716 if (body->empty()) {
2717 isc_throw(CtrlChannelError, "list of responses must not be empty");
2718 }
2719
2720 // Check if the status code of the first response. We don't support multiple
2721 // at this time, because we always send a request to a single location.
2722 ConstElementPtr args = parseAnswer(rcode, body->get(0));
2723 if ((rcode != CONTROL_RESULT_SUCCESS) &&
2724 (rcode != CONTROL_RESULT_EMPTY)) {
2725 std::ostringstream s;
2726 // Include an error text if available.
2727 if (args && args->getType() == Element::string) {
2728 s << args->stringValue() << ", ";
2729 }
2730 // Include an error code.
2731 s << "error code " << rcode;
2732 isc_throw(CtrlChannelError, s.str());
2733 }
2734
2735 return (args);
2736}
2737
2738bool
2739HAService::clientConnectHandler(const boost::system::error_code& ec, int tcp_native_fd) {
2740
2741 // If client is running it's own IOService we do NOT want to
2742 // register the socket with IfaceMgr.
2743 if (client_->getThreadIOService()) {
2744 return (true);
2745 }
2746
2747 // If things look OK register the socket with Interface Manager. Note
2748 // we don't register if the FD is < 0 to avoid an exception throw.
2749 // It is unlikely that this will occur but we want to be liberal
2750 // and avoid issues.
2751 if ((!ec || (ec.value() == boost::asio::error::in_progress))
2752 && (tcp_native_fd >= 0)) {
2753 // External socket callback is a NOP. Ready events handlers are
2754 // run by an explicit call IOService ready in kea-dhcp<n> code.
2755 // We are registering the socket only to interrupt main-thread
2756 // select().
2757 IfaceMgr::instance().addExternalSocket(tcp_native_fd,
2758 std::bind(&HAService::socketReadyHandler, this, ph::_1)
2759 );
2760 }
2761
2762 // If ec.value() == boost::asio::error::already_connected, we should already
2763 // be registered, so nothing to do. If it is any other value, then connect
2764 // failed and Connection logic should handle that, not us, so no matter
2765 // what happens we're returning true.
2766 return (true);
2767}
2768
2769void
2770HAService::socketReadyHandler(int tcp_native_fd) {
2771 // If the socket is ready but does not belong to one of our client's
2772 // ongoing transactions, we close it. This will unregister it from
2773 // IfaceMgr and ensure the client starts over with a fresh connection
2774 // if it needs to do so.
2775 client_->closeIfOutOfBand(tcp_native_fd);
2776}
2777
2778void
2779HAService::clientCloseHandler(int tcp_native_fd) {
2780 if (tcp_native_fd >= 0) {
2781 IfaceMgr::instance().deleteExternalSocket(tcp_native_fd);
2782 }
2783};
2784
2785size_t
2786HAService::pendingRequestSize() {
2787 if (MultiThreadingMgr::instance().getMode()) {
2788 std::lock_guard<std::mutex> lock(mutex_);
2789 return (pending_requests_.size());
2790 } else {
2791 return (pending_requests_.size());
2792 }
2793}
2794
2795template<typename QueryPtrType>
2796int
2797HAService::getPendingRequest(const QueryPtrType& query) {
2798 if (MultiThreadingMgr::instance().getMode()) {
2799 std::lock_guard<std::mutex> lock(mutex_);
2800 return (getPendingRequestInternal(query));
2801 } else {
2802 return (getPendingRequestInternal(query));
2803 }
2804}
2805
2806template<typename QueryPtrType>
2807int
2808HAService::getPendingRequestInternal(const QueryPtrType& query) {
2809 if (pending_requests_.count(query) == 0) {
2810 return (0);
2811 } else {
2812 return (pending_requests_[query]);
2813 }
2814}
2815
2816void
2817HAService::checkPermissionsClientAndListener() {
2818 // Since this function is used as CS callback all exceptions must be
2819 // suppressed (except the @ref MultiThreadingInvalidOperation), unlikely
2820 // though they may be.
2821 // The @ref MultiThreadingInvalidOperation is propagated to the scope of the
2822 // @ref MultiThreadingCriticalSection constructor.
2823 try {
2824 if (client_) {
2825 client_->checkPermissions();
2826 }
2827
2828 if (listener_) {
2829 listener_->checkPermissions();
2830 }
2831 } catch (const isc::MultiThreadingInvalidOperation& ex) {
2833 .arg(ex.what());
2834 // The exception needs to be propagated to the caller of the
2835 // @ref MultiThreadingCriticalSection constructor.
2836 throw;
2837 } catch (const std::exception& ex) {
2839 .arg(ex.what());
2840 }
2841}
2842
2843void
2844HAService::startClientAndListener() {
2845 // Add critical section callbacks.
2846 MultiThreadingMgr::instance().addCriticalSectionCallbacks("HA_MT",
2847 std::bind(&HAService::checkPermissionsClientAndListener, this),
2848 std::bind(&HAService::pauseClientAndListener, this),
2849 std::bind(&HAService::resumeClientAndListener, this));
2850
2851 if (client_) {
2852 client_->start();
2853 }
2854
2855 if (listener_) {
2856 listener_->start();
2857 }
2858}
2859
2860void
2861HAService::pauseClientAndListener() {
2862 // Since this function is used as CS callback all exceptions must be
2863 // suppressed, unlikely though they may be.
2864 try {
2865 if (client_) {
2866 client_->pause();
2867 }
2868
2869 if (listener_) {
2870 listener_->pause();
2871 }
2872 } catch (const std::exception& ex) {
2874 .arg(ex.what());
2875 }
2876}
2877
2878void
2879HAService::resumeClientAndListener() {
2880 // Since this function is used as CS callback all exceptions must be
2881 // suppressed, unlikely though they may be.
2882 try {
2883 if (client_) {
2884 client_->resume();
2885 }
2886
2887 if (listener_) {
2888 listener_->resume();
2889 }
2890 } catch (std::exception& ex) {
2892 .arg(ex.what());
2893 }
2894}
2895
2896void
2897HAService::stopClientAndListener() {
2898 // Remove critical section callbacks.
2899 MultiThreadingMgr::instance().removeCriticalSectionCallbacks("HA_MT");
2900
2901 if (client_) {
2902 client_->stop();
2903 }
2904
2905 if (listener_) {
2906 listener_->stop();
2907 }
2908}
2909
2910// Explicit instantiations.
2911template int HAService::getPendingRequest(const Pkt4Ptr&);
2912template int HAService::getPendingRequest(const Pkt6Ptr&);
2913
2914} // end of namespace isc::ha
2915} // end of namespace isc
if(!(yy_init))
virtual const char * what() const
Returns a C-style character string of the cause of the exception.
Exception thrown when a worker thread is trying to stop or pause the respective thread pool (which wo...
A generic exception that is thrown when an unexpected error condition occurs.
A multi-threaded HTTP listener that can process API commands requests.
A standard control channel exception that is thrown if a function is there is a problem with one of t...
static data::ConstElementPtr createLease4Delete(const dhcp::Lease4 &lease4)
Creates lease4-del command.
static data::ConstElementPtr createLease4Update(const dhcp::Lease4 &lease4)
Creates lease4-update command.
static data::ConstElementPtr createLease6BulkApply(const dhcp::Lease6CollectionPtr &leases, const dhcp::Lease6CollectionPtr &deleted_leases)
Creates lease6-bulk-apply command.
Holds communication state between DHCPv4 servers.
Holds communication state between DHCPv6 servers.
Role
Server's role in the High Availability setup.
Definition: ha_config.h:70
static std::string roleToString(const HAConfig::PeerConfig::Role &role)
Returns role name.
Definition: ha_config.cc:79
std::map< std::string, PeerConfigPtr > PeerConfigMap
Map of the servers' configurations.
Definition: ha_config.h:232
static std::string HAModeToString(const HAMode &ha_mode)
Returns HA mode name.
Definition: ha_config.cc:224
boost::shared_ptr< PeerConfig > PeerConfigPtr
Pointer to the server's configuration.
Definition: ha_config.h:229
static const int HA_MAINTENANCE_START_EVT
ha-maintenance-start command received.
Definition: ha_service.h:62
bool inScope(dhcp::Pkt4Ptr &query4)
Checks if the DHCPv4 query should be processed by this server.
Definition: ha_service.cc:965
void adjustNetworkState()
Enables or disables network state depending on the served scopes.
Definition: ha_service.cc:995
void stopClientAndListener()
Stop the client and(or) listener instances.
Definition: ha_service.cc:2897
int getNormalState() const
Returns normal operation state for the current configuration.
Definition: ha_service.cc:923
bool shouldQueueLeaseUpdates(const HAConfig::PeerConfigPtr &peer_config) const
Checks if the lease updates should be queued.
Definition: ha_service.cc:1435
static const int HA_HEARTBEAT_COMPLETE_EVT
Finished heartbeat command.
Definition: ha_service.h:47
bool isMaintenanceCanceled() const
Convenience method checking if the current state is a result of canceling the maintenance.
Definition: ha_service.cc:1067
void asyncSendLeaseUpdate(const QueryPtrType &query, const HAConfig::PeerConfigPtr &config, const data::ConstElementPtr &command, const hooks::ParkingLotHandlePtr &parking_lot)
Asynchronously sends lease update to the peer.
Definition: ha_service.cc:1270
void verboseTransition(const unsigned state)
Transitions to a desired state and logs it.
Definition: ha_service.cc:858
bool sendLeaseUpdatesFromBacklog()
Attempts to send all lease updates from the backlog synchronously.
Definition: ha_service.cc:2288
config::CmdHttpListenerPtr listener_
HTTP listener instance used to receive and respond to HA commands and lease updates.
Definition: ha_service.h:1123
bool leaseUpdateComplete(QueryPtrType &query, const hooks::ParkingLotHandlePtr &parking_lot)
Handle last pending request for this query.
Definition: ha_service.cc:1216
HAConfigPtr config_
Pointer to the HA hooks library configuration.
Definition: ha_service.h:1113
bool shouldTerminate() const
Indicates if the server should transition to the terminated state as a result of high clock skew.
Definition: ha_service.cc:1054
void terminatedStateHandler()
Handler for "terminated" state.
Definition: ha_service.cc:729
dhcp::NetworkStatePtr network_state_
Pointer to the state of the DHCP service (enabled/disabled).
Definition: ha_service.h:1110
HAService(const asiolink::IOServicePtr &io_service, const dhcp::NetworkStatePtr &network_state, const HAConfigPtr &config, const HAServerType &server_type=HAServerType::DHCPv4)
Constructor.
Definition: ha_service.cc:53
void scheduleHeartbeat()
Schedules asynchronous heartbeat to a peer if it is not scheduled.
Definition: ha_service.cc:1701
void passiveBackupStateHandler()
Handler for "passive-backup" state.
Definition: ha_service.cc:543
QueryFilter query_filter_
Selects queries to be processed/dropped.
Definition: ha_service.h:1129
static const int HA_MAINTENANCE_NOTIFY_EVT
ha-maintenance-notify command received.
Definition: ha_service.h:59
void inMaintenanceStateHandler()
Handler for the "in-maintenance" state.
Definition: ha_service.cc:402
virtual void verifyEvents()
Verifies events used by the HA service.
Definition: ha_service.cc:129
void conditionalLogPausedState() const
Logs if the server is paused in the current state.
Definition: ha_service.cc:949
bool unpause()
Unpauses the HA state machine with logging.
Definition: ha_service.cc:939
static const int HA_CONTROL_RESULT_MAINTENANCE_NOT_ALLOWED
Control result returned in response to ha-maintenance-notify.
Definition: ha_service.h:68
void serveDefaultScopes()
Instructs the HA service to serve default scopes.
Definition: ha_service.cc:960
size_t asyncSendLeaseUpdates(const dhcp::Pkt4Ptr &query, const dhcp::Lease4CollectionPtr &leases, const dhcp::Lease4CollectionPtr &deleted_leases, const hooks::ParkingLotHandlePtr &parking_lot)
Schedules asynchronous IPv4 leases updates.
Definition: ha_service.cc:1102
static const int HA_SYNCING_SUCCEEDED_EVT
Lease database synchronization succeeded.
Definition: ha_service.h:56
bool sendHAReset()
Sends ha-reset command to partner synchronously.
Definition: ha_service.cc:2379
virtual void defineEvents()
Defines events used by the HA service.
Definition: ha_service.cc:116
asiolink::IOServicePtr io_service_
Pointer to the IO service object shared between this hooks library and the DHCP server.
Definition: ha_service.h:1107
CommunicationStatePtr communication_state_
Holds communication state with a peer.
Definition: ha_service.h:1126
LeaseUpdateBacklog lease_update_backlog_
Backlog of DHCP lease updates.
Definition: ha_service.h:1243
virtual ~HAService()
Destructor.
Definition: ha_service.cc:108
static const int HA_SYNCING_FAILED_EVT
Lease database synchronization failed.
Definition: ha_service.h:53
static const int HA_MAINTENANCE_CANCEL_EVT
ha-maintenance-cancel command received.
Definition: ha_service.h:65
std::function< void(const bool, const std::string &)> PostRequestCallback
Callback invoked when request was sent and a response received or an error occurred.
Definition: ha_service.h:77
void readyStateHandler()
Handler for "ready" state.
Definition: ha_service.cc:561
virtual void defineStates()
Defines states of the HA service.
Definition: ha_service.cc:142
void backupStateHandler()
Handler for the "backup" state.
Definition: ha_service.cc:195
void communicationRecoveryHandler()
Handler for the "communication-recovery" state.
Definition: ha_service.cc:210
bool isPartnerStateInvalid() const
Indicates if the partner's state is invalid.
Definition: ha_service.cc:1072
int synchronize(std::string &status_message, const std::string &server_name, const unsigned int max_period)
Synchronizes lease database with a partner.
Definition: ha_service.cc:2133
void normalStateHandler()
Handler for the "hot-standby" and "load-balancing" states.
Definition: ha_service.cc:327
void waitingStateHandler()
Handler for "waiting" state.
Definition: ha_service.cc:750
bool shouldSendLeaseUpdates(const HAConfig::PeerConfigPtr &peer_config) const
Checks if the lease updates should be sent as result of leases allocation or release.
Definition: ha_service.cc:1403
static const int HA_LEASE_UPDATES_COMPLETE_EVT
Finished lease updates commands.
Definition: ha_service.h:50
void partnerDownStateHandler()
Handler for "partner-down" state.
Definition: ha_service.cc:427
http::HttpClientPtr client_
HTTP client instance used to send HA commands and lease updates.
Definition: ha_service.h:1119
void updatePendingRequest(QueryPtrType &query)
Update pending request counter for this query.
Definition: ha_service.cc:1249
bool shouldPartnerDown() const
Indicates if the server should transition to the partner down state.
Definition: ha_service.cc:1027
std::function< void(const bool, const std::string &, const bool)> PostSyncCallback
Callback invoked when lease database synchronization is complete.
Definition: ha_service.h:86
void syncingStateHandler()
Handler for "syncing" state.
Definition: ha_service.cc:640
void partnerInMaintenanceStateHandler()
Handler for "partner-in-maintenance" state.
Definition: ha_service.cc:504
bool push(const OpType op_type, const dhcp::LeasePtr &lease)
Appends lease update to the queue.
OpType
Type of the lease update (operation type).
void clear()
Removes all lease updates from the queue.
bool wasOverflown()
Checks if the queue was overflown.
bool inScope(const dhcp::Pkt4Ptr &query4, std::string &scope_class) const
Checks if this server should process the DHCPv4 query.
void serveFailoverScopes()
Enable scopes required in failover case.
void serveDefaultScopes()
Serve default scopes for the given HA mode.
void serveNoScopes()
Disables all scopes.
Represents HTTP Host header.
Definition: http_header.h:68
HTTP client class.
Definition: client.h:87
void asyncSendRequest(const Url &url, const asiolink::TlsContextPtr &tls_context, const HttpRequestPtr &request, const HttpResponsePtr &response, const RequestHandler &request_callback, const RequestTimeout &request_timeout=RequestTimeout(10000), const ConnectHandler &connect_callback=ConnectHandler(), const HandshakeHandler &handshake_callback=HandshakeHandler(), const CloseHandler &close_callback=CloseHandler())
Queues new asynchronous HTTP request for a given URL.
Definition: client.cc:1932
This class parses and generates time values used in HTTP.
Definition: date_time.h:41
std::string rfc1123Format() const
Returns time value formatted as specified in RFC 1123.
Definition: date_time.cc:30
const EventPtr & getEvent(unsigned int value)
Fetches the event referred to by value.
Definition: state_model.cc:186
std::string getStateLabel(const int state) const
Fetches the label associated with an state value.
Definition: state_model.cc:421
void unpauseModel()
Unpauses state model.
Definition: state_model.cc:276
bool isModelPaused() const
Returns whether or not the model is paused.
Definition: state_model.cc:415
void postNextEvent(unsigned int event)
Sets the next event to the given event value.
Definition: state_model.cc:320
void defineState(unsigned int value, const std::string &label, StateHandler handler, const StatePausing &state_pausing=STATE_PAUSE_NEVER)
Adds an state value and associated label to the set of states.
Definition: state_model.cc:196
bool doOnExit()
Checks if on exit flag is true.
Definition: state_model.cc:347
unsigned int getNextEvent() const
Fetches the model's next event.
Definition: state_model.cc:373
void defineEvent(unsigned int value, const std::string &label)
Adds an event value and associated label to the set of events.
Definition: state_model.cc:170
void transition(unsigned int state, unsigned int event)
Sets up the model to transition into given state with a given event.
Definition: state_model.cc:264
bool doOnEntry()
Checks if on entry flag is true.
Definition: state_model.cc:339
static const int NOP_EVT
Signifies that no event has occurred.
Definition: state_model.h:292
void startModel(const int start_state)
Begins execution of the model.
Definition: state_model.cc:100
unsigned int getLastEvent() const
Fetches the model's last event.
Definition: state_model.cc:367
unsigned int getCurrState() const
Fetches the model's current state.
Definition: state_model.cc:355
Utility class to measure code execution times.
Definition: stopwatch.h:35
void stop()
Stops the stopwatch.
Definition: stopwatch.cc:35
std::string logFormatLastDuration() const
Returns the last measured duration in the format directly usable in log messages.
Definition: stopwatch.cc:75
This file contains several functions and constants that are used for handling commands and responses ...
#define isc_throw(type, stream)
A shortcut macro to insert known values into exception arguments.
An abstract API for lease database.
#define LOG_ERROR(LOGGER, MESSAGE)
Macro to conveniently test error output and log it.
Definition: macros.h:32
#define LOG_INFO(LOGGER, MESSAGE)
Macro to conveniently test info output and log it.
Definition: macros.h:20
#define LOG_WARN(LOGGER, MESSAGE)
Macro to conveniently test warn output and log it.
Definition: macros.h:26
const int CONTROL_RESULT_EMPTY
Status code indicating that the specified command was completed correctly, but failed to produce any ...
const char * CONTROL_TEXT
String used for storing textual description ("text")
constexpr long TIMEOUT_DEFAULT_HTTP_CLIENT_REQUEST
Timeout for the HTTP clients awaiting a response to a request.
Definition: timeouts.h:38
const int CONTROL_RESULT_ERROR
Status code indicating a general failure.
ConstElementPtr createAnswer(const int status_code, const std::string &text, const ConstElementPtr &arg)
ConstElementPtr parseAnswer(int &rcode, const ConstElementPtr &msg)
const char * CONTROL_RESULT
String used for result, i.e. integer status ("result")
const int CONTROL_RESULT_SUCCESS
Status code indicating a successful operation.
boost::shared_ptr< const Element > ConstElementPtr
Definition: data.h:27
boost::shared_ptr< Element > ElementPtr
Definition: data.h:24
boost::shared_ptr< isc::dhcp::Pkt > PktPtr
A pointer to either Pkt4 or Pkt6 packet.
Definition: pkt.h:797
std::string ClientClass
Defines a single class name.
Definition: classify.h:37
boost::shared_ptr< Lease4Collection > Lease4CollectionPtr
A shared pointer to the collection of IPv4 leases.
Definition: lease.h:490
boost::shared_ptr< Pkt4 > Pkt4Ptr
A pointer to Pkt4 object.
Definition: pkt4.h:544
boost::shared_ptr< Lease > LeasePtr
Pointer to the lease object.
Definition: lease.h:26
boost::shared_ptr< NetworkState > NetworkStatePtr
Pointer to the NetworkState object.
boost::shared_ptr< Lease6Collection > Lease6CollectionPtr
A shared pointer to the collection of IPv6 leases.
Definition: lease.h:644
boost::shared_ptr< Pkt6 > Pkt6Ptr
A pointer to Pkt6 packet.
Definition: pkt6.h:28
boost::shared_ptr< Lease4 > Lease4Ptr
Pointer to a Lease4 structure.
Definition: lease.h:283
const isc::log::MessageID HA_INVALID_PARTNER_STATE_LOAD_BALANCING
Definition: ha_messages.h:51
const isc::log::MessageID HA_RESUME_CLIENT_LISTENER_FAILED
Definition: ha_messages.h:93
const isc::log::MessageID HA_LOCAL_DHCP_ENABLE
Definition: ha_messages.h:76
const isc::log::MessageID HA_LEASES_BACKLOG_NOTHING_TO_SEND
Definition: ha_messages.h:58
const isc::log::MessageID HA_LEASES_BACKLOG_FAILED
Definition: ha_messages.h:57
const isc::log::MessageID HA_SYNC_FAILED
Definition: ha_messages.h:100
const isc::log::MessageID HA_TERMINATED_RESTART_PARTNER
Definition: ha_messages.h:105
const int HA_PASSIVE_BACKUP_ST
In passive-backup state with a single active server and backup servers.
const int HA_HOT_STANDBY_ST
Hot standby state.
const isc::log::MessageID HA_INVALID_PARTNER_STATE_COMMUNICATION_RECOVERY
Definition: ha_messages.h:49
const isc::log::MessageID HA_LEASES_BACKLOG_SUCCESS
Definition: ha_messages.h:60
const int HA_COMMUNICATION_RECOVERY_ST
Communication recovery state.
const isc::log::MessageID HA_STATE_MACHINE_CONTINUED
Definition: ha_messages.h:96
isc::log::Logger ha_logger("ha-hooks")
Definition: ha_log.h:17
const isc::log::MessageID HA_LEASES_SYNC_FAILED
Definition: ha_messages.h:62
const isc::log::MessageID HA_SYNC_SUCCESSFUL
Definition: ha_messages.h:103
const int HA_UNAVAILABLE_ST
Special state indicating that this server is unable to communicate with the partner.
const isc::log::MessageID HA_CONFIG_LEASE_UPDATES_DISABLED_REMINDER
Definition: ha_messages.h:33
const isc::log::MessageID HA_SERVICE_STARTED
Definition: ha_messages.h:95
const int HA_TERMINATED_ST
HA service terminated state.
const int HA_IN_MAINTENANCE_ST
In maintenance state.
const int HA_LOAD_BALANCING_ST
Load balancing state.
const isc::log::MessageID HA_DHCP_ENABLE_FAILED
Definition: ha_messages.h:42
const isc::log::MessageID HA_LEASE_UPDATE_DELETE_FAILED_ON_PEER
Definition: ha_messages.h:71
const isc::log::MessageID HA_LEASES_BACKLOG_START
Definition: ha_messages.h:59
const isc::log::MessageID HA_SYNC_START
Definition: ha_messages.h:102
const isc::log::MessageID HA_HEARTBEAT_FAILED
Definition: ha_messages.h:44
const int HA_PARTNER_DOWN_ST
Partner down state.
const isc::log::MessageID HA_LEASE_UPDATES_ENABLED
Definition: ha_messages.h:68
const isc::log::MessageID HA_INVALID_PARTNER_STATE_HOT_STANDBY
Definition: ha_messages.h:50
const isc::log::MessageID HA_STATE_MACHINE_PAUSED
Definition: ha_messages.h:97
const isc::log::MessageID HA_TERMINATED
Definition: ha_messages.h:104
const isc::log::MessageID HA_DHCP_DISABLE_FAILED
Definition: ha_messages.h:40
boost::shared_ptr< HAConfig > HAConfigPtr
Pointer to the High Availability configuration structure.
Definition: ha_config.h:760
const isc::log::MessageID HA_MAINTENANCE_STARTED_IN_PARTNER_DOWN
Definition: ha_messages.h:85
const int HA_PARTNER_IN_MAINTENANCE_ST
Partner in-maintenance state.
const isc::log::MessageID HA_MAINTENANCE_NOTIFY_FAILED
Definition: ha_messages.h:81
const int HA_WAITING_ST
Server waiting state, i.e. waiting for another server to be ready.
HAServerType
Lists possible server types for which HA service is created.
const int HA_BACKUP_ST
Backup state.
const isc::log::MessageID HA_PAUSE_CLIENT_LISTENER_ILLEGAL
Definition: ha_messages.h:89
const isc::log::MessageID HA_PAUSE_CLIENT_LISTENER_FAILED
Definition: ha_messages.h:88
const isc::log::MessageID HA_MAINTENANCE_SHUTDOWN_SAFE
Definition: ha_messages.h:83
const isc::log::MessageID HA_MAINTENANCE_NOTIFY_CANCEL_FAILED
Definition: ha_messages.h:79
const isc::log::MessageID HA_LEASE_UPDATES_DISABLED
Definition: ha_messages.h:67
const isc::log::MessageID HA_LOCAL_DHCP_DISABLE
Definition: ha_messages.h:75
const int HA_SYNCING_ST
Synchronizing database state.
const isc::log::MessageID HA_RESET_FAILED
Definition: ha_messages.h:91
const isc::log::MessageID HA_STATE_TRANSITION
Definition: ha_messages.h:98
const isc::log::MessageID HA_CONFIG_LEASE_SYNCING_DISABLED_REMINDER
Definition: ha_messages.h:30
std::string stateToString(int state)
Returns state name.
const int HA_READY_ST
Server ready state, i.e. synchronized database, can enable DHCP service.
const isc::log::MessageID HA_COMMUNICATION_INTERRUPTED
Definition: ha_messages.h:20
const isc::log::MessageID HA_MAINTENANCE_STARTED
Definition: ha_messages.h:84
const isc::log::MessageID HA_LEASE_UPDATE_CREATE_UPDATE_FAILED_ON_PEER
Definition: ha_messages.h:70
const isc::log::MessageID HA_LEASE_UPDATE_FAILED
Definition: ha_messages.h:72
const isc::log::MessageID HA_STATE_TRANSITION_PASSIVE_BACKUP
Definition: ha_messages.h:99
boost::shared_ptr< ParkingLotHandle > ParkingLotHandlePtr
Pointer to the parking lot handle.
Definition: parking_lots.h:375
boost::shared_ptr< PostHttpRequestJson > PostHttpRequestJsonPtr
Pointer to PostHttpRequestJson.
boost::shared_ptr< HttpResponseJson > HttpResponseJsonPtr
Pointer to the HttpResponseJson object.
Definition: response_json.h:24
boost::shared_ptr< HttpResponse > HttpResponsePtr
Pointer to the HttpResponse object.
Definition: response.h:78
const char * MessageID
Definition: message_types.h:15
Definition: edns.h:19
Defines the logger used by the top-level component of kea-lfc.
HTTP request/response timeout value.
Definition: client.h:90