IPSec traffic stops, no errors, but link stays up



  • I have a strange issue between 2 pfSense servers.  One is for production and the other a development environment.  We link the two via IPSec.

    In the period of the log attached the connection is up, but I am not able to connect to a host on the "other" end.

    From /var/etc/ipsec/ipsec.conf

    conn con4000
    fragmentation = yes
    keyexchange = ikev1
    reauth = yes
    forceencaps = no
    mobike = no

    rekey = yes
    installpolicy = yes
    type = tunnel
    dpdaction = restart
    dpddelay = 1s
    dpdtimeout = 3s
    auto = route
    left = 129.232.232.130
    right = 196.30.52.140
    leftid = 129.232.232.130
    ikelifetime = 604800s
    lifetime = 3600s
    ike = aes256-sha1-modp1024!
    esp = aes256-sha1-modp1024!
    leftauth = psk
    rightauth = psk
    rightid = 196.30.52.140
    aggressive = no
    rightsubnet = 196.30.30.98
    leftsubnet = 41.75.111.176|192.168.0.22

    Options for the charon IKE daemon.

    charon {

    # Accept unencrypted ID and HASH payloads in IKEv1 Main Mode.
        # accept_unencrypted_mainmode_messages = no

    # Maximum number of half-open IKE_SAs for a single peer IP.
        # block_threshold = 5

    # Whether Certicate Revocation Lists (CRLs) fetched via HTTP or LDAP should
        # be saved under a unique file name derived from the public key of the
        # Certification Authority (CA) to /etc/ipsec.d/crls (stroke) or
        # /etc/swanctl/x509crl (vici), respectively.
        # cache_crls = no

    # Whether relations in validated certificate chains should be cached in
        # memory.
        # cert_cache = yes

    # Send Cisco Unity vendor ID payload (IKEv1 only).
        # cisco_unity = no

    # Close the IKE_SA if setup of the CHILD_SA along with IKE_AUTH failed.
        # close_ike_on_child_failure = no

    # Number of half-open IKE_SAs that activate the cookie mechanism.
        # cookie_threshold = 10

    # Delete CHILD_SAs right after they got successfully rekeyed (IKEv1 only).
        # delete_rekeyed = no

    # Delay in seconds until inbound IPsec SAs are deleted after rekeyings
        # (IKEv2 only).
        # delete_rekeyed_delay = 5

    # Use ANSI X9.42 DH exponent size or optimum size matched to cryptographic
        # strength.
        # dh_exponent_ansi_x9_42 = yes

    # Use RTLD_NOW with dlopen when loading plugins and IMV/IMCs to reveal
        # missing symbols immediately.
        # dlopen_use_rtld_now = no

    # DNS server assigned to peer via configuration payload (CP).
        # dns1 =

    # DNS server assigned to peer via configuration payload (CP).
        # dns2 =

    # Enable Denial of Service protection using cookies and aggressiveness
        # checks.
        # dos_protection = yes

    # Compliance with the errata for RFC 4753.
        # ecp_x_coordinate_only = yes

    # Free objects during authentication (might conflict with plugins).
        # flush_auth_cfg = no

    # Whether to follow IKEv2 redirects (RFC 5685).
        # follow_redirects = yes

    # Maximum size (complete IP datagram size in bytes) of a sent IKE fragment
        # when using proprietary IKEv1 or standardized IKEv2 fragmentation, defaults
        # to 1280 (use 0 for address family specific default values, which uses a
        # lower value for IPv4).  If specified this limit is used for both IPv4 and
        # IPv6.
        # fragment_size = 1280

    # Name of the group the daemon changes to after startup.
        # group =

    # Timeout in seconds for connecting IKE_SAs (also see IKE_SA_INIT DROPPING).
        # half_open_timeout = 30

    # Enable hash and URL support.
        # hash_and_url = no

    # Allow IKEv1 Aggressive Mode with pre-shared keys as responder.
        # i_dont_care_about_security_and_use_aggressive_mode_psk = no

    # Whether to ignore the traffic selectors from the kernel's acquire events
        # for IKEv2 connections (they are not used for IKEv1).
        # ignore_acquire_ts = no

    # A space-separated list of routing tables to be excluded from route
        # lookups.
        # ignore_routing_tables =

    # Maximum number of IKE_SAs that can be established at the same time before
        # new connection attempts are blocked.
        # ikesa_limit = 0

    # Number of exclusively locked segments in the hash table.
        # ikesa_table_segments = 1

    # Size of the IKE_SA hash table.
        # ikesa_table_size = 1

    # Whether to close IKE_SA if the only CHILD_SA closed due to inactivity.
        # inactivity_close_ike = no

    # Limit new connections based on the current number of half open IKE_SAs,
        # see IKE_SA_INIT DROPPING in strongswan.conf(5).
        # init_limit_half_open = 0

    # Limit new connections based on the number of queued jobs.
        # init_limit_job_load = 0

    # Causes charon daemon to ignore IKE initiation requests.
        # initiator_only = no

    # Install routes into a separate routing table for established IPsec
        # tunnels.
        # install_routes = yes

    # Install virtual IP addresses.
        # install_virtual_ip = yes

    # The name of the interface on which virtual IP addresses should be
        # installed.
        # install_virtual_ip_on =

    # Check daemon, libstrongswan and plugin integrity at startup.
        # integrity_test = no

    # A comma-separated list of network interfaces that should be ignored, if
        # interfaces_use is specified this option has no effect.
        # interfaces_ignore =

    # A comma-separated list of network interfaces that should be used by
        # charon. All other interfaces are ignored.
        # interfaces_use =

    # NAT keep alive interval.
        # keep_alive = 20s

    # Plugins to load in the IKE daemon charon.
        # load =

    # Determine plugins to load via each plugin's load option.
        # load_modular = no

    # Initiate IKEv2 reauthentication with a make-before-break scheme.
        # make_before_break = no

    # Maximum number of IKEv1 phase 2 exchanges per IKE_SA to keep state about
        # and track concurrently.
        # max_ikev1_exchanges = 3

    # Maximum packet size accepted by charon.
        # max_packet = 10000

    # Enable multiple authentication exchanges (RFC 4739).
        # multiple_authentication = yes

    # WINS servers assigned to peer via configuration payload (CP).
        # nbns1 =

    # WINS servers assigned to peer via configuration payload (CP).
        # nbns2 =

    # UDP port used locally. If set to 0 a random port will be allocated.
        # port = 500

    # UDP port used locally in case of NAT-T. If set to 0 a random port will be
        # allocated.  Has to be different from charon.port, otherwise a random port
        # will be allocated.
        # port_nat_t = 4500

    # Whether to prefer updating SAs to the path with the best route.
        # prefer_best_path = no

    # Prefer locally configured proposals for IKE/IPsec over supplied ones as
        # responder (disabling this can avoid keying retries due to
        # INVALID_KE_PAYLOAD notifies).
        # prefer_configured_proposals = yes

    # By default public IPv6 addresses are preferred over temporary ones (RFC
        # 4941), to make connections more stable. Enable this option to reverse
        # this.
        # prefer_temporary_addrs = no

    # Process RTM_NEWROUTE and RTM_DELROUTE events.
        # process_route = yes

    # Delay in ms for receiving packets, to simulate larger RTT.
        # receive_delay = 0

    # Delay request messages.
        # receive_delay_request = yes

    # Delay response messages.
        # receive_delay_response = yes

    # Specific IKEv2 message type to delay, 0 for any.
        # receive_delay_type = 0

    # Size of the AH/ESP replay window, in packets.
        # replay_window = 32

    # Base to use for calculating exponential back off, see IKEv2 RETRANSMISSION
        # in strongswan.conf(5).
        # retransmit_base = 1.8

    # Maximum jitter in percent to apply randomly to calculated retransmission
        # timeout (0 to disable).
        # retransmit_jitter = 0

    # Upper limit in seconds for calculated retransmission timeout (0 to
        # disable).
        # retransmit_limit = 0

    # Timeout in seconds before sending first retransmit.
        # retransmit_timeout = 4.0

    # Number of times to retransmit a packet before giving up.
        # retransmit_tries = 5

    # Interval in seconds to use when retrying to initiate an IKE_SA (e.g. if
        # DNS resolution failed), 0 to disable retries.
        # retry_initiate_interval = 0

    # Initiate CHILD_SA within existing IKE_SAs (always enabled for IKEv1).
        # reuse_ikesa = yes

    # Numerical routing table to install routes to.
        # routing_table =

    # Priority of the routing table.
        # routing_table_prio =

    # Delay in ms for sending packets, to simulate larger RTT.
        # send_delay = 0

    # Delay request messages.
        # send_delay_request = yes

    # Delay response messages.
        # send_delay_response = yes

    # Specific IKEv2 message type to delay, 0 for any.
        # send_delay_type = 0

    # Send strongSwan vendor ID payload
        # send_vendor_id = no

    # Whether to enable Signature Authentication as per RFC 7427.
        # signature_authentication = yes

    # Whether to enable constraints against IKEv2 signature schemes.
        # signature_authentication_constraints = yes

    # The upper limit for SPIs requested from the kernel for IPsec SAs.
        # spi_max = 0xcfffffff

    # The lower limit for SPIs requested from the kernel for IPsec SAs.
        # spi_min = 0xc0000000

    # Number of worker threads in charon.
        # threads = 16

    # Name of the user the daemon changes to after startup.
        # user =

    crypto_test {

    # Benchmark crypto algorithms and order them by efficiency.
            # bench = no

    # Buffer size used for crypto benchmark.
            # bench_size = 1024

    # Number of iterations to test each algorithm.
            # bench_time = 50

    # Test crypto algorithms during registration (requires test vectors
            # provided by the test-vectors plugin).
            # on_add = no

    # Test crypto algorithms on each crypto primitive instantiation.
            # on_create = no

    # Strictly require at least one test vector to enable an algorithm.
            # required = no

    # Whether to test RNG with TRUE quality; requires a lot of entropy.
            # rng_true = no

    }

    host_resolver {

    # Maximum number of concurrent resolver threads (they are terminated if
            # unused).
            # max_threads = 3

    # Minimum number of resolver threads to keep around.
            # min_threads = 0

    }

    leak_detective {

    # Includes source file names and line numbers in leak detective output.
            # detailed = yes

    # Threshold in bytes for leaks to be reported (0 to report all).
            # usage_threshold = 10240

    # Threshold in number of allocations for leaks to be reported (0 to
            # report all).
            # usage_threshold_count = 0

    }

    processor {

    # Section to configure the number of reserved threads per priority class
            # see JOB PRIORITY MANAGEMENT in strongswan.conf(5).
            priority_threads {

    }

    }

    # Section containing a list of scripts (name = path) that are executed when
        # the daemon is started.
        start-scripts {

    }

    # Section containing a list of scripts (name = path) that are executed when
        # the daemon is terminated.
        stop-scripts {

    }

    tls {

    # List of TLS encryption ciphers.
            # cipher =

    # List of TLS key exchange methods.
            # key_exchange =

    # List of TLS MAC algorithms.
            # mac =

    # List of TLS cipher suites.
            # suites =

    }

    x509 {

    # Discard certificates with unsupported or unknown critical extensions.
            # enforce_critical = yes

    }

    }

    ipsec.log.txt



  • What can I do the try to troubleshoot this?  If the logs don't show anything, I'm pretty much stumped, aren't I?



  • Check the other side and verify all the settings match. Verify the phase two ID's match.



  • @dotdash:

    Check the other side and verify all the settings match. Verify the phase two ID's match.

    The connection is established and stays established for phase 1 and 2.  If there was a mismatch, they wouldn't connect to begin with.  I have checked both sides many times and everything matches.

    The link stays up, but if nothing is done over it, something happens that puts the link into a state where no traffic traverses over it.  Then, by attempting to connect to a database service on the other end, the link is woken up after about 30 - 60 seconds.