dns and software, was Re: Reliable Cloud host ?

Mark Andrews marka at isc.org
Thu Mar 1 21:45:44 UTC 2012


In message <CAC38B59-1F54-4788-87A2-A1A8BE453500 at delong.com>, Owen DeLong write
s:
> >=20
> > It's deeper than just that, though.  The whole paradigm is messy, from
> > the point of view of someone who just wants to get stuff done.  The
> > examples are (almost?) all fatally flawed.  The code that actually =
> gets
> > at least some of it right ends up being too complex and too hard for
> > people to understand why things are done the way they are.
> >=20
> > Even in the "old days", before IPv6, geez, look at this:
> >=20
> > bcopy(host->h_addr_list[n], (char *)&addr->sin_addr.s_addr, =
> sizeof(addr->sin_addr.s_addr));
> >=20
> > That's real comprehensible - and it's essentially the data interface=20=
> 
> > between the resolver library and the system's addressing structures
> > for syscalls.
> >=20
> > On one hand, it's "great" that they wanted to abstract the dirty =
> details
> > of DNS away from users, but I'd say they failed pretty much even at =
> that.
> >=20
> > ... JG
> > --=20
> > Joe Greco - sol.net Network Services - Milwaukee, WI - =
> http://www.sol.net
> > "We call it the 'one bite at the apple' rule. Give me one chance [and] =
> then I
> > won't contact you again." - Direct Marketing Ass'n position on e-mail =
> spam(CNN)
> > With 24 million small businesses in the US alone, that's way too many =
> apples.
> 
> I think that the modern set of getaddrinfo and connect is actually not =
> that complicated:
> 
>   /* Hints for getaddrinfo() (tell it what we want) */
>   memset(&addrinfo, 0, sizeof(addrinfo));	/* Zero out the buffer =
> */
>   addrinfo.ai_family=3DPF_UNSPEC;			/* Any and all =
> address families */
>   addrinfo.ai_socktype=3DSOCK_STREAM;		/* Stream Socket */
>   addrinfo.ai_protocol=3DIPPROTO_TCP;		/* TCP */
>   /* Ask the resolver library for the information. Exit on failure. */
>   /* argv[1] is the hostname passed in by the user. "demo" is the =
> service name */
>   if (rval =3D getaddrinfo(argv[1], "demo", &addrinfo, &res) !=3D 0) {
>     fprintf(stderr, "%s: Failed to resolve address information.\n", =
> argv[0]);
>     exit(2);
>   }
> 
>   /* Iterate through the results */
>   for (r=3Dres; r; r =3D r->ai_next) {
>     /* Create a socket configured for the next candidate */
>     sockfd6 =3D socket(r->ai_family, r->ai_socktype, r->ai_protocol);
>     /* Try to connect */
>     if (connect(sockfd6, r->ai_addr, r->ai_addrlen) < 0)
>     {
>       /* Failed to connect */
>       e_save =3D errno;
>       /* Destroy socket */
>       (void) close(sockfd6);
>       /* Recover the error information */
>       errno =3D e_save;
>       /* Tell the user that this attempt failed */
>       fprintf(stderr, "%s: Failed attempt to %s.\n", argv[0],=20
> 		get_ip_str((struct sockaddr *)r->ai_addr, buf, BUFLEN));
>       /* Give error details */
>       perror("Socket error");
>     } else {			/* Success! */
>       /* Inform the user */
>       snprintf(s, BUFLEN, "%s: Succeeded to %s.", argv[0],
> 		get_ip_str((struct sockaddr *)r->ai_addr, buf, BUFLEN));
>       debug(5, argv[0], s);
>       /* Flag our success */
>       success++;
>       /* Stop iterating */
>       break;
>     }
>   }
>   /* Out of the loop. Either we succeeded or ran out of possibilities */
>   if (success =3D=3D 0) /* If we ran out of possibilities... */
>   {
>     /* Inform the user, free up the resources, and exit */
>     fprintf(stderr, "%s: Failed to connect to %s.\n", argv[0], argv[1]);
>     freeaddrinfo(res);
>     exit(5);
>   }
>   /* Succeeded. Inform the user and continue with the application */
>   printf("%s: Successfully connected to %s at %s on FD %d.\n", argv[0], =
> argv[1],
> 	get_ip_str((struct sockaddr *)r->ai_addr, buf, BUFLEN),
> 	sockfd6);
>   /* Free up the memory held by the resolver results */
>   freeaddrinfo(res);
> 
> It's really hard to make a case that this is all that complex.
> 
> I put a lot of extra comments in there to make it clear what's happening =
> for people who may not be used to coding in C. It also contains a whole =
> lot of extra user notification and debugging instrumentation because it =
> is designed as an example people can use to learn with.=20
> 
> Yes, this was a lot messier and a lot stranger and harder to get right =
> with get*by{name,addr}, but, those days are long gone and anyone still =
> coding with those needs to move forward.
> 
> Owen
> 

These days you want something more complicated as everyone is or
will be soon multi-homed.  The basic loop above has very bad error
characteristics if the first machines are not reachable.  I've got
working select, poll and thread based examples here:

http://www.isc.org/community/blog/201101/how-to-connect-to-a-multi-homed-server-over-tcp.

>From http://www.isc.org/files/imce/select-connect_0.c:

/*
 * Copyright (C) 2011  Internet Systems Consortium, Inc. ("ISC")
 *
 * Permission to use, copy, modify, and/or distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
 * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
 * PERFORMANCE OF THIS SOFTWARE.
 */


#define TIMEOUT 500 /* ms */

int
connect_to_host(struct addrinfo *res0) {
	struct addrinfo *res;
	int fd = -1, n, i, j, flags, count, max = -1, *fds;
	struct timeval *timeout, timeout0 = { 0, TIMEOUT * 1000};
	fd_set fdset, wrset;

	/*
	 * Work out how many possible descriptors we could use.
	 */
	for (res = res0, count = 0; res; res = res->ai_next)
		count++;
	fds = calloc(count, sizeof(*fds));
	if (fds == NULL) {
		perror("calloc");
		goto cleanup;
	}
	FD_ZERO(&fdset);
	for (res = res0, i = 0, count = 0; res; res = res->ai_next) {
		fd = socket(res->ai_family, res->ai_socktype, res->ai_protocol);
		if (fd == -1) {
			/*
			 * If AI_ADDRCONFIG is not supported we will get
			 * EAFNOSUPPORT returned.  Behave as if the address
			 * was not there.
			 */
			if (errno != EAFNOSUPPORT)
				perror("socket");
			else if (res->ai_next != NULL)
				continue;
		} else if (fd >= FD_SETSIZE) {
			close(fd);
		} else if ((flags = fcntl(fd, F_GETFL)) == -1) {
			perror("fcntl");
			close(fd);
		} else if (fcntl(fd, F_SETFL, flags | O_NONBLOCK) == -1) {
			perror("fcntl");
			close(fd);
		} else if (connect(fd, res->ai_addr, res->ai_addrlen) == -1) {
			if (errno != EINPROGRESS) {
				perror("connect");
				close(fd);
			} else {
				/*
				 * Record the information for this descriptor.
				 */
				fds[i] = fd;
				FD_SET(fd, &fdset);
				if (max == -1 || fd > max)
					max = fd;
				count++;
				i++;
			}
		} else  {
			/*
			 * We connected without blocking.
			 */
			goto done;
		}

		if (count == 0)
			continue;

		assert(max != -1);
		do {
			if (res->ai_next != NULL)
				timeout = &timeout0;
			else
				timeout = NULL;

			/* The write bit is set on both success and failure. */
			wrset = fdset;
			n = select(max + 1, NULL, &wrset, NULL, timeout);
			if (n == 0) {
				timeout0.tv_usec >>= 1;
				break;
			}
			if (n < 0) {
				if (errno == EAGAIN || errno == EINTR)
					continue;
				perror("select");
				fd = -1;
				goto done;
			}
			for (fd = 0; fd <= max; fd++) {
				if (FD_ISSET(fd, &wrset)) {
					socklen_t len;
					int err;
					for (j = 0; j < i; j++)
						if (fds[j] == fd)
							break;
					assert(j < i);
					/*
					 * Test to see if the connect
					 * succeeded.
					 */
					len = sizeof(err);
					n = getsockopt(fd, SOL_SOCKET,
						       SO_ERROR, &err, &len);
					if (n != 0 || err != 0) {
						close(fd);
						FD_CLR(fd, &fdset);
						fds[j] = -1;
						count--;
						continue;
					}
					/* Connect succeeded. */
					goto done;
				}
			}
		} while (timeout == NULL && count != 0);
	}

	/* We failed to connect. */
	fd = -1;

 done:
	/* Close all other descriptors we have created. */
	for (j = 0; j < i; j++)
		if (fds[j] != fd && fds[j] != -1) {
			close(fds[j]);
		}

	if (fd != -1) {
		/* Restore default blocking behaviour.  */
		if ((flags = fcntl(fd, F_GETFL)) != -1) {
			flags &= ~O_NONBLOCK;
			if (fcntl(fd, F_SETFL, flags) == -1)
				perror("fcntl");
		} else
			perror("fcntl");
	}

 cleanup:
	/* Free everything. */
	if (fds) free(fds);

	return (fd);
}

-- 
Mark Andrews, ISC
1 Seymour St., Dundas Valley, NSW 2117, Australia
PHONE: +61 2 9871 4742                 INTERNET: marka at isc.org




More information about the NANOG mailing list