MySQL Select 1 in N rows / every Nth row UDF

I wrote this MySQL UDF because I need to create sub-samples of time series data in a logging application, and I need to brush up on my MySQL UDF skills for a code review of some older UDFs. The function sample() takes a single argument which is the limit of its internal counter. The return value is the current value of the internal counter.

SELECT country_name, sample(3) AS sample FROM countries

would return a result like

country_name          sample
Andorra               0
United Arab Emirate   1
Afghanistan           2
Antigua and Barbuda   0
Anguilla              1
Albania               2
Armenia               0

You can select every one out of every 3 rows like this:

SELECT country_name FROM countries WHERE sample(3, country_name) = 0

The second argument to the function is not used by the function: it’s a hack to get MySQL to execute the function on every row.

[Update March 2015 – to compile on Ubuntu you need header files from the libmysqlclient-dev package)]

Compile the UDF (on Ubuntu) like this:

g++ -fPIC -I/usr/include/mysql -o libsample.so -shared sample.cpp

mv or cp libsample.so to /usr/lib/mysql/plugin/, link it to /usr/lib, run ldconfig and in MySQL enter:

CREATE FUNCTION sample RETURNS INT SONAME 'libsample.so';

Here’s the code:

/* sample(N) returns a value that cycles from 0 to N - 1
 add a column to use in a WHERE clause to sample 1 row
 from every 10:
 select name from user where sample(10, name) = 1;
 The column isn't used by MySQL, it's a hack to get MySQL
 to invoke the function on every row:
 19th May 2013, sean@firtl.com
 */
#ifdef STANDARD
#include <string.h>
#ifdef __WIN__
typedef unsigned __int64 ulonglong;
typedef __int64 longlong;
#else
typedef unsigned long long ulonglong;
typedef long long longlong;
#endif /*__WIN__*/
#else
#include <my_global.h>
#include <my_sys.h>
#endif
#include <mysql.h>
#include <m_ctype.h>
#include <m_string.h>

#ifdef HAVE_DLOPEN

// http://dev.mysql.com/doc/refman/5.0/en/udf-calling.html
extern "C" {
	my_bool	sample_init(UDF_INIT *initid, UDF_ARGS *args, char *message);
	void sample_deinit(UDF_INIT *initid);
	longlong sample(UDF_INIT *initid, UDF_ARGS *args, char *is_null, char *error);
}

my_bool sample_init(UDF_INIT *initid, UDF_ARGS *args, char *message)
{
	// http://dev.mysql.com/doc/refman/5.0/en/udf-arguments.html
	if (args->arg_count < 1)
	{
		strcpy(message, "Requires at least one argument");
		return 1;
	}
	else if (args->arg_type[0] != INT_RESULT)
	{
		strcpy(message, "Requires a number argument");
		return 1;
	}

  int* workspace = new int[2];

	initid->ptr = (char*)workspace;
	workspace[0] = *((long long*) args->args[0]);
	workspace[1] = 0;
	return 0;
}

void sample_deinit(UDF_INIT *initid)
{
	if (initid->ptr != NULL)
		delete [] initid->ptr;
}

longlong sample(UDF_INIT *initid, UDF_ARGS *args, char *is_null, char *error)
{
	int* d = (int*) initid->ptr;
	(d[1])++;
	if (d[1] >= d[0])
		d[1] = 0;
	return d[1];
}

#endif /* HAVE_DLOPEN */

Leave a Reply

Your email address will not be published. Required fields are marked *