Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
language: generic

services:
- docker

script:
- docker build .
93 changes: 17 additions & 76 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,96 +1,37 @@
FROM ubuntu:14.04
FROM openjdk:8

# Set version and github repo which you want to build from
ENV GITHUB_OWNER druid-io
ENV DRUID_VERSION 0.12.1
ENV ZOOKEEPER_VERSION 3.4.10
ENV DRUID_VERSION 0.15.1-incubating
ENV ZOOKEEPER_VERSION 3.4.14

# Java 8
RUN apt-get update \
&& apt-get install -y software-properties-common \
&& apt-add-repository -y ppa:webupd8team/java \
&& apt-get purge --auto-remove -y software-properties-common \
&& apt-get update \
&& echo oracle-java-8-installer shared/accepted-oracle-license-v1-1 select true | /usr/bin/debconf-set-selections \
&& apt-get install -y oracle-java8-installer oracle-java8-set-default \
mysql-server \
supervisor \
git \
&& apt-get clean \
&& rm -rf /var/cache/oracle-jdk8-installer \
&& rm -rf /var/lib/apt/lists/*
# Get Druid
RUN mkdir -p /tmp \
&& cd /tmp/ \
&& curl -fsLS "https://www.apache.org/dyn/closer.cgi?filename=/incubator/druid/$DRUID_VERSION/apache-druid-$DRUID_VERSION-bin.tar.gz&action=download" | tar xvz \
&& mv apache-druid-$DRUID_VERSION /opt/druid

# Maven
RUN wget -q -O - http://archive.apache.org/dist/maven/maven-3/3.2.5/binaries/apache-maven-3.2.5-bin.tar.gz | tar -xzf - -C /usr/local \
&& ln -s /usr/local/apache-maven-3.2.5 /usr/local/apache-maven \
&& ln -s /usr/local/apache-maven/bin/mvn /usr/local/bin/mvn
WORKDIR /opt/druid/

# Zookeeper
RUN wget -q -O - http://www.us.apache.org/dist/zookeeper/zookeeper-$ZOOKEEPER_VERSION/zookeeper-$ZOOKEEPER_VERSION.tar.gz | tar -xzf - -C /usr/local \
&& cp /usr/local/zookeeper-$ZOOKEEPER_VERSION/conf/zoo_sample.cfg /usr/local/zookeeper-$ZOOKEEPER_VERSION/conf/zoo.cfg \
&& ln -s /usr/local/zookeeper-$ZOOKEEPER_VERSION /usr/local/zookeeper
RUN curl -fsLS "https://www.apache.org/dyn/closer.cgi?filename=/zookeeper/zookeeper-$ZOOKEEPER_VERSION/zookeeper-$ZOOKEEPER_VERSION.tar.gz&action=download" | tar xvz \
&& mv zookeeper-$ZOOKEEPER_VERSION zk

# Druid system user
RUN adduser --system --group --no-create-home druid \
&& mkdir -p /var/lib/druid \
&& chown druid:druid /var/lib/druid
ADD config/common.runtime.properties conf/druid/single-server/micro-quickstart/_common/common.runtime.properties

# Druid (from source)
RUN mkdir -p /usr/local/druid/lib

# trigger rebuild only if branch changed
ADD https://api.github.com/repos/$GITHUB_OWNER/druid/git/refs/heads/$DRUID_VERSION druid-version.json
RUN git clone -q --branch $DRUID_VERSION --depth 1 https://github.com/$GITHUB_OWNER/druid.git /tmp/druid
WORKDIR /tmp/druid

# package and install Druid locally
# use versions-maven-plugin 2.1 to work around https://jira.codehaus.org/browse/MVERSIONS-285
RUN mvn -U -B org.codehaus.mojo:versions-maven-plugin:2.1:set -DgenerateBackupPoms=false -DnewVersion=$DRUID_VERSION \
&& mvn -U -B install -DskipTests=true -Dmaven.javadoc.skip=true \
&& cp services/target/druid-services-$DRUID_VERSION-selfcontained.jar /usr/local/druid/lib \
&& cp -r distribution/target/extensions /usr/local/druid/ \
&& cp -r distribution/target/hadoop-dependencies /usr/local/druid/ \
&& apt-get purge --auto-remove -y git \
&& apt-get clean \
&& rm -rf /tmp/* \
/var/tmp/* \
/usr/local/apache-maven-3.2.5 \
/usr/local/apache-maven \
/root/.m2

WORKDIR /

# Setup metadata store and add sample data
ADD sample-data.sql sample-data.sql
RUN find /var/lib/mysql -type f -exec touch {} \; \
&& /etc/init.d/mysql start \
&& mysql -u root -e "GRANT ALL ON druid.* TO 'druid'@'localhost' IDENTIFIED BY 'diurd'; CREATE database druid CHARACTER SET utf8;" \
&& java -cp /usr/local/druid/lib/druid-services-*-selfcontained.jar \
-Ddruid.extensions.directory=/usr/local/druid/extensions \
-Ddruid.extensions.loadList=[\"mysql-metadata-storage\"] \
-Ddruid.metadata.storage.type=mysql \
io.druid.cli.Main tools metadata-init \
--connectURI="jdbc:mysql://localhost:3306/druid" \
--user=druid --password=diurd \
&& mysql -u root druid < sample-data.sql \
&& /etc/init.d/mysql stop

# Setup supervisord
ADD supervisord.conf /etc/supervisor/conf.d/supervisord.conf
RUN bash -c "./bin/start-micro-quickstart &" && \
./bin/post-index-task --file quickstart/tutorial/wikipedia-index.json --url http://localhost:8081 --submit-timeout 600

# Expose ports:
# - 8888: HTTP (router)
# - 8081: HTTP (coordinator)
# - 8082: HTTP (broker)
# - 8083: HTTP (historical)
# - 8090: HTTP (overlord)
# - 3306: MySQL
# - 2181 2888 3888: ZooKeeper
EXPOSE 8888
EXPOSE 8081
EXPOSE 8082
EXPOSE 8083
EXPOSE 8090
EXPOSE 3306
EXPOSE 2181 2888 3888

WORKDIR /var/lib/druid
ENTRYPOINT export HOSTIP="$(resolveip -s $HOSTNAME)" && find /var/lib/mysql -type f -exec touch {} \; && exec /usr/bin/supervisord -c /etc/supervisor/conf.d/supervisord.conf
ENTRYPOINT ./bin/start-micro-quickstart
72 changes: 21 additions & 51 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,78 +1,48 @@
# Druid Docker Image
[![Build Status](https://travis-ci.org/Fokko/docker-druid.svg?branch=master)](https://travis-ci.org/Fokko/docker-druid)

## Run a simple Druid cluster
# Apache Druid (Incubating) Docker Image

[Install Docker](docker-install.md)
[Install Docker](https://docs.docker.com/install/)

Download and launch the docker image
## Run a simple Apache Druid (Incubating) cluster

Download and launch the docker image:
```sh
docker pull druidio/example-cluster
docker run --rm -i -p 3000:8082 -p 3001:8081 druidio/example-cluster
docker run --rm -i -p 8888:8888 druidio/example-cluster
```

Wait a minute or so for Druid to start up and download the sample.

On OS X

- List datasources

```
curl http://$(docker-machine ip default):3000/druid/v2/datasources
```

- access the coordinator console

```
open http://$(docker-machine ip default):3001/
```

On Linux

- List datasources

```
curl http://localhost:3000/druid/v2/datasources
```

- access the coordinator console at http://localhost:3001/
Once the cluster has started, you can navigate to [http://localhost:8888](http://localhost:8888). The [Druid router process](../development/router.html), which serves the Druid console, resides at this address.

## Build Druid Docker Image

To build the docker image yourself

```sh
git clone https://github.com/druid-io/docker-druid.git
docker build -t example-cluster docker-druid
cd docker-druid
docker build -t docker-druid .
docker run --rm -i -p 8888:8888 docker-druid
```

## Logging

You might want to look into the logs when debugging the Druid processes. This can be done by logging into the container using `docker ps`:
```
CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES
9e73cbfc5612 druidio/example-cluster "/bin/sh -c 'export H" 7 seconds ago Up 6 seconds 2181/tcp, 2888/tcp, 3306/tcp, 3888/tcp, 8083/tcp, 0.0.0.0:3001->8081/tcp, 0.0.0.0:3000->8082/tcp sick_lamport
CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES
5782c4d4fa40 docker-druid "/bin/sh -c ./bin/st…" 4 seconds ago Up 3 seconds 2181/tcp, 2888/tcp, 3888/tcp, 8081-8083/tcp, 8090/tcp, 0.0.0.0:8888->8888/tcp angry_banach
```

And attaching to the container using `docker exec -ti 9e73cbfc5612 bash` logs are written to `/tmp/`:
Run the `docker logs` command to fetch the logs.

```
root@d59a3d4a68c3:/tmp# ls -lah
total 224K
drwxrwxrwt 8 root root 4.0K Jan 18 20:38 .
drwxr-xr-x 61 root root 4.0K Jan 18 20:38 ..
-rw------- 1 root root 0 Jan 18 20:38 druid-broker-stderr---supervisor-az6WwP.log
-rw------- 1 root root 18K Jan 18 20:39 druid-broker-stdout---supervisor-D28zOC.log
-rw------- 1 root root 0 Jan 18 20:38 druid-coordinator-stderr---supervisor-RYMt5L.log
-rw------- 1 root root 100K Jan 18 21:14 druid-coordinator-stdout---supervisor-Jq4WCi.log
-rw------- 1 root root 0 Jan 18 20:38 druid-historical-stderr---supervisor-rmMHmF.log
-rw------- 1 root root 18K Jan 18 20:39 druid-historical-stdout---supervisor-AJ0SZX.log
-rw------- 1 root root 7.9K Jan 18 21:09 druid-indexing-service-stderr---supervisor-x3YNlo.log
-rw------- 1 root root 28K Jan 18 21:14 druid-indexing-service-stdout---supervisor-5uyV7u.log
-rw------- 1 root root 155 Jan 18 20:38 mysql-stderr---supervisor-NqN9nY.log
-rw------- 1 root root 153 Jan 18 20:38 mysql-stdout---supervisor-23izTf.log
-rw------- 1 root root 78 Jan 18 20:38 zookeeper-stderr---supervisor-Rm33j8.log
-rw------- 1 root root 7.4K Jan 18 20:39 zookeeper-stdout---supervisor-6AFVOR.log
$ docker logs -f 5782c4d4fa40
[Wed Aug 7 09:22:41 2019] Running command[zk], logging to[/opt/druid/var/sv/zk.log]: bin/run-zk conf
[Wed Aug 7 09:22:41 2019] Running command[coordinator-overlord], logging to[/opt/druid/var/sv/coordinator-overlord.log]: bin/run-druid coordinator-overlord conf/druid/single-server/micro-quickstart
[Wed Aug 7 09:22:41 2019] Running command[broker], logging to[/opt/druid/var/sv/broker.log]: bin/run-druid broker conf/druid/single-server/micro-quickstart
[Wed Aug 7 09:22:41 2019] Running command[router], logging to[/opt/druid/var/sv/router.log]: bin/run-druid router conf/druid/single-server/micro-quickstart
[Wed Aug 7 09:22:41 2019] Running command[historical], logging to[/opt/druid/var/sv/historical.log]: bin/run-druid historical conf/druid/single-server/micro-quickstart
[Wed Aug 7 09:22:41 2019] Running command[middleManager], logging to[/opt/druid/var/sv/middleManager.log]: bin/run-druid middleManager conf/druid/single-server/micro-quickstart
```

## Troubleshooting
Expand All @@ -91,4 +61,4 @@ The allocated resources are limited by default to 2 cpu's and 2gb of memory. Alt
```
2017-01-20T15:59:58,445 INFO [forking-task-runner-0-[index_transactions_2017-01-20T15:59:50.637Z]] io.druid.indexing.overlord.ForkingTaskRunner - Process exited with status[137] for task: index_transactions_2017-01-20T15:59:50.637Z
```
From the log we observe that the process receives an 137 (=128+9) SIGKILL signal. Because it hit the memory limit, the application is killed instantly. To avoid this you might want to give more resources to the Docker hypervisor under Docker > Preferences.
From the log we observe that the process receives an 137 (=128+9) SIGKILL signal. Because it hit the memory limit, the application is killed instantly. To avoid this you might want to give more resources to the Docker hypervisor under Docker > Preferences.
145 changes: 145 additions & 0 deletions config/common.runtime.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#

# Extensions specified in the load list will be loaded by Druid
# We are using local fs for deep storage - not recommended for production - use S3, HDFS, or NFS instead
# We are using local derby for the metadata store - not recommended for production - use MySQL or Postgres instead

# If you specify `druid.extensions.loadList=[]`, Druid won't load any extension from file system.
# If you don't specify `druid.extensions.loadList`, Druid will load all the extensions under root extension directory.
# More info: http://druid.io/docs/latest/operations/including-extensions.html
druid.extensions.loadList=["druid-hdfs-storage", "druid-kafka-indexing-service", "druid-datasketches", "druid-avro-extensions"]

# If you have a different version of Hadoop, place your Hadoop client jar files in your hadoop-dependencies directory
# and uncomment the line below to point to your directory.
#druid.extensions.hadoopDependenciesDir=/my/dir/hadoop-dependencies


#
# Hostname
#
druid.host=localhost

#
# Logging
#

# Log all runtime properties on startup. Disable to avoid logging properties on startup:
druid.startup.logging.logProperties=true

#
# Zookeeper
#

druid.zk.service.host=localhost
druid.zk.paths.base=/druid

#
# Metadata storage
#

# For Derby server on your Druid Coordinator (only viable in a cluster with a single Coordinator, no fail-over):
druid.metadata.storage.type=derby
druid.metadata.storage.connector.connectURI=jdbc:derby://localhost:1527/var/druid/metadata.db;create=true
druid.metadata.storage.connector.host=localhost
druid.metadata.storage.connector.port=1527

# For MySQL (make sure to include the MySQL JDBC driver on the classpath):
#druid.metadata.storage.type=mysql
#druid.metadata.storage.connector.connectURI=jdbc:mysql://db.example.com:3306/druid
#druid.metadata.storage.connector.user=...
#druid.metadata.storage.connector.password=...

# For PostgreSQL:
#druid.metadata.storage.type=postgresql
#druid.metadata.storage.connector.connectURI=jdbc:postgresql://db.example.com:5432/druid
#druid.metadata.storage.connector.user=...
#druid.metadata.storage.connector.password=...

#
# Deep storage
#

# For local disk (only viable in a cluster if this is a network mount):
druid.storage.type=local
druid.storage.storageDirectory=var/druid/segments

# For HDFS:
#druid.storage.type=hdfs
#druid.storage.storageDirectory=/druid/segments

# For S3:
#druid.storage.type=s3
#druid.storage.bucket=your-bucket
#druid.storage.baseKey=druid/segments
#druid.s3.accessKey=...
#druid.s3.secretKey=...

#
# Indexing service logs
#

# For local disk (only viable in a cluster if this is a network mount):
druid.indexer.logs.type=file
druid.indexer.logs.directory=var/druid/indexing-logs

# For HDFS:
#druid.indexer.logs.type=hdfs
#druid.indexer.logs.directory=/druid/indexing-logs

# For S3:
#druid.indexer.logs.type=s3
#druid.indexer.logs.s3Bucket=your-bucket
#druid.indexer.logs.s3Prefix=druid/indexing-logs

#
# Service discovery
#

druid.selectors.indexing.serviceName=druid/overlord
druid.selectors.coordinator.serviceName=druid/coordinator

#
# Monitoring
#

druid.monitoring.monitors=["org.apache.druid.java.util.metrics.JvmMonitor"]
druid.emitter=noop
druid.emitter.logging.logLevel=info

# Storage type of double columns
# ommiting this will lead to index double as float at the storage layer

druid.indexing.doubleStorage=double

#
# Security
#
druid.server.hiddenProperties=["druid.s3.accessKey","druid.s3.secretKey","druid.metadata.storage.connector.password"]


#
# SQL
#
druid.sql.enable=true

#
# Lookups
#
druid.lookup.enableLookupSyncOnStartup=false
Loading