ramcloud-ycsb/runYcsb at master · PlatformLab/ramcloud-ycsb · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
#!/bin/bash
# This is the top-level script that runs one or more of the YCSB benchmarks
# on RAMCloud.  This script is somewhat brittle right now: not much error
# handling, hardwired configuration information, etc.

# Configuration information you may need to modify:

# Information about the experiments you'd like to run:
# TRANSPORTS:       list of RAMCloud transports to use
# SERVER_MEMORIES:  list of different log sizes to try on the servers, which
#                   determine memory utilization; 1680 supposedly means 75%
#                   utilization and 1450 supposedly means 90% utilization
# CLIENT_COUNTS:    different numbers of clients to try
# WORKLOADS:        YCSB workloads to run (combination of a, b, c, d, and f;
#                   RAMCloud can't currently run with e); must run workload f
#                   before d (not sure why)
# RUNS:             How many different runs to carry out with each setting

TRANSPORTS="infrc basic"
SERVER_MEMORIES="1450"
CLIENT_COUNTS="24"
WORKLOADS="a b c f"
RUNS=1

# Machines to use for the runs ("xx" means "rcxx").  Make sure that enough
# clients have been specified to cover the CLIENT_COUNTS values above.
COORDINATOR="42"
SERVERS=`for i in {30..41}; do echo $i; done`
FILL_CLIENTS=`for i in {43..52}; do echo $i; done`
CLIENTS=`for i in {43..69}; do echo $i; done`

# Uncomment the following line to start up an extra backup on the
# given servers.
# EXTRA_BACKUPS="69 70 71 72 73 74 75 76 77 78 79 80"

# Clean the coordinator, servers, and clients when receive SIGINT
function sigint_handler() {
  echo "Stop coordinator: rc${COORDINATOR}"
  ssh rc$COORDINATOR pkill coordinator

  for s in $SERVERS $EXTRA_BACKUPS; do
    echo "Stop server: rc$s"
    ssh rc$s pkill server
  done

  for s in $CLIENTS; do
    echo "Stop client: rc$s"
    ssh rc$s pkill java
  done
  exit 0
}
trap 'sigint_handler' 2

# Top-level directory in which log subdirectories will be created.
TOP_LOG_DIR="logs"

# Current directory, for passing to ssh programs
WD=`pwd`

# Directory containing RAMCloud binaries
RAMCLOUD_BIN="$WD/ramcloud/bin"

TIME=$(date +%Y%m%d%H%M%S)
TOP_LOG_DIR=`pwd`"/logs"
LOG_DIR="$TOP_LOG_DIR/${TIME}$1"
mkdir -p $LOG_DIR
rm $TOP_LOG_DIR/latest
ln -s $LOG_DIR $TOP_LOG_DIR/latest

for TRANSPORT in $TRANSPORTS; do
  if [ "$TRANSPORT" = "basic" ]; then
    COORD_LOCATOR="basic+udp:host=192.168.1.1$COORDINATOR,port=14246"
    LOCATOR_BASE="basic+infud:rttMicros=25,gbs=30,"
  elif [ "$TRANSPORT" = "infrc" ]; then
    COORD_LOCATOR="infrc:host=192.168.1.1$COORDINATOR,port=14246"
    LOCATOR_BASE="infrc:"
  else
    echo Unsupported transport $TRANSPORT
    exit 1
  fi

  for SERVER_MEMORY in $SERVER_MEMORIES; do
    for ((RUN=1; RUN<=$RUNS; RUN++)); do
      for CLIENT_COUNT in $CLIENT_COUNTS; do
        LOG_SUBDIR="$LOG_DIR/$TRANSPORT""_$SERVER_MEMORY""_$CLIENT_COUNT""clients_run$RUN"
        mkdir -p $LOG_SUBDIR

        # Start Servers
        echo "Starting coordinator for run $RUN..."
          CMD="$RAMCLOUD_BIN/coordinator \
            -C $COORD_LOCATOR \
            -d 10000 \
            --clusterName=__unnamed__ \
            > $LOG_SUBDIR/coordinator.log 2>&1"
        ssh rc$COORDINATOR "$CMD" > /dev/null 2>&1 &

        echo "Starting servers for run $RUN..."
        NUM_BACKUPS="0"
        NUM_MASTERS="0"
        for s in $SERVERS; do
          CMD="$RAMCLOUD_BIN/server \
              -C $COORD_LOCATOR \
              -L ${LOCATOR_BASE}host=192.168.1.1$s,port=14242 \
              --clusterName=__unnamed__ \
              -f /dev/sda2 \
              --segmentFrames 11000 \
              --maxNonVolatileBuffers 20 \
              --detectFailures 0 \
              --timeout 10000 \
              -r 3 \
              -t $SERVER_MEMORY \
              -E 3 \
              -w 1 \
              --maxCores 4 \
              --logCleanerThreads 2 \
              --cleanerBalancer=tombstoneRatio:0.40 \
              --preferredIndex=$s \
              > $LOG_SUBDIR/server-rc$s.log 2>&1"
          ssh rc$s "$CMD" > /dev/null 2>&1 &
          NUM_MASTERS=$((NUM_MASTERS + 1))
          NUM_BACKUPS=$((NUM_BACKUPS + 1))
        done

        # Start secondary backups, if desired
        if [ "$EXTRA_BACKUPS" != "" ]; then
          echo "Starting extra backups for run $RUN..."
          for s in $EXTRA_BACKUPS; do
            CMD="$RAMCLOUD_BIN/server \
                -C $COORD_LOCATOR \
                -L ${LOCATOR_BASE}host=192.168.1.1$s,port=14243 \
                --clusterName=__unnamed__ \
                -f /dev/sdb2 \
                --segmentFrames 11000 \
                --maxNonVolatileBuffers 20 \
                --detectFailures 0 \
                --timeout 10000 \
                -r 3 \
                -t $SERVER_MEMORY \
                -E 3 \
                -w 1 \
                --maxCores 4 \
                --logCleanerThreads 2 \
                --cleanerBalancer=tombstoneRatio:0.40 \
                --preferredIndex=$((s+100)) \
                --backupOnly \
                > $LOG_SUBDIR/backup-rc$s.log 2>&1"
            ssh rc$s "$CMD" > /dev/null 2>&1 &
          NUM_BACKUPS=$((NUM_BACKUPS + 1))
          done
        fi

        if $RAMCLOUD_BIN/ensureServers -C $COORD_LOCATOR \
            --masters $NUM_MASTERS --backups $NUM_BACKUPS --wait 20 \
            > $LOG_SUBDIR/ensureServers.log 2>&1; then
          true
        else
          echo "Cluster didn't startup correctly; see $LOG_SUBDIR/ensureServers.log"
          ssh rc$COORDINATOR pkill coordinator
          for s in $SERVERS $EXTRA_BACKUPS; do
            ssh rc$s pkill server
          done
          exit 1
        fi

        # Load the dataset for the run
        echo "Filling the key-value store for run $RUN..."
        ./helper $COORD_LOCATOR logMessage NOTICE \
            "**** Filling the key-value store for run $RUN"
        ./fill.sh $WD $COORD_LOCATOR $LOG_SUBDIR "$FILL_CLIENTS"
        ./helper $COORD_LOCATOR logMessage NOTICE \
            "**** Finished filling the key-value store"

        # Figure out which of the clients to use
        COUNT=0
        USE_CLIENTS=""
        for CLIENT in $CLIENTS; do
          if [ $COUNT -lt $CLIENT_COUNT ]; then
            USE_CLIENTS="$USE_CLIENTS $CLIENT"
          fi
          COUNT=$((COUNT + 1))
        done

        # Run the measurement workloads
        for WORKLOAD in $WORKLOADS; do
          echo "Running workload $WORKLOAD for run $RUN..."
          ./helper $COORD_LOCATOR logMessage NOTICE \
              "**** Running workload $WORKLOAD for run $RUN"
          mkdir $LOG_SUBDIR/workload$WORKLOAD
          ./run.sh $WD $LOG_SUBDIR/workload$WORKLOAD $COORD_LOCATOR \
              workload${WORKLOAD} "$USE_CLIENTS"
        done
        ./helper $COORD_LOCATOR logMessage NOTICE "**** Workloads finished"

        # Kill all servers
        echo "Killing servers for run $RUN..."
        ssh rc$COORDINATOR pkill coordinator
        for s in $SERVERS $EXTRA_BACKUPS; do
          ssh rc$s pkill server
        done

        # Leave a bit of time for servers to really exit before the next run
        # (not sure why this is needed...)
        sleep 5
      done
    done
  done
done

# Print statistics
./printStats.py $LOG_DIR | tee $LOG_DIR/stats