You asked for it! You might want to skip to Cassandra.save(....) ... public class BuildCassandraDB { private static void importContentInterests(Connection connection) throws Exception { Statement statement; ResultSet resultSet; String sql; int count, totalcount;
/* * Select Contents/Categories Map, Categories/Interests Map, * Where categories are equal, order by Contents, save Interests to Contents */ sql = "select content, interest, name " + "from contents_attractions_map, attractions_categories_map, categories_interests_map, interests " + "where contents_attractions_map.attraction = attractions_categories_map.attraction and " + "attractions_categories_map.category = categories_interests_map.category and " + "categories_interests_map.interest = interests.id " + "order by contents_attractions_map.content"; statement = connection.createStatement(); resultSet = statement.executeQuery(sql); HashMap<Integer, ContentCass> contentMap = new HashMap<Integer, ContentCass>(); count = 0; while (resultSet.next()) { Integer contentId = resultSet.getInt("CONTENT"); Integer interestId = resultSet.getInt("INTEREST"); String interestName = resultSet.getString("NAME"); ContentCass contentCass = contentMap.get(contentId); if (contentCass == null) { contentCass = new ContentCass(contentId.toString(), false); contentMap.put(contentId, contentCass); count++; } contentCass.addInterest(interestId.toString(), interestName); if (count >= 1000) { for (ContentCass saveContentCass : contentMap.values()) saveContentCass.save(); contentMap.clear(); count = 0; } } for (ContentCass saveContentCass : contentMap.values()) saveContentCass.save(); contentMap.clear(); resultSet.close(); } } public class ContentCass extends BaseCass { public ContentCass(String rowKey, boolean populateEntity) throws CassException { super(rowKey, populateEntity); } @Override public String getColumnFamily() { return "Content"; } @Override public boolean isSuper() { return true; } /* * Setters */ public void setDocument(String document) { setVal("data", "document", document); } public void setStrippedChecksum(String strippedChecksum) { setVal("data", "strippedChecksum", strippedChecksum); } public void setStrippedDuplicate(String strippedDuplicate) { setVal("data", "strippedDuplicate", strippedDuplicate); } public void setText(String text) { setVal("data", "text", text); } public void setAuthor(String author) { setVal("data", "author", author); } public void setReferringContent(String referringContent) { setVal("data", "referringContent", referringContent); } public void setStatus(String status) { setVal("data", "status", status); } public void setVersion(String version) { setVal("data", "version", version); } public void setSentiment(String sentiment) { setVal("data", "sentiment", sentiment); } public void setCreated(String created) { setVal("data", "created", created); } public void setSnippet(String snippet) { setVal("data", "snippet", snippet); } public void setTitle(String title) { setVal("data", "title", title); } public void setDocumentURL(String documentURL) { setVal("data", "documentURL", documentURL); } public void setSourceType(String sourceType) { setVal("data", "sourceType", sourceType); } public void addAttraction(String attractionId, String attractionName) { setVal("attractions", attractionId, attractionName); } public void addCategory(String categoryId, String categoryName) { setVal("categories", categoryId, categoryName); } public void addInterest(String interestId, String interestName) { setVal("interests", interestId, interestName); } public void addGeoSite(String geoSiteId, String geoSiteName) { setVal("geoSites", geoSiteId, geoSiteName); } /* * Getters */ public String getDocument() { return (String)getVal("data", "document"); } public String getStrippedChecksum() { return (String)getVal("data", "strippedChecksum"); } public String getStrippedDuplicate() { return (String)getVal("data", "strippedDuplicate"); } public String getText() { return (String)getVal("data", "text"); } public String getAuthor() { return (String)getVal("data", "author"); } public String getReferringContent() { return (String)getVal("data", "referringContent"); } public String getStatus() { return (String)getVal("data", "status"); } public String getVersion() { return (String)getVal("data", "version"); } public String getSentiment() { return (String)getVal("data", "sentiment"); } public String getCreated() { return (String)getVal("data", "created"); } public String getSnippet() { return (String)getVal("data", "snippet"); } public String getTitle() { return (String)getVal("data", "title"); } public String getDocumentURL() { return (String)getVal("data", "documentURL"); } public String getSourceType() { return (String)getVal("data", "sourceType"); } public TupleSet<String> getAttractions() { return (TupleSet<String>) getVal("attractions"); } public TupleSet<String> getCategories() { return (TupleSet<String>)getVal("categories"); } public TupleSet<String> getInterests() { return (TupleSet<String>)getVal("interests"); } public TupleSet<String> getGeoSites() { return (TupleSet<String>)getVal("geoSites"); } } package com.lookin2.cassandra.entities; import java.util.Collection; import com.lookin2.cassandra.*; import com.lookin2.cassandra.CAO.*; public abstract class BaseCass { private final Tuple<String> rowTuple; /* * The name of the column family */ public abstract String getColumnFamily(); /* * Whether the column family is super */ public abstract boolean isSuper(); protected BaseCass(String rowKey, boolean populateEntity) throws CassException { if (populateEntity) rowTuple = populate(rowKey); else { rowTuple = new Tuple<String>(rowKey); rowTuple.setVal(new TupleSet<String>()); } } protected Tuple<String> populate(String rowKey) throws CassException { return Cassandra.get(getColumnFamily(), rowKey, isSuper()); } public void save() throws CassException { Cassandra.save (getColumnFamily(), rowTuple); } private Tuple<String> getTuple(String key, boolean create) { TupleSet<String> columnTupleSet = (TupleSet<String>) rowTuple.getVal(); Tuple<String> columnTuple = (Tuple<String>)columnTupleSet.get(key); if (create && columnTuple == null) { columnTuple = new Tuple<String>(key); columnTupleSet.add(columnTuple); } return columnTuple; } private Tuple<String> getTuple(String superKey, String key, boolean create) { TupleSet<String> superColumnTupleSet = (TupleSet<String>) rowTuple.getVal(); Tuple<String> superColumnTuple = (Tuple<String>)superColumnTupleSet.get(superKey); TupleSet<String> columnTupleSet; if (superColumnTuple == null) { if (create) { columnTupleSet = new TupleSet<String>(); superColumnTuple = new Tuple<String>(superKey, columnTupleSet); superColumnTupleSet.add(superColumnTuple); } else return null; } else columnTupleSet = (TupleSet<String>)superColumnTuple.getVal(); Tuple<String> columnTuple = (Tuple<String>)columnTupleSet.get(key); if (create && columnTuple == null) { columnTuple = new Tuple<String>(key); columnTupleSet.add(columnTuple); } return columnTuple; } protected void setVal(String key, Collection<Tuple<? extends Comparable<String>>> tupleCollection) { if (key == null || tupleCollection == null) return; TupleSet<String> superColumnTupleSet = (TupleSet<String>) rowTuple.getVal(); superColumnTupleSet.addAll(tupleCollection); } protected void setVal(String key, Object val) { if (key == null || val == null) return; getTuple(key, true).setVal(val); } protected void setVal(String superKey, String key, Object val) { if (superKey == null || key == null || val == null) return; getTuple(superKey, key, true).setVal(val); } protected void delete(String key) { if (key == null) return; getTuple(key, true).setVal(null); } protected void delete(String superKey, String key) { if (superKey == null || key == null) return; getTuple(superKey, key, true).setVal(null); } protected Tuple<String> getRow() { return rowTuple; } protected Object getVal(String key) { return getTuple(key, false).getVal(); } protected Object getVal(String superKey, String key) { return getTuple(key, false).getVal(); } @Override public String toString() { return rowTuple.toString(); } } package com.lookin2.cassandra.CAO; import java.util.*; import org.apache.cassandra.thrift.*; import me.prettyprint.cassandra.service.*; import com.lookin2.cassandra.*; public class Cassandra { private static final String KEYSPACE = "Lookin2"; private static final String URL_PORT = "localhost:9160"; private static CassandraClientPool clientPool = null; private static synchronized CassandraClientPool getClientPool() { if (clientPool == null) clientPool = CassandraClientPoolFactory.INSTANCE.get(); return clientPool; } private static Keyspace getKeyspace() throws CassException { try { if (clientPool == null) clientPool = getClientPool(); CassandraClient client = clientPool.borrowClient(URL_PORT); return client.getKeyspace(KEYSPACE, ConsistencyLevel.ONE); } catch (Exception e) { e.printStackTrace(); throw new CassException(e); } } private static void releaseKeyspace(Keyspace keyspace) throws CassException { try { clientPool.releaseKeyspace(keyspace); } catch (Exception e) { e.printStackTrace(); throw new CassException(e); } } public static Tuple<String> get(String columnFamily, String rowKey, boolean isSuper) throws CassException { Keyspace keyspace = getKeyspace(); ColumnParent columnParent = new ColumnParent(columnFamily); SliceRange sliceRange = new SliceRange(); sliceRange.setStart(new byte[]{}); sliceRange.setFinish(new byte[]{}); SlicePredicate slicePredicate = new SlicePredicate(); slicePredicate.setSlice_range(sliceRange); try { if (isSuper) { TupleSet<String> superColumnTupleSet = new TupleSet<String>(); List<SuperColumn> superColumnList = keyspace.getSuperSlice(rowKey, columnParent, slicePredicate); for (SuperColumn superColumn : superColumnList) { TupleSet<String> columnTupleSet = new TupleSet<String>(); List<Column> columnList = superColumn.getColumns(); for (Column column : columnList) columnTupleSet.add(new String(column.getName()), new String(column.getValue())); superColumnTupleSet.add(new String(superColumn.getName()), columnTupleSet); } return new Tuple<String>(rowKey, superColumnTupleSet); } else { TupleSet<String> columnTupleSet = new TupleSet<String>(); List<Column> columnList = keyspace.getSlice(rowKey, columnParent, slicePredicate); for (Column column : columnList) columnTupleSet.add(new String(column.getName()), new String(column.getValue())); return new Tuple<String>(rowKey, columnTupleSet); } } catch (Exception e) { throw new CassException(e); } } public static void save (String columnFamily, Tuple<?> rowTuple) throws CassException { save (columnFamily, null, rowTuple); } public static void save (String columnFamily, TupleSet<?> rowTupleSet) throws CassException { save (columnFamily, rowTupleSet, null); } private static void save(String columnFamily, TupleSet<?> rowTupleSet, Tuple<?> rowTuple) throws CassException { BatchMutation batch; if (rowTupleSet != null) batch = buildBatchMutation(columnFamily, rowTupleSet); else if (rowTuple != null) batch = buildBatchMutation(columnFamily, rowTuple); else throw new CassException("Both rowTupleSet and rowTuple are null"); try { Keyspace keyspace = getKeyspace(); keyspace.batchMutate(batch); batch = null; releaseKeyspace(keyspace); } catch (Exception e) { throw new CassException(e); } Runtime.getRuntime().gc(); } private static BatchMutation buildBatchMutation(String columnFamily, Tuple<?> rowTuple) { BatchMutation batch = new BatchMutation(); buildBatchMutation(batch, columnFamily, rowTuple); return batch; } private static BatchMutation buildBatchMutation(String columnFamily, TupleSet<?> rowTupleSet) { BatchMutation batch = new BatchMutation(); for (Tuple<?> rowTuple : rowTupleSet) buildBatchMutation(batch, columnFamily, rowTuple); return batch; } private static void buildBatchMutation(BatchMutation batch, String columnFamily, Tuple<?> rowTuple) { TupleSet<?> rowValTupleSet = (TupleSet<?>) rowTuple.getVal(); for (Tuple<?> superColumnTuple : rowValTupleSet) // Loop through the columns of the row { Object val = superColumnTuple.getVal(); if (val instanceof TupleSet<?>) // If the value is a TupleSet, then this is a supertable { TupleSet<?> superColumnTupleSet = (TupleSet<?>)val; for (Tuple<?> columnTuple : superColumnTupleSet) // Loop over the superColumns { buildBatchMutation(batch, columnFamily, rowTuple, superColumnTuple, columnTuple); } } else { Tuple<?> columnTuple = (Tuple<?>)val; buildBatchMutation(batch, columnFamily, rowTuple, null, columnTuple); } } } private static void buildBatchMutation(BatchMutation batch, String columnFamily, Tuple<?> rowTuple, Tuple<?> superColumnTuple, Tuple<?> columnTuple) { if (!columnTuple.isUpdated()) return; String key = rowTuple.getKeyString(); List<String> columnFamilies = new ArrayList<String>(); columnFamilies.add(columnFamily); if (columnTuple.getVal() == null) { Deletion deletion = new Deletion(); SlicePredicate slicePredicate = new SlicePredicate(); List<byte[]> columnList = new ArrayList<byte[]>(); columnList.add(columnTuple.getKeyBytes()); slicePredicate.setColumn_names(columnList); if (superColumnTuple != null) deletion.setSuper_column(superColumnTuple.getKeyBytes()); deletion.predicate = slicePredicate; batch.addDeletion(key, columnFamilies, deletion); } else { Column column = new Column(); column.setName(columnTuple.getKeyBytes()); column.setValue(columnTuple.getValBytes()); column.setTimestamp(System.currentTimeMillis() * 1000); if (superColumnTuple == null) batch.addInsertion(key, columnFamilies, column); else { SuperColumn superColumn = new SuperColumn(); List<Column> columns = new ArrayList<Column>(); columns.add(column); superColumn.setColumns(columns); superColumn.setName(superColumnTuple.getKeyBytes()); batch.addSuperInsertion(key, columnFamilies, superColumn); } } } } On Mon, May 10, 2010 at 11:27 AM, vd <vineetdan...@gmail.com> wrote: > What is the complete code string you are using to connect with cassandra > from Java code > > > > > On Mon, May 10, 2010 at 1:49 PM, David Boxenhorn <da...@lookin2.com>wrote: > >> I don't know what "TSocket or the buffered one" means. Maybe I should >> know? >> >> I'm using Hector. Does that explain anything? >> >> On Mon, May 10, 2010 at 11:15 AM, vd <vineetdan...@gmail.com> wrote: >> >>> >>> Hi >>> >>> what is it that you are using to connect with cassnadra TSocket or the >>> buffered one ? >>> >>> >>> ____________________________________ >>> >>> _______________________________________ >>> >>> >>> >>> >>> On Mon, May 10, 2010 at 1:29 PM, David Boxenhorn <da...@lookin2.com>wrote: >>> >>>> I'm running Java on the client, jdbc queries on Oracle, Hector on >>>> Cassandra. >>>> >>>> The Cassandra and Oracle database designs are radically different, as >>>> you might guess. >>>> >>>> I have no doubt that Cassandra can be tuned, in a multiple-server >>>> cluster, to have superior throughput (that's why I'm doing it!). But for >>>> now, it's really frustrating my development effort that Cassandra is so >>>> slow. Can't I get it up to twice as slow as Oracle in my configuration? >>>> >>>> On Mon, May 10, 2010 at 10:47 AM, vd <vineetdan...@gmail.com> wrote: >>>> >>>>> Hi David >>>>> >>>>> If I may ask...how do you plan to import data from oracle to cassandra >>>>> ? >>>>> As answer AFAIK cassandra's true ability comes into play when running >>>>> on more than one machine...and please share how you are making comparisons >>>>> like on writes or reads from cassandra. >>>>> >>>>> >>>>> >>>>> _______________________________________ >>>>> _______________________________________ >>>>> >>>>> >>>>> >>>>> >>>>> >>>>> On Mon, May 10, 2010 at 1:04 PM, David Boxenhorn <da...@lookin2.com>wrote: >>>>> >>>>>> I'm running Oracle and Cassandra on my machine, trying to import my >>>>>> data to Cassandra from Oracle. >>>>>> >>>>>> In my configuration Oracle is about ten times faster than Cassandra. >>>>>> Cassandra has out-of-the-box tuning. >>>>>> >>>>>> I am new to Cassandra. How do I begin trying to tune it? >>>>>> >>>>>> Thanks. >>>>>> >>>>> >>>>> >>>> >>> >> >