Part 2: Encrypt the sensitive data
In the previous part, we:
- Logged in to the
stash
CLI tool - Created a dataset
- Pushed the dataset configuration
Now we’re going change the database schema to support new columns.
In this part we will:
- Add and apply migrations to install Protect custom types and change the database structure to support the encrypted indexes
- Encrypt the sensitive data inside your existing database
1. Add and apply migrations
The first migration to run, is the install of the Protect custom types into your database.
This migration adds in the custom types ore_64_8_v1
and ore_64_8_v1_term
.
ore_64_8_v1
is used forstring
andtext
types.ore_64_8_v1_term
is used for non string types.
We do this by creating a Sequelize migration:
npx sequelize-cli migration:generate --name add-protect-database-extensions
This will generate a migration file under migrations/
.
Open up that migration file, and add this code to run the install/uninstall scripts packaged with @cipherstash/libpq
:
'use strict';
const { INSTALL_SQL, UNINSTALL_SQL } = require('@cipherstash/libpq/database-extensions/postgresql');
/** @type {import('sequelize-cli').Migration} */
module.exports = {
async up(queryInterface) {
await queryInterface.sequelize.query(INSTALL_SQL)
},
async down(queryInterface) {
await queryInterface.sequelize.query(UNINSTALL_SQL)
}
};
The CipherStash driver works by rewriting your app’s SQL queries to use the underlying encrypted columns.
To set up those encrypted columns, generate another Sequelize migration:
npx sequelize-cli migration:generate --name add-protect-columns-to-patients-table
Per the last step, this will generate another migration file under migrations/
.
Open up that new migration file, and add the following code that creates the CipherStash columns:
/** @type {import('sequelize-cli').Migration} */
module.exports = {
async up(q, { DataTypes }) {
// Add columns for sorting and encrypting
await q.addColumn("patients", "__full_name_encrypted", { type: DataTypes.TEXT });
await q.addColumn("patients", "__full_name_ore", { type: "ore_64_8_v1" });
await q.addColumn("patients", "__full_name_match", { type: DataTypes.ARRAY(DataTypes.INTEGER) });
await q.addColumn("patients", "__full_name_unique", { type: DataTypes.TEXT });
await q.addColumn("patients", "__email_encrypted", { type: DataTypes.TEXT });
await q.addColumn("patients", "__email_ore", { type: "ore_64_8_v1" });
await q.addColumn("patients", "__email_match", { type: DataTypes.ARRAY(DataTypes.INTEGER) });
await q.addColumn("patients", "__email_unique", { type: DataTypes.TEXT });
await q.addColumn("patients", "__dob_encrypted", { type: DataTypes.TEXT });
await q.addColumn("patients", "__dob_ore", { type: "ore_64_8_v1_term" });
await q.addColumn("patients", "__weight_encrypted", { type: DataTypes.TEXT });
await q.addColumn("patients", "__weight_ore", { type: "ore_64_8_v1_term" });
await q.addColumn("patients", "__allergies_encrypted", { type: DataTypes.TEXT });
await q.addColumn("patients", "__allergies_ore", { type: "ore_64_8_v1" });
await q.addColumn("patients", "__allergies_match", { type: DataTypes.ARRAY(DataTypes.INTEGER) });
await q.addColumn("patients", "__allergies_unique", { type: DataTypes.TEXT });
await q.addColumn("patients", "__medications_encrypted", { type: DataTypes.TEXT });
await q.addColumn("patients", "__medications_ore", { type: "ore_64_8_v1" });
await q.addColumn("patients", "__medications_match", { type: DataTypes.ARRAY(DataTypes.INTEGER) });
await q.addColumn("patients", "__medications_unique", { type: DataTypes.TEXT });
// Add indexes for all the ORE fields used for sorting and range queries
await q.addIndex("patients", ["__full_name_ore"]);
await q.addIndex("patients", ["__email_ore"]);
await q.addIndex("patients", ["__dob_ore"]);
await q.addIndex("patients", ["__weight_ore"]);
await q.addIndex("patients", ["__allergies_ore"]);
await q.addIndex("patients", ["__medications_ore"]);
// Add indexes for all the match fields used for full text searches
await q.addIndex("patients", ["__full_name_match"], { using: "GIN" });
await q.addIndex("patients", ["__email_match"], { using: "GIN" });
await q.addIndex("patients", ["__allergies_match"], { using: "GIN" });
await q.addIndex("patients", ["__medications_match"], { using: "GIN" });
}
}
The _encrypted
columns are the encrypted values, and the _match
and _ore
columns are the encrypted indexes.
Once the migrations have been created you can run the migrations using the following sequelize-cli
command:
npx sequelize-cli db:migrate
2. Encrypt the sensitive data
Now we have the necessary database structure in place, it’s time to encrypt your data.
This is done by iterating through all your data and saving it back to the database.
When the data is saved the @cipherstash/pg-native
will intercept and encrypt the updates.
There is a script provided at the root of the demo that uses a naive method to iterate through and save everything.
// encrypt-data.js
const { sequelize } = require("./models");
/**
* @type {import('sequelize').Sequelize['models']}
*/
const models = sequelize.models;
const PAGE_SIZE = 1000;
/**
* @param {import('sequelize').Model} model
*/
async function forceSave(model) {
// Since we want to encrypt all fields list them all here as changed (even if nothing was)
model.changed("full_name", true);
model.changed("email", true);
model.changed("dob", true);
model.changed("weight", true);
model.changed("allergies", true);
model.changed("medications", true);
return await model.save();
}
(async () => {
let offset = 0;
// Iterate through all the patients in batches and save them
while (true) {
const patients = await models.patient.findAll({
offset,
limit: PAGE_SIZE,
order: [[ 'id', 'ASC' ]]
})
if (patients.length === 0) {
break;
}
await Promise.all(patients.map(forceSave))
offset += PAGE_SIZE;
}
console.log("Migration successful!")
})();
You can run it with the following command:
node encrypt-data.js
In future @cipherstash/libpq
will provide a method that handles this in a more efficient way.
Now it’s time for the final part: query the newly encrypted data.