MySQL


Chapt.2 Select


1. Wildcard, regular expression, ORDER and LIMIT

SELECT *
FROM customers
-- WHERE (address LIKE '%trail%' OR address LIKE '%avenue%') AND phone LIKE '%4'                  # "%" is the early used wildcard
-- WHERE last_name REGEXP 'field|mac|rose' OR last_name REGEXP 'll$' OR last_name REGEXP '^boa'   # Regular expression
WHERE last_name REGEXP '[gim]e' OR last_name REGEXP 'a[n-w]'                                      # last_name that include "ge/ie/me/an-aw"
ORDER BY points DESC
LIMIT 3;                             # LIMIT must be placed in the end of snippet

SELECT first_name, last_name, 
		10 AS points         # Give a value and name it with "points" (has no relation with the "points" column)
FROM customers
ORDER BY birth_date DESC;            # Multiple columns can be set for one ordering

2. Null

SELECT *
FROM orders
WHERE shipped_date IS NULL;


Chapt.3 Join


1. Regular form of an inner join

SELECT *
FROM customers c                     # Cannot use its original name once the column has an alias
JOIN orders o
    ON o.customer_id = c.customer_id

2. Joining across databases

SELECT *
FROM order_items oi
JOIN sql_inventory.products p
    ON oi.product_id = p.product_id;

3. Self join

USE sql_hr;
SELECT e.employee_id, e.first_name, e.reports_to, m.first_name AS manager
FROM employees e
JOIN employees m                      # Self join, to find the manager
    ON e.reports_to = m.employee_id;

4. Outer join

  • The inner join only returns content that meets the join conditions, causing some content to be lost. To display these lost content, an outer join is required (left/right)
USE sql_invoicing;
SELECT *
FROM clients c
LEFT JOIN payments p
    USING (client_id)                       # If the column names are the same, use USING instead of ON
LEFT JOIN payment_methods pm                # Join multiple tables
    ON p.payment_method = pm.payment_method_id
LEFT JOIN invoices i            
    ON c.client_id = i.client_id            # Compound Join Conditions
    AND p.invoice_id = i.invoice_id         # In this case, the second condition is actually an inner join!!
ORDER BY c.client_id;

5. Union

  • With UNION, each query must contain the same columns, expressions and aggregation functions
  • This snippet is optimized in Chapt 7
SELECT customer_id, first_name, points, 'Bronze' AS type
FROM customers
WHERE points<2000
UNION
SELECT customer_id, first_name, points, 'Silver' AS type
FROM customers
WHERE points BETWEEN 2000 AND 3000
UNION
SELECT customer_id, first_name, points, 'Gold' AS type
FROM customers
WHERE points>3000
ORDER BY first_name;


Chapt.4 Column Operation


1. Insert

INSERT INTO customers
VALUE(
    DEFAULT,       # The customer_id column is Auto Incremental, use "last_insert_id()" to get the id of last insert
    'John',
    'Smith',
    '1998-07-15',
    DEFAULT,
    '8 Xinhui Road',
    'Chengdu',
    'CN',
    9980
    ),
    (              # Insert multiple rows
    DEFAULT,
    'Michael',
    'Jackson',
    '1978-12-15',
    DEFAULT,
    '1 Star Avenue',
    'Los Angeles',
    'US',
    9999
    );

2. Delete

  • Difference between DELETE, TRUNCATE and DROP:
    TRUNCATE and DELETE only delete data without deleting the structure of the table; DROP will delete the structure of the table, triggers, indexes, etc
DELETE FROM customers                 # Delete all the columns, but can be filtered by WHERE
WHERE first_name = 'Michael' AND last_name = 'Jackson';

3. Copy a table

USE sql_invoicing;
CREATE TABLE invoices_archived AS 
	SELECT i.invoice_id, i.number, c.name AS client, i.invoice_total, i.payment_total, i.invoice_date, i.due_date, i.payment_date
	FROM invoices i
	JOIN clients c USING (client_id)
	WHERE payment_date IS NOT NULL;

4. Update

UPDATE invoices
SET                                   # Becasue sql doesn't have "==", we must use SET to change values
    payment_total = invoice_total/2, 
    payment_date = due_date
-- WHERE client_id IN (3,5)           # Update multiple columns
WHERE client_id = (                   # If the result of nested query is not unique, change "=" to "IN" here
	SELECT client_id
	FROM clients
	WHERE name='Myworks'
    );


Chapt.5 Aggregation Function


1. Common AFs

USE sql_invoicing;
SELECT 
    MAX(invoice_total) AS highest,
    MIN(invoice_total) AS lowest,
    AVG(invoice_total) AS average,
    SUM(invoice_total) AS total,
    COUNT(invoice_total) AS number,                 # COUNT only counts for non-NUll
    COUNT(DISTINCT client_id) AS client_number      # DISTINCT for deduplication  
FROM invoices;

2. GROUP BY

  • Only useful for aggregation functions!
SELECT 
    p.date,
    pm.name AS payment_method,
    SUM(p.amount) AS total_payments
FROM payments p
JOIN payment_methods pm
	ON p.payment_method =  pm.payment_method_id
GROUP BY p.date, pm.name              # Each group is a combination of these 2 columns

3. HAVING

  • Filtering with HAVING after GROUP BY, and can only filter contents exist in SELECT
USE sql_store;
SELECT 
    c.customer_id, c.first_name, c.last_name, c.state,
    SUM(oi.quantity*oi.unit_price) AS spent_money
FROM customers c 
JOIN orders o       USING(customer_id)
JOIN order_items oi USING(order_id)
WHERE state='VA'                      # WHERE must be written before GROUP BY
GROUP BY c.customer_id, c.first_name, c.last_name, c.state
HAVING spent_money>100;


Chapt.6 Complex Query


1. Subquery example

USE sql_invoicing;
SELECT *
FROM clients
WHERE client_id NOT IN (
    SELECT DISTINCT client_id
    FROM invoices
    );

Another style with JOIN

SELECT *
FROM clients
LEFT JOIN invoices USING(client_id)
WHERE invoice_id IS NULL;

Another style with EXISTS

SELECT *
FROM clients c
WHERE NOT EXISTS (
    SELECT client_id
    FROM invoices
    WHERE client_id = c.client_id
    );

2. Correlated subquery

SELECT *
FROM invoices i
WHERE invoice_total > (
    SELECT AVG(invoice_total)
    FROM invoices
    WHERE client_id = i.client_id        #
    );         

3. FROM & SELECT subquery

SELECT *
FROM (                                           # FROM subquery. Only for single queries!
	SELECT 
		invoice_id, 
		invoice_total,
		(SELECT AVG(invoice_total)       # SELECT subquery. Direct using "AVG(invoice_total)" returns only 1 row
		 FROM invoices) AS invoices_avg,
		invoice_total - (SELECT invoices_avg) AS difference
	FROM invoices
    ) AS invoice_summary;                        # Must have alias


Chapt.7 Built-in Function


1. Number

SELECT ROUND(3.1415926, 4); 
SELECT TRUNCATE(4.24923, 2); 
SELECT CEILING(5.2);                          # Minimal integer >= input
SELECT FLOOR(5.2);                            # Maximal integer <= input
SELECT RAND();                                # Random in (0,1)

2. String

SELECT TRIM('  A-928 201 24  ');              # Remove leading and trailing spaces
SELECT SUBSTRING('Kindergarten', 2, 4);       # Extract 4 characters, start with character No.2 (1,2,3,...)
SELECT LOCATE('der', 'Kindergarten');
SELECT REPLACE('Kindergarten', 'e', 'de');
SELECT CONCAT('Zheng', ' ', 'Zhongyi');

3. Time

SELECT *
FROM orders
WHERE YEAR(order_date) = EXTRACT(YEAR FROM NOW());
SELECT DATEDIFF(NOW(), '1998-07-15');

4. IFNULL & COALESCE & IF

SELECT 
    order_id,
    order_date, 
    -- IFNULL(shipper_id, 'Not assigned') AS shipper,             # If the first parameter is NULL, return the second one
    COALESCE(shipper_id, comments, 'Not assigned') AS shipper,    # Replace NULL with the first non-NULL parameter
    IF(YEAR(order_date) = 2019, 'Active', 'Archive') AS status
FROM orders;

5. CASE

SELECT 
    order_id,
    order_date,
    CASE 
	WHEN YEAR(order_date)=2019 THEN 'Active'
        WHEN YEAR(order_date)=2018 THEN 'Last year'
        WHEN YEAR(order_date)<2018 THEN 'Archived'
        ELSE 'Future'
    END AS status
FROM orders;

Optimize the last snippet in Chapt 3

SELECT       
    customer_id, first_name, points, 
    CASE
	WHEN points<2000 THEN 'Bronze' 
	WHEN points BETWEEN 2000 AND 3000 THEN 'Silver'
	WHEN points>3000 THEN 'Gold'
    END AS type
FROM customers
ORDER BY first_name;


Chapt.8 View


1. Create view

  • Treat view as a snapshot of the query result of a table
USE sql_invoicing;
CREATE VIEW sales_by_client AS 
    SELECT 
	c.client_id,
	c.name,
	SUM(invoice_total) AS total_sales
    FROM clients c
    JOIN invoices i USING (client_id)
    GROUP BY c.client_id, c.name;

2. Delete view

DROP VIEW IF EXISTS sales_by_client;

3. Updatable view

  • View without DSTINCT, AF(GROUP BY, HAVING) and UNION can be used in UPDATE, INSERT and DELETE statements
  • Used in case you don’t have direct access to a table
CREATE OR REPLACE VIEW invoice_with_balance AS
    SELECT 
	invoice_id,
    	number,
	client_id,
        invoice_total,
	payment_total,
	invoice_total - payment_total AS balance,
	invoice_date,
	due_date,
	payment_date
    FROM invoices
    WHERE invoice_total - payment_total > 0
WITH CHECK OPTION;                              # Prevent some unexpected missing of values

DELETE FROM invoice_with_balance
WHERE invoice_id = 1;


Chapt.9 Stored Procedure


1. Define a temp delimiter

  • To avoid doing this, right click the “Stored Procedures” in the left navigator to create a SP
DELIMITER $$

2. Create SP

  • Treat SP as a storage of the query process of a table
DELIMITER $$
CREATE PROCEDURE get_clients()
BEGIN
    SELECT * FROM clients;
END$$
DELIMITER ;

3. Delete SP

DROP PROCEDURE IF EXISTS get_clients;

4. SP with parameters

DELIMITER $$ 
CREATE PROCEDURE get_unpaid_invoices_for_client(
    client_id INT,
    OUT invoices_count INT,                     # OUT means this parameter is used for output(try not to use it...)
    OUT invoices_total DECIMAL(9,2)             # 2 bits to store decimal, and the rest 7 to store integer
    )
BEGIN
    SELECT COUNT(*), SUM(invoice_total)
    INTO invoices_count, invoices_total
    FROM invoices i
    WHERE i.client_id = client_id AND payment_total = 0;
END$$
DELIMITER ;

5. Variables in SP

DELIMITER $$ 
CREATE PROCEDURE get_risk_factor()
BEGIN
    DECLARE risk_factor DECIMAL(9,2) DEFAULT 0;  # Declare a variable, must write before SELECT sentences
    DECLARE invoices_total DECIMAL(9,2);
    DECLARE invoices_count INT;
    
    SELECT COUNT(*), SUM(invoice_total)
    INTO invoices_count, invoices_total
    FROM invoices;
    
    SET risk_factor = invoices_total / invoices_count * 5;
    SELECT IFNULL(risk_factor, 0);
END$$
DELIMITER ;


Chapt.10 Trigger


1. Create trigger

  • Trigger is automatically executed BEFORE or AFTER the INSERT, DELETE and UPDATE
DELIMITER $$ 
CREATE TRIGGER payments_after_insert             # The INSERT trigger
    AFTER INSERT ON payments
    FOR EACH ROW
BEGIN
    UPDATE invoices
    SET payment_total = payment_total + NEW.amount
    WHERE invoice_id = NEW.invoice_id;
END$$

INSERT INTO payments
VALUES(DEFAULT, 5, 3, '2019-01-01', 10, 1)$$

CREATE TRIGGER payments_after_delete             # The DELETE trigger
    AFTER DELETE ON payments
    FOR EACH ROW
BEGIN
    UPDATE invoices
    SET payment_total = payment_total - OLD.amount
    WHERE invoice_id = OLD.invoice_id;
END$$

DELETE FROM payments
WHERE payment_id > 8$$
DELIMITER ;

2. Delete trigger

DROP TRIGGER IF EXISTS payments_after_delete;

3. Show trigger

SHOW TRIGGERS;

4. Create Event

  • Event is automatically executed according to a SCHEDULE
DELIMITER $$ 
CREATE EVENT yearly_delete_stale_audit_rows        # Create an event to yearly delete data of previous years
ON SCHEDULE
    EVERY 1 YEAR STARTS '2019-01-01' ENDS '2029-01-01'
DO
BEGIN
    DELETE FROM payments
    WHERE date < NOW() - INTERVAL 1 YEAR;
END$$
DELIMITER ;

5. Alter event

ALTER EVENT yearly_delete_stale_audit_rows DISABLE; 


Chapt.11 Transaction


1. ACID features

  • Atomicity: All changes to data are performed as if they are a single operation. That is, all the changes are performed, or none of them are
  • Consistency: Data is in a consistent state when a transaction starts and when it ends
  • Isolation: The intermediate state of a transaction is invisible to other transactions. As a result, transactions that run concurrently appear to be serialized
  • Durability: After a transaction successfully completes, changes to data persist and are not undone, even in the event of a system failure

2. Create transaction

  • MySQL uses autocommit mode by default, if you don’t explicitly use START TRANSACTION to start a transaction, each query will be treated as a transaction and automatically committed
START TRANSACTION;              # Actually this sentence and the COMMIT are not necessary
UPDATE customers
SET points = points + 10
WHERE customer_id = 1;
COMMIT;                         # Changes only take effect after the commit, otherwise they would be rollbacked

3. Isolation level

  • Use “lock” to prevent multiple transactions update the same content at the same time (cause lost update)
  • 4 levels are set to cope with 4 kinds of concurrency issues:
isolevel


SET TRANSACTION ISOLATION LEVEL READ COMMITTED;
START TRANSACTION;              # Non-repeatable reads
SELECT points FROM customers WHERE customer_id = 1;
SELECT points FROM customers WHERE customer_id = 1;
COMMIT;

4. Deadlock

  • If 2 transactions are updating 2 records in a reverse order, it is likely to have deadlock


Chapt.12 Design A Database


1. ER Diagram

  • Entity-Relationship has 3 components: entities, attributes, and relationships, which are used for conceptual design of relational databases
eer


2. Normalization

  • 1NF: A table cannot have repeated columns and each cell of it should have a single value
  • 2NF: One table with its columns should represent only one entity
  • 3NF: A column in a table should not be derived from other columns
  • Tips1: Directly combine tables that are frequently joined together
  • Tips2: Use 2 one-many relationships to represent the many-many relationship

3. Create database

CREATE DATABASE IF NOT EXISTS sql_store2;
USE sql_store2;

4. Create & Delete table

DROP TABLE IF EXISTS orders;        # Because "orders" depends on "customers", we have to drop "orders" first
DROP TABLE IF EXISTS customers;

CREATE TABLE IF NOT EXISTS customers(
    customer_id INT PRIMARY KEY AUTO_INCREMENT,
    first_name  VARCHAR(50) NOT NULL,
    points      INT NOT NULL DEFAULT 0,
    email       VARCHAR(255) NOT NULL UNIQUE
);

CREATE TABLE IF NOT EXISTS orders(
    order_id    INT PRIMARY KEY,
    customer_id INT NOT NULL,
    FOREIGN KEY fk_orders_customers (customer_id)
	REFERENCES customers (customer_id)
        ON UPDATE CASCADE
        ON DELETE NO ACTION
);

5. Alter table

ALTER TABLE customers
    ADD last_name     VARCHAR(50) NOT NULL AFTER first_name,
    ADD city          VARCHAR(50) NOT NULL,
    MODIFY first_name VARCHAR(50) DEFAULT '',
    DROP points;
    -- DROP PRIMARY KEY;           # Don't need to specify column name

6. Engine

SHOW ENGINES;                      # Store engines: InnoDB & MyISAM


Chapt.13 Indexing


1. Create index

  • Design indexes based on the queries but not tables
  • BTree: A type of index structure
CREATE INDEX idx_points ON customers(points);
EXPLAIN SELECT customer_id FROM customers 
-- USE INDEX (idx_customers)                                    # Not required
WHERE points > 1000;                                            # rows: 1010 -> 529

2. Delete index

DROP INDEX idx_points ON customers;

3. Prefix indexes

  • Only for BLOB, TEXT and VARCHAR
CREATE INDEX idx_lastname ON customers(last_name(20));
SELECT COUNT(DISTINCT LEFT(last_name, 1)), 
       COUNT(DISTINCT LEFT(last_name, 5)),                      # Best prefix index value
       COUNT(DISTINCT LEFT(last_name, 10))
FROM customers;

4. Fulltext indexes

CREATE FULLTEXT INDEX idx_title_body ON posts(title, body);
SELECT *-- , MATCH(title, body) AGAINST ('javascript') AS scores
FROM posts 
WHERE MATCH(title, body) AGAINST ('javascript') OR
      MATCH(title, body) AGAINST ('"handling a form"' IN BOOLEAN MODE) OR 
      MATCH(title, body) AGAINST ('redux -react' IN BOOLEAN MODE);

5. Composite indexes

  • Put index columns with higher selectivity first
    Selectivity: unique index values/amount of records. The maximum value is 1, at which point each record has a unique index
CREATE INDEX idx_state_points ON customers(state, points);      
EXPLAIN SELECT customer_id FROM customers WHERE state = 'CA' AND points > 1000;
  • If we change the above AND to OR, indexes will be ignored, Mysql has to do a full index scan with 1010 rows
EXPLAIN SELECT customer_id FROM customers WHERE state = 'CA'    # Split OR by UNION
  UNION SELECT customer_id FROM customers WHERE points > 1000;  # Both queries use the "idx_state_points" which means this index is a covering index


Chapt.14 Securing Database


1. Create user

CREATE USER moon_app IDENTIFIED BY 'admin';

2. Delete user

DROP USER moon_app;

3. Grant privilege

GRANT SELECT, INSERT, UPDATE, EXECUTE, DELETE, CREATE VIEW
ON sql_store.*
TO moon_app;

SHOW GRANTS FOR moon_app;

REVOKE CREATE VIEW
ON sql_store.*
FROM moon_app;