MySQL

Chapt.2 Select

1. Wildcard, regular expression, ORDER and LIMIT

SELECT *
FROM customers
-- WHERE (address LIKE '%trail%' OR address LIKE '%avenue%') AND phone LIKE '%4'                  # "%" is the early used wildcard
-- WHERE last_name REGEXP 'field|mac|rose' OR last_name REGEXP 'll$' OR last_name REGEXP '^boa'   # Regular expression
WHERE last_name REGEXP '[gim]e' OR last_name REGEXP 'a[n-w]'                                      # last_name that include "ge/ie/me/an-aw"
ORDER BY points DESC
LIMIT 3;                             # LIMIT must be placed in the end of snippet

SELECT first_name, last_name, 
		10 AS points         # Give a value and name it with "points" (has no relation with the "points" column)
FROM customers
ORDER BY birth_date DESC;            # Multiple columns can be set for one ordering

2. Null

SELECT *
FROM orders
WHERE shipped_date IS NULL;

Chapt.3 Join

1. Regular form of an inner join

SELECT *
FROM customers c                     # Cannot use its original name once the column has an alias
JOIN orders o
    ON o.customer_id = c.customer_id

2. Joining across databases

SELECT *
FROM order_items oi
JOIN sql_inventory.products p
    ON oi.product_id = p.product_id;

3. Self join

USE sql_hr;
SELECT e.employee_id, e.first_name, e.reports_to, m.first_name AS manager
FROM employees e
JOIN employees m                      # Self join, to find the manager
    ON e.reports_to = m.employee_id;

4. Outer join

The inner join only returns content that meets the join conditions, causing some content to be lost. To display these lost content, an outer join is required (left/right)

USE sql_invoicing;
SELECT *
FROM clients c
LEFT JOIN payments p
    USING (client_id)                       # If the column names are the same, use USING instead of ON
LEFT JOIN payment_methods pm                # Join multiple tables
    ON p.payment_method = pm.payment_method_id
LEFT JOIN invoices i            
    ON c.client_id = i.client_id            # Compound Join Conditions
    AND p.invoice_id = i.invoice_id         # In this case, the second condition is actually an inner join!!
ORDER BY c.client_id;

5. Union

With UNION, each query must contain the same columns, expressions and aggregation functions
This snippet is optimized in Chapt 7

SELECT customer_id, first_name, points, 'Bronze' AS type
FROM customers
WHERE points<2000
UNION
SELECT customer_id, first_name, points, 'Silver' AS type
FROM customers
WHERE points BETWEEN 2000 AND 3000
UNION
SELECT customer_id, first_name, points, 'Gold' AS type
FROM customers
WHERE points>3000
ORDER BY first_name;

Chapt.4 Column Operation

1. Insert

INSERT INTO customers
VALUE(
    DEFAULT,       # The customer_id column is Auto Incremental, use "last_insert_id()" to get the id of last insert
    'John',
    'Smith',
    '1998-07-15',
    DEFAULT,
    '8 Xinhui Road',
    'Chengdu',
    'CN',
    9980
    ),
    (              # Insert multiple rows
    DEFAULT,
    'Michael',
    'Jackson',
    '1978-12-15',
    DEFAULT,
    '1 Star Avenue',
    'Los Angeles',
    'US',
    9999
    );

2. Delete

Difference between DELETE, TRUNCATE and DROP:
TRUNCATE and DELETE only delete data without deleting the structure of the table; DROP will delete the structure of the table, triggers, indexes, etc

DELETE FROM customers                 # Delete all the columns, but can be filtered by WHERE
WHERE first_name = 'Michael' AND last_name = 'Jackson';

3. Copy a table

USE sql_invoicing;
CREATE TABLE invoices_archived AS 
	SELECT i.invoice_id, i.number, c.name AS client, i.invoice_total, i.payment_total, i.invoice_date, i.due_date, i.payment_date
	FROM invoices i
	JOIN clients c USING (client_id)
	WHERE payment_date IS NOT NULL;

4. Update

UPDATE invoices
SET                                   # Becasue sql doesn't have "==", we must use SET to change values
    payment_total = invoice_total/2, 
    payment_date = due_date
-- WHERE client_id IN (3,5)           # Update multiple columns
WHERE client_id = (                   # If the result of nested query is not unique, change "=" to "IN" here
	SELECT client_id
	FROM clients
	WHERE name='Myworks'
    );

Chapt.5 Aggregation Function

1. Common AFs

USE sql_invoicing;
SELECT 
    MAX(invoice_total) AS highest,
    MIN(invoice_total) AS lowest,
    AVG(invoice_total) AS average,
    SUM(invoice_total) AS total,
    COUNT(invoice_total) AS number,                 # COUNT only counts for non-NUll
    COUNT(DISTINCT client_id) AS client_number      # DISTINCT for deduplication  
FROM invoices;

2. GROUP BY

Only useful for aggregation functions!

SELECT 
    p.date,
    pm.name AS payment_method,
    SUM(p.amount) AS total_payments
FROM payments p
JOIN payment_methods pm
	ON p.payment_method =  pm.payment_method_id
GROUP BY p.date, pm.name              # Each group is a combination of these 2 columns

3. HAVING

Filtering with HAVING after GROUP BY, and can only filter contents exist in SELECT

USE sql_store;
SELECT 
    c.customer_id, c.first_name, c.last_name, c.state,
    SUM(oi.quantity*oi.unit_price) AS spent_money
FROM customers c 
JOIN orders o       USING(customer_id)
JOIN order_items oi USING(order_id)
WHERE state='VA'                      # WHERE must be written before GROUP BY
GROUP BY c.customer_id, c.first_name, c.last_name, c.state
HAVING spent_money>100;

Chapt.6 Complex Query

1. Subquery example

USE sql_invoicing;
SELECT *
FROM clients
WHERE client_id NOT IN (
    SELECT DISTINCT client_id
    FROM invoices
    );

Another style with JOIN

SELECT *
FROM clients
LEFT JOIN invoices USING(client_id)
WHERE invoice_id IS NULL;

Another style with EXISTS

SELECT *
FROM clients c
WHERE NOT EXISTS (
    SELECT client_id
    FROM invoices
    WHERE client_id = c.client_id
    );

2. Correlated subquery

SELECT *
FROM invoices i
WHERE invoice_total > (
    SELECT AVG(invoice_total)
    FROM invoices
    WHERE client_id = i.client_id        #
    );         

3. FROM & SELECT subquery

SELECT *
FROM (                                           # FROM subquery. Only for single queries!
	SELECT 
		invoice_id, 
		invoice_total,
		(SELECT AVG(invoice_total)       # SELECT subquery. Direct using "AVG(invoice_total)" returns only 1 row
		 FROM invoices) AS invoices_avg,
		invoice_total - (SELECT invoices_avg) AS difference
	FROM invoices
    ) AS invoice_summary;                        # Must have alias

Chapt.7 Built-in Function

1. Number

SELECT ROUND(3.1415926, 4); 
SELECT TRUNCATE(4.24923, 2); 
SELECT CEILING(5.2);                          # Minimal integer >= input
SELECT FLOOR(5.2);                            # Maximal integer <= input
SELECT RAND();                                # Random in (0,1)

2. String

SELECT TRIM('  A-928 201 24  ');              # Remove leading and trailing spaces
SELECT SUBSTRING('Kindergarten', 2, 4);       # Extract 4 characters, start with character No.2 (1,2,3,...)
SELECT LOCATE('der', 'Kindergarten');
SELECT REPLACE('Kindergarten', 'e', 'de');
SELECT CONCAT('Zheng', ' ', 'Zhongyi');

3. Time

SELECT *
FROM orders
WHERE YEAR(order_date) = EXTRACT(YEAR FROM NOW());
SELECT DATEDIFF(NOW(), '1998-07-15');

4. IFNULL & COALESCE & IF

SELECT 
    order_id,
    order_date, 
    -- IFNULL(shipper_id, 'Not assigned') AS shipper,             # If the first parameter is NULL, return the second one
    COALESCE(shipper_id, comments, 'Not assigned') AS shipper,    # Replace NULL with the first non-NULL parameter
    IF(YEAR(order_date) = 2019, 'Active', 'Archive') AS status
FROM orders;

5. CASE

SELECT 
    order_id,
    order_date,
    CASE 
	WHEN YEAR(order_date)=2019 THEN 'Active'
        WHEN YEAR(order_date)=2018 THEN 'Last year'
        WHEN YEAR(order_date)<2018 THEN 'Archived'
        ELSE 'Future'
    END AS status
FROM orders;

Optimize the last snippet in Chapt 3

SELECT       
    customer_id, first_name, points, 
    CASE
	WHEN points<2000 THEN 'Bronze' 
	WHEN points BETWEEN 2000 AND 3000 THEN 'Silver'
	WHEN points>3000 THEN 'Gold'
    END AS type
FROM customers
ORDER BY first_name;

Chapt.8 View

1. Create view

Treat view as a snapshot of the query result of a table

USE sql_invoicing;
CREATE VIEW sales_by_client AS 
    SELECT 
	c.client_id,
	c.name,
	SUM(invoice_total) AS total_sales
    FROM clients c
    JOIN invoices i USING (client_id)
    GROUP BY c.client_id, c.name;

2. Delete view

DROP VIEW IF EXISTS sales_by_client;

3. Updatable view

View without DSTINCT, AF(GROUP BY, HAVING) and UNION can be used in UPDATE, INSERT and DELETE statements
Used in case you don’t have direct access to a table

CREATE OR REPLACE VIEW invoice_with_balance AS
    SELECT 
	invoice_id,
    	number,
	client_id,
        invoice_total,
	payment_total,
	invoice_total - payment_total AS balance,
	invoice_date,
	due_date,
	payment_date
    FROM invoices
    WHERE invoice_total - payment_total > 0
WITH CHECK OPTION;                              # Prevent some unexpected missing of values

DELETE FROM invoice_with_balance
WHERE invoice_id = 1;

Chapt.9 Stored Procedure

1. Define a temp delimiter

To avoid doing this, right click the “Stored Procedures” in the left navigator to create a SP

DELIMITER $$

2. Create SP

Treat SP as a storage of the query process of a table

DELIMITER $$
CREATE PROCEDURE get_clients()
BEGIN
    SELECT * FROM clients;
END$$
DELIMITER ;

3. Delete SP

DROP PROCEDURE IF EXISTS get_clients;

4. SP with parameters

DELIMITER $$ 
CREATE PROCEDURE get_unpaid_invoices_for_client(
    client_id INT,
    OUT invoices_count INT,                     # OUT means this parameter is used for output(try not to use it...)
    OUT invoices_total DECIMAL(9,2)             # 2 bits to store decimal, and the rest 7 to store integer
    )
BEGIN
    SELECT COUNT(*), SUM(invoice_total)
    INTO invoices_count, invoices_total
    FROM invoices i
    WHERE i.client_id = client_id AND payment_total = 0;
END$$
DELIMITER ;

5. Variables in SP

DELIMITER $$ 
CREATE PROCEDURE get_risk_factor()
BEGIN
    DECLARE risk_factor DECIMAL(9,2) DEFAULT 0;  # Declare a variable, must write before SELECT sentences
    DECLARE invoices_total DECIMAL(9,2);
    DECLARE invoices_count INT;
    
    SELECT COUNT(*), SUM(invoice_total)
    INTO invoices_count, invoices_total
    FROM invoices;
    
    SET risk_factor = invoices_total / invoices_count * 5;
    SELECT IFNULL(risk_factor, 0);
END$$
DELIMITER ;

Chapt.10 Trigger

1. Create trigger

Trigger is automatically executed BEFORE or AFTER the INSERT, DELETE and UPDATE

DELIMITER $$ 
CREATE TRIGGER payments_after_insert             # The INSERT trigger
    AFTER INSERT ON payments
    FOR EACH ROW
BEGIN
    UPDATE invoices
    SET payment_total = payment_total + NEW.amount
    WHERE invoice_id = NEW.invoice_id;
END$$

INSERT INTO payments
VALUES(DEFAULT, 5, 3, '2019-01-01', 10, 1)$$

CREATE TRIGGER payments_after_delete             # The DELETE trigger
    AFTER DELETE ON payments
    FOR EACH ROW
BEGIN
    UPDATE invoices
    SET payment_total = payment_total - OLD.amount
    WHERE invoice_id = OLD.invoice_id;
END$$

DELETE FROM payments
WHERE payment_id > 8$$
DELIMITER ;

2. Delete trigger

DROP TRIGGER IF EXISTS payments_after_delete;

3. Show trigger

SHOW TRIGGERS;

4. Create Event

Event is automatically executed according to a SCHEDULE

DELIMITER $$ 
CREATE EVENT yearly_delete_stale_audit_rows        # Create an event to yearly delete data of previous years
ON SCHEDULE
    EVERY 1 YEAR STARTS '2019-01-01' ENDS '2029-01-01'
DO
BEGIN
    DELETE FROM payments
    WHERE date < NOW() - INTERVAL 1 YEAR;
END$$
DELIMITER ;

5. Alter event

ALTER EVENT yearly_delete_stale_audit_rows DISABLE; 

Chapt.11 Transaction

1. ACID features

Atomicity: All changes to data are performed as if they are a single operation. That is, all the changes are performed, or none of them are
Consistency: Data is in a consistent state when a transaction starts and when it ends
Isolation: The intermediate state of a transaction is invisible to other transactions. As a result, transactions that run concurrently appear to be serialized
Durability: After a transaction successfully completes, changes to data persist and are not undone, even in the event of a system failure

2. Create transaction

MySQL uses autocommit mode by default, if you don’t explicitly use START TRANSACTION to start a transaction, each query will be treated as a transaction and automatically committed

START TRANSACTION;              # Actually this sentence and the COMMIT are not necessary
UPDATE customers
SET points = points + 10
WHERE customer_id = 1;
COMMIT;                         # Changes only take effect after the commit, otherwise they would be rollbacked

3. Isolation level

Use “lock” to prevent multiple transactions update the same content at the same time (cause lost update)
4 levels are set to cope with 4 kinds of concurrency issues:

SET TRANSACTION ISOLATION LEVEL READ COMMITTED;
START TRANSACTION;              # Non-repeatable reads
SELECT points FROM customers WHERE customer_id = 1;
SELECT points FROM customers WHERE customer_id = 1;
COMMIT;

4. Deadlock

If 2 transactions are updating 2 records in a reverse order, it is likely to have deadlock

Chapt.12 Design A Database

1. ER Diagram

Entity-Relationship has 3 components: entities, attributes, and relationships, which are used for conceptual design of relational databases

2. Normalization

1NF: A table cannot have repeated columns and each cell of it should have a single value
2NF: One table with its columns should represent only one entity
3NF: A column in a table should not be derived from other columns
Tips1: Directly combine tables that are frequently joined together
Tips2: Use 2 one-many relationships to represent the many-many relationship

3. Create database

CREATE DATABASE IF NOT EXISTS sql_store2;
USE sql_store2;

4. Create & Delete table

DROP TABLE IF EXISTS orders;        # Because "orders" depends on "customers", we have to drop "orders" first
DROP TABLE IF EXISTS customers;

CREATE TABLE IF NOT EXISTS customers(
    customer_id INT PRIMARY KEY AUTO_INCREMENT,
    first_name  VARCHAR(50) NOT NULL,
    points      INT NOT NULL DEFAULT 0,
    email       VARCHAR(255) NOT NULL UNIQUE
);

CREATE TABLE IF NOT EXISTS orders(
    order_id    INT PRIMARY KEY,
    customer_id INT NOT NULL,
    FOREIGN KEY fk_orders_customers (customer_id)
	REFERENCES customers (customer_id)
        ON UPDATE CASCADE
        ON DELETE NO ACTION
);

5. Alter table

ALTER TABLE customers
    ADD last_name     VARCHAR(50) NOT NULL AFTER first_name,
    ADD city          VARCHAR(50) NOT NULL,
    MODIFY first_name VARCHAR(50) DEFAULT '',
    DROP points;
    -- DROP PRIMARY KEY;           # Don't need to specify column name

6. Engine

SHOW ENGINES;                      # Store engines: InnoDB & MyISAM

Chapt.13 Indexing

1. Create index

Design indexes based on the queries but not tables
BTree: A type of index structure

CREATE INDEX idx_points ON customers(points);
EXPLAIN SELECT customer_id FROM customers 
-- USE INDEX (idx_customers)                                    # Not required
WHERE points > 1000;                                            # rows: 1010 -> 529

2. Delete index

DROP INDEX idx_points ON customers;

3. Prefix indexes

Only for BLOB, TEXT and VARCHAR

CREATE INDEX idx_lastname ON customers(last_name(20));
SELECT COUNT(DISTINCT LEFT(last_name, 1)), 
       COUNT(DISTINCT LEFT(last_name, 5)),                      # Best prefix index value
       COUNT(DISTINCT LEFT(last_name, 10))
FROM customers;

4. Fulltext indexes

CREATE FULLTEXT INDEX idx_title_body ON posts(title, body);
SELECT *-- , MATCH(title, body) AGAINST ('javascript') AS scores
FROM posts 
WHERE MATCH(title, body) AGAINST ('javascript') OR
      MATCH(title, body) AGAINST ('"handling a form"' IN BOOLEAN MODE) OR 
      MATCH(title, body) AGAINST ('redux -react' IN BOOLEAN MODE);

5. Composite indexes

Put index columns with higher selectivity first
Selectivity: unique index values/amount of records. The maximum value is 1, at which point each record has a unique index

CREATE INDEX idx_state_points ON customers(state, points);      
EXPLAIN SELECT customer_id FROM customers WHERE state = 'CA' AND points > 1000;

If we change the above AND to OR, indexes will be ignored, Mysql has to do a full index scan with 1010 rows

EXPLAIN SELECT customer_id FROM customers WHERE state = 'CA'    # Split OR by UNION
  UNION SELECT customer_id FROM customers WHERE points > 1000;  # Both queries use the "idx_state_points" which means this index is a covering index

Chapt.14 Securing Database

1. Create user

CREATE USER moon_app IDENTIFIED BY 'admin';

2. Delete user

DROP USER moon_app;

3. Grant privilege

GRANT SELECT, INSERT, UPDATE, EXECUTE, DELETE, CREATE VIEW
ON sql_store.*
TO moon_app;

SHOW GRANTS FOR moon_app;

REVOKE CREATE VIEW
ON sql_store.*
FROM moon_app;