一. 插入1000万数据
1.建库
create database bigdata;
use bigdata;
2.建表
# 1.建部门表dept
create table dept(
id int unsigned primary key auto_increment,
deptno mediumint unsigned not null default 0,
dname varchar(20) not null default "",
loc varchar(13) not null default ""
) default charset=utf8;
# 2.建员工表emp
create table emp(
id int unsigned primary key auto_increment,
empno mediumint unsigned not null default 0, #编号
ename varchar(20) not null default "",
job varchar(9) not null default "", #上级编号
mgr mediumint unsigned not null default 0,
hiredate date not null, #入职时间
sal decimal(7,2) not null,
comm decimal(7,2) not null,
deptno mediumint unsigned not null default 0 #部门编号
) default charset=utf8;
3.设置参数
由于插入大数据时mysql会报错:
This function has none of DETERMINISTIC...
我们开启了bin-log, 我们就必须指定我们的函数是否是
1 DETERMINISTIC 不确定的
2 NO SQL 没有SQl语句,当然也不会修改数据
3 READS SQL DATA 只是读取数据,当然也不会修改数据
4 MODIFIES SQL DATA 要修改数据
5 CONTAINS SQL 包含了SQL语句
其中在function里面,只有 DETERMINISTIC, NO SQL 和 READS SQL DATA 被支持。如果我们开启了 bin-log, 我们就必须为我们的function指定一个参数。
在MySQL中创建函数时出现这种错误的解决方法:
set global log_bin_trust_function_creators=TRUE;
4.创建函数
创建函数,保证每条数据不同.
# 1.随机产生字符串
delimiter create function rand_string(n int) returns varchar(255) #创建有参返回函数
begin
declare chars_str varchar(100) default 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ';
declare return_str varchar(255) default "";
declare i int default 0;
while i
# 此处while语句作用为产生一个n位的随机字母字符串.
# floor(1+rand()*52) 取1-52中任意随机数
# substring(chars_str,floor(1+rand()*52),1)) #取chars_str的52个中任意一个字母.
-------------------------
# 2.随机产生部门编号
delimiter create function rand_num() returns int(5)
begin
declare i int default 0;
set i = floor(100+rand()*10);
return i;
end
# 创建一个100-110之间的随机部门编号
5.创建存储过程
# 1.创建往emp员工表中插入数据的存储过程
delimiter create procedure insert_emp(in start int(10),in max_num int(10))
begin
declare i int default 0;
set autocommit=0; repeat
set i=i+1;
insert into emp(empno,ename,job,mgr,hiredate,sal,comm,deptno) values((start+i),rand_string(6),'SALESMAN',0001,curdate(),2000,400,rand_num());
until i =max_num
end repeat;
commit;
end
# 2.创建往dept部门表中插入数据的存储过程
delimiter create procedure insert_dept(in start int(10),in max_num int(10))
begin
declare i int default 0;
set autocommit=0; repeat
set i=i+1;
insert into dept(deptno,dname,loc) values((start+i),rand_string(10),rand_string(8));
until i =max_num
end repeat;
commit;
end
6.调用存储过程
delimiter ;
#插入10个部门编号
call insert_dept(100,10);
#插入50万条数据(1-2分钟左右)
call insert_emp(100001,500000);
call insert_emp(600001,500000);
查看(比较慢)
select * from emp;
二. 查询调试
1.开启慢查询
show variables like 'profiling';
+---------------+-------+
| Variable_name | Value |
+---------------+-------+
| profiling | OFF |
+---------------+-------+
set profiling=on;
show variables like 'profiling';
+---------------+-------+
| Variable_name | Value |
+---------------+-------+
| profiling | ON |
+---------------+-------+
2.SQL语句查询测试
# 使用查询语句:
select * from emp group by id%10 limit 1500000;
+----+--------+--------+----------+-----+
| id | empno | ename | job | mgr |
+----+--------+--------+----------+-----+
| 10 | 100011 | cgzbKv | SALESMAN | 1 |
| 1 | 100002 | dMzoWQ | SALESMAN | 1 |
| 2 | 100003 | SBdLwb | SALESMAN | 1 |
| 3 | 100004 | aDQtxf | SALESMAN | 1 |
| 4 | 100005 | Ngrpxw | SALESMAN | 1 |
| 5 | 100006 | BqblYk | SALESMAN | 1 |
| 6 | 100007 | UiiqgH | SALESMAN | 1 |
| 7 | 100008 | pVLRbI | SALESMAN | 1 |
| 8 | 100009 | VdDJJr | SALESMAN | 1 |
| 9 | 100010 | sNNAlF | SALESMAN | 1 |
+----+--------+--------+----------+-----+
10 rows in set (0.59 sec)
select * from emp group by id%20 order by 5;
+----+--------+--------+----------+-----+
| id | empno | ename | job | mgr |
+----+--------+--------+----------+-----+
| 2 | 100003 | SBdLwb | SALESMAN | 1 |
| 4 | 100005 | Ngrpxw | SALESMAN | 1 |
| 6 | 100007 | UiiqgH | SALESMAN | 1 |
| 8 | 100009 | VdDJJr | SALESMAN | 1 |
| 10 | 100011 | cgzbKv | SALESMAN | 1 |
| 12 | 100013 | ixOzis | SALESMAN | 1 |
| 14 | 100015 | BQAgcU | SALESMAN | 1 |
| 16 | 100017 | Drbery | SALESMAN | 1 |
| 18 | 100019 | WtcjMg | SALESMAN | 1 |
| 20 | 100021 | vIbiMl | SALESMAN | 1 |
| 1 | 100002 | dMzoWQ | SALESMAN | 1 |
| 3 | 100004 | aDQtxf | SALESMAN | 1 |
| 5 | 100006 | BqblYk | SALESMAN | 1 |
| 7 | 100008 | pVLRbI | SALESMAN | 1 |
| 9 | 100010 | sNNAlF | SALESMAN | 1 |
| 11 | 100012 | HpDUQt | SALESMAN | 1 |
| 13 | 100014 | idUmAQ | SALESMAN | 1 |
| 15 | 100016 | NYEZiS | SALESMAN | 1 |
| 17 | 100018 | eEFqML | SALESMAN | 1 |
| 19 | 100020 | EPkIJw | SALESMAN | 1 |
+----+--------+--------+----------+-----+
20 rows in set (0.64 sec)
3.语句诊断show profile
mysql> show profiles;
+----------+------------+------------------------------------------------+
| Query_ID | Duration | Query |
+----------+------------+------------------------------------------------+
| 1 | 0.63810275 | select * from emp group by id%20 order by 5 |
| 2 | 0.62535500 | select * from emp group by id%10 limit 1500000 |
+----------+------------+------------------------------------------------+
2 rows in set, 1 warning (0.00 sec)
# 查看一条SQL语句的完整周期:
1.分析刚才2号查询语句的具体查询细节:
show profile cpu,block io for query 2;
+----------------------+----------+----------+
| Status | Duration | CPU_user |
+----------------------+----------+----------+
| starting | 0.000094 | 0.000088 |
| checking permissions | 0.000015 | 0.000015 |#检查权限
| Opening tables | 0.000033 | 0.000033 |
| init | 0.000047 | 0.000047 |#初始化
| System lock | 0.000019 | 0.000019 |#系统锁定
| optimizing | 0.000012 | 0.000011 |#优化
| statistics | 0.000039 | 0.000039 |#统计
| preparing | 0.000021 | 0.000021 |#准备
| Creating tmp table | 0.000066 | 0.000067 |#创建临时表
| Sorting result | 0.000011 | 0.000009 |#filesort
| executing | 0.000006 | 0.000006 |#执行
| Sending data | 0.604537 | 0.600846 |#发送数据
| Creating sort index | 0.000048 | 0.000043 |#创建排序索引
| end | 0.000004 | 0.000004 |
| removing tmp table | 0.000007 | 0.000007 |#删除临时表
| end | 0.000005 | 0.000004 |
| query end | 0.000007 | 0.000008 |#查询结束
| closing tables | 0.000012 | 0.000011 |
| freeing items | 0.000020 | 0.000020 |#释放项目
| cleaning up | 0.000011 | 0.000011 |
+----------------------+----------+----------+
###!!!!!!!!!!!!!!!###
# 结果分析
一般出现以下4种情况的就是有问题的sql:
1.converting HEAP to MyISAM 查询结果太大,内存不够需要写磁盘
2.Creating tmp table 创建临时表
removing tmp table 删除临时表
3.Copying to tmp table on disk 把内存中临时表复制到磁盘(严重)
4.locked
4.全局查询日志
# !!不要在生产环境开启这个功能(用于测试环境)
方式一:配置在my.cnf下
general_log=1 #开启记录所有的功能和sql语句.类似备份.
general_log_file=/path/logfile #日志文件路径
log_output=FILE #输出格式
方式二:
set global general_log=1;
set global log_output='table';
可以用select * from mysql.general_log 查看
Copyright © 2009 - Now . XPBag.com . All rights Reserved.
夜心的小站 »
MySQL 千万级批量数据脚本 测试实例