Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit94daebe

Browse files
committed
Merge branch 'master' of github.com:ggerganov/llama.cpp into grammar-example
* 'master' of github.com:ggerganov/llama.cpp: (24 commits) convert : fix Baichuan2 models by using vocab size in config.json (ggml-org#3299) readme : add project status link ggml : fix build afterggml-org#3329 llm : add Refact model (ggml-org#3329) sync : ggml (conv 1d + 2d updates, UB fixes) (ggml-org#3468) finetune : readme fix typo (ggml-org#3465) ggml : add RISC-V Vector Support for K-Quants and improved the existing intrinsics (ggml-org#3453) main : consistent prefix/suffix coloring (ggml-org#3425) llama : fix session saving/loading (ggml-org#3400) llama : expose model's rope_freq_scale in the API (ggml-org#3418) metal : alibi for arbitrary number of heads (ggml-org#3426) cmake : make LLAMA_NATIVE flag actually use the instructions supported by the processor (ggml-org#3273) Work on the BPE tokenizer (ggml-org#3252) convert : fix vocab size when not defined in hparams (ggml-org#3421) cmake : increase minimum version for add_link_options (ggml-org#3444) CLBlast: Add broadcast support for matrix multiplication (ggml-org#3402) gguf : add BERT, MPT, and GPT-J arch info (ggml-org#3408) gguf : general usability improvements (ggml-org#3409) cmake : make CUDA flags more similar to the Makefile (ggml-org#3420) finetune :fixggml-org#3404 (ggml-org#3437) ...
2 parentse5c4193 +019ba1d commit94daebe

File tree

50 files changed

+4692
-937
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+4692
-937
lines changed

‎.dockerignore‎

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
*.o
22
*.a
33
.cache/
4+
.git/
5+
.github/
6+
.gitignore
47
.vs/
58
.vscode/
69
.DS_Store

‎.github/workflows/build.yml‎

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ jobs:
188188
sysctl -a
189189
mkdir build
190190
cd build
191-
cmake-DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF..
191+
cmake ..
192192
cmake --build . --config Release -j $(sysctl -n hw.logicalcpu)
193193
194194
-name:Test
@@ -265,17 +265,17 @@ jobs:
265265
matrix:
266266
include:
267267
-build:'noavx'
268-
defines:'-DLLAMA_BUILD_SERVER=ON -DLLAMA_AVX=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF -DBUILD_SHARED_LIBS=ON'
268+
defines:'-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_AVX=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF -DBUILD_SHARED_LIBS=ON'
269269
-build:'avx2'
270-
defines:'-DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=ON'
270+
defines:'-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=ON'
271271
-build:'avx'
272-
defines:'-DLLAMA_BUILD_SERVER=ON -DLLAMA_AVX2=OFF -DBUILD_SHARED_LIBS=ON'
272+
defines:'-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_AVX2=OFF -DBUILD_SHARED_LIBS=ON'
273273
-build:'avx512'
274-
defines:'-DLLAMA_BUILD_SERVER=ON -DLLAMA_AVX512=ON -DBUILD_SHARED_LIBS=ON'
274+
defines:'-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_AVX512=ON -DBUILD_SHARED_LIBS=ON'
275275
-build:'clblast'
276-
defines:'-DLLAMA_BUILD_SERVER=ON -DLLAMA_CLBLAST=ON -DBUILD_SHARED_LIBS=ON -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/clblast"'
276+
defines:'-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CLBLAST=ON -DBUILD_SHARED_LIBS=ON -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/clblast"'
277277
-build:'openblas'
278-
defines:'-DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"'
278+
defines:'-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"'
279279

280280
steps:
281281
-name:Clone
@@ -414,7 +414,7 @@ jobs:
414414
run:|
415415
mkdir build
416416
cd build
417-
cmake .. -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUBLAS=ON -DBUILD_SHARED_LIBS=ON
417+
cmake .. -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUBLAS=ON -DBUILD_SHARED_LIBS=ON
418418
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
419419
420420
-name:Determine tag name

‎.gitignore‎

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ models-mnt
4040
/embedding
4141
/gguf
4242
/gguf-llama-simple
43+
/infill
4344
/libllama.so
4445
/llama-bench
4546
/main
@@ -90,4 +91,5 @@ tests/test-quantize-perf
9091
tests/test-sampling
9192
tests/test-tokenizer-0-llama
9293
tests/test-tokenizer-0-falcon
93-
tests/test-tokenizer-1
94+
tests/test-tokenizer-1-llama
95+
tests/test-tokenizer-1-bpe

‎CMakeLists.txt‎

Lines changed: 43 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
cmake_minimum_required(VERSION 3.12)# Don't bump this versionforno reason
1+
cmake_minimum_required(VERSION 3.13)#foradd_link_options
22
project("llama.cpp" C CXX)
33

44
set(CMAKE_EXPORT_COMPILE_COMMANDSON)
@@ -44,7 +44,7 @@ endif()
4444

4545
# general
4646
option(LLAMA_STATIC"llama: static link libraries"OFF)
47-
option(LLAMA_NATIVE"llama: enable -march=native flag"OFF)
47+
option(LLAMA_NATIVE"llama: enable -march=native flag"ON)
4848
option(LLAMA_LTO"llama: enable link time optimization"OFF)
4949

5050
# debug
@@ -58,15 +58,21 @@ option(LLAMA_SANITIZE_ADDRESS "llama: enable address sanitizer"
5858
option(LLAMA_SANITIZE_UNDEFINED"llama: enable undefined sanitizer"OFF)
5959

6060
# instruction set specific
61-
option(LLAMA_AVX"llama: enable AVX"ON)
62-
option(LLAMA_AVX2"llama: enable AVX2"ON)
63-
option(LLAMA_AVX512"llama: enable AVX512"OFF)
64-
option(LLAMA_AVX512_VBMI"llama: enable AVX512-VBMI"OFF)
65-
option(LLAMA_AVX512_VNNI"llama: enable AVX512-VNNI"OFF)
66-
option(LLAMA_FMA"llama: enable FMA"ON)
61+
if (LLAMA_NATIVE)
62+
set(INS_ENBOFF)
63+
else()
64+
set(INS_ENBON)
65+
endif()
66+
67+
option(LLAMA_AVX"llama: enable AVX"${INS_ENB})
68+
option(LLAMA_AVX2"llama: enable AVX2"${INS_ENB})
69+
option(LLAMA_AVX512"llama: enable AVX512"OFF)
70+
option(LLAMA_AVX512_VBMI"llama: enable AVX512-VBMI"OFF)
71+
option(LLAMA_AVX512_VNNI"llama: enable AVX512-VNNI"OFF)
72+
option(LLAMA_FMA"llama: enable FMA"${INS_ENB})
6773
# in MSVC F16C is implied with AVX2/AVX512
6874
if (NOTMSVC)
69-
option(LLAMA_F16C"llama: enable F16C"ON)
75+
option(LLAMA_F16C"llama: enable F16C"${INS_ENB})
7076
endif()
7177

7278
# 3rd party libs
@@ -343,8 +349,9 @@ if (LLAMA_MPI)
343349
set(GGML_SOURCES_MPI ggml-mpi.c ggml-mpi.h)
344350
add_compile_definitions(GGML_USE_MPI)
345351
add_compile_definitions(${MPI_C_COMPILE_DEFINITIONS})
346-
set(cxx_flags${cxx_flags} -Wno-cast-qual)
347-
set(c_flags${c_flags} -Wno-cast-qual)
352+
if (NOTMSVC)
353+
add_compile_options(-Wno-cast-qual)
354+
endif()
348355
set(LLAMA_EXTRA_LIBS${LLAMA_EXTRA_LIBS}${MPI_C_LIBRARIES})
349356
set(LLAMA_EXTRA_INCLUDES${LLAMA_EXTRA_INCLUDES}${MPI_C_INCLUDE_DIRS})
350357
# Even if you're only using the C header, C++ programs may bring in MPI
@@ -418,10 +425,11 @@ if (LLAMA_ALL_WARNINGS)
418425
set(c_flags -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int
419426
-Werror=implicit-function-declaration)
420427
set(cxx_flags -Wmissing-declarations -Wmissing-noreturn)
428+
set(host_cxx_flags"")
421429

422430
if (CMAKE_C_COMPILER_IDMATCHES"Clang")
423431
set(warning_flags${warning_flags} -Wunreachable-code-break -Wunreachable-code-return)
424-
set(cxx_flags${cxx_flags} -Wmissing-prototypes -Wextra-semi)
432+
set(host_cxx_flags${host_cxx_flags} -Wmissing-prototypes -Wextra-semi)
425433

426434
if (
427435
(CMAKE_C_COMPILER_IDSTREQUAL"Clang"AND CMAKE_C_COMPILER_VERSIONVERSION_GREATER_EQUAL 3.8.0)OR
@@ -431,27 +439,38 @@ if (LLAMA_ALL_WARNINGS)
431439
endif()
432440
elseif (CMAKE_C_COMPILER_IDSTREQUAL"GNU")
433441
set(c_flags${c_flags} -Wdouble-promotion)
434-
set(cxx_flags${cxx_flags} -Wno-array-bounds)
442+
set(host_cxx_flags${host_cxx_flags} -Wno-array-bounds)
435443

436444
if (CMAKE_CXX_COMPILER_VERSIONVERSION_GREATER_EQUAL 7.1.0)
437-
set(cxx_flags${cxx_flags} -Wno-format-truncation)
445+
set(host_cxx_flags${host_cxx_flags} -Wno-format-truncation)
438446
endif()
439447
if (CMAKE_CXX_COMPILER_VERSIONVERSION_GREATER_EQUAL 8.1.0)
440-
set(cxx_flags${cxx_flags} -Wextra-semi)
448+
set(host_cxx_flags${host_cxx_flags} -Wextra-semi)
441449
endif()
442450
endif()
443451
else()
444452
# todo : msvc
445453
endif()
446454

447-
add_compile_options(
448-
${warning_flags}
449-
"$<$<COMPILE_LANGUAGE:C>:${c_flags}>"
450-
"$<$<COMPILE_LANGUAGE:CXX>:${cxx_flags}>"
451-
)
455+
set(c_flags${c_flags}${warning_flags})
456+
set(cxx_flags${cxx_flags}${warning_flags})
457+
add_compile_options("$<$<COMPILE_LANGUAGE:C>:${c_flags}>"
458+
"$<$<COMPILE_LANGUAGE:CXX>:${cxx_flags}${host_cxx_flags}>")
452459

453460
endif()
454461

462+
if (NOTMSVC)
463+
set(cuda_flags -Wno-pedantic)
464+
endif()
465+
set(cuda_flags${cxx_flags} -use_fast_math${cuda_flags})
466+
467+
list(JOIN host_cxx_flags" " cuda_host_flags)# pass host compiler flags as a single argument
468+
if (NOT cuda_host_flagsSTREQUAL"")
469+
set(cuda_flags${cuda_flags} -Xcompiler${cuda_host_flags})
470+
endif()
471+
472+
add_compile_options("$<$<COMPILE_LANGUAGE:CUDA>:${cuda_flags}>")
473+
455474
if (WIN32)
456475
add_compile_definitions(_CRT_SECURE_NO_WARNINGS)
457476

@@ -491,9 +510,6 @@ if (NOT MSVC)
491510
if (LLAMA_GPROF)
492511
add_compile_options(-pg)
493512
endif()
494-
if (LLAMA_NATIVE)
495-
add_compile_options(-march=native)
496-
endif()
497513
endif()
498514

499515
if ((${CMAKE_SYSTEM_PROCESSOR}MATCHES"arm")OR (${CMAKE_SYSTEM_PROCESSOR}MATCHES"aarch64")OR ("${CMAKE_GENERATOR_PLATFORM_LWR}"MATCHES"arm64"))
@@ -548,6 +564,9 @@ elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(x86_64|i686|AMD64)$" OR "${CMAKE_GE
548564
add_compile_options($<$<COMPILE_LANGUAGE:CXX>:/arch:AVX>)
549565
endif()
550566
else()
567+
if (LLAMA_NATIVE)
568+
add_compile_options(-march=native)
569+
endif()
551570
if (LLAMA_F16C)
552571
add_compile_options(-mf16c)
553572
endif()
@@ -705,6 +724,7 @@ set(LLAMA_BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR}
705724
set(LLAMA_BUILD_NUMBER${BUILD_NUMBER})
706725
set(LLAMA_BUILD_COMMIT${BUILD_COMMIT})
707726
set(LLAMA_INSTALL_VERSION 0.0.${BUILD_NUMBER})
727+
get_directory_property(LLAMA_TRANSIENT_DEFINESCOMPILE_DEFINITIONS)
708728

709729
configure_package_config_file(
710730
${CMAKE_CURRENT_SOURCE_DIR}/scripts/LlamaConfig.cmake.in

‎Makefile‎

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
# Define the default target now so that it is always the first target
2-
BUILD_TARGETS = main quantize quantize-stats perplexity embedding vdot q8dot train-text-from-scratch convert-llama2c-to-ggml simple batched save-load-state server embd-input-test gguf llama-bench baby-llama beam-search speculative benchmark-matmult parallel finetune export-lora tests/test-c.o
2+
BUILD_TARGETS = main quantize quantize-stats perplexity embedding vdot q8dot train-text-from-scratch convert-llama2c-to-ggml simple batched save-load-state server embd-input-test gguf llama-bench baby-llama beam-search speculativeinfillbenchmark-matmult parallel finetune export-lora tests/test-c.o
33

44
# Binaries only useful for tests
5-
TEST_TARGETS = tests/test-llama-grammar tests/test-grammar-parser tests/test-double-float tests/test-grad0 tests/test-opt tests/test-quantize-fns tests/test-quantize-perf tests/test-sampling tests/test-tokenizer-0-llama tests/test-tokenizer-0-falcon tests/test-tokenizer-1-llama
5+
TEST_TARGETS = tests/test-llama-grammar tests/test-grammar-parser tests/test-double-float tests/test-grad0 tests/test-opt tests/test-quantize-fns tests/test-quantize-perf tests/test-sampling tests/test-tokenizer-0-llama tests/test-tokenizer-0-falcon tests/test-tokenizer-1-llama tests/test-tokenizer-1-bpe
66

77
# Code coverage output files
88
COV_TARGETS = *.gcno tests/*.gcno *.gcda tests/*.gcda *.gcov tests/*.gcov lcov-report gcovr-report
@@ -62,9 +62,11 @@ test: $(TEST_TARGETS)
6262
if ["$$test_target"="tests/test-tokenizer-0-llama" ];then \
6363
./$$test_target$(CURDIR)/models/ggml-vocab-llama.gguf;\
6464
elif ["$$test_target"="tests/test-tokenizer-0-falcon" ];then\
65-
continue;\
65+
./$$test_target$(CURDIR)/models/ggml-vocab-falcon.gguf;\
6666
elif ["$$test_target"="tests/test-tokenizer-1-llama" ];then\
6767
continue;\
68+
elif ["$$test_target"="tests/test-tokenizer-1-bpe" ];then\
69+
continue;\
6870
else\
6971
echo"Running test$$test_target...";\
7072
./$$test_target;\
@@ -543,6 +545,9 @@ main: examples/main/main.cpp build-info.h ggml.
543545
@echo'==== Run ./main -h for help. ===='
544546
@echo
545547

548+
infill: examples/infill/infill.cpp build-info.h ggml.o llama.o common.o console.o grammar-parser.o$(OBJS)
549+
$(CXX)$(CXXFLAGS)$(filter-out%.h,$^) -o$@$(LDFLAGS)
550+
546551
simple: examples/simple/simple.cpp build-info.h ggml.o llama.o common.o$(OBJS)
547552
$(CXX)$(CXXFLAGS)$(filter-out%.h,$^) -o$@$(LDFLAGS)
548553

@@ -667,6 +672,9 @@ tests/test-tokenizer-0-falcon: tests/test-tokenizer-0-falcon.cpp build-info.h gg
667672
tests/test-tokenizer-0-llama: tests/test-tokenizer-0-llama.cpp build-info.h ggml.o llama.o common.o$(OBJS)
668673
$(CXX)$(CXXFLAGS)$(filter-out%.h,$^) -o$@$(LDFLAGS)
669674

675+
tests/test-tokenizer-1-bpe: tests/test-tokenizer-1-bpe.cpp build-info.h ggml.o llama.o common.o$(OBJS)
676+
$(CXX)$(CXXFLAGS)$(filter-out%.h,$^) -o$@$(LDFLAGS)
677+
670678
tests/test-tokenizer-1-llama: tests/test-tokenizer-1-llama.cpp build-info.h ggml.o llama.o common.o$(OBJS)
671679
$(CXX)$(CXXFLAGS)$(filter-out%.h,$^) -o$@$(LDFLAGS)
672680

‎README.md‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
[![Actions Status](https://github.com/ggerganov/llama.cpp/workflows/CI/badge.svg)](https://github.com/ggerganov/llama.cpp/actions)
66
[![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](https://opensource.org/licenses/MIT)
77

8-
[Roadmap](https://github.com/users/ggerganov/projects/7) /[Manifesto](https://github.com/ggerganov/llama.cpp/discussions/205) /[ggml](https://github.com/ggerganov/ggml)
8+
[Roadmap](https://github.com/users/ggerganov/projects/7) /[Project status](https://github.com/ggerganov/llama.cpp/discussions/3471) /[Manifesto](https://github.com/ggerganov/llama.cpp/discussions/205) /[ggml](https://github.com/ggerganov/ggml)
99

1010
Inference of[LLaMA](https://arxiv.org/abs/2302.13971) model in pure C/C++
1111

‎common/common.cpp‎

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -389,6 +389,8 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
389389
params.interactive_first =true;
390390
}elseif (arg =="-ins" || arg =="--instruct") {
391391
params.instruct =true;
392+
}elseif (arg =="--infill") {
393+
params.infill =true;
392394
}elseif (arg =="--multiline-input") {
393395
params.multiline_input =true;
394396
}elseif (arg =="--simple-io") {
@@ -921,6 +923,7 @@ std::string llama_detokenize_bpe(llama_context * ctx, const std::vector<llama_to
921923
result += piece;
922924
}
923925

926+
// NOTE: the original tokenizer decodes bytes after collecting the pieces.
924927
return result;
925928
}
926929

‎common/common.h‎

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,7 @@ struct gpt_params {
120120
bool use_mlock =false;// use mlock to keep model in memory
121121
bool numa =false;// attempt optimizations that help on some NUMA systems
122122
bool verbose_prompt =false;// print prompt tokens before generation
123+
bool infill =false;// use infill mode
123124
};
124125

125126
boolgpt_params_parse(int argc,char ** argv, gpt_params & params);

‎convert-baichuan-hf-to-gguf.py‎

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,14 @@
1111
frompathlibimportPath
1212
fromtypingimportTYPE_CHECKING,Any
1313
importitertools
14-
importgguf
1514
importnumpyasnp
1615
importtorch
1716
fromsentencepieceimportSentencePieceProcessor# type: ignore[import]
1817

18+
if'NO_LOCAL_GGUF'notinos.environ:
19+
sys.path.insert(1,str(Path(__file__).parent/'gguf-py'/'gguf'))
20+
importgguf
21+
1922

2023
ifTYPE_CHECKING:
2124
fromtypingimportTypeAlias
@@ -174,8 +177,11 @@ def parse_args() -> argparse.Namespace:
174177
print("gguf: get sentencepiece tokenizer vocab, scores and token types")
175178

176179
tokenizer=SentencePieceProcessor(str(tokenizer_model_file))
180+
vocab_size=hparams.get('vocab_size')
181+
ifvocab_sizeisNone:
182+
vocab_size=tokenizer.vocab_size()
177183

178-
foriinrange(tokenizer.vocab_size()):
184+
foriinrange(vocab_size):
179185
text:bytes
180186
score:float
181187

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp